Ejemplo n.º 1
0
    def __init__(self, device, max_frames, lr = 0.0001, margin = 1, scale = 1, hard_rank = 0, hard_prob = 0, model="alexnet50", nOut = 512, nSpeakers = 1000, optimizer = 'adam', encoder_type = 'SAP', normalize = True, trainfunc='contrastive', **kwargs):
        super(SpeakerNet, self).__init__();

        argsdict = {'nOut': nOut, 'encoder_type':encoder_type}

        self.device = device

        SpeakerNetModel = importlib.import_module('models.'+model).__getattribute__(model)
        # @TODO make cuda optional in order to train on dev machines w/o GPUs
        self.__S__ = SpeakerNetModel(**argsdict).to(self.device);

        if trainfunc == 'angleproto':
            self.__L__ = AngleProtoLoss().to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        elif trainfunc == 'ge2e':
            self.__L__ = GE2ELoss().to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        elif trainfunc == 'amsoftmax':
            self.__L__ = AMSoftmax(in_feats=nOut, n_classes=nSpeakers, m=margin, s=scale).to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = True
        elif trainfunc == 'aamsoftmax':
            self.__L__ = AAMSoftmax(in_feats=nOut, n_classes=nSpeakers, m=margin, s=scale).to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = True
        elif trainfunc == 'softmax':
            self.__L__ = SoftmaxLoss(in_feats=nOut, n_classes=nSpeakers).to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = True
        elif trainfunc == 'proto':
            self.__L__ = ProtoLoss().to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = False
        elif trainfunc == 'triplet':
            self.__L__ = PairwiseLoss(loss_func='triplet', hard_rank=hard_rank, hard_prob=hard_prob, margin=margin).to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        elif trainfunc == 'contrastive':
            self.__L__ = PairwiseLoss(loss_func='contrastive', hard_rank=hard_rank, hard_prob=hard_prob, margin=margin).to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        else:
            raise ValueError('Undefined loss.')

        if optimizer == 'adam':
            self.__optimizer__ = torch.optim.Adam(self.parameters(), lr = lr);
        elif optimizer == 'sgd':
            self.__optimizer__ = torch.optim.SGD(self.parameters(), lr = lr, momentum = 0.9, weight_decay=5e-5);
        else:
            raise ValueError('Undefined optimizer.')
        
        self.__max_frames__ = max_frames;
Ejemplo n.º 2
0
    def __init__(self,
                 lr=0.0001,
                 model="alexnet50",
                 nOut=512,
                 encoder_type='SAP',
                 normalize=True,
                 trainfunc='contrastive',
                 **kwargs):
        super(SpeakerNet, self).__init__()

        argsdict = {'nOut': nOut, 'encoder_type': encoder_type}

        SpeakerNetModel = importlib.import_module(
            'models.' + model).__getattribute__(model)
        self.__S__ = SpeakerNetModel(**argsdict).cuda()

        if trainfunc == 'angleproto':
            self.__L__ = AngleProtoLoss().cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        elif trainfunc == 'proto':
            self.__L__ = ProtoLoss().cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = False
        else:
            raise ValueError('Undefined loss.')

        self.__optimizer__ = torch.optim.Adam(list(self.__S__.parameters()) +
                                              list(self.__L__.parameters()),
                                              lr=lr)

        self.torchfb = transforms.MelSpectrogram(sample_rate=16000,
                                                 n_fft=512,
                                                 win_length=400,
                                                 hop_length=160,
                                                 f_min=0.0,
                                                 f_max=8000,
                                                 pad=0,
                                                 n_mels=40).cuda()
        self.instancenorm = nn.InstanceNorm1d(40).cuda()

        print('Initialised network with nOut %d encoder_type %s' %
              (nOut, encoder_type))
Ejemplo n.º 3
0
    def __init__(self,
                 max_frames,
                 lr=0.0001,
                 margin=1,
                 scale=1,
                 hard_rank=0,
                 hard_prob=0,
                 model="alexnet50",
                 nOut=512,
                 nSpeakers=1000,
                 optimizer='adam',
                 encoder_type='SAP',
                 normalize=True,
                 trainfunc='contrastive',
                 **kwargs):
        super(SpeakerNet, self).__init__()

        argsdict = {'nOut': nOut, 'encoder_type': encoder_type}

        self.__S__ = globals()[model](**argsdict).cuda()

        if trainfunc == 'angleproto':
            self.__L__ = AngleProtoLoss().cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        elif trainfunc == 'ge2e':
            self.__L__ = GE2ELoss().cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        elif trainfunc == 'amsoftmax':
            self.__L__ = AMSoftmax(in_feats=nOut,
                                   n_classes=nSpeakers,
                                   m=margin,
                                   s=scale).cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = True
        elif trainfunc == 'aamsoftmax':
            self.__L__ = AAMSoftmax(in_feats=nOut,
                                    n_classes=nSpeakers,
                                    m=margin,
                                    s=scale).cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = True
        elif trainfunc == 'softmax':
            self.__L__ = SoftmaxLoss(in_feats=nOut, n_classes=nSpeakers).cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = True
        elif trainfunc == 'proto':
            self.__L__ = ProtoLoss().cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = False
        elif trainfunc == 'triplet':
            self.__L__ = PairwiseLoss(loss_func='triplet',
                                      hard_rank=hard_rank,
                                      hard_prob=hard_prob,
                                      margin=margin).cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        elif trainfunc == 'contrastive':
            self.__L__ = PairwiseLoss(loss_func='contrastive',
                                      hard_rank=hard_rank,
                                      hard_prob=hard_prob,
                                      margin=margin).cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        else:
            raise ValueError('Undefined loss.')

        if optimizer == 'adam':
            self.__optimizer__ = torch.optim.Adam(self.parameters(), lr=lr)
        elif optimizer == 'sgd':
            self.__optimizer__ = torch.optim.SGD(self.parameters(),
                                                 lr=lr,
                                                 momentum=0.9,
                                                 weight_decay=5e-5)
        else:
            raise ValueError('Undefined optimizer.')

        self.__max_frames__ = max_frames