Esempio n. 1
0
    def __init__(self, device, max_frames, lr = 0.0001, margin = 1, scale = 1, hard_rank = 0, hard_prob = 0, model="alexnet50", nOut = 512, nSpeakers = 1000, optimizer = 'adam', encoder_type = 'SAP', normalize = True, trainfunc='contrastive', **kwargs):
        super(SpeakerNet, self).__init__();

        argsdict = {'nOut': nOut, 'encoder_type':encoder_type}

        self.device = device

        SpeakerNetModel = importlib.import_module('models.'+model).__getattribute__(model)
        # @TODO make cuda optional in order to train on dev machines w/o GPUs
        self.__S__ = SpeakerNetModel(**argsdict).to(self.device);

        if trainfunc == 'angleproto':
            self.__L__ = AngleProtoLoss().to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        elif trainfunc == 'ge2e':
            self.__L__ = GE2ELoss().to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        elif trainfunc == 'amsoftmax':
            self.__L__ = AMSoftmax(in_feats=nOut, n_classes=nSpeakers, m=margin, s=scale).to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = True
        elif trainfunc == 'aamsoftmax':
            self.__L__ = AAMSoftmax(in_feats=nOut, n_classes=nSpeakers, m=margin, s=scale).to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = True
        elif trainfunc == 'softmax':
            self.__L__ = SoftmaxLoss(in_feats=nOut, n_classes=nSpeakers).to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = True
        elif trainfunc == 'proto':
            self.__L__ = ProtoLoss().to(self.device)
            self.__train_normalize__    = False
            self.__test_normalize__     = False
        elif trainfunc == 'triplet':
            self.__L__ = PairwiseLoss(loss_func='triplet', hard_rank=hard_rank, hard_prob=hard_prob, margin=margin).to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        elif trainfunc == 'contrastive':
            self.__L__ = PairwiseLoss(loss_func='contrastive', hard_rank=hard_rank, hard_prob=hard_prob, margin=margin).to(self.device)
            self.__train_normalize__    = True
            self.__test_normalize__     = True
        else:
            raise ValueError('Undefined loss.')

        if optimizer == 'adam':
            self.__optimizer__ = torch.optim.Adam(self.parameters(), lr = lr);
        elif optimizer == 'sgd':
            self.__optimizer__ = torch.optim.SGD(self.parameters(), lr = lr, momentum = 0.9, weight_decay=5e-5);
        else:
            raise ValueError('Undefined optimizer.')
        
        self.__max_frames__ = max_frames;
Esempio n. 2
0
    def __init__(self,
                 max_frames,
                 lr=0.0001,
                 margin=1,
                 scale=1,
                 hard_rank=0,
                 hard_prob=0,
                 model="alexnet50",
                 nOut=512,
                 nSpeakers=1000,
                 optimizer='adam',
                 encoder_type='SAP',
                 normalize=True,
                 trainfunc='contrastive',
                 **kwargs):
        super(SpeakerNet, self).__init__()

        argsdict = {'nOut': nOut, 'encoder_type': encoder_type}

        self.__S__ = globals()[model](**argsdict).cuda()

        if trainfunc == 'angleproto':
            self.__L__ = AngleProtoLoss().cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        elif trainfunc == 'ge2e':
            self.__L__ = GE2ELoss().cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        elif trainfunc == 'amsoftmax':
            self.__L__ = AMSoftmax(in_feats=nOut,
                                   n_classes=nSpeakers,
                                   m=margin,
                                   s=scale).cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = True
        elif trainfunc == 'aamsoftmax':
            self.__L__ = AAMSoftmax(in_feats=nOut,
                                    n_classes=nSpeakers,
                                    m=margin,
                                    s=scale).cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = True
        elif trainfunc == 'softmax':
            self.__L__ = SoftmaxLoss(in_feats=nOut, n_classes=nSpeakers).cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = True
        elif trainfunc == 'proto':
            self.__L__ = ProtoLoss().cuda()
            self.__train_normalize__ = False
            self.__test_normalize__ = False
        elif trainfunc == 'triplet':
            self.__L__ = PairwiseLoss(loss_func='triplet',
                                      hard_rank=hard_rank,
                                      hard_prob=hard_prob,
                                      margin=margin).cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        elif trainfunc == 'contrastive':
            self.__L__ = PairwiseLoss(loss_func='contrastive',
                                      hard_rank=hard_rank,
                                      hard_prob=hard_prob,
                                      margin=margin).cuda()
            self.__train_normalize__ = True
            self.__test_normalize__ = True
        else:
            raise ValueError('Undefined loss.')

        if optimizer == 'adam':
            self.__optimizer__ = torch.optim.Adam(self.parameters(), lr=lr)
        elif optimizer == 'sgd':
            self.__optimizer__ = torch.optim.SGD(self.parameters(),
                                                 lr=lr,
                                                 momentum=0.9,
                                                 weight_decay=5e-5)
        else:
            raise ValueError('Undefined optimizer.')

        self.__max_frames__ = max_frames