Ejemplo n.º 1
0
def start_training(working_dir, pre_training_phase=True):
    ensures_dir(CHECKPOINTS_SOFTMAX_DIR)
    ensures_dir(CHECKPOINTS_TRIPLET_DIR)
    batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1]
    if pre_training_phase:
        logger.info('Softmax pre-training.')
        kc = KerasFormatConverter(working_dir)
        num_speakers_softmax = len(kc.categorical_speakers.speaker_ids)
        dsm = DeepSpeakerModel(batch_input_shape, include_softmax=True, num_speakers_softmax=num_speakers_softmax)
        dsm.m.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        pre_training_checkpoint = load_best_checkpoint(CHECKPOINTS_SOFTMAX_DIR)
        if pre_training_checkpoint is not None:
            initial_epoch = int(pre_training_checkpoint.split('/')[-1].split('.')[0].split('_')[-1])
            logger.info(f'Initial epoch is {initial_epoch}.')
            logger.info(f'Loading softmax checkpoint: {pre_training_checkpoint}.')
            dsm.m.load_weights(pre_training_checkpoint)  # latest one.
        else:
            initial_epoch = 0
        fit_model_softmax(dsm, kc.kx_train, kc.ky_train, kc.kx_test, kc.ky_test, initial_epoch=initial_epoch)
    else:
        logger.info('Training with the triplet loss.')
        dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False)
        triplet_checkpoint = load_best_checkpoint(CHECKPOINTS_TRIPLET_DIR)
        pre_training_checkpoint = load_best_checkpoint(CHECKPOINTS_SOFTMAX_DIR)
        if triplet_checkpoint is not None:
            logger.info(f'Loading triplet checkpoint: {triplet_checkpoint}.')
            dsm.m.load_weights(triplet_checkpoint)
        elif pre_training_checkpoint is not None:
            logger.info(f'Loading pre-training checkpoint: {pre_training_checkpoint}.')
            # If `by_name` is True, weights are loaded into layers only if they share the
            # same name. This is useful for fine-tuning or transfer-learning models where
            # some of the layers have changed.
            dsm.m.load_weights(pre_training_checkpoint, by_name=True)
        dsm.m.compile(optimizer=SGD(), loss=deep_speaker_loss)
        fit_model(dsm, working_dir, NUM_FRAMES)
Ejemplo n.º 2
0
 def load_best_model(self):
     state_dict = utils.load_best_checkpoint(self.checkpoint_dir)
     if state_dict is None:
         print(
             f"Could not load best checkpoint. Did not find under: {self.checkpoint_dir}")
         return
     self.model.load_state_dict(state_dict)
Ejemplo n.º 3
0
def start_training(working_dir):
    pre_training_phase=True
    ensures_dir(CHECKPOINTS_MTL_DIR)
    ensures_dir(CHECKPOINTS_MTL_DIR)
    batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1]
    logger.info('Started training.')
    kc = KerasFormatConverter(working_dir)
 
    num_speakers_softmax = len(kc.categorical_speakers.speaker_ids)
    logger.info(f'categorical_speakers: {kc.categorical_speakers.speaker_ids}')
    dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False, num_speakers_softmax=num_speakers_softmax)
    base_model = dsm.m
    x = base_model.output
    x = Dense(1024, name='shared')(x)
    y=Dense(1024,name='speaker_task')(x)
    speaker_out= Dense(num_speakers_softmax, activation='softmax',name='speaker_pred')(y)
    gender_out= Dense(1, activation='sigmoid',name='gender_pred')(x)
    model = Model(inputs=base_model.input, outputs=[speaker_out, gender_out])
    
    model.compile(optimizer='adam', loss=['sparse_categorical_crossentropy','binary_crossentropy'], metrics={'speaker_pred': 'accuracy', 'gender_pred': 'binary_accuracy'})
    training_checkpoint = load_best_checkpoint(CHECKPOINTS_MTL_DIR)
    if training_checkpoint is not None:
        initial_epoch = int(training_checkpoint.split('/')[-1].split('.')[0].split('_')[-1])
        logger.info(f'Initial epoch is {initial_epoch}.')
        logger.info(f'Loading softmax checkpoint: {training_checkpoint}.')
        model.load_weights(training_checkpoint)  # latest one.
    else:
        initial_epoch = 0
    fit_model_mtl(model, kc.kx_train, kc.ky_train,kc.kg_train, kc.kx_test, kc.ky_test,kc.kg_test, initial_epoch=initial_epoch)
Ejemplo n.º 4
0
def main(args):
    np.random.seed(0)
    torch.manual_seed(0)
    start_time = time.time()
    utils.create_image_records(args.visualization_path)
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # ------------------------- get data loaders -------------------------
    num_feature = 306
    trainlabels = ["mass", "force", "friction"
                   ] if args.baseline else args.train_labels
    passthrough_dict = utils.get_passthrough(trainlabels,
                                             parameter_length=25,
                                             vector_length=num_feature)
    train_loader, shape_test_loader, parameter_test_loader = \
        dataset.getloader(args, labels=trainlabels, inframe=[0, 1, 2, 3], outframe=[4])

    # ------------------------- initialize model and optimizer -------------------------
    select = False if args.baseline else True
    model = net.PhysicsModel(num_feature=num_feature,
                             passthrough=passthrough_dict,
                             select=select)
    if args.cuda:
        model = model.cuda()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr)
    start_epoch, best_epoch_error, model, optimizer = utils.load_best_checkpoint(
        args.resume, model, optimizer)

    # ------------------------- define criterion -------------------------
    criterion = evaluation.image

    for epoch in range(args.start_epoch, args.epochs):
        print(trainlabels)
        epoch_error = train(train_loader,
                            model,
                            criterion,
                            optimizer,
                            epoch,
                            passthrough_dict,
                            args=args)
        is_best = epoch_error < best_epoch_error
        best_epoch_error = min(epoch_error, best_epoch_error)
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'best_epoch_error': best_epoch_error,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            },
            is_best=is_best,
            checkpoint_path=args.resume,
            epoch=epoch)

    print('Time elapsed: {:.2f}s'.format(time.time() - start_time))
Ejemplo n.º 5
0
def main():
    select = True
    try:
        sys.argv[1]
    except Exception:
        select = False
    print('select', select)

    working_dir = '/media/philippe/8TB/deep-speaker'
    # by construction this  losses should be much higher than the normal losses.
    # we select batches this way.
    batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1]
    print('Testing with the triplet losses.')
    dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False)
    triplet_checkpoint = load_best_checkpoint(CHECKPOINTS_TRIPLET_DIR)
    pre_training_checkpoint = load_best_checkpoint(CHECKPOINTS_SOFTMAX_DIR)
    if triplet_checkpoint is not None:
        print(f'Loading triplet checkpoint: {triplet_checkpoint}.')
        dsm.m.load_weights(triplet_checkpoint)
    elif pre_training_checkpoint is not None:
        print(f'Loading pre-training checkpoint: {pre_training_checkpoint}.')
        # If `by_name` is True, weights are loaded into layers only if they share the
        # same name. This is useful for fine-tuning or transfer-learning models where
        # some of the layers have changed.
        dsm.m.load_weights(pre_training_checkpoint, by_name=True)
    dsm.m.compile(optimizer='adam', loss=deep_speaker_loss)
    kc = KerasFormatConverter(working_dir)
    if select:
        print('TripletBatcherSelectHardNegatives()')
        batcher = TripletBatcherSelectHardNegatives(kc.kx_train, kc.ky_train,
                                                    kc.kx_test, kc.ky_test,
                                                    dsm)
    else:
        print('TripletBatcher()')
        batcher = TripletBatcher(kc.kx_train, kc.ky_train, kc.kx_test,
                                 kc.ky_test)
    batch_size = BATCH_SIZE
    losses = []
    while True:
        _bx, _by = batcher.get_batch(batch_size, is_test=False)
        losses.append(
            dsm.m.evaluate(_bx, _by, verbose=0, batch_size=BATCH_SIZE))
        print(np.mean(losses))
Ejemplo n.º 6
0
def test(working_dir, checkpoint_file=None):
    batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1]
    dsm = DeepSpeakerModel(batch_input_shape)
    if checkpoint_file is None:
        checkpoint_file = load_best_checkpoint(CHECKPOINTS_TRIPLET_DIR)
    if checkpoint_file is not None:
        logger.info(
            f'Found checkpoint [{checkpoint_file}]. Loading weights...')
        dsm.m.load_weights(checkpoint_file, by_name=True)
    else:
        logger.info(f'Could not find any checkpoint in {checkpoint_file}.')
        exit(1)

    fm, tpr, acc, eer = eval_model(working_dir, model=dsm)
    logger.info(f'f-measure = {fm:.3f}, true positive rate = {tpr:.3f}, '
                f'accuracy = {acc:.3f}, equal error rate = {eer:.3f}')
Ejemplo n.º 7
0
def main(args):
    np.random.seed(0)
    torch.manual_seed(0)
    start_time = time.time()
    utils.create_image_records(args.visualization_path)
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # ------------------------- get data loaders -------------------------
    num_feature, trainlabels = 306, args.train_labels
    passthrough_dict = utils.get_passthrough(trainlabels,
                                             parameter_length=25,
                                             vector_length=num_feature)
    train_loader, shape_test_loader, parameter_test_loader = \
        dataset.getloader(args, labels=trainlabels, inframe=[0, 1, 2, 3], outframe=[4])

    # ------------------------- initialize model and optimizer -------------------------
    model = net.PhysicsModel(num_feature=num_feature,
                             passthrough=passthrough_dict)
    if args.cuda:
        model = model.cuda()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr)
    start_epoch, best_epoch_error, model, optimizer = utils.load_best_checkpoint(
        args.resume, model, optimizer)

    # ------------------------- define criterion -------------------------
    criterion = evaluation.pixel

    shape_error = interpolate(shape_test_loader,
                              model,
                              criterion,
                              start_epoch,
                              optimizer,
                              args=args)
    parameter_error = interpolate(parameter_test_loader,
                                  model,
                                  criterion,
                                  start_epoch,
                                  optimizer,
                                  args=args)

    print('Time elapsed: {:.2f}s'.format(time.time() - start_time))