def main():
    # Views the training images and displays the distance on anchor-negative and anchor-positive
    test_display_triplet_distance = False

    # print the experiment configuration
    print('\nparsed options:\n{}\n'.format(vars(args)))
    print('\nNumber of Classes:\n{}\n'.format(len(train_dir.classes)))

    # instantiate model and initialize weightsNUM_FEATURES
    # TODO(xin): IMPORTANT load num_classes from checkpoint
    model = DeepSpeakerModel(embedding_size=args.embedding_size,
                             num_classes=len(train_dir.classes),
                             feature_dim=num_features,
                             frame_dim=c.NUM_FRAMES)

    if args.cuda:
        model.cuda()

    from torchsummary import summary
    summary(model, (1, c.NUM_FRAMES, c.NUM_FEATURES))
    # # More detailed information on model
    # print(model)

    optimizer = create_optimizer(model, args.lr)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print('=> loading checkpoint {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('=> no checkpoint found at {}'.format(args.resume))

    start = args.start_epoch
    end = start + args.epochs

    train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=False, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs)
    for epoch in range(start, end):

        if args.test_only:
            test(test_loader, model, epoch)
            return

        train(train_loader, model, optimizer, epoch)
        test(test_loader, model, epoch)
def main():

    model = DeepSpeakerModel(embedding_size=256, num_classes=10)

    if args.resume:
        if os.path.isfile(args.resume):
            print('=> loading checkpoint {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
        else:
            print('=> no checkpoint found at {}'.format(args.resume))

    # print(calculateOneEmbedding('/home/zinzin/Documents/pytorch/deepspeaker-pytorch/data/test_set/dnl/s1/t1/s1_t1_1.wav', model))

    embeddings = enrollment(model)
    test(model, embeddings)
def main():
    # Views the training images and displays the distance on anchor-negative and anchor-positive
    test_display_triplet_distance = False

    # print the experiment configuration
    print('\nparsed options:\n{}\n'.format(vars(args)))
    print('\nNumber of Classes:\n{}\n'.format(len(train_dir.classes)))

    # instantiate model and initialize weights
    model = DeepSpeakerModel(embedding_size=args.embedding_size,
                             num_classes=len(train_dir.classes))

    if args.cuda:
        model.cuda()

    optimizer = create_optimizer(model, args.lr)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print('=> loading checkpoint {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('=> no checkpoint found at {}'.format(args.resume))

    start = args.start_epoch
    #start = 0
    end = start + args.epochs

    train_loader = torch.utils.data.DataLoader(train_dir,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               **kwargs)
    for epoch in range(start, end):

        train(train_loader, model, optimizer, epoch)
        #test(test_loader, model, epoch)
        #break;

        if test_display_triplet_distance:
            display_triplet_distance(model, train_loader,
                                     LOG_DIR + "/train_{}".format(epoch))
예제 #4
0
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items()
            if k in model_dict.keys() and v.size() == model_dict[k].size()
        }
        missed_params = [
            k for k, v in model_dict.items()
            if not k in pretrained_dict.keys()
        ]

        print('loaded params/tot params:{}/{}'.format(len(pretrained_dict),
                                                      len(model_dict)))
        print('miss matched params:{}'.format(missed_params))

        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

    train_dataset = Mydataset(opt.data_path, 'train')
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              num_workers=opt.num_workers,
                              drop_last=False,
                              shuffle=True)

    test_dataset = Mydataset(opt.data_path, 'test')
    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batch_size,
                             num_workers=opt.num_workers,
                             drop_last=False,
                             shuffle=True)
예제 #5
0
def load_model(ckp_path):
    model = DeepSpeakerModel(embedding_size=512, num_classes=251)
    checkpoint = torch.load(ckp_path, map_location='cpu')
    model.load_state_dict(checkpoint)
    return model