예제 #1
0
파일: main.py 프로젝트: bliunlpr/speaker
def main():

    if opt.resume:
        model_path = os.path.join(opt.works_dir, opt.resume)
        if not os.path.isfile(model_path):
            raise Exception("no checkpoint found at {}".format(model_path))

        package = torch.load(model_path,
                             map_location=lambda storage, loc: storage)
        opt.lr = package.get('learning_rate', opt.lr)
        opt.total_steps = int(package.get('total_steps', 0)) - 1
        print('total_steps is {}'.format(opt.total_steps))

        if opt.seq_training == 'true':
            if opt.model_type == 'lstm':
                model = DeepSpeakerSeqModel.load_model(model_path,
                                                       'state_dict')
            elif opt.model_type == 'cnn':
                model = DeepSpeakerCnnSeqModel.load_model(
                    model_path, 'state_dict')
            else:
                raise Exception('wrong model_type {}'.format(opt.model_type))
        else:
            if opt.model_type == 'lstm':
                model = DeepSpeakerModel.load_model(model_path, 'state_dict')
            elif opt.model_type == 'cnn':
                model = DeepSpeakerCnnModel.load_model(model_path,
                                                       'state_dict')
            else:
                raise Exception('wrong model_type {}'.format(opt.model_type))

        logging.info('Loading model {}'.format(model_path))
    else:
        if opt.seq_training == 'true':
            if opt.model_type == 'lstm':
                model = DeepSpeakerSeqModel(opt)
            elif opt.model_type == 'cnn':
                model = DeepSpeakerCnnSeqModel(opt)
            else:
                raise Exception('wrong model_type {}'.format(opt.model_type))
        else:
            if opt.model_type == 'lstm':
                model = DeepSpeakerModel(opt)
            elif opt.model_type == 'cnn':
                model = DeepSpeakerCnnModel(opt)
            else:
                raise Exception('wrong model_type {}'.format(opt.model_type))

    print(model)
    for k, v in model.state_dict().items():
        print(k, v.shape)

    model.to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999),
                           weight_decay=opt.weight_decay)
    train(opt, model, optimizer)
def main():

    model = DeepSpeakerModel(embedding_size=256, num_classes=10)

    if args.resume:
        if os.path.isfile(args.resume):
            print('=> loading checkpoint {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
        else:
            print('=> no checkpoint found at {}'.format(args.resume))

    # print(calculateOneEmbedding('/home/zinzin/Documents/pytorch/deepspeaker-pytorch/data/test_set/dnl/s1/t1/s1_t1_1.wav', model))

    embeddings = enrollment(model)
    test(model, embeddings)
def main():
    # Views the training images and displays the distance on anchor-negative and anchor-positive
    test_display_triplet_distance = False

    # print the experiment configuration
    print('\nparsed options:\n{}\n'.format(vars(args)))
    print('\nNumber of Classes:\n{}\n'.format(len(train_dir.classes)))

    # instantiate model and initialize weightsNUM_FEATURES
    # TODO(xin): IMPORTANT load num_classes from checkpoint
    model = DeepSpeakerModel(embedding_size=args.embedding_size,
                             num_classes=len(train_dir.classes),
                             feature_dim=num_features,
                             frame_dim=c.NUM_FRAMES)

    if args.cuda:
        model.cuda()

    from torchsummary import summary
    summary(model, (1, c.NUM_FRAMES, c.NUM_FEATURES))
    # # More detailed information on model
    # print(model)

    optimizer = create_optimizer(model, args.lr)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print('=> loading checkpoint {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('=> no checkpoint found at {}'.format(args.resume))

    start = args.start_epoch
    end = start + args.epochs

    train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=False, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs)
    for epoch in range(start, end):

        if args.test_only:
            test(test_loader, model, epoch)
            return

        train(train_loader, model, optimizer, epoch)
        test(test_loader, model, epoch)
예제 #4
0
def main(libri_dir=c.DATASET_DIR):


    print('Looking for fbank features [.npy] files in {}.'.format(libri_dir))
    libri = data_catalog(libri_dir)
    #                          filename                                       speaker_id
    #   0    audio/LibriSpeechSamples/train-clean-100-npy/1-100-0001.npy        1
    #   1    audio/LibriSpeechSamples/train-clean-100-npy/1-100-0002.npy        1        
    unique_speakers = libri['speaker_id'].unique() # 251 speaker
    transform=transforms.Compose([transforms.ToTensor()])
                                               
    train_dir = stochastic_mini_batch(libri)
    train_loader = DataLoader(train_dir, batch_size=c.BATCH_SIZE, shuffle=True)
    model = DeepSpeakerModel(embedding_size=c.EMBEDDING_SIZE,num_classes=c.NUM_SPEAKERS)
    
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0)
    epoch = 0
    model.cuda()
    summary(model, input_size=(1, 160, 64))
    for epoch in range(100):       
        model.train()
      
        for batch_idx, (data_a, data_p, data_n,label_a,label_p,label_n) in tqdm(enumerate(train_loader)):
            
            data_a, data_p, data_n = data_a.type(torch.FloatTensor),data_p.type(torch.FloatTensor),data_n.type(torch.FloatTensor)
            data_a, data_p, data_n = data_a.cuda(), data_p.cuda(), data_n.cuda()
            data_a, data_p, data_n = Variable(data_a), Variable(data_p), Variable(data_n)
            out_a, out_p, out_n = model(data_a), model(data_p), model(data_n)
            
            triplet_loss = TripletMarginLoss(0.2).forward(out_a, out_p, out_n)
            loss = triplet_loss
            # compute gradient and update weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print('selected_triplet_loss', triplet_loss.data)
        print("epoch:",epoch)
        torch.save(model.state_dict(),"checkpoint_{}.pt".format(epoch))
def main():
    # Views the training images and displays the distance on anchor-negative and anchor-positive
    test_display_triplet_distance = False

    # print the experiment configuration
    print('\nparsed options:\n{}\n'.format(vars(args)))
    print('\nNumber of Classes:\n{}\n'.format(len(train_dir.classes)))

    # instantiate model and initialize weights
    model = DeepSpeakerModel(embedding_size=args.embedding_size,
                             num_classes=len(train_dir.classes))

    if args.cuda:
        model.cuda()

    optimizer = create_optimizer(model, args.lr)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print('=> loading checkpoint {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('=> no checkpoint found at {}'.format(args.resume))

    start = args.start_epoch
    #start = 0
    end = start + args.epochs

    train_loader = torch.utils.data.DataLoader(train_dir,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               **kwargs)
    for epoch in range(start, end):

        train(train_loader, model, optimizer, epoch)
        #test(test_loader, model, epoch)
        #break;

        if test_display_triplet_distance:
            display_triplet_distance(model, train_loader,
                                     LOG_DIR + "/train_{}".format(epoch))
예제 #6
0
import os
import torch
from torch.autograd import Variable
from model import DeepSpeakerModel

# _, _, rawInput = get_feature(os.path.dirname(os.path.abspath(__file__)) + '/s2_n_8_9.wav')

# print('Raw input: ', rawInput)

# if torch.cuda.is_available():
# 	tensor = torch.from_numpy(rawInput).type(torch.FloatTensor).cuda()
cnnModel = DeepSpeakerModel(256, 1)
# else:
# 	tensor = torch.from_numpy(rawInput).type(torch.FloatTensor)
# 	cnnModel = DeepSpeakerModel(256, 1)

# input = Variable(tensor, requires_grad=True)

# print('Input: ', input)

input = Variable(torch.randn(128, 1, 64, 32).type(torch.FloatTensor),
                 requires_grad=True)

output = cnnModel(input)

print('OUTPUT: ', output)
예제 #7
0
import options as opt
from data_load import Mydataset
from model import DeepSpeakerModel
from audio_fbank import read_mfcc, sample_from_mfcc

import tensorflow as tf

import tqdm

if (__name__ == '__main__'):
    torch.manual_seed(55)
    torch.cuda.manual_seed_all(55)

if (__name__ == '__main__'):
    model = DeepSpeakerModel(embedding_size=opt.embedding_size,
                             num_classes=opt.classes).cuda()
    writer = SummaryWriter()

    if (hasattr(opt, 'weights')):
        pretrained_dict = torch.load(opt.weights)
        model_dict = model.state_dict()
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items()
            if k in model_dict.keys() and v.size() == model_dict[k].size()
        }
        missed_params = [
            k for k, v in model_dict.items()
            if not k in pretrained_dict.keys()
        ]
예제 #8
0
def load_model(ckp_path):
    model = DeepSpeakerModel(embedding_size=512, num_classes=251)
    checkpoint = torch.load(ckp_path, map_location='cpu')
    model.load_state_dict(checkpoint)
    return model
예제 #9
0
                                       pin_memory=True)
logging.info("Building dataset Sucessed")

##  Building Model ##
logging.info("Building Model")
opt.in_size = val_dataset.in_size
if opt.seq_training == 'true':
    if opt.model_type == 'lstm' or opt.model_type == 'blstmp':
        model = DeepSpeakerSeqModel(opt)
    elif opt.model_type == 'cnn':
        model = DeepSpeakerCnnSeqModel(opt)
    else:
        raise Exception('wrong model_type {}'.format(opt.model_type))
else:
    if opt.model_type == 'lstm' or opt.model_type == 'blstmp':
        model = DeepSpeakerModel(opt)
    elif opt.model_type == 'cnn':
        model = DeepSpeakerCnnModel(opt)
    else:
        raise Exception('wrong model_type {}'.format(opt.model_type))

if opt.resume:
    model, opt.steps = load(model, opt.resume)
else:
    raise Exception('wrong opt.resume {}'.format(opt.resume))

model.to(opt.device)

print(model)
logging.info("Building Model Sucessed")