Esempio n. 1
0
elif args.model == 'lcnn9_mfcc':
    model = model_.lcnn_9layers(n_z=args.latent_size,
                                proj_size=len(train_dataset.speakers_list)
                                if args.softmax != 'none' else 0,
                                ncoef=args.ncoef,
                                sm_type=args.softmax)
elif args.model == 'lcnn29_mfcc':
    model = model_.lcnn_29layers_v2(n_z=args.latent_size,
                                    proj_size=len(train_dataset.speakers_list)
                                    if args.softmax != 'none' else 0,
                                    ncoef=args.ncoef,
                                    sm_type=args.softmax)
elif args.model == 'TDNN':
    model = model_.TDNN(n_z=args.latent_size,
                        proj_size=len(train_dataset.speakers_list)
                        if args.softmax != 'none' else 0,
                        ncoef=args.ncoef,
                        sm_type=args.softmax)
elif args.model == 'TDNN_multipool':
    model = model_.TDNN_multipool(n_z=args.latent_size,
                                  proj_size=len(train_dataset.speakers_list)
                                  if args.softmax != 'none' else 0,
                                  ncoef=args.ncoef,
                                  sm_type=args.softmax)
elif args.model == 'FTDNN':
    model = model_.FTDNN(n_z=args.latent_size,
                         proj_size=len(train_dataset.speakers_list)
                         if args.softmax != 'none' else 0,
                         ncoef=args.ncoef,
                         sm_type=args.softmax)
Esempio n. 2
0
                                proj_size=train_dataset.n_speakers if
                                args.softmax != 'none' or args.pretrain else 0,
                                ncoef=args.ncoef,
                                sm_type=args.softmax,
                                delta=args.delta)
elif args.model == 'resnet_2d':
    model = model_.ResNet_2d(n_z=args.latent_size,
                             proj_size=train_dataset.n_speakers
                             if args.softmax != 'none' or args.pretrain else 0,
                             ncoef=args.ncoef,
                             sm_type=args.softmax,
                             delta=args.delta)
elif args.model == 'TDNN':
    model = model_.TDNN(n_z=args.latent_size,
                        proj_size=train_dataset.n_speakers
                        if args.softmax != 'none' or args.pretrain else 0,
                        ncoef=args.ncoef,
                        sm_type=args.softmax,
                        delta=args.delta)
elif args.model == 'TDNN_att':
    model = model_.TDNN_att(n_z=args.latent_size,
                            proj_size=train_dataset.n_speakers
                            if args.softmax != 'none' or args.pretrain else 0,
                            ncoef=args.ncoef,
                            sm_type=args.softmax,
                            delta=args.delta)
elif args.model == 'TDNN_multihead':
    model = model_.TDNN_multihead(
        n_z=args.latent_size,
        proj_size=train_dataset.n_speakers
        if args.softmax != 'none' or args.pretrain else 0,
        ncoef=args.ncoef,
Esempio n. 3
0
                                   ncoef=args.ncoef)
    elif args.model == 'resnet_stats':
        model = model_.ResNet_stats(n_z=args.latent_size,
                                    proj_size=None,
                                    ncoef=args.ncoef)
    elif args.model == 'lcnn9_mfcc':
        model = model_.lcnn_9layers(n_z=args.latent_size,
                                    proj_size=None,
                                    ncoef=args.ncoef)
    elif args.model == 'lcnn29_mfcc':
        model = model_.lcnn_29layers_v2(n_z=args.latent_size,
                                        proj_size=None,
                                        ncoef=args.ncoef)
    elif args.model == 'TDNN':
        model = model_.TDNN(n_z=args.latent_size,
                            proj_size=None,
                            ncoef=args.ncoef)
    elif args.model == 'TDNN_multipool':
        model = model_.TDNN_multipool(n_z=args.latent_size,
                                      proj_size=None,
                                      ncoef=args.ncoef)
    elif args.model == 'FTDNN':
        model = model_.FTDNN(n_z=args.latent_size,
                             proj_size=None,
                             ncoef=args.ncoef)

    ckpt = torch.load(args.cp_path, map_location=lambda storage, loc: storage)
    model.load_state_dict(ckpt['model_state'], strict=False)

    model.eval()
Esempio n. 4
0
                                    ncoef=args.ncoef,
                                    sm_type=args.softmax)
    elif args.model == 'lcnn9_mfcc':
        model = model_.lcnn_9layers(n_z=args.latent_size,
                                    proj_size=len(list(labels_dict.keys())),
                                    ncoef=args.ncoef,
                                    sm_type=args.softmax)
    elif args.model == 'lcnn29_mfcc':
        model = model_.lcnn_29layers_v2(n_z=args.latent_size,
                                        proj_size=len(list(
                                            labels_dict.keys())),
                                        ncoef=args.ncoef,
                                        sm_type=args.softmax)
    elif args.model == 'TDNN':
        model = model_.TDNN(n_z=args.latent_size,
                            proj_size=len(list(labels_dict.keys())),
                            ncoef=args.ncoef,
                            sm_type=args.softmax)
    elif args.model == 'TDNN_multipool':
        model = model_.TDNN_multipool(n_z=args.latent_size,
                                      proj_size=len(list(labels_dict.keys())),
                                      ncoef=args.ncoef,
                                      sm_type=args.softmax)
    elif args.model == 'FTDNN':
        model = model_.FTDNN(n_z=args.latent_size,
                             proj_size=len(list(labels_dict.keys())),
                             ncoef=args.ncoef,
                             sm_type=args.softmax)

    if args.cp_path_2 is not None:
        model_2 = type(model)(n_z=args.latent_size,
                              proj_size=len(list(labels_dict.keys())),
Esempio n. 5
0
    out = model.out_proj(mu, torch.ones(mu.size(0)))
    print('resnet_small', mu.size(), emb.size(), out.size())
if args.model == 'resnet_2d' or args.model == 'all':
    batch = torch.rand(3, 3 if args.delta else 1, 43, 200)
    model = model_.ResNet_2d(n_z=args.latent_size,
                             delta=args.delta,
                             proj_size=10,
                             sm_type='softmax')
    mu, emb = model.forward(batch)
    out = model.out_proj(mu, torch.ones(mu.size(0)))
    print('resnet_2d', mu.size(), emb.size(), out.size())
if args.model == 'TDNN' or args.model == 'all':
    batch = torch.rand(3, 3 if args.delta else 1, args.ncoef, 200)
    model = model_.TDNN(n_z=args.latent_size,
                        ncoef=args.ncoef,
                        delta=args.delta,
                        proj_size=10,
                        sm_type='softmax')
    mu, emb = model.forward(batch)
    out = model.out_proj(mu, torch.ones(mu.size(0)))
    print('TDNN', mu.size(), emb.size(), out.size())
if args.model == 'TDNN_att' or args.model == 'all':
    batch = torch.rand(3, 3 if args.delta else 1, args.ncoef, 200)
    model = model_.TDNN_att(n_z=args.latent_size,
                            ncoef=args.ncoef,
                            delta=args.delta,
                            proj_size=10,
                            sm_type='softmax')
    mu, emb = model.forward(batch)
    out = model.out_proj(mu, torch.ones(mu.size(0)))
    print('TDNN_att', mu.size(), emb.size(), out.size())
Esempio n. 6
0
                                 proj_size=0,
                                 ncoef=args.ncoef,
                                 delta=args.delta)
 elif args.model == 'resnet_small':
     model = model_.ResNet_small(n_z=args.latent_size,
                                 proj_size=0,
                                 ncoef=args.ncoef,
                                 delta=args.delta)
 elif args.model == 'resnet_2d':
     model = model_.ResNet_2d(n_z=args.latent_size,
                              proj_size=0,
                              ncoef=args.ncoef,
                              delta=args.delta)
 elif args.model == 'TDNN':
     model = model_.TDNN(n_z=args.latent_size,
                         proj_size=0,
                         ncoef=args.ncoef,
                         delta=args.delta)
 elif args.model == 'TDNN_att':
     model = model_.TDNN_att(n_z=args.latent_size,
                             proj_size=0,
                             ncoef=args.ncoef,
                             delta=args.delta)
 elif args.model == 'TDNN_multihead':
     model = model_.TDNN_multihead(n_z=args.latent_size,
                                   proj_size=0,
                                   ncoef=args.ncoef,
                                   delta=args.delta)
 elif args.model == 'TDNN_lstm':
     model = model_.TDNN_lstm(n_z=args.latent_size,
                              proj_size=0,
                              ncoef=args.ncoef,
Esempio n. 7
0
if args.model == 'resnet_50' or args.model == 'all':
    batch = torch.rand(3, 10000).to(device)
    model = model_.ResNet_50(pase_cfg=args.pase_cfg,
                             pase_cp=args.pase_cp,
                             n_z=args.latent_size,
                             ncoef=args.ncoef,
                             proj_size=10,
                             sm_type='softmax').to(device)
    mu = model.forward(batch)
    out = model.out_proj(mu, torch.ones(mu.size(0)))
    print('resnet_50', mu.size(), out.size())
if args.model == 'TDNN' or args.model == 'all':
    batch = torch.rand(3, 10000).to(device)
    model = model_.TDNN(pase_cfg=args.pase_cfg,
                        pase_cp=args.pase_cp,
                        n_z=args.latent_size,
                        ncoef=args.ncoef,
                        proj_size=10,
                        sm_type='softmax').to(device)
    mu = model.forward(batch)
    out = model.out_proj(mu, torch.ones(mu.size(0)))
    print('TDNN', mu.size(), out.size())
if args.model == 'TDNN_mfcc' or args.model == 'all':
    batch = torch.rand(3, args.ncoef, 200).to(device)
    model = model_.TDNN_mfcc(n_z=args.latent_size,
                             ncoef=args.ncoef,
                             proj_size=10,
                             sm_type='softmax').to(device)
    mu = model.forward(batch)
    out = model.out_proj(mu, torch.ones(mu.size(0)))
    print('TDNN_mfcc', mu.size(), out.size())
if args.model == 'MLP' or args.model == 'all':
                                    nh=ckpt['n_hidden'],
                                    n_h=ckpt['hidden_size'],
                                    proj_size=ckpt['r_proj_size'],
                                    ncoef=ckpt['ncoef'],
                                    ndiscriminators=ckpt['ndiscriminators'])
    elif args.model == 'resnet_large':
        model = model_.ResNet_large(n_z=ckpt['latent_size'],
                                    nh=ckpt['n_hidden'],
                                    n_h=ckpt['hidden_size'],
                                    proj_size=ckpt['r_proj_size'],
                                    ncoef=ckpt['ncoef'],
                                    ndiscriminators=ckpt['ndiscriminators'])
    elif args.model == 'TDNN':
        model = model_.TDNN(n_z=ckpt['latent_size'],
                            nh=ckpt['n_hidden'],
                            n_h=ckpt['hidden_size'],
                            proj_size=ckpt['r_proj_size'],
                            ncoef=ckpt['ncoef'],
                            ndiscriminators=ckpt['ndiscriminators'])

    try:
        model.load_state_dict(ckpt['model_state'], strict=True)
    except RuntimeError as err:
        print("Runtime Error: {0}".format(err))
    except:
        print("Unexpected error:", sys.exc_info()[0])
        raise

    model.eval()
    if args.cuda:
        model = model.to(device)
Esempio n. 9
0
elif args.model == 'resnet_large':
    model = model_.ResNet_large(n_z=args.latent_size,
                                nh=args.n_hidden,
                                n_h=args.hidden_size,
                                proj_size=train_dataset.n_speakers,
                                ncoef=args.ncoef,
                                dropout_prob=args.dropout_prob,
                                sm_type=args.softmax,
                                ndiscriminators=args.ndiscriminators,
                                r_proj_size=args.rproj_size)
elif args.model == 'TDNN':
    model = model_.TDNN(n_z=args.latent_size,
                        nh=args.n_hidden,
                        n_h=args.hidden_size,
                        proj_size=train_dataset.n_speakers,
                        ncoef=args.ncoef,
                        dropout_prob=args.dropout_prob,
                        sm_type=args.softmax,
                        ndiscriminators=args.ndiscriminators,
                        r_proj_size=args.rproj_size)

if args.cuda:
    device = get_freer_gpu()
else:
    device = None

if args.cuda:
    model = model.cuda(device)

optimizer = TransformerOptimizer(optim.SGD(model.parameters(),
                                           lr=args.lr,
Esempio n. 10
0
	model = model_.ResNet_stats(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('resnet_stats', mu.size())
if args.model == 'lcnn9_mfcc' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.lcnn_9layers(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('lcnn9_mfcc', mu.size())
if args.model == 'lcnn29_mfcc' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.lcnn_29layers_v2(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('lcnn29_mfcc', mu.size())
if args.model == 'TDNN' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.TDNN(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('TDNN', mu.size())
if args.model == 'TDNN_multipool' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.TDNN_multipool(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('TDNN_multipool', mu.size())
if args.model == 'FTDNN' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.FTDNN(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('FTDNN', mu.size())

if args.softmax:
	batch = torch.rand(3, mu.size(0))
Esempio n. 11
0
                                ncoef=args.ncoef,
                                ndiscriminators=args.ndiscriminators,
                                r_proj_size=args.rproj_size)
    print('resnet_large')
    mu, emb = model.forward(batch)
    print(mu.size())
    emb = torch.cat([emb, emb], 1)
    print(emb.size())
    pred = model.forward_bin(emb)
    print(pred)
    scores_p = model.forward_bin(emb)
    print(scores_p)
if args.model == 'TDNN' or args.model == 'all':
    batch = torch.rand(3, 1, args.ncoef, 200)
    model = model_.TDNN(n_z=args.latent_size,
                        nh=args.n_hidden,
                        n_h=args.hidden_size,
                        proj_size=100,
                        ncoef=args.ncoef,
                        ndiscriminators=args.ndiscriminators,
                        r_proj_size=args.rproj_size)
    print('TDNN')
    mu, emb = model.forward(batch)
    print(mu.size())
    emb = torch.cat([emb, emb], 1)
    print(emb.size())
    pred = model.forward_bin(emb)
    print(pred)
    scores_p = model.forward_bin(emb)
    print(scores_p)
Esempio n. 12
0
elif args.model == 'resnet_small':
    model = model_.ResNet_small(n_z=args.latent_size,
                                proj_size=train_dataset.n_speakers,
                                ncoef=args.ncoef,
                                sm_type=args.softmax,
                                delta=args.delta)
elif args.model == 'resnet_2d':
    model = model_.ResNet_2d(n_z=args.latent_size,
                             proj_size=train_dataset.n_speakers,
                             ncoef=args.ncoef,
                             sm_type=args.softmax,
                             delta=args.delta)
elif args.model == 'TDNN':
    model = model_.TDNN(n_z=args.latent_size,
                        proj_size=train_dataset.n_speakers,
                        ncoef=args.ncoef,
                        sm_type=args.softmax,
                        delta=args.delta)
elif args.model == 'TDNN_logpool':
    model = model_.TDNN_logpool(n_z=args.latent_size,
                                proj_size=train_dataset.n_speakers,
                                ncoef=args.ncoef,
                                sm_type=args.softmax,
                                delta=args.delta)
elif args.model == 'TDNN_att':
    model = model_.TDNN_att(n_z=args.latent_size,
                            proj_size=train_dataset.n_speakers,
                            ncoef=args.ncoef,
                            sm_type=args.softmax,
                            delta=args.delta)
elif args.model == 'TDNN_multihead':
Esempio n. 13
0
		if args.cp_path is None:
			raise ValueError('There is no checkpoint/model path. Use arg --cp-path to indicate the path!')

		print('Cuda Mode is: {}'.format(args.cuda))

		if args.cuda:
			device = get_freer_gpu()

		if args.model == 'resnet_18':
			model = model_.ResNet_18(pase_cfg=args.pase_cfg, n_z=args.latent_size, proj_size=None, ncoef=args.ncoef)
		elif args.model == 'resnet_34':
			model = model_.ResNet_34(pase_cfg=args.pase_cfg, n_z=args.latent_size, proj_size=None, ncoef=args.ncoef)
		elif args.model == 'resnet_50':
			model = model_.ResNet_50(pase_cfg=args.pase_cfg, n_z=args.latent_size, proj_size=None, ncoef=args.ncoef)
		elif args.model == 'TDNN':
			model = model_.TDNN(pase_cfg=args.pase_cfg, n_z=args.latent_size, proj_size=None, ncoef=args.ncoef)

		ckpt = torch.load(args.cp_path, map_location = lambda storage, loc: storage)
		model.load_state_dict(ckpt['model_state'], strict=False)

		model.eval()

		if args.cuda:
			model = model.to(device)

		enroll_utt_data = read_utt2rec(args.enroll_data+'wav.scp', args.m4a)
		test_utt_data = read_utt2rec(args.test_data+'wav.scp', args.m4a)

		utterances_enroll, utterances_test, labels = read_trials(args.trials_path)

		print('\nAll data ready. Start of scoring')
Esempio n. 14
0
def train(lr, l2, max_gnorm, momentum, margin, lambda_, swap, latent_size, n_frames, model, ncoef, epochs, batch_size, valid_batch_size, n_workers, cuda, train_hdf_file, valid_hdf_file, cp_path, softmax, delta, logdir):

	if cuda:
		device=get_freer_gpu()
		if args.model == 'resnet_qrnn':
			import cupy
			cupy.cuda.Device(int(str(device).split(':')[-1])).use()

	cp_name = get_file_name(cp_path)

	if args.logdir:
		from torch.utils.tensorboard import SummaryWriter
		writer = SummaryWriter(log_dir=logdir+cp_name, comment=args.model, purge_step=True)
	else:
		writer = None

	train_dataset = Loader(hdf5_name = train_hdf_file, max_nb_frames = int(n_frames), delta = delta)
	train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=n.workers, worker_init_fn=set_np_randomseed)

	valid_dataset = Loader_valid(hdf5_name = valid_hdf_file, max_nb_frames = int(n_frames), delta = delta)
	valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batch_size, shuffle=True, num_workers=n_workers, worker_init_fn=set_np_randomseed)

	if model == 'resnet_mfcc':
		model=model_.ResNet_mfcc(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=ncoef, sm_type=softmax, delta=delta)
	elif model == 'resnet_34':
		model=model_.ResNet_34(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=ncoef, sm_type=softmax, delta=delta)
	elif model == 'resnet_lstm':
		model=model_.ResNet_lstm(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=ncoef, sm_type=softmax, delta=delta)
	elif model == 'resnet_qrnn':
		model=model_.ResNet_qrnn(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=ncoef, sm_type=softmax, delta=delta)
	elif model == 'resnet_stats':
		model=model_.ResNet_stats(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'resnet_large':
		model = model_.ResNet_large(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'resnet_small':
		model = model_.ResNet_small(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'resnet_2d':
		model = model_.ResNet_2d(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'TDNN':
		model = model_.TDNN(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'TDNN_att':
		model = model_.TDNN_att(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'TDNN_multihead':
		model = model_.TDNN_multihead(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'TDNN_lstm':
		model = model_.TDNN_lstm(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'TDNN_aspp':
		model = model_.TDNN_aspp(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'TDNN_mod':
		model = model_.TDNN_mod(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'TDNN_multipool':
		model = model_.TDNN_multipool(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)
	elif args.model == 'transformer':
		model = model_.transformer_enc(n_z=int(latent_size), proj_size=train_dataset.n_speakers, ncoef=args.ncoef, sm_type=softmax, delta=delta)

	if cuda:
		model=model.to(device)
	else:
		device=None

	optimizer=optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=l2)

	trainer=TrainLoop(model, optimizer, train_loader, valid_loader, max_gnorm=max_gnorm, margin=margin, lambda_=lambda_, verbose=-1, device=device, cp_name=cp_name, save_cp=True, checkpoint_path=cp_path, swap=swap, softmax=True, pretrain=False, mining=True, cuda=cuda, logger=writer)

	return trainer.train(n_epochs=epochs)
Esempio n. 15
0
                             pase_cp=args.pase_cp,
                             n_z=args.latent_size,
                             proj_size=train_dataset.n_speakers,
                             ncoef=args.ncoef,
                             sm_type=args.softmax)
elif args.model == 'resnet_50':
    model = model_.ResNet_50(pase_cfg=args.pase_cfg,
                             pase_cp=args.pase_cp,
                             n_z=args.latent_size,
                             proj_size=train_dataset.n_speakers,
                             ncoef=args.ncoef,
                             sm_type=args.softmax)
elif args.model == 'TDNN':
    model = model_.TDNN(pase_cfg=args.pase_cfg,
                        pase_cp=args.pase_cp,
                        n_z=args.latent_size,
                        proj_size=train_dataset.n_speakers,
                        ncoef=args.ncoef,
                        sm_type=args.softmax)
elif args.model == 'TDNN_mfcc':
    model = model_.TDNN_mfcc(n_z=args.latent_size,
                             proj_size=train_dataset.n_speakers,
                             ncoef=args.ncoef,
                             sm_type=args.softmax)
elif args.model == 'MLP':
    model = model_.MLP(pase_cfg=args.pase_cfg,
                       pase_cp=args.pase_cp,
                       n_z=args.latent_size,
                       proj_size=train_dataset.n_speakers,
                       ncoef=args.ncoef,
                       sm_type=args.softmax)
elif args.model == 'global_MLP':