def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # test_display_triplet_distance = False # print the experiment configuration num_spks = 1211 print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) print('Number of Speakers: {}.\n'.format(num_spks)) # instantiate model and initialize weights model = LocalResNet(resnet_size=10, embedding_size=args.embedding_size, num_classes=num_spks) # start_epoch = 0 if args.loss_type == 'asoft': model.classifier = AngleLinear(in_features=args.embedding_size, out_features=num_spks, m=args.m) elif args.loss_type == 'amsoft': model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=num_spks) # ['soft', 'asoft', 'center', 'amsoft'], optionally resume from a checkpoint start = 1 print('Start epoch is : ' + str(start)) # start = 0 end = start + args.epochs for epoch in range(start, end): check_path = '{}/checkpoint_{}.pth'.format(args.check_path, epoch) if os.path.isfile(check_path): print('=> loading checkpoint {}'.format(check_path)) checkpoint = torch.load(check_path) # pdb.set_trace() e = checkpoint['epoch'] filtered = {k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k} model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) ce = checkpoint['criterion'] torch.save({'epoch': e, 'model': model, 'criterion': ce}, check_path + '.new') print('=> Saving new checkpoint at {}'.format(check_path + '.new')) else: print('=> no checkpoint found at {}'.format(check_path))
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # print the experiment configuration num_spks = 1211 print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) print('Number of Speakers: {}.\n'.format(num_spks)) # instantiate model and initialize weights # model = SuperficialResCNN(layers=[1, 1, 1, 0], embedding_size=args.embedding_size, # n_classes=num_spks, m=args.margin) model = LocalResNet(resnet_size=10, embedding_size=args.embedding_size, num_classes=num_spks) if args.loss_type == 'asoft': model.classifier = AngleLinear(in_features=args.embedding_size, out_features=num_spks, m=args.m) elif args.loss_type == 'amsoft': model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=num_spks) if args.cuda: model.cuda() # optionally resume from a checkpoint sitw_test_loader = torch.utils.data.DataLoader(sitw_dev_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs) sitw_dev_loader = torch.utils.data.DataLoader(sitw_dev_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs) epochs = np.arange(1, args.epochs + 1) resume_path = args.check_path + '/checkpoint_{}.pth' for epoch in epochs: # Load model from Checkpoint file if os.path.isfile(resume_path.format(epoch)): print('=> loading checkpoint {}'.format(resume_path.format(epoch))) checkpoint = torch.load(resume_path.format(epoch)) start_epoch = checkpoint['epoch'] filtered = {k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k} model.load_state_dict(filtered) else: print('=> no checkpoint found at %s' % resume_path.format(epoch)) continue sitw_test(sitw_dev_loader, sitw_test_loader, model, start_epoch) writer.close()
def main(): print('\nNumber of Speakers: {}.'.format(train_dir.num_spks)) # print the experiment configuration print('Current time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) # instantiate model and initialize weights channels = args.channels.split(',') channels = [int(x) for x in channels] kernel_size = args.kernel_size.split(',') kernel_size = [int(x) for x in kernel_size] padding = [int((x - 1) / 2) for x in kernel_size] kernel_size = tuple(kernel_size) padding = tuple(padding) model_kwargs = { 'input_dim': args.feat_dim, 'kernel_size': kernel_size, 'stride': args.stride, 'padding': padding, 'channels': channels, 'alpha': args.alpha, 'avg_size': args.avg_size, 'time_dim': args.time_dim, 'resnet_size': args.resnet_size, 'embedding_size': args.embedding_size, 'time_dim': args.time_dim, 'num_classes': len(train_dir.speakers), 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) model = create_model(args.model, **model_kwargs) if args.loss_type == 'asoft': model.classifier = AngleLinear(in_features=args.embedding_size, out_features=train_dir.num_spks, m=args.m) elif args.loss_type == 'amsoft': model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=train_dir.num_spks) train_loader = DataLoader(train_part, batch_size=args.batch_size, shuffle=False, **kwargs) veri_loader = DataLoader(veri_dir, batch_size=args.batch_size, shuffle=False, **kwargs) valid_loader = DataLoader(valid_part, batch_size=args.batch_size, shuffle=False, **kwargs) test_loader = DataLoader(test_dir, batch_size=args.batch_size, shuffle=False, **kwargs) # sitw_test_loader = DataLoader(sitw_test_part, batch_size=args.batch_size, shuffle=False, **kwargs) # sitw_dev_loader = DataLoader(sitw_dev_part, batch_size=args.batch_size, shuffle=False, **kwargs) resume_path = args.check_path + '/checkpoint_{}.pth' print('=> Saving output in {}\n'.format(args.extract_path)) epochs = np.arange(args.start_epochs, args.epochs + 1) for e in epochs: # Load model from Checkpoint file if os.path.isfile(resume_path.format(e)): print('=> loading checkpoint {}'.format(resume_path.format(e))) checkpoint = torch.load(resume_path.format(e)) # epoch = checkpoint['epoch'] if e == 0: filtered = checkpoint.state_dict() else: filtered = { k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k } # model.load_state_dict(filtered) model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) try: args.dropout_p = model.dropout_p except: pass else: print('=> no checkpoint found at %s' % resume_path.format(e)) continue model.cuda() file_dir = args.extract_path + '/epoch_%d' % e if not os.path.exists(file_dir): os.makedirs(file_dir) if not args.test_only: # if args.cuda: # model_conv1 = model.conv1.weight.cpu().detach().numpy() # np.save(file_dir + '/model.conv1.npy', model_conv1) train_extract(train_loader, model, file_dir, 'vox1_train') train_extract(valid_loader, model, file_dir, 'vox1_valid') test_extract(veri_loader, model, file_dir, 'vox1_veri') test_extract(test_loader, model, file_dir, 'vox1_test')
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # test_display_triplet_distance = False # print the experiment configuration print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) print('Number of Speakers for set A: {}.'.format(args.num_spks_a)) print('Number of Speakers for set B: {}.\n'.format(args.num_spks_b)) # instantiate model and initialize weights kernel_size = args.kernel_size.split(',') kernel_size = [int(x) for x in kernel_size] if args.padding == '': padding = [int((x - 1) / 2) for x in kernel_size] else: padding = args.padding.split(',') padding = [int(x) for x in padding] kernel_size = tuple(kernel_size) padding = tuple(padding) stride = args.stride.split(',') stride = [int(x) for x in stride] channels = args.channels.split(',') channels = [int(x) for x in channels] model_kwargs = {'input_dim': args.input_dim, 'feat_dim': args.feat_dim, 'kernel_size': kernel_size, 'filter': args.filter, 'inst_norm': args.inst_norm, 'input_norm': args.input_norm, 'stride': stride, 'fast': args.fast, 'avg_size': args.avg_size, 'time_dim': args.time_dim, 'padding': padding, 'encoder_type': args.encoder_type, 'vad': args.vad, 'transform': args.transform, 'embedding_size': args.embedding_size, 'ince': args.inception, 'resnet_size': args.resnet_size, 'num_classes_a': args.num_spks_a, 'num_classes_b': args.num_spks_b, 'input_len': args.input_len, 'channels': channels, 'alpha': args.alpha, 'dropout_p': args.dropout_p} print('Model options: {}'.format(model_kwargs)) model = create_model(args.model, **model_kwargs) start_epoch = 0 if args.save_init and not args.finetune: check_path = '{}/checkpoint_{}.pth'.format(args.check_path, start_epoch) torch.save(model, check_path) if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) epoch = checkpoint['epoch'] filtered = {k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k} model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) # model.dropout.p = args.dropout_p else: print('=> no checkpoint found at {}'.format(args.resume)) ce_criterion = nn.CrossEntropyLoss() if args.loss_type == 'soft': xe_criterion = None elif args.loss_type == 'asoft': ce_criterion = None model.classifier_a = AngleLinear(in_features=args.embedding_size, out_features=train_dir_a.num_spks, m=args.m) model.classifier_b = AngleLinear(in_features=args.embedding_size, out_features=train_dir_b.num_spks, m=args.m) xe_criterion = AngleSoftmaxLoss(lambda_min=args.lambda_min, lambda_max=args.lambda_max) elif args.loss_type == 'center': xe_criterion = CenterLoss(num_classes=int(train_dir_a.num_spks + train_dir_b.num_spks), feat_dim=args.embedding_size) if args.resume: try: criterion = checkpoint['criterion'] xe_criterion.load_state_dict(criterion[1].state_dict()) except: pass elif args.loss_type == 'gaussian': xe_criterion = GaussianLoss(num_classes=int(args.num_spks + args.num_spks), feat_dim=args.embedding_size) elif args.loss_type == 'coscenter': xe_criterion = CenterCosLoss(num_classes=int(args.num_spks + args.num_spks), feat_dim=args.embedding_size) if args.resume: try: criterion = checkpoint['criterion'] xe_criterion.load_state_dict(criterion[1].state_dict()) except: pass elif args.loss_type == 'mulcenter': xe_criterion = MultiCenterLoss(num_classes=int(args.num_spks + args.num_spks), feat_dim=args.embedding_size, num_center=args.num_center) elif args.loss_type == 'amsoft': ce_criterion = None model.classifier_a = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=args.num_spks) model.classifier_b = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=args.num_spks) xe_criterion = AMSoftmaxLoss(margin=args.margin, s=args.s) elif args.loss_type == 'arcsoft': ce_criterion = None model.classifier_a = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=args.num_spks) model.classifier_b = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=args.num_spks) xe_criterion = ArcSoftmaxLoss(margin=args.margin, s=args.s) elif args.loss_type == 'wasse': xe_criterion = Wasserstein_Loss(source_cls=args.source_cls) ce = [ce_criterion, xe_criterion] start = args.start_epoch + start_epoch print('Start epoch is : ' + str(start)) # start = 0 # enroll_batch_size_a = int(args.batch_size) enroll_loader_a = torch.utils.data.DataLoader(enroll_extract_dir, batch_size=args.batch_size, shuffle=False, **kwargs) # batch_size_b = args.batch_size - batch_size_a test_loader = torch.utils.data.DataLoader(test_extract_dir, batch_size=args.batch_size, shuffle=False, **kwargs) # train_loader = [train_loader_a, train_loader_b] # train_extract_loader = torch.utils.data.DataLoader(train_extract_dir, batch_size=1, shuffle=False, **kwargs) # print('Batch_size is {} for A, and {} for B.'.format(batch_size_a, batch_size_b)) # batch_size_a = int(args.batch_size / 8) # valid_loader_a = torch.utils.data.DataLoader(valid_dir_a, batch_size=batch_size_a, shuffle=False, # **kwargs) # batch_size_b = int(len(valid_dir_b) / len(valid_dir_a) * batch_size_a) # valid_loader_b = torch.utils.data.DataLoader(valid_dir_b, batch_size=batch_size_b, shuffle=False, # **kwargs) # valid_loader = valid_loader_a, valid_loader_b # test_loader = torch.utils.data.DataLoader(test_dir, batch_size=int(args.batch_size / 16), shuffle=False, **kwargs) # sitw_test_loader = torch.utils.data.DataLoader(sitw_test_dir, batch_size=args.test_batch_size, # shuffle=False, **kwargs) # sitw_dev_loader = torch.utils.data.DataLoader(sitw_dev_part, batch_size=args.test_batch_size, shuffle=False, # **kwargs) # print('Batcch_size is {} for A, and {} for B.'.format(batch_size_a, batch_size_b)) if args.cuda: model = model.cuda() for i in range(len(ce)): if ce[i] != None: ce[i] = ce[i].cuda() try: print('Dropout is {}.'.format(model.dropout_p)) except: pass xvector_dir = args.check_path xvector_dir = xvector_dir.replace('checkpoint', 'xvector') # valid_test(train_extract_loader, valid_loader, model, epoch, xvector_dir) test(model, epoch, writer, xvector_dir)
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # test_display_triplet_distance = False # print the experiment configuration print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) print('Number of Speakers: {}.\n'.format(train_dir.num_spks)) # instantiate model and initialize weights kernel_size = args.kernel_size.split(',') kernel_size = [int(x) for x in kernel_size] padding = [int((x - 1) / 2) for x in kernel_size] kernel_size = tuple(kernel_size) padding = tuple(padding) stride = args.stride.split(',') stride = [int(x) for x in stride] channels = args.channels.split(',') channels = [int(x) for x in channels] model_kwargs = { 'input_dim': args.input_dim, 'feat_dim': args.feat_dim, 'kernel_size': kernel_size, 'filter': args.filter, 'inst_norm': args.inst_norm, 'stride': stride, 'fast': args.fast, 'avg_size': args.avg_size, 'time_dim': args.time_dim, 'padding': padding, 'encoder_type': args.encoder_type, 'vad': args.vad, 'embedding_size': args.embedding_size, 'ince': args.inception, 'resnet_size': args.resnet_size, 'num_classes': train_dir.num_spks, 'channels': channels, 'alpha': args.alpha, 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) model = create_model(args.model, **model_kwargs) start_epoch = 0 if args.save_init and not args.finetune: check_path = '{}/checkpoint_{}.pth'.format(args.check_path, start_epoch) torch.save(model, check_path) if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] filtered = { k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k } model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) # # model.dropout.p = args.dropout_p else: print('=> no checkpoint found at {}'.format(args.resume)) ce_criterion = nn.CrossEntropyLoss() if args.loss_type == 'soft': xe_criterion = None elif args.loss_type == 'asoft': ce_criterion = None model.classifier = AngleLinear(in_features=args.embedding_size, out_features=train_dir.num_spks, m=args.m) xe_criterion = AngleSoftmaxLoss(lambda_min=args.lambda_min, lambda_max=args.lambda_max) elif args.loss_type == 'center': xe_criterion = CenterLoss(num_classes=train_dir.num_spks, feat_dim=args.embedding_size) elif args.loss_type == 'amsoft': ce_criterion = None model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=train_dir.num_spks) xe_criterion = AMSoftmaxLoss(margin=args.margin, s=args.s) optimizer = create_optimizer(model.parameters(), args.optimizer, **opt_kwargs) if args.loss_type == 'center': optimizer = torch.optim.SGD([{ 'params': xe_criterion.parameters(), 'lr': args.lr * 5 }, { 'params': model.parameters() }], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) if args.finetune: if args.loss_type == 'asoft' or args.loss_type == 'amsoft': classifier_params = list(map(id, model.classifier.parameters())) rest_params = filter(lambda p: id(p) not in classifier_params, model.parameters()) optimizer = torch.optim.SGD( [{ 'params': model.classifier.parameters(), 'lr': args.lr * 10 }, { 'params': rest_params }], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) if args.filter: filter_params = list(map(id, model.filter_layer.parameters())) rest_params = filter(lambda p: id(p) not in filter_params, model.parameters()) optimizer = torch.optim.SGD([{ 'params': model.filter_layer.parameters(), 'lr': args.lr * 0.05 }, { 'params': rest_params }], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) if args.scheduler == 'exp': scheduler = ExponentialLR(optimizer, gamma=args.gamma) else: milestones = args.milestones.split(',') milestones = [int(x) for x in milestones] milestones.sort() scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1) ce = [ce_criterion, xe_criterion] start = args.start_epoch + start_epoch print('Start epoch is : ' + str(start)) # start = 0 end = start + args.epochs train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=False, **kwargs) valid_loader = torch.utils.data.DataLoader(valid_dir, batch_size=int(args.batch_size / 2), shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(test_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs) # sitw_test_loader = torch.utils.data.DataLoader(sitw_test_dir, batch_size=args.test_batch_size, # shuffle=False, **kwargs) # sitw_dev_loader = torch.utils.data.DataLoader(sitw_dev_part, batch_size=args.test_batch_size, shuffle=False, # **kwargs) if args.cuda: model = model.cuda() for i in range(len(ce)): if ce[i] != None: ce[i] = ce[i].cuda() try: print('Dropout is {}.'.format(model.dropout_p)) except: pass # for epoch in range(start, end): # # pdb.set_trace() # print('\n\33[1;34m Current \'{}\' learning rate is '.format(args.optimizer), end='') # for param_group in optimizer.param_groups: # print('{:.5f} '.format(param_group['lr']), end='') # print(' \33[0m') # # train(train_loader, model, ce, optimizer, epoch) # if epoch % 4 == 1 or epoch == (end - 1): # check_path = '{}/checkpoint_{}.pth'.format(args.check_path, epoch) # torch.save({'epoch': epoch, # 'state_dict': model.state_dict(), # 'criterion': ce}, # check_path) # # if epoch % 2 == 1 and epoch != (end - 1): # test(test_loader, valid_loader, model, epoch) # # sitw_test(sitw_test_loader, model, epoch) # # sitw_test(sitw_dev_loader, model, epoch) # scheduler.step() # exit(1) xvector_dir = args.check_path xvector_dir = xvector_dir.replace('checkpoint', 'xvector') if args.extract: extract_dir = KaldiExtractDataset(dir=args.test_dir, transform=transform_V, filer_loader=file_loader) extract_loader = torch.utils.data.DataLoader(extract_dir, batch_size=1, shuffle=False, **kwargs) verification_extract(extract_loader, model, xvector_dir) verify_dir = ScriptVerifyDataset(dir=args.test_dir, trials_file=args.trials, xvectors_dir=xvector_dir, loader=read_vec_flt) verify_loader = torch.utils.data.DataLoader(verify_dir, batch_size=64, shuffle=False, **kwargs) verification_test(test_loader=verify_loader, dist_type=('cos' if args.cos_sim else 'l2'), log_interval=args.log_interval, save=args.save_score) writer.close()
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # test_display_triplet_distance = False # print the experiment configuration print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) print('Number of Speakers: {}.\n'.format(train_dir.num_spks)) # instantiate model and initialize weights # kernel_size = args.kernel_size.split(',') # kernel_size = [int(x) for x in kernel_size] # padding = [int((x - 1) / 2) for x in kernel_size] # # kernel_size = tuple(kernel_size) # padding = tuple(padding) # # channels = args.channels.split(',') # channels = [int(x) for x in channels] model = ResNet20(embedding_size=args.embedding_size, num_classes=train_dir.num_spks, dropout_p=args.dropout_p) start_epoch = 0 if args.save_init: check_path = '{}/checkpoint_{}.pth'.format(args.check_path, start_epoch) torch.save({'epoch': 0, 'state_dict': model.state_dict()}, check_path) # torch.save(model, check_path) if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] filtered = {k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k} model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) # optimizer.load_state_dict(checkpoint['optimizer']) # scheduler.load_state_dict(checkpoint['scheduler']) # if 'criterion' in checkpoint.keys(): # ce = checkpoint['criterion'] else: print('=> no checkpoint found at {}'.format(args.resume)) ce_criterion = nn.CrossEntropyLoss() if args.loss_type == 'soft': xe_criterion = None elif args.loss_type == 'asoft': ce_criterion = None model.classifier = AngleLinear(in_features=args.embedding_size, out_features=train_dir.num_spks, m=args.m) xe_criterion = AngleSoftmaxLoss(lambda_min=args.lambda_min, lambda_max=args.lambda_max) elif args.loss_type == 'center': xe_criterion = CenterLoss(num_classes=train_dir.num_spks, feat_dim=args.embedding_size) elif args.loss_type == 'amsoft': ce_criterion = None model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=train_dir.num_spks) xe_criterion = AMSoftmaxLoss(margin=args.margin, s=args.s) optimizer = create_optimizer(model.parameters(), args.optimizer, **opt_kwargs) if args.loss_type == 'center': optimizer = torch.optim.SGD([{'params': xe_criterion.parameters(), 'lr': args.lr * 5}, {'params': model.parameters()}], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) if args.finetune: if args.loss_type == 'asoft' or args.loss_type == 'amsoft': classifier_params = list(map(id, model.classifier.parameters())) rest_params = filter(lambda p: id(p) not in classifier_params, model.parameters()) optimizer = torch.optim.SGD([{'params': model.classifier.parameters(), 'lr': args.lr * 5}, {'params': rest_params}], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) milestones = args.milestones.split(',') milestones = [int(x) for x in milestones] milestones.sort() scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1) ce = [ce_criterion, xe_criterion] start = args.start_epoch + start_epoch print('Start epoch is : ' + str(start)) # start = 0 end = start + args.epochs train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=True, **kwargs) valid_loader = torch.utils.data.DataLoader(valid_dir, batch_size=int(args.batch_size / 2), shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(test_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs) # sitw_test_loader = torch.utils.data.DataLoader(sitw_test_dir, batch_size=args.test_batch_size, # shuffle=False, **kwargs) # sitw_dev_loader = torch.utils.data.DataLoader(sitw_dev_part, batch_size=args.test_batch_size, shuffle=False, # **kwargs) if args.cuda: model = model.cuda() for i in range(len(ce)): if ce[i] != None: ce[i] = ce[i].cuda() for epoch in range(start, end): # pdb.set_trace() print('\n\33[1;34m Current \'{}\' learning rate is '.format(args.optimizer), end='') for param_group in optimizer.param_groups: print('{:.5f} '.format(param_group['lr']), end='') print(' \33[0m') train(train_loader, model, ce, optimizer, scheduler, epoch) test(test_loader, valid_loader, model, epoch) # sitw_test(sitw_test_loader, model, epoch) # sitw_test(sitw_dev_loader, model, epoch) scheduler.step() # exit(1) writer.close()
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # print the experiment configuration print('\nCurrent time is \33[91m{}\33[0m'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) print('Number of Classes: {}\n'.format(len(train_dir.speakers))) # instantiate # model and initialize weights # instantiate model and initialize weights model_kwargs = { 'input_dim': args.feat_dim, 'embedding_size': args.embedding_size, 'num_classes': len(train_dir.speakers), 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) model = create_model(args.model, **model_kwargs) if args.cuda: model.cuda() start = 0 # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) start = checkpoint['epoch'] checkpoint = torch.load(args.resume) filtered = { k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k } model.load_state_dict(filtered) # optimizer.load_state_dict(checkpoint['optimizer']) # scheduler.load_state_dict(checkpoint['scheduler']) # criterion.load_state_dict(checkpoint['criterion']) else: print('=> no checkpoint found at {}'.format(args.resume)) ce_criterion = nn.CrossEntropyLoss() if args.loss_type == 'soft': xe_criterion = None elif args.loss_type == 'asoft': ce_criterion = None model.classifier = AngleLinear(in_features=args.embedding_size, out_features=train_dir.num_spks, m=args.m) xe_criterion = AngleSoftmaxLoss(lambda_min=args.lambda_min, lambda_max=args.lambda_max) elif args.loss_type == 'center': xe_criterion = CenterLoss(num_classes=train_dir.num_spks, feat_dim=args.embedding_size) elif args.loss_type == 'amsoft': model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=train_dir.num_spks) xe_criterion = AMSoftmaxLoss(margin=args.margin, s=args.s) optimizer = create_optimizer(model.parameters(), args.optimizer, **opt_kwargs) if args.loss_type == 'center': optimizer = torch.optim.SGD([{ 'params': xe_criterion.parameters(), 'lr': args.lr * 5 }, { 'params': model.parameters() }], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) if args.finetune: if args.loss_type == 'asoft' or args.loss_type == 'amsoft': classifier_params = list(map(id, model.classifier.parameters())) rest_params = filter(lambda p: id(p) not in classifier_params, model.parameters()) optimizer = torch.optim.SGD( [{ 'params': model.classifier.parameters(), 'lr': args.lr * 5 }, { 'params': rest_params }], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) milestones = args.milestones.split(',') milestones = [int(x) for x in milestones] milestones.sort() # print('Scheduler options: {}'.format(milestones)) scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1) if args.save_init and not args.finetune: check_path = '{}/checkpoint_{}.pth'.format(args.check_path, start) torch.save( { 'epoch': start, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, check_path) start += args.start_epoch print('Start epoch is : ' + str(start)) end = args.epochs + 1 # pdb.set_trace() train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=True, **kwargs) valid_loader = torch.utils.data.DataLoader(valid_dir, batch_size=args.batch_size, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(test_part, batch_size=args.test_batch_size, shuffle=False, **kwargs) ce = [ce_criterion, xe_criterion] if args.cuda: model = model.cuda() for i in range(len(ce)): if ce[i] != None: ce[i] = ce[i].cuda() for epoch in range(start, end): # pdb.set_trace() print('\n\33[1;34m Current \'{}\' learning rate is '.format( args.optimizer), end='') for param_group in optimizer.param_groups: print('{:.5f} '.format(param_group['lr']), end='') print(' \33[0m') train(train_loader, model, optimizer, ce, epoch) test(test_loader, valid_loader, model, epoch) scheduler.step() # break writer.close()
def __init__(self, embedding_size, layers=[1, 1, 1, 0], block=BasicBlock, n_classes=1000, m=3): # block类型,embedding大小,分类数,maigin大小 super(SuperficialResCNN, self).__init__() self.embedding_size = embedding_size self.relu = ReLU(inplace=True) self.in_planes = 64 self.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=2, bias=False) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(block, 64, layers[0]) self.in_planes = 128 self.conv2 = nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2, bias=False) self.bn2 = nn.BatchNorm2d(128) self.layer2 = self._make_layer(block, 128, layers[1]) self.in_planes = 256 self.conv3 = nn.Conv2d(128, 256, kernel_size=5, stride=2, padding=2, bias=False) self.bn3 = nn.BatchNorm2d(256) self.layer3 = self._make_layer(block, 256, layers[2]) # self.in_planes = 512 # self.conv4 = nn.Conv2d(256, 512, kernel_size=5, stride=2, padding=2, bias=False) # self.bn4 = nn.BatchNorm2d(512) # self.layer4 = self._make_layer(block, 512, layers[3]) # self.avg_pool = nn.AdaptiveAvgPool2d([4, 1]) self.avg_pool = nn.AdaptiveAvgPool2d((4, 1)) self.fc = nn.Sequential(nn.Linear(self.in_planes * 4, embedding_size), nn.BatchNorm1d(embedding_size)) # self.W = torch.nn.Parameter(torch.randn(self.embedding_size, n_classes)) # self.W.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5) # nn.init.xavier_normal(self.W, gain=1) self.angle_linear = AngleLinear(in_features=embedding_size, out_features=n_classes, m=m) for m in self.modules(): # 对于各层参数的初始化 if isinstance(m, nn.Conv2d): # 以2/n的开方为标准差,做均值为0的正态分布 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): # weight设置为1,bias为0 m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.BatchNorm1d): # weight设置为1,bias为0 m.weight.data.fill_(1) m.bias.data.zero_()
'num_classes_a': train_dir_a.num_spks, 'num_classes_b': train_dir_b.num_spks, 'channels': channels, 'alpha': args.alpha, 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) dist_type = 'cos' if args.cos_sim else 'l2' print('Testing with %s distance, ' % dist_type) if args.valid or args.extract: model = create_model(args.model, **model_kwargs) if args.loss_type == 'asoft': model.classifier_a = AngleLinear(in_features=args.embedding_size, out_features=train_dir_a.num_spks, m=args.m) model.classifier_b = AngleLinear(in_features=args.embedding_size, out_features=train_dir_b.num_spks, m=args.m) elif args.loss_type in ['amsoft', 'arcsoft']: model.classifier_a = AdditiveMarginLinear( feat_dim=args.embedding_size, n_classes=train_dir_a.num_spks) model.classifier_b = AdditiveMarginLinear( feat_dim=args.embedding_size, n_classes=train_dir_b.num_spks) assert os.path.isfile(args.resume) print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) # start_epoch = checkpoint['epoch']
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # test_display_triplet_distance = False # print the experiment configuration print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) # print('Number of Speakers: {}.\n'.format(train_dir.num_spks)) # instantiate model and initialize weights kernel_size = args.kernel_size.split(',') kernel_size = [int(x) for x in kernel_size] padding = [int((x - 1) / 2) for x in kernel_size] kernel_size = tuple(kernel_size) padding = tuple(padding) channels = args.channels.split(',') channels = [int(x) for x in channels] model_kwargs = { 'embedding_size': args.embedding_size, 'resnet_size': args.resnet_size, 'input_dim': args.feat_dim, 'num_classes': train_dir.num_spks, 'alpha': args.alpha, 'channels': channels, 'stride': args.stride, 'avg_size': args.avg_size, 'time_dim': args.time_dim, 'kernel_size': kernel_size, 'padding': padding, 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) if args.valid or args.extract: model = create_model(args.model, **model_kwargs) if args.loss_type == 'asoft': model.classifier = AngleLinear(in_features=args.embedding_size, out_features=train_dir.num_spks, m=args.m) elif args.loss_type == 'amsoft': model.classifier = AdditiveMarginLinear( feat_dim=args.embedding_size, n_classes=train_dir.num_spks) assert os.path.isfile(args.resume) print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) # start_epoch = checkpoint['epoch'] filtered = { k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k } # model_dict = model.state_dict() # model_dict.update(filtered) model.load_state_dict(filtered) # try: model.dropout.p = args.dropout_p except: pass start = args.start_epoch print('Epoch is : ' + str(start)) if args.cuda: model.cuda() # train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=True, **kwargs) if args.valid: valid_loader = torch.utils.data.DataLoader( valid_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs) valid(valid_loader, model) if args.extract: verify_loader = torch.utils.data.DataLoader( verfify_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs) extract(verify_loader, model, args.xvector_dir) file_loader = read_vec_flt test_dir = ScriptVerifyDataset(dir=args.test_dir, trials_file=args.trials, xvectors_dir=args.xvector_dir, loader=file_loader) test_loader = torch.utils.data.DataLoader(test_dir, batch_size=args.test_batch_size * 64, shuffle=False, **kwargs) test(test_loader)
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # test_display_triplet_distance = False # print the experiment configuration print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) opts = vars(args) keys = list(opts.keys()) keys.sort() options = [] for k in keys: options.append("\'%s\': \'%s\'" % (str(k), str(opts[k]))) print('Parsed options: \n{ %s }' % (', '.join(options))) print('Number of Speakers for set A: {}.'.format(train_dir_a.num_spks)) print('Number of Speakers for set B: {}.\n'.format(train_dir_b.num_spks)) # instantiate model and initialize weights kernel_size = args.kernel_size.split(',') kernel_size = [int(x) for x in kernel_size] if args.padding == '': padding = [int((x - 1) / 2) for x in kernel_size] else: padding = args.padding.split(',') padding = [int(x) for x in padding] kernel_size = tuple(kernel_size) padding = tuple(padding) stride = args.stride.split(',') stride = [int(x) for x in stride] channels = args.channels.split(',') channels = [int(x) for x in channels] model_kwargs = { 'input_dim': args.input_dim, 'feat_dim': args.feat_dim, 'kernel_size': kernel_size, 'mask': args.mask_layer, 'mask_len': args.mask_len, 'block_type': args.block_type, 'filter': args.filter, 'inst_norm': args.inst_norm, 'input_norm': args.input_norm, 'stride': stride, 'fast': args.fast, 'avg_size': args.avg_size, 'time_dim': args.time_dim, 'padding': padding, 'encoder_type': args.encoder_type, 'vad': args.vad, 'transform': args.transform, 'embedding_size': args.embedding_size, 'ince': args.inception, 'resnet_size': args.resnet_size, 'num_classes_a': train_dir_a.num_spks, 'num_classes_b': train_dir_b.num_spks, 'input_len': args.input_len, 'channels': channels, 'alpha': args.alpha, 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) model = create_model(args.model, **model_kwargs) start_epoch = 0 if args.save_init and not args.finetune: check_path = '{}/checkpoint_{}.pth'.format(args.check_path, start_epoch) torch.save(model, check_path) if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] # filtered = {k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k} # filtered = {k: v for k, v in checkpoint['state_dict'] if 'num_batches_tracked' not in k} checkpoint_state_dict = checkpoint['state_dict'] if isinstance(checkpoint_state_dict, tuple): checkpoint_state_dict = checkpoint_state_dict[0] filtered = { k: v for k, v in checkpoint_state_dict.items() if 'num_batches_tracked' not in k } model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) # model.dropout.p = args.dropout_p else: print('=> no checkpoint found at {}'.format(args.resume)) ce_criterion = nn.CrossEntropyLoss() if args.loss_type == 'soft': xe_criterion = None elif args.loss_type == 'asoft': ce_criterion = None model.classifier_a = AngleLinear(in_features=args.embedding_size, out_features=train_dir_a.num_spks, m=args.m) model.classifier_b = AngleLinear(in_features=args.embedding_size, out_features=train_dir_b.num_spks, m=args.m) xe_criterion = AngleSoftmaxLoss(lambda_min=args.lambda_min, lambda_max=args.lambda_max) elif args.loss_type == 'center': xe_criterion = CenterLoss(num_classes=int(train_dir_a.num_spks + train_dir_b.num_spks), feat_dim=args.embedding_size) if args.resume: try: criterion = checkpoint['criterion'] xe_criterion.load_state_dict(criterion[1].state_dict()) except: pass elif args.loss_type == 'gaussian': xe_criterion = GaussianLoss(num_classes=int(train_dir_a.num_spks + train_dir_b.num_spks), feat_dim=args.embedding_size) elif args.loss_type == 'coscenter': xe_criterion = CenterCosLoss(num_classes=int(train_dir_a.num_spks + train_dir_b.num_spks), feat_dim=args.embedding_size) if args.resume: try: criterion = checkpoint['criterion'] xe_criterion.load_state_dict(criterion[1].state_dict()) except: pass elif args.loss_type == 'mulcenter': xe_criterion = MultiCenterLoss(num_classes=int(train_dir_a.num_spks + train_dir_b.num_spks), feat_dim=args.embedding_size, num_center=args.num_center) elif args.loss_type == 'amsoft': ce_criterion = None model.classifier_a = AdditiveMarginLinear( feat_dim=args.embedding_size, n_classes=train_dir_a.num_spks) model.classifier_b = AdditiveMarginLinear( feat_dim=args.embedding_size, n_classes=train_dir_b.num_spks) xe_criterion = AMSoftmaxLoss(margin=args.margin, s=args.s) elif args.loss_type == 'arcsoft': ce_criterion = None model.classifier_a = AdditiveMarginLinear( feat_dim=args.embedding_size, n_classes=train_dir_a.num_spks) model.classifier_b = AdditiveMarginLinear( feat_dim=args.embedding_size, n_classes=train_dir_b.num_spks) xe_criterion = ArcSoftmaxLoss(margin=args.margin, s=args.s) elif args.loss_type == 'wasse': xe_criterion = Wasserstein_Loss(source_cls=args.source_cls) optimizer = create_optimizer(model.parameters(), args.optimizer, **opt_kwargs) if args.loss_type in ['center', 'mulcenter', 'gaussian', 'coscenter']: optimizer = torch.optim.SGD([{ 'params': xe_criterion.parameters(), 'lr': args.lr * 5 }, { 'params': model.parameters() }], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) if args.filter == 'fDLR': filter_params = list(map(id, model.filter_layer.parameters())) rest_params = filter(lambda p: id(p) not in filter_params, model.parameters()) optimizer = torch.optim.SGD([{ 'params': model.filter_layer.parameters(), 'lr': args.lr * 0.05 }, { 'params': rest_params }], lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) # Save model config txt with open( osp.join( args.check_path, 'model.%s.cfg' % time.strftime("%Y.%m.%d", time.localtime())), 'w') as f: f.write('model: ' + str(model) + '\n') f.write('CrossEntropy: ' + str(ce_criterion) + '\n') f.write('Other Loss: ' + str(xe_criterion) + '\n') f.write('Optimizer: ' + str(optimizer) + '\n') milestones = args.milestones.split(',') milestones = [int(x) for x in milestones] milestones.sort() if args.scheduler == 'exp': scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=args.gamma, verbose=True) elif args.scheduler == 'rop': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=args.patience, min_lr=1e-5, verbose=True) else: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1, verbose=True) ce = [ce_criterion, xe_criterion] start = args.start_epoch + start_epoch print('Start epoch is : ' + str(start)) # start = 0 end = start + args.epochs batch_size_a = int(args.batch_size * len(train_dir_a) / (len(train_dir_a) + len(train_dir_b))) train_loader_a = torch.utils.data.DataLoader(train_dir_a, batch_size=batch_size_a, shuffle=False, **kwargs) # num_iteration = np.floor(len(train_dir_a) / args.batch_size) batch_size_b = args.batch_size - batch_size_a train_loader_b = torch.utils.data.DataLoader(train_dir_b, batch_size=batch_size_b, shuffle=False, **kwargs) train_loader = [train_loader_a, train_loader_b] train_extract_loader = torch.utils.data.DataLoader(train_extract_dir, batch_size=1, shuffle=False, **kwargs) print('Batch_size is {} for A, and {} for B.'.format( batch_size_a, batch_size_b)) batch_size_a = int(args.batch_size / 8) valid_loader_a = torch.utils.data.DataLoader(valid_dir_a, batch_size=batch_size_a, shuffle=False, **kwargs) batch_size_b = int(len(valid_dir_b) / len(valid_dir_a) * batch_size_a) valid_loader_b = torch.utils.data.DataLoader(valid_dir_b, batch_size=batch_size_b, shuffle=False, **kwargs) valid_loader = valid_loader_a, valid_loader_b # test_loader = torch.utils.data.DataLoader(test_dir, batch_size=int(args.batch_size / 16), shuffle=False, **kwargs) # sitw_test_loader = torch.utils.data.DataLoader(sitw_test_dir, batch_size=args.test_batch_size, # shuffle=False, **kwargs) # sitw_dev_loader = torch.utils.data.DataLoader(sitw_dev_part, batch_size=args.test_batch_size, shuffle=False, # **kwargs) # print('Batcch_size is {} for A, and {} for B.'.format(batch_size_a, batch_size_b)) if args.cuda: if len(args.gpu_id) > 1: print("Continue with gpu: %s ..." % str(args.gpu_id)) torch.distributed.init_process_group( backend="nccl", # init_method='tcp://localhost:23456', init_method= 'file:///home/work2020/yangwenhao/project/lstm_speaker_verification/data/sharedfile', rank=0, world_size=1) model = model.cuda() model = DistributedDataParallel(model, find_unused_parameters=True) else: model = model.cuda() for i in range(len(ce)): if ce[i] != None: ce[i] = ce[i].cuda() try: print('Dropout is {}.'.format(model.dropout_p)) except: pass xvector_dir = args.check_path xvector_dir = xvector_dir.replace('checkpoint', 'xvector') start_time = time.time() for epoch in range(start, end): # pdb.set_trace() print('\n\33[1;34m Current \'{}\' learning rate is '.format( args.optimizer), end='') for param_group in optimizer.param_groups: print('{:.5f} '.format(param_group['lr']), end='') print(' \33[0m') # pdb.set_trace() train(train_loader, model, ce, optimizer, epoch) valid_loss = valid_class(valid_loader, model, ce, epoch) if epoch % 4 == 1 or epoch == (end - 1) or epoch in milestones: check_path = '{}/checkpoint_{}.pth'.format(args.check_path, epoch) model_state_dict = model.module.state_dict() \ if isinstance(model, DistributedDataParallel) else model.state_dict(), torch.save( { 'epoch': epoch, 'state_dict': model_state_dict, 'criterion': ce }, check_path) if epoch % 2 == 1 or epoch == (end - 1): valid_test(train_extract_loader, model, epoch, xvector_dir) if epoch != (end - 2) and (epoch % 4 == 1 or epoch in milestones or epoch == (end - 1)): test(model, epoch, writer, xvector_dir) if args.scheduler == 'rop': scheduler.step(valid_loss) else: scheduler.step() # exit(1) writer.close() stop_time = time.time() t = float(start_time - stop_time) print("Running %.4f minutes for each epoch.\n" % (t / 60 / (end - start)))
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive # test_display_triplet_distance = False # print the experiment configuration print('\nCurrent time is \33[91m{}\33[0m.'.format(str(time.asctime()))) opts = vars(args) keys = list(opts.keys()) keys.sort() options = [] for k in keys: options.append("\'%s\': \'%s\'" % (str(k), str(opts[k]))) print('Parsed options: \n{ %s }' % (', '.join(options))) print('Number of Speakers: {}.\n'.format(train_dir.num_spks)) # instantiate model and initialize weights kernel_size = args.kernel_size.split(',') kernel_size = [int(x) for x in kernel_size] context = args.context.split(',') context = [int(x) for x in context] if args.padding == '': padding = [int((x - 1) / 2) for x in kernel_size] else: padding = args.padding.split(',') padding = [int(x) for x in padding] kernel_size = tuple(kernel_size) padding = tuple(padding) stride = args.stride.split(',') stride = [int(x) for x in stride] channels = args.channels.split(',') channels = [int(x) for x in channels] model_kwargs = { 'input_dim': args.input_dim, 'feat_dim': args.feat_dim, 'kernel_size': kernel_size, 'context': context, 'filter_fix': args.filter_fix, 'mask': args.mask_layer, 'mask_len': args.mask_len, 'block_type': args.block_type, 'filter': args.filter, 'exp': args.exp, 'inst_norm': args.inst_norm, 'input_norm': args.input_norm, 'stride': stride, 'fast': args.fast, 'avg_size': args.avg_size, 'time_dim': args.time_dim, 'padding': padding, 'encoder_type': args.encoder_type, 'vad': args.vad, 'transform': args.transform, 'embedding_size': args.embedding_size, 'ince': args.inception, 'resnet_size': args.resnet_size, 'num_classes': train_dir.num_spks, 'channels': channels, 'alpha': args.alpha, 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) dist_type = 'cos' if args.cos_sim else 'l2' print('Testing with %s distance, ' % dist_type) model = create_model(args.model, **model_kwargs) start_epoch = 0 if args.save_init and not args.finetune: check_path = '{}/checkpoint_{}.pth'.format(args.check_path, start_epoch) torch.save(model, check_path) iteration = 0 # if args.resume else 0 if args.finetune and args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] checkpoint_state_dict = checkpoint['state_dict'] if isinstance(checkpoint_state_dict, tuple): checkpoint_state_dict = checkpoint_state_dict[0] filtered = { k: v for k, v in checkpoint_state_dict.items() if 'num_batches_tracked' not in k } if list(filtered.keys())[0].startswith('module'): new_state_dict = OrderedDict() for k, v in filtered.items(): name = k[ 7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,去掉module. new_state_dict[name] = v # 新字典的key值对应的value为一一对应的值。 model.load_state_dict(new_state_dict) else: model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) # model.dropout.p = args.dropout_p else: print('=> no checkpoint found at {}'.format(args.resume)) ce_criterion = nn.CrossEntropyLoss() if args.loss_type == 'soft': xe_criterion = None elif args.loss_type == 'asoft': ce_criterion = None model.classifier = AngleLinear(in_features=args.embedding_size, out_features=train_dir.num_spks, m=args.m) xe_criterion = AngleSoftmaxLoss(lambda_min=args.lambda_min, lambda_max=args.lambda_max) elif args.loss_type == 'center': xe_criterion = CenterLoss(num_classes=train_dir.num_spks, feat_dim=args.embedding_size) elif args.loss_type == 'gaussian': xe_criterion = GaussianLoss(num_classes=train_dir.num_spks, feat_dim=args.embedding_size) elif args.loss_type == 'coscenter': xe_criterion = CenterCosLoss(num_classes=train_dir.num_spks, feat_dim=args.embedding_size) elif args.loss_type == 'mulcenter': xe_criterion = MultiCenterLoss(num_classes=train_dir.num_spks, feat_dim=args.embedding_size, num_center=args.num_center) elif args.loss_type == 'amsoft': ce_criterion = None model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=train_dir.num_spks) xe_criterion = AMSoftmaxLoss(margin=args.margin, s=args.s) elif args.loss_type == 'arcsoft': ce_criterion = None model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=train_dir.num_spks) xe_criterion = ArcSoftmaxLoss(margin=args.margin, s=args.s, iteraion=iteration, all_iteraion=args.all_iteraion) elif args.loss_type == 'wasse': xe_criterion = Wasserstein_Loss(source_cls=args.source_cls) elif args.loss_type == 'ring': xe_criterion = RingLoss(ring=args.ring) args.alpha = 0.0 model_para = model.parameters() if args.loss_type in [ 'center', 'mulcenter', 'gaussian', 'coscenter', 'ring' ]: assert args.lr_ratio > 0 model_para = [{ 'params': xe_criterion.parameters(), 'lr': args.lr * args.lr_ratio }, { 'params': model.parameters() }] if args.finetune: if args.loss_type == 'asoft' or args.loss_type == 'amsoft': classifier_params = list(map(id, model.classifier.parameters())) rest_params = filter(lambda p: id(p) not in classifier_params, model.parameters()) assert args.lr_ratio > 0 model_para = [{ 'params': model.classifier.parameters(), 'lr': args.lr * args.lr_ratio }, { 'params': rest_params }] if args.filter in ['fDLR', 'fBLayer', 'fLLayer', 'fBPLayer']: filter_params = list(map(id, model.filter_layer.parameters())) rest_params = filter(lambda p: id(p) not in filter_params, model.parameters()) model_para = [{ 'params': model.filter_layer.parameters(), 'lr': args.lr * args.lr_ratio }, { 'params': rest_params }] optimizer = create_optimizer(model_para, args.optimizer, **opt_kwargs) if not args.finetune and args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] checkpoint_state_dict = checkpoint['state_dict'] if isinstance(checkpoint_state_dict, tuple): checkpoint_state_dict = checkpoint_state_dict[0] filtered = { k: v for k, v in checkpoint_state_dict.items() if 'num_batches_tracked' not in k } # filtered = {k: v for k, v in checkpoint['state_dict'].items() if 'num_batches_tracked' not in k} if list(filtered.keys())[0].startswith('module'): new_state_dict = OrderedDict() for k, v in filtered.items(): name = k[ 7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,去掉module. new_state_dict[name] = v # 新字典的key值对应的value为一一对应的值。 model.load_state_dict(new_state_dict) else: model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) # model.dropout.p = args.dropout_p else: print('=> no checkpoint found at {}'.format(args.resume)) # Save model config txt with open( osp.join( args.check_path, 'model.%s.conf' % time.strftime("%Y.%m.%d", time.localtime())), 'w') as f: f.write('model: ' + str(model) + '\n') f.write('CrossEntropy: ' + str(ce_criterion) + '\n') f.write('Other Loss: ' + str(xe_criterion) + '\n') f.write('Optimizer: ' + str(optimizer) + '\n') milestones = args.milestones.split(',') milestones = [int(x) for x in milestones] milestones.sort() if args.scheduler == 'exp': scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=args.gamma) elif args.scheduler == 'rop': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=args.patience, min_lr=1e-5) else: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) ce = [ce_criterion, xe_criterion] start = args.start_epoch + start_epoch print('Start epoch is : ' + str(start)) # start = 0 end = start + args.epochs train_loader = torch.utils.data.DataLoader( train_dir, batch_size=args.batch_size, collate_fn=PadCollate(dim=2, num_batch=int( np.ceil(len(train_dir) / args.batch_size)), min_chunk_size=args.min_chunk_size, max_chunk_size=args.max_chunk_size), shuffle=args.shuffle, **kwargs) valid_loader = torch.utils.data.DataLoader( valid_dir, batch_size=int(args.batch_size / 2), collate_fn=PadCollate(dim=2, fix_len=True, min_chunk_size=args.chunk_size, max_chunk_size=args.chunk_size + 1), shuffle=False, **kwargs) train_extract_loader = torch.utils.data.DataLoader(train_extract_dir, batch_size=1, shuffle=False, **extract_kwargs) if args.cuda: if len(args.gpu_id) > 1: print("Continue with gpu: %s ..." % str(args.gpu_id)) torch.distributed.init_process_group( backend="nccl", # init_method='tcp://localhost:23456', init_method= 'file:///home/ssd2020/yangwenhao/lstm_speaker_verification/data/sharedfile', rank=0, world_size=1) model = DistributedDataParallel(model.cuda(), find_unused_parameters=True) else: model = model.cuda() for i in range(len(ce)): if ce[i] != None: ce[i] = ce[i].cuda() try: print('Dropout is {}.'.format(model.dropout_p)) except: pass xvector_dir = args.check_path xvector_dir = xvector_dir.replace('checkpoint', 'xvector') start_time = time.time() try: for epoch in range(start, end): # pdb.set_trace() lr_string = '\n\33[1;34m Current \'{}\' learning rate is '.format( args.optimizer) for param_group in optimizer.param_groups: lr_string += '{:.6f} '.format(param_group['lr']) print('%s \33[0m' % lr_string) train(train_loader, model, ce, optimizer, epoch) valid_loss = valid_class(valid_loader, model, ce, epoch) if (epoch == 1 or epoch != (end - 2)) and (epoch % 4 == 1 or epoch in milestones or epoch == (end - 1)): model.eval() check_path = '{}/checkpoint_{}.pth'.format( args.check_path, epoch) model_state_dict = model.module.state_dict() \ if isinstance(model, DistributedDataParallel) else model.state_dict(), torch.save( { 'epoch': epoch, 'state_dict': model_state_dict, 'criterion': ce }, check_path) valid_test(train_extract_loader, model, epoch, xvector_dir) test(model, epoch, writer, xvector_dir) if epoch != (end - 1): try: shutil.rmtree("%s/train/epoch_%s" % (xvector_dir, epoch)) shutil.rmtree("%s/test/epoch_%s" % (xvector_dir, epoch)) except Exception as e: print('rm dir xvectors error:', e) if args.scheduler == 'rop': scheduler.step(valid_loss) else: scheduler.step() except KeyboardInterrupt: end = epoch writer.close() stop_time = time.time() t = float(stop_time - start_time) print("Running %.4f minutes for each epoch.\n" % (t / 60 / (max(end - start, 1)))) exit(0)
def main(): print('\nNumber of Speakers: {}.'.format(train_dir.num_spks)) # print the experiment configuration print('Current time is \33[91m{}\33[0m.'.format(str(time.asctime()))) print('Parsed options: {}'.format(vars(args))) # instantiate model and initialize weights kernel_size = args.kernel_size.split(',') kernel_size = [int(x) for x in kernel_size] if args.padding == '': padding = [int((x - 1) / 2) for x in kernel_size] else: padding = args.padding.split(',') padding = [int(x) for x in padding] kernel_size = tuple(kernel_size) padding = tuple(padding) stride = args.stride.split(',') stride = [int(x) for x in stride] channels = args.channels.split(',') channels = [int(x) for x in channels] model_kwargs = { 'input_dim': args.input_dim, 'feat_dim': args.feat_dim, 'kernel_size': kernel_size, 'mask': args.mask_layer, 'mask_len': args.mask_len, 'block_type': args.block_type, 'filter': args.filter, 'inst_norm': args.inst_norm, 'input_norm': args.input_norm, 'stride': stride, 'fast': args.fast, 'avg_size': args.avg_size, 'time_dim': args.time_dim, 'padding': padding, 'encoder_type': args.encoder_type, 'vad': args.vad, 'transform': args.transform, 'embedding_size': args.embedding_size, 'ince': args.inception, 'resnet_size': args.resnet_size, 'num_classes': train_dir.num_spks, 'channels': channels, 'alpha': args.alpha, 'dropout_p': args.dropout_p } print('Model options: {}'.format(model_kwargs)) model = create_model(args.model, **model_kwargs) if args.loss_type == 'asoft': model.classifier = AngleLinear(in_features=args.embedding_size, out_features=train_dir.num_spks, m=args.m) elif args.loss_type == 'amsoft' or args.loss_type == 'arcsoft': model.classifier = AdditiveMarginLinear(feat_dim=args.embedding_size, n_classes=train_dir.num_spks) train_loader = DataLoader(train_part, batch_size=args.batch_size, shuffle=False, **kwargs) veri_loader = DataLoader(veri_dir, batch_size=args.batch_size, shuffle=False, **kwargs) valid_loader = DataLoader(valid_part, batch_size=args.batch_size, shuffle=False, **kwargs) test_loader = DataLoader(test_dir, batch_size=args.batch_size, shuffle=False, **kwargs) # sitw_test_loader = DataLoader(sitw_test_part, batch_size=args.batch_size, shuffle=False, **kwargs) # sitw_dev_loader = DataLoader(sitw_dev_part, batch_size=args.batch_size, shuffle=False, **kwargs) resume_path = args.check_path + '/checkpoint_{}.pth' print('=> Saving output in {}\n'.format(args.extract_path)) epochs = np.arange(args.start_epochs, args.epochs + 1) for e in epochs: # Load model from Checkpoint file if os.path.isfile(resume_path.format(e)): print('=> loading checkpoint {}'.format(resume_path.format(e))) checkpoint = torch.load(resume_path.format(e)) checkpoint_state_dict = checkpoint['state_dict'] if isinstance(checkpoint_state_dict, tuple): checkpoint_state_dict = checkpoint_state_dict[0] # epoch = checkpoint['epoch'] # if e == 0: # filtered = checkpoint.state_dict() # else: filtered = { k: v for k, v in checkpoint_state_dict.items() if 'num_batches_tracked' not in k } if list(filtered.keys())[0].startswith('module'): new_state_dict = OrderedDict() for k, v in filtered.items(): name = k[ 7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,去掉module. new_state_dict[name] = v # 新字典的key值对应的value为一一对应的值。 model.load_state_dict(new_state_dict) else: model_dict = model.state_dict() model_dict.update(filtered) model.load_state_dict(model_dict) else: print('=> no checkpoint found at %s' % resume_path.format(e)) continue model.cuda() file_dir = args.extract_path + '/epoch_%d' % e if not os.path.exists(file_dir): os.makedirs(file_dir) if not args.test_only: # if args.cuda: # model_conv1 = model.conv1.weight.cpu().detach().numpy() # np.save(file_dir + '/model.conv1.npy', model_conv1) train_extract(train_loader, model, file_dir, '%s_train' % args.train_set_name) train_extract(valid_loader, model, file_dir, '%s_valid' % args.train_set_name) test_extract(veri_loader, model, file_dir, '%s_veri' % args.train_set_name) test_extract(test_loader, model, file_dir, '%s_test' % args.test_set_name)