def main(args): # Construct Solver # data tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=args.num_workers, shuffle=args.shuffle, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=args.num_workers, LFR_m=args.LFR_m, LFR_n=args.LFR_n) # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} # model encoder = Encoder(args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) print(model) model.cuda() # optimizer model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3]) optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
} model = model_dict.get(args.model, 'DeepSpeech').cuda() train_dataset = SpeechDataset('train.csv', augment=args.augment) # train_sampler = StochasticBucketSampler(train_dataset, # batch_size=args.batch_size) train_loader = SpeechDataloader(train_dataset, num_workers=args.num_worker, batch_size=args.batch_size, shuffle=True) if args.model == 'SAN': optimizer = TransformerOptimizer(torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), scale_factor=args.k, warmup_step=args.warmup) scheduler = None else: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.8, nesterov=True) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.985) if args.from_epoch != 0: model_path = 'checkpoints_{}/model{}.pt'.format( args.model, args.from_epoch - 1) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer'])
def train(lr, l2, momentum, smoothing, warmup, model, emb_size, n_hidden, hidden_size, dropout_prob, epochs, batch_size, valid_batch_size, n_workers, cuda, data_path, valid_data_path, hdf_path, valid_hdf_path, checkpoint_path, softmax, pretrained, pretrained_path, max_gnorm, stats, log_dir, eval_every, ablation): args_dict = locals() cp_name = get_cp_name(checkpoint_path) if pretrained_path != 'none': print('\nLoading pretrained model from: {}\n'.format( args.pretrained_path)) ckpt = torch.load(pretrained_path, map_location=lambda storage, loc: storage) dropout_prob, n_hidden, hidden_size, emb_size = ckpt[ 'dropout_prob'], ckpt['n_hidden'], ckpt['hidden_size'], ckpt[ 'emb_size'] if 'r_proj_size' in ckpt: rproj_size = ckpt['r_proj_size'] else: rproj_size = -1 print('\nUsing pretrained config for discriminator. Ignoring args.') args_dict['dropout_prob'], args_dict['n_hidden'], args_dict[ 'hidden_size'], args_dict[ 'emb_size'] = dropout_prob, n_hidden, hidden_size, emb_size if log_dir != 'none': writer = SummaryWriter(log_dir=os.path.join(log_dir, cp_name), comment=model, purge_step=0) writer.add_hparams(hparam_dict=args_dict, metric_dict={'best_eer': 0.0}) else: writer = None if stats == 'cars': mean, std = [0.4461, 0.4329, 0.4345], [0.2888, 0.2873, 0.2946] elif stats == 'cub': mean, std = [0.4782, 0.4925, 0.4418], [0.2330, 0.2296, 0.2647] elif stats == 'sop': mean, std = [0.5603, 0.5155, 0.4796], [0.2939, 0.2991, 0.3085] elif stats == 'imagenet': mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] if hdf_path != 'none': transform_train = transforms.Compose([ transforms.ToPILImage(), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.RandomPerspective(p=0.1), transforms.RandomGrayscale(p=0.1), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) trainset = Loader(hdf_path, transform_train) else: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.RandomPerspective(p=0.1), transforms.RandomGrayscale(p=0.1), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) trainset = datasets.ImageFolder(data_path, transform=transform_train) train_loader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True, num_workers=n_workers, worker_init_fn=set_np_randomseed, pin_memory=True) if valid_hdf_path != 'none': transform_test = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) validset = Loader(args.valid_hdf_path, transform_test) else: transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) validset = datasets.ImageFolder(args.valid_data_path, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=valid_batch_size, shuffle=True, num_workers=n_workers, pin_memory=True) nclasses = trainset.n_classes if isinstance(trainset, Loader) else len( trainset.classes) if model == 'vgg': model_ = vgg.VGG('VGG19', nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=nclasses, emb_size=emb_size, r_proj_size=rproj_size) elif model == 'resnet': model_ = resnet.ResNet50(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=nclasses, emb_size=emb_size, r_proj_size=rproj_size) elif model == 'densenet': model_ = densenet.DenseNet121(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=nclasses, emb_size=emb_size, r_proj_size=rproj_size) if pretrained_path != 'none': if ckpt['sm_type'] == 'am_softmax': del (ckpt['model_state']['out_proj.w']) elif ckpt['sm_type'] == 'softmax': del (ckpt['model_state']['out_proj.w.weight']) del (ckpt['model_state']['out_proj.w.bias']) print(model_.load_state_dict(ckpt['model_state'], strict=False)) print('\n') if pretrained: print('\nLoading pretrained encoder from torchvision\n') if model == 'vgg': model_pretrained = torchvision.models.vgg19(pretrained=True) elif model == 'resnet': model_pretrained = torchvision.models.resnet50(pretrained=True) elif model == 'densenet': model_pretrained = torchvision.models.densenet121(pretrained=True) print( model_.load_state_dict(model_pretrained.state_dict(), strict=False)) print('\n') if cuda: device = get_freer_gpu() model_ = model_.cuda(device) torch.backends.cudnn.benchmark = True optimizer = TransformerOptimizer(optim.SGD(model_.parameters(), lr=lr, momentum=momentum, weight_decay=l2, nesterov=True), lr=lr, warmup_steps=warmup) trainer = TrainLoop(model_, optimizer, train_loader, valid_loader, max_gnorm=max_gnorm, label_smoothing=smoothing, verbose=-1, cp_name=cp_name, save_cp=True, checkpoint_path=checkpoint_path, ablation=ablation, cuda=cuda, logger=writer) for i in range(5): print(' ') print('Hyperparameters:') print('Selected model: {}'.format(model)) print('Embedding size: {}'.format(emb_size)) print('Hidden layer size: {}'.format(hidden_size)) print('Number of hidden layers: {}'.format(n_hidden)) print('Random projection size: {}'.format(rproj_size)) print('Dropout rate: {}'.format(dropout_prob)) print('Batch size: {}'.format(batch_size)) print('LR: {}'.format(lr)) print('Momentum: {}'.format(momentum)) print('l2: {}'.format(l2)) print('Label smoothing: {}'.format(smoothing)) print('Warmup iterations: {}'.format(warmup)) print('Softmax Mode is: {}'.format(softmax)) print('Pretrained: {}'.format(pretrained)) print('Pretrained path: {}'.format(pretrained_path)) print('Evaluate every {} iterations.'.format(eval_every)) print('Ablation Mode: {}'.format(ablation)) print(' ') if i > 0: print(' ') print('Trial {}'.format(i + 1)) print(' ') try: cost = trainer.train(n_epochs=epochs, save_every=epochs + 10, eval_every=eval_every) print(' ') print('Best e2e EER in file ' + cp_name + ' was: {}'.format(cost[0])) print('Best cos EER in file ' + cp_name + ' was: {}'.format(cost[1])) print(' ') if log_dir != 'none': writer.add_hparams(hparam_dict=args_dict, metric_dict={'best_eer': cost[0]}) return cost[0] except: print("Error:", sys.exc_info()) pass print('Returning dummy cost due to failures while training.') return 0.99
try: model_mix.load_state_dict(ckpt['model_state'], strict=True) except RuntimeError as err: print("Runtime Error: {0}".format(err)) model_mix.load_state_dict(ckpt['model_state'], strict=False) except: print("Unexpected error:", sys.exc_info()[0]) raise model_la = model_la.to(device) model_pa = model_pa.to(device) model_mix = model_mix.to(device) optimizer_la = TransformerOptimizer( optim.Adam(model_la.parameters(), betas=(args.b1, args.b2), weight_decay=args.l2), lr=args.lr_la if args.lr_la > 0.0 else args.lr, warmup_steps=args.warmup) optimizer_pa = TransformerOptimizer( optim.Adam(model_pa.parameters(), betas=(args.b1, args.b2), weight_decay=args.l2), lr=args.lr_pa if args.lr_pa > 0.0 else args.lr, warmup_steps=args.warmup) optimizer_mix = TransformerOptimizer( optim.Adam(model_mix.parameters(), betas=(args.b1, args.b2), weight_decay=args.l2), lr=args.lr_mix if args.lr_mix > 0.0 else args.lr, warmup_steps=args.warmup)
def train(lr, l2, momentum, max_gnorm, warmup, input_size, n_hidden, hidden_size, dropout_prob, smoothing, n_cycles, epochs, batch_size, valid_batch_size, n_workers, cuda, train_hdf_path, valid_hdf_path, cp_path, logdir): hp_dict = { 'lr': lr, 'l2': l2, 'momentum': momentum, 'max_gnorm': max_gnorm, 'warmup': warmup, 'input_size': input_size, 'n_hidden': n_hidden, 'hidden_size': hidden_size, 'dropout_prob': dropout_prob, 'smoothing': smoothing, 'n_cycles': n_cycles, 'epochs': epochs, 'batch_size': batch_size, 'valid_batch_size': valid_batch_size, 'n_workers': n_workers, 'cuda': cuda, 'train_hdf_path': train_hdf_path, 'valid_hdf_path': valid_hdf_path, 'cp_path': cp_path } cp_name = get_file_name(cp_path) if args.logdir: from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter(log_dir=logdir + cp_name, purge_step=True) writer.add_hparams(hparam_dict=hp_dict, metric_dict={'best_eer': 0.5}) else: writer = None train_dataset = Loader(hdf5_clean=train_hdf_path + 'train_clean.hdf', hdf5_attack=train_hdf_path + 'train_attack.hdf', label_smoothing=smoothing, n_cycles=n_cycles) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=n_workers) valid_dataset = Loader(hdf5_clean=valid_hdf_path + 'valid_clean.hdf', hdf5_attack=valid_hdf_path + 'valid_attack.hdf', n_cycles=1) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batch_size, shuffle=False, num_workers=n_workers) model = model_.MLP(n_in=input_size, nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob) if cuda: device = get_freer_gpu() model = model.cuda(device) optimizer = TransformerOptimizer(optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=l2, nesterov=True), lr=lr, warmup_steps=warmup) trainer = TrainLoop(model, optimizer, train_loader, valid_loader, max_gnorm=max_gnorm, verbose=-1, cp_name=cp_name, save_cp=True, checkpoint_path=cp_path, cuda=cuda, logger=writer) for i in range(5): if i > 0: print(' ') print('Trial {}'.format(i + 1)) print(' ') try: cost = trainer.train(n_epochs=epochs, save_every=epochs + 10) print(' ') print('Best EER in file ' + cp_name + ' was: {}'.format(cost)) print(' ') print('With hyperparameters:') print('Hidden layer size size: {}'.format(int(hidden_size))) print('Number of hidden layers: {}'.format(int(n_hidden))) print('Dropout rate: {}'.format(dropout_prob)) print('Batch size: {}'.format(batch_size)) print('LR: {}'.format(lr)) print('Warmup iterations: {}'.format(warmup)) print('Momentum: {}'.format(momentum)) print('l2: {}'.format(l2)) print('Max. Grad. norm: {}'.format(max_gnorm)) print('Label smoothing: {}'.format(smoothing)) print(' ') if args.logdir: writer.add_hparams(hparam_dict=hp_dict, metric_dict={'best_eer': cost}) return cost except: pass print('Returning dummy cost due to failures while training.') cost = 0.99 if args.logdir: writer.add_hparams(hparam_dict=hp_dict, metric_dict={'best_eer': cost}) return cost
if args.pretrained_path is not None: ckpt = torch.load(args.pretrained_path, map_location = lambda storage, loc: storage) model = model_.MLP(n_in=ckpt['input_size'], nh=ckpt['n_hidden'], n_h=ckpt['hidden_size'], dropout_prob=ckpt['dropout_prob']) try: model.load_state_dict(ckpt['model_state'], strict=True) except RuntimeError as err: print("Runtime Error: {0}".format(err)) model.load_state_dict(ckpt['model_state'], strict=False) except: print("Unexpected error:", sys.exc_info()[0]) raise model = model.to(device) optimizer = TransformerOptimizer(optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.l2, nesterov=True), lr=args.lr, warmup_steps=args.warmup) trainer = TrainLoop(model, optimizer, train_loader, valid_loader, max_gnorm=args.max_gnorm, verbose=args.verbose, checkpoint_path=args.checkpoint_path, checkpoint_epoch=args.checkpoint_epoch, cuda=args.cuda, logger=writer) if args.verbose > 0: print('Cuda Mode: {}'.format(args.cuda)) print('Device: {}'.format(device)) print('Batch size: {}'.format(args.batch_size)) print('Validation batch size: {}'.format(args.valid_batch_size)) print('LR: {}'.format(args.lr)) print('Momentum: {}'.format(args.momentum)) print('l2: {}'.format(args.l2)) print('Max. grad norm: {}'.format(args.max_gnorm)) print('Warmup iterations: {}'.format(args.warmup)) print('Inputs dimensionality: {}'.format(args.input_size)) print('Number of hidden layers: {}'.format(args.n_hidden))
def main(args): # Construct Solver # data tr_dataset = AudioDataset( args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames, ) cv_dataset = AudioDataset( args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames, ) tr_loader = AudioDataLoader( tr_dataset, batch_size=1, num_workers=args.num_workers, shuffle=args.shuffle, LFR_m=args.LFR_m, LFR_n=args.LFR_n, ) cv_loader = AudioDataLoader( cv_dataset, batch_size=1, num_workers=args.num_workers, LFR_m=args.LFR_m, LFR_n=args.LFR_n, ) # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) data = {"tr_loader": tr_loader, "cv_loader": cv_loader} # model encoder = Encoder( args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen, ) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen, ) model = Transformer(encoder, decoder) device = flow.device("cuda") model.to(device) # optimizer optimizier = TransformerOptimizer( flow.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps, args.step_num, ) # solver solver = Solver(data, model, optimizier, device, args) solver.train()