def run(): args = parser.parse_args() cmds, notes = create_commands( "a3c", args.num_workers, args.remotes, args.env_id, args.log_dir, mode=args.mode, hparams=hparams.get_hparams(args, ignore_default=True)) if args.dry_run: print("Dry-run mode due to -n flag, otherwise the following commands would be executed:") else: print("Executing the following commands:") print("\n".join(cmds)) print("") if not args.dry_run: if args.mode == "tmux": os.environ["TMUX"] = "" os.system("\n".join(cmds)) print('\n'.join(notes))
def test(): hparams = get_hparams() print(hparams.task_name) model_path = os.path.join(hparams.model_path, hparams.task_name, hparams.spec_opt) # Load Dataset Loader root = '../dataset/feat/test' list_dir_A = './etc/Test_dt05_real_isolated_1ch_track_list.csv' list_dir_B = './etc/Test_dt05_simu_isolated_1ch_track_list.csv' output_dir = './output/{}/{}_img'.format(hparams.task_name, hparams.iteration_num) if not os.path.exists(output_dir): os.makedirs(output_dir) normalizer_clean = Tanhize('clean') normalizer_noisy = Tanhize('noisy') test_list_A, speaker_A = testset_list_classifier(root, list_dir_A) test_list_B, speaker_B = testset_list_classifier(root, list_dir_B) generator_A = Generator() generator_B = Generator() discriminator_A = Discriminator() discriminator_B = Discriminator() ContEncoder_A = ContentEncoder() ContEncoder_B = ContentEncoder() StEncoder_A = StyleEncoder() StEncoder_B = StyleEncoder() generator_A = nn.DataParallel(generator_A).cuda() generator_B = nn.DataParallel(generator_B).cuda() discriminator_A = nn.DataParallel(discriminator_A).cuda() discriminator_B = nn.DataParallel(discriminator_B).cuda() ContEncoder_A = nn.DataParallel(ContEncoder_A).cuda() ContEncoder_B = nn.DataParallel(ContEncoder_B).cuda() StEncoder_A = nn.DataParallel(StEncoder_A).cuda() StEncoder_B = nn.DataParallel(StEncoder_B).cuda() map_location = lambda storage, loc: storage generator_A.load_state_dict( torch.load('./models/{}/{}/model_gen_A_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) generator_B.load_state_dict( torch.load('./models/{}/{}/model_gen_B_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) discriminator_A.load_state_dict( torch.load('./models/{}/{}/model_dis_A_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) discriminator_B.load_state_dict( torch.load('./models/{}/{}/model_dis_B_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) ContEncoder_A.load_state_dict( torch.load('./models/{}/{}/model_ContEnc_A_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) ContEncoder_B.load_state_dict( torch.load('./models/{}/{}/model_ContEnc_B_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) StEncoder_A.load_state_dict( torch.load('./models/{}/{}/model_StEnc_A_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) StEncoder_B.load_state_dict( torch.load('./models/{}/{}/model_StEnc_B_{}.pth'.format( hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) for i in range(10): generator_B.eval() ContEncoder_A.eval() StEncoder_B.eval() feat = testset_loader(root, test_list_A[i], speaker_A, normalizer=normalizer_noisy) print(feat['audio_name']) A_content = Variable(torch.FloatTensor(feat['sp']).unsqueeze(0)).cuda() A_cont = ContEncoder_A(A_content) z_st = get_z_random_sparse(1, 8, 1) st_0 = torch.ones((1, 8, 1)) * 2 feature_z = generator_B(A_cont, z_st) feature_z = normalizer_noisy.backward_process(feature_z.squeeze().data) feature_z = feature_z.squeeze().data.cpu().numpy() feature_0 = generator_B(A_cont, st_0) feature_0 = normalizer_noisy.backward_process(feature_0.squeeze().data) feature_0 = feature_0.squeeze().data.cpu().numpy() imsave(os.path.join( output_dir, 'z-img-' + feat['audio_name'].split('.')[0] + '.png'), feature_z.transpose(), origin='lower') imsave(os.path.join( output_dir, '0-img-' + feat['audio_name'].split('.')[0] + '.png'), feature_0.transpose(), origin='lower') for i in range(10): generator_B.eval() ContEncoder_A.eval() feat = testset_loader(root, test_list_B[i], speaker_B, normalizer=normalizer_noisy) print(feat['audio_name']) A_content = Variable(torch.FloatTensor(feat['sp']).unsqueeze(0)).cuda() A_cont = ContEncoder_A(A_content) z_st = get_z_random_sparse(1, 8, 1) feature_z = generator_B(A_cont, z_st) feature_z = normalizer_noisy.backward_process(feature_z.squeeze().data) feature_z = feature_z.squeeze().data.cpu().numpy() imsave(os.path.join( output_dir, 'z-img-' + feat['audio_name'].split('.')[0] + '.png'), feature_z.transpose(), origin='lower')
import hparams print(hparams) pd = { 'current_loop_index': 0, 'training_total_loops': 10, 'training_games_per_loop': 600, 'training_samples_per_game': 50, } hparams.set_hparams(pd) new_pd = hparams.get_hparams() print((new_pd['training_total_loops'])) print((new_pd['training_games_per_loop']))
if hparams.plotattn: plot_attention(attention_predict, input_file_name) def plot_attention(attention, file_name): if os.path.exists(hparams.attention_path + file_name + '_attn/') is False: os.mkdir(hparams.attention_path + file_name + '_attn/') for i in range(hparams.num_heads): matrix = attention[i].transpose() matrix = np.array( [[int(255 * matrix[i][j]) for j in range(matrix.shape[1])] for i in range(matrix.shape[0])], dtype='uint8') io.imsave( hparams.attention_path + file_name + '_attn/head_' + str(i) + '.jpg', matrix) if __name__ == "__main__": hparams = get_hparams() print(hparams) os.environ['CUDA_VISIBLE_DEVICES'] = hparams.gpu device = torch.device("cuda") mymodel = Utils.create_model(hparams, mode='predict') mymodel = mymodel.float() mymodel.to(device) (ppg_scaler, fwh_scaler, shape_info) = pickle.load( open(os.path.join(hparams.dataset, 'scaler.pickle'), 'rb')) predict(hparams, Utils.get_predict_file_list(hparams.predict), ppg_scaler, fwh_scaler, mymodel, device)
# train and evaluate network on dev split my_trainer.train_and_evaluate(restore_from=restore_dir) utils.set_logger(os.path.join(experiment_dir, 'test.log')) # evaluate the network on test split my_trainer.test(test_dir) if __name__ == '__main__': """Parse command line arguments and start main function.""" args = parser.parse_args() model_name = args.model _model = get_model_class(model_name) problem_name = args.problem _problem = get_problem_class(problem_name) hp_name = args.hparams _hp = get_hparams(hp_name) _experiment_dir = args.experiment_dir _restore_dir = args.restore_dir _test_dir = args.test_dir _overwrite_results = args.overwrite_results _skip_generate_data = args.skip_generate_data main(_problem, _model, _hp, _experiment_dir, _restore_dir, _test_dir, _overwrite_results, _skip_generate_data)
def main(): hparams = get_hparams() print(hparams) os.environ['CUDA_VISIBLE_DEVICES'] = hparams.gpu if hparams.network == "BLSTM": if hparams.mode == 'train': ppg_train, ppg_validate, ppg_evaluate, fwh_train, fwh_validate, fwh_evaluate, train_mask, validate_mask, evaluate_mask \ = Utils.load_data(hparams) mymodel, recent_epoch = Utils.create_model(hparams, ppg_train[0].shape[1], fwh_train[0].shape[1]) my_multi_gpu_model = get_multi_gpu_model(mymodel, hparams.gpu) opt = optimizers.Adam(lr=hparams.learning_rate) if hparams.add_mean is False: my_multi_gpu_model.compile(optimizer=opt, loss=hparams.loss, sample_weight_mode='temporal') else: my_multi_gpu_model.compile(optimizer=opt, loss={ 'output_fwh': hparams.loss, 'output_mean': hparams.loss }, loss_weights={ 'output_fwh': 1.0, 'output_mean': hparams.mean_weight }, sample_weight_mode='temporal') multi_gpu_train(hparams, ppg_train, fwh_train, ppg_validate, fwh_validate, mymodel, my_multi_gpu_model, recent_epoch) evaluate_loss = multi_gpu_evaluate(hparams, ppg_evaluate, fwh_evaluate) print("evaluate_loss: {:.4f}".format(evaluate_loss)) elif hparams.mode == 'predict': (ppg_scaler, fwh_scaler, shape_info) = pickle.load( open(os.path.join(hparams.dataset, 'scaler.pickle'), 'rb')) print("shape_info", shape_info) predict(hparams, Utils.get_predict_file_list(hparams.predict), ppg_scaler, fwh_scaler, shape_info) elif hparams.network == "WaveNet": if hparams.mode == "train": ppg_train, ppg_validate, ppg_evaluate, fwh_train, fwh_validate, fwh_evaluate, train_mask, validate_mask, evaluate_mask \ = Utils.load_data(hparams) assert hparams.batch_pad is True mymodel, recent_epoch = Utils.create_model(hparams, ppg_train[0].shape[1], fwh_train[0].shape[1]) my_multi_gpu_model = get_multi_gpu_model(mymodel, hparams.gpu) opt = optimizers.Adam(lr=hparams.learning_rate) my_multi_gpu_model.compile(optimizer=opt, loss=hparams.loss) multi_gpu_train(hparams, ppg_train, fwh_train, ppg_validate, fwh_validate, mymodel, my_multi_gpu_model, recent_epoch) evaluate_loss = multi_gpu_evaluate(hparams, ppg_evaluate, fwh_evaluate) print("evaluate_loss: {:.4f}".format(evaluate_loss)) elif hparams.mode == "predict": (ppg_scaler, fwh_scaler, shape_info) = pickle.load( open(os.path.join(hparams.dataset, 'scaler.pickle'), 'rb')) print("shape_info", shape_info) wavenet_predict(hparams, Utils.get_predict_file_list(hparams.predict), ppg_scaler, fwh_scaler, shape_info) elif hparams.network == "Tacotron": if hparams.mode == "train": ppg_train, ppg_validate, ppg_evaluate, fwh_train, fwh_validate, fwh_evaluate, train_mask, validate_mask, evaluate_mask \ = Utils.load_data(hparams) assert hparams.batch_pad is True if hparams.TF is True: mymodel, recent_epoch = Utils.create_model( hparams, ppg_train[0].shape[1], fwh_train[0].shape[1]) else: mymodel, recent_epoch = Utils.create_model( hparams, ppg_train[0].shape[1], fwh_train[0].shape[1], stateful=True, state_batch_size=hparams.batch_size) print('\nstateful is', mymodel.layers[-3].stateful, '\n') my_multi_gpu_model = get_multi_gpu_model(mymodel, hparams.gpu) opt = optimizers.Adam(lr=hparams.learning_rate) my_multi_gpu_model.compile(optimizer=opt, loss=hparams.loss, sample_weight_mode='temporal') multi_gpu_train(hparams, ppg_train, fwh_train, ppg_validate, fwh_validate, mymodel, my_multi_gpu_model, recent_epoch) evaluate_loss = multi_gpu_evaluate(hparams, ppg_evaluate, fwh_evaluate) print("evaluate_loss: {:.4f}".format(evaluate_loss)) elif hparams.mode == 'predict': (ppg_scaler, fwh_scaler, shape_info) = pickle.load( open(os.path.join(hparams.dataset, 'scaler.pickle'), 'rb')) Tacotron_predict(hparams, Utils.get_predict_file_list(hparams.predict), ppg_scaler, fwh_scaler, shape_info) elif hparams.network == 'CNN': if hparams.mode == 'train': ppg_train, ppg_validate, ppg_evaluate, fwh_train, fwh_validate, fwh_evaluate, train_mask, validate_mask, evaluate_mask \ = Utils.load_data(hparams) mymodel, recent_epoch = Utils.create_model(hparams, ppg_train[0].shape[1], fwh_train[0].shape[1]) my_multi_gpu_model = get_multi_gpu_model(mymodel, hparams.gpu) opt = optimizers.Adam(lr=hparams.learning_rate) my_multi_gpu_model.compile(optimizer=opt, loss=hparams.loss) multi_gpu_train(hparams, ppg_train, fwh_train, ppg_validate, fwh_validate, mymodel, my_multi_gpu_model, recent_epoch) evaluate_loss = multi_gpu_evaluate(hparams, ppg_evaluate, fwh_evaluate) print("evaluate_loss: {:.4f}".format(evaluate_loss)) elif hparams.mode == 'predict': (ppg_scaler, fwh_scaler, shape_info) = pickle.load( open(os.path.join(hparams.dataset, 'scaler.pickle'), 'rb')) CNN_predict(hparams, Utils.get_predict_file_list(hparams.predict), ppg_scaler, fwh_scaler, shape_info)
def train(): hparams = get_hparams() model_path = os.path.join(hparams.model_path, hparams.task_name, hparams.spec_opt) if not os.path.exists(model_path): os.makedirs(model_path) # Load Dataset Loader normalizer_clean = Tanhize('clean') normalizer_noisy = Tanhize('noisy') print('Load dataset2d loader') dataset_A_2d = npyDataset2d(hparams.dataset_root, hparams.list_dir_train_A_2d, hparams.frame_len, normalizer=normalizer_noisy) dataset_B_2d = npyDataset2d(hparams.dataset_root, hparams.list_dir_train_B_2d, hparams.frame_len, normalizer=normalizer_clean) dataloader_A = DataLoader( dataset_A_2d, batch_size=hparams.batch_size, shuffle=True, drop_last=True, ) dataloader_B = DataLoader( dataset_B_2d, batch_size=hparams.batch_size, shuffle=True, drop_last=True, ) # Load Generator / Disciminator model generator_A = Generator() generator_B = Generator() discriminator_A = Discriminator() discriminator_B = Discriminator() ContEncoder_A = ContentEncoder() ContEncoder_B = ContentEncoder() StEncoder_A = StyleEncoder() StEncoder_B = StyleEncoder() generator_A.apply(weights_init) generator_B.apply(weights_init) discriminator_A.apply(weights_init) discriminator_B.apply(weights_init) ContEncoder_A.apply(weights_init) ContEncoder_B.apply(weights_init) StEncoder_A.apply(weights_init) StEncoder_B.apply(weights_init) real_label = 1 fake_label = 0 real_tensor = Variable(torch.FloatTensor(hparams.batch_size)) _ = real_tensor.data.fill_(real_label) fake_tensor = Variable(torch.FloatTensor(hparams.batch_size)) _ = fake_tensor.data.fill_(fake_label) # Define Loss function d = nn.MSELoss() bce = nn.BCELoss() # Cuda Process if hparams.cuda == True: print('-- Activate with CUDA --') generator_A = nn.DataParallel(generator_A).cuda() generator_B = nn.DataParallel(generator_B).cuda() discriminator_A = nn.DataParallel(discriminator_A).cuda() discriminator_B = nn.DataParallel(discriminator_B).cuda() ContEncoder_A = nn.DataParallel(ContEncoder_A).cuda() ContEncoder_B = nn.DataParallel(ContEncoder_B).cuda() StEncoder_A = nn.DataParallel(StEncoder_A).cuda() StEncoder_B = nn.DataParallel(StEncoder_B).cuda() d.cuda() bce.cuda() real_tensor = real_tensor.cuda() fake_tensor = fake_tensor.cuda() else: print('-- Activate without CUDA --') gen_params = chain( generator_A.parameters(), generator_B.parameters(), ContEncoder_A.parameters(), ContEncoder_B.parameters(), StEncoder_A.parameters(), StEncoder_B.parameters(), ) dis_params = chain( discriminator_A.parameters(), discriminator_B.parameters(), ) optimizer_g = optim.Adam(gen_params, lr=hparams.learning_rate) optimizer_d = optim.Adam(dis_params, lr=hparams.learning_rate) iters = 0 for e in range(hparams.epoch_size): # input Tensor A_loader, B_loader = iter(dataloader_A), iter(dataloader_B) for i in range(len(A_loader) - 1): batch_A = A_loader.next() batch_B = B_loader.next() A_indx = torch.LongTensor(list(range(hparams.batch_size))) B_indx = torch.LongTensor(list(range(hparams.batch_size))) A_ = torch.FloatTensor(batch_A) B_ = torch.FloatTensor(batch_B) if hparams.cuda == True: x_A = Variable(A_.cuda()) x_B = Variable(B_.cuda()) else: x_A = Variable(A_) x_B = Variable(B_) real_tensor.data.resize_(hparams.batch_size).fill_(real_label) fake_tensor.data.resize_(hparams.batch_size).fill_(fake_label) ## Discrominator Update Steps discriminator_A.zero_grad() discriminator_B.zero_grad() # x_A, x_B, x_AB, x_BA # [#_batch, max_time_len, dim] A_c = ContEncoder_A(x_A).detach() B_c = ContEncoder_B(x_B).detach() # A,B : N ~ (0,1) A_s = Variable(get_z_random(hparams.batch_size, 8)) B_s = Variable(get_z_random(hparams.batch_size, 8)) x_AB = generator_B(A_c, B_s).detach() x_BA = generator_A(B_c, A_s).detach() # We recommend LSGAN-loss for adversarial loss l_d_A_real = 0.5 * torch.mean( (discriminator_A(x_A) - real_tensor)**2) l_d_A_fake = 0.5 * torch.mean( (discriminator_A(x_BA) - fake_tensor)**2) l_d_B_real = 0.5 * torch.mean( (discriminator_B(x_B) - real_tensor)**2) l_d_B_fake = 0.5 * torch.mean( (discriminator_B(x_AB) - fake_tensor)**2) l_d_A = l_d_A_real + l_d_A_fake l_d_B = l_d_B_real + l_d_B_fake l_d = l_d_A + l_d_B l_d.backward() optimizer_d.step() ## Generator Update Steps generator_A.zero_grad() generator_B.zero_grad() ContEncoder_A.zero_grad() ContEncoder_B.zero_grad() StEncoder_A.zero_grad() StEncoder_B.zero_grad() A_c = ContEncoder_A(x_A) B_c = ContEncoder_B(x_B) A_s_prime = StEncoder_A(x_A) B_s_prime = StEncoder_B(x_B) # A,B : N ~ (0,1) A_s = Variable(get_z_random(hparams.batch_size, 8)) B_s = Variable(get_z_random(hparams.batch_size, 8)) x_BA = generator_A(B_c, A_s) x_AB = generator_B(A_c, B_s) x_A_recon = generator_A(A_c, A_s_prime) x_B_recon = generator_B(B_c, B_s_prime) B_c_recon = ContEncoder_A(x_BA) A_s_recon = StEncoder_A(x_BA) A_c_recon = ContEncoder_B(x_AB) B_s_recon = StEncoder_B(x_AB) x_ABA = generator_A(A_c_recon, A_s_prime) x_BAB = generator_B(B_c_recon, B_s_prime) l_cy_A = recon_criterion(x_ABA, x_A) l_cy_B = recon_criterion(x_BAB, x_B) l_f_A = recon_criterion(x_A_recon, x_A) l_f_B = recon_criterion(x_B_recon, x_B) l_c_A = recon_criterion(A_c_recon, A_c) l_c_B = recon_criterion(B_c_recon, B_c) l_s_A = recon_criterion(A_s_recon, A_s) l_s_B = recon_criterion(B_s_recon, B_s) # We recommend LSGAN-loss for adversarial loss l_gan_A = 0.5 * torch.mean( (discriminator_A(x_BA) - real_tensor)**2) l_gan_B = 0.5 * torch.mean( (discriminator_B(x_AB) - real_tensor)**2) l_g = l_gan_A + l_gan_B + lambda_f * (l_f_A + l_f_B) + lambda_s * ( l_s_A + l_s_B) + lambda_c * (l_c_A + l_c_B) + lambda_cy * ( l_cy_A + l_cy_B) l_g.backward() optimizer_g.step() if iters % hparams.log_interval == 0: print("---------------------") print("Gen Loss :{} disc loss :{}".format( l_g / hparams.batch_size, l_d / hparams.batch_size)) print("epoch :", e, " ", "total ", hparams.epoch_size) print("iteration :", iters) if iters % hparams.model_save_interval == 0: torch.save( generator_A.state_dict(), os.path.join(model_path, 'model_gen_A_{}.pth'.format(iters))) torch.save( generator_B.state_dict(), os.path.join(model_path, 'model_gen_B_{}.pth'.format(iters))) torch.save( discriminator_A.state_dict(), os.path.join(model_path, 'model_dis_A_{}.pth'.format(iters))) torch.save( discriminator_B.state_dict(), os.path.join(model_path, 'model_dis_B_{}.pth'.format(iters))) torch.save( ContEncoder_A.state_dict(), os.path.join(model_path, 'model_ContEnc_A_{}.pth'.format(iters))) torch.save( ContEncoder_B.state_dict(), os.path.join(model_path, 'model_ContEnc_B_{}.pth'.format(iters))) torch.save( StEncoder_A.state_dict(), os.path.join(model_path, 'model_StEnc_A_{}.pth'.format(iters))) torch.save( StEncoder_B.state_dict(), os.path.join(model_path, 'model_StEnc_B_{}.pth'.format(iters))) iters += 1
def train(args, checkpoint, mid_checkpoint_location, final_checkpoint_location, best_checkpoint_location, actfun, curr_seed, outfile_path, filename, fieldnames, curr_sample_size, device, num_params, curr_k=2, curr_p=1, curr_g=1, perm_method='shuffle'): """ Runs training session for a given randomized model :param args: arguments for this job :param checkpoint: current checkpoint :param checkpoint_location: output directory for checkpoints :param actfun: activation function currently being used :param curr_seed: seed being used by current job :param outfile_path: path to save outputs from training session :param fieldnames: column names for output file :param device: reference to CUDA device for GPU support :param num_params: number of parameters in the network :param curr_k: k value for this iteration :param curr_p: p value for this iteration :param curr_g: g value for this iteration :param perm_method: permutation strategy for our network :return: """ resnet_ver = args.resnet_ver resnet_width = args.resnet_width num_epochs = args.num_epochs actfuns_1d = ['relu', 'abs', 'swish', 'leaky_relu', 'tanh'] if actfun in actfuns_1d: curr_k = 1 kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {} if args.one_shot: util.seed_all(curr_seed) model_temp, _ = load_model(args.model, args.dataset, actfun, curr_k, curr_p, curr_g, num_params=num_params, perm_method=perm_method, device=device, resnet_ver=resnet_ver, resnet_width=resnet_width, verbose=args.verbose) util.seed_all(curr_seed) dataset_temp = util.load_dataset( args, args.model, args.dataset, seed=curr_seed, validation=True, batch_size=args.batch_size, train_sample_size=curr_sample_size, kwargs=kwargs) curr_hparams = hparams.get_hparams(args.model, args.dataset, actfun, curr_seed, num_epochs, args.search, args.hp_idx, args.one_shot) optimizer = optim.Adam(model_temp.parameters(), betas=(curr_hparams['beta1'], curr_hparams['beta2']), eps=curr_hparams['eps'], weight_decay=curr_hparams['wd'] ) start_time = time.time() oneshot_fieldnames = fieldnames if args.search else None oneshot_outfile_path = outfile_path if args.search else None lr = util.run_lr_finder( args, model_temp, dataset_temp[0], optimizer, nn.CrossEntropyLoss(), val_loader=dataset_temp[3], show=False, device=device, fieldnames=oneshot_fieldnames, outfile_path=oneshot_outfile_path, hparams=curr_hparams ) curr_hparams = {} print("Time to find LR: {}\n LR found: {:3e}".format(time.time() - start_time, lr)) else: curr_hparams = hparams.get_hparams(args.model, args.dataset, actfun, curr_seed, num_epochs, args.search, args.hp_idx) lr = curr_hparams['max_lr'] criterion = nn.CrossEntropyLoss() model, model_params = load_model(args.model, args.dataset, actfun, curr_k, curr_p, curr_g, num_params=num_params, perm_method=perm_method, device=device, resnet_ver=resnet_ver, resnet_width=resnet_width, verbose=args.verbose) util.seed_all(curr_seed) model.apply(util.weights_init) util.seed_all(curr_seed) dataset = util.load_dataset( args, args.model, args.dataset, seed=curr_seed, validation=args.validation, batch_size=args.batch_size, train_sample_size=curr_sample_size, kwargs=kwargs) loaders = { 'aug_train': dataset[0], 'train': dataset[1], 'aug_eval': dataset[2], 'eval': dataset[3], } sample_size = dataset[4] batch_size = dataset[5] if args.one_shot: optimizer = optim.Adam(model_params) scheduler = OneCycleLR(optimizer, max_lr=lr, epochs=num_epochs, steps_per_epoch=int(math.floor(sample_size / batch_size)), cycle_momentum=False ) else: optimizer = optim.Adam(model_params, betas=(curr_hparams['beta1'], curr_hparams['beta2']), eps=curr_hparams['eps'], weight_decay=curr_hparams['wd'] ) scheduler = OneCycleLR(optimizer, max_lr=curr_hparams['max_lr'], epochs=num_epochs, steps_per_epoch=int(math.floor(sample_size / batch_size)), pct_start=curr_hparams['cycle_peak'], cycle_momentum=False ) epoch = 1 if checkpoint is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) epoch = checkpoint['epoch'] model.to(device) print("*** LOADED CHECKPOINT ***" "\n{}" "\nSeed: {}" "\nEpoch: {}" "\nActfun: {}" "\nNum Params: {}" "\nSample Size: {}" "\np: {}" "\nk: {}" "\ng: {}" "\nperm_method: {}".format(mid_checkpoint_location, checkpoint['curr_seed'], checkpoint['epoch'], checkpoint['actfun'], checkpoint['num_params'], checkpoint['sample_size'], checkpoint['p'], checkpoint['k'], checkpoint['g'], checkpoint['perm_method'])) util.print_exp_settings(curr_seed, args.dataset, outfile_path, args.model, actfun, util.get_model_params(model), sample_size, batch_size, model.k, model.p, model.g, perm_method, resnet_ver, resnet_width, args.optim, args.validation, curr_hparams) best_val_acc = 0 if args.mix_pre_apex: model, optimizer = amp.initialize(model, optimizer, opt_level="O2") # ---- Start Training while epoch <= num_epochs: if args.check_path != '': torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'curr_seed': curr_seed, 'epoch': epoch, 'actfun': actfun, 'num_params': num_params, 'sample_size': sample_size, 'p': curr_p, 'k': curr_k, 'g': curr_g, 'perm_method': perm_method }, mid_checkpoint_location) util.seed_all((curr_seed * args.num_epochs) + epoch) start_time = time.time() if args.mix_pre: scaler = torch.cuda.amp.GradScaler() # ---- Training model.train() total_train_loss, n, num_correct, num_total = 0, 0, 0, 0 for batch_idx, (x, targetx) in enumerate(loaders['aug_train']): # print(batch_idx) x, targetx = x.to(device), targetx.to(device) optimizer.zero_grad() if args.mix_pre: with torch.cuda.amp.autocast(): output = model(x) train_loss = criterion(output, targetx) total_train_loss += train_loss n += 1 scaler.scale(train_loss).backward() scaler.step(optimizer) scaler.update() elif args.mix_pre_apex: output = model(x) train_loss = criterion(output, targetx) total_train_loss += train_loss n += 1 with amp.scale_loss(train_loss, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() else: output = model(x) train_loss = criterion(output, targetx) total_train_loss += train_loss n += 1 train_loss.backward() optimizer.step() if args.optim == 'onecycle' or args.optim == 'onecycle_sgd': scheduler.step() _, prediction = torch.max(output.data, 1) num_correct += torch.sum(prediction == targetx.data) num_total += len(prediction) epoch_aug_train_loss = total_train_loss / n epoch_aug_train_acc = num_correct * 1.0 / num_total alpha_primes = [] alphas = [] if model.actfun == 'combinact': for i, layer_alpha_primes in enumerate(model.all_alpha_primes): curr_alpha_primes = torch.mean(layer_alpha_primes, dim=0) curr_alphas = F.softmax(curr_alpha_primes, dim=0).data.tolist() curr_alpha_primes = curr_alpha_primes.tolist() alpha_primes.append(curr_alpha_primes) alphas.append(curr_alphas) model.eval() with torch.no_grad(): total_val_loss, n, num_correct, num_total = 0, 0, 0, 0 for batch_idx, (y, targety) in enumerate(loaders['aug_eval']): y, targety = y.to(device), targety.to(device) output = model(y) val_loss = criterion(output, targety) total_val_loss += val_loss n += 1 _, prediction = torch.max(output.data, 1) num_correct += torch.sum(prediction == targety.data) num_total += len(prediction) epoch_aug_val_loss = total_val_loss / n epoch_aug_val_acc = num_correct * 1.0 / num_total total_val_loss, n, num_correct, num_total = 0, 0, 0, 0 for batch_idx, (y, targety) in enumerate(loaders['eval']): y, targety = y.to(device), targety.to(device) output = model(y) val_loss = criterion(output, targety) total_val_loss += val_loss n += 1 _, prediction = torch.max(output.data, 1) num_correct += torch.sum(prediction == targety.data) num_total += len(prediction) epoch_val_loss = total_val_loss / n epoch_val_acc = num_correct * 1.0 / num_total lr_curr = 0 for param_group in optimizer.param_groups: lr_curr = param_group['lr'] print( " Epoch {}: LR {:1.5f} ||| aug_train_acc {:1.4f} | val_acc {:1.4f}, aug {:1.4f} ||| " "aug_train_loss {:1.4f} | val_loss {:1.4f}, aug {:1.4f} ||| time = {:1.4f}" .format(epoch, lr_curr, epoch_aug_train_acc, epoch_val_acc, epoch_aug_val_acc, epoch_aug_train_loss, epoch_val_loss, epoch_aug_val_loss, (time.time() - start_time)), flush=True ) if args.hp_idx is None: hp_idx = -1 else: hp_idx = args.hp_idx epoch_train_loss = 0 epoch_train_acc = 0 if epoch == num_epochs: with torch.no_grad(): total_train_loss, n, num_correct, num_total = 0, 0, 0, 0 for batch_idx, (x, targetx) in enumerate(loaders['aug_train']): x, targetx = x.to(device), targetx.to(device) output = model(x) train_loss = criterion(output, targetx) total_train_loss += train_loss n += 1 _, prediction = torch.max(output.data, 1) num_correct += torch.sum(prediction == targetx.data) num_total += len(prediction) epoch_aug_train_loss = total_train_loss / n epoch_aug_train_acc = num_correct * 1.0 / num_total total_train_loss, n, num_correct, num_total = 0, 0, 0, 0 for batch_idx, (x, targetx) in enumerate(loaders['train']): x, targetx = x.to(device), targetx.to(device) output = model(x) train_loss = criterion(output, targetx) total_train_loss += train_loss n += 1 _, prediction = torch.max(output.data, 1) num_correct += torch.sum(prediction == targetx.data) num_total += len(prediction) epoch_train_loss = total_val_loss / n epoch_train_acc = num_correct * 1.0 / num_total # Outputting data to CSV at end of epoch with open(outfile_path, mode='a') as out_file: writer = csv.DictWriter(out_file, fieldnames=fieldnames, lineterminator='\n') writer.writerow({'dataset': args.dataset, 'seed': curr_seed, 'epoch': epoch, 'time': (time.time() - start_time), 'actfun': model.actfun, 'sample_size': sample_size, 'model': args.model, 'batch_size': batch_size, 'alpha_primes': alpha_primes, 'alphas': alphas, 'num_params': util.get_model_params(model), 'var_nparams': args.var_n_params, 'var_nsamples': args.var_n_samples, 'k': curr_k, 'p': curr_p, 'g': curr_g, 'perm_method': perm_method, 'gen_gap': float(epoch_val_loss - epoch_train_loss), 'aug_gen_gap': float(epoch_aug_val_loss - epoch_aug_train_loss), 'resnet_ver': resnet_ver, 'resnet_width': resnet_width, 'epoch_train_loss': float(epoch_train_loss), 'epoch_train_acc': float(epoch_train_acc), 'epoch_aug_train_loss': float(epoch_aug_train_loss), 'epoch_aug_train_acc': float(epoch_aug_train_acc), 'epoch_val_loss': float(epoch_val_loss), 'epoch_val_acc': float(epoch_val_acc), 'epoch_aug_val_loss': float(epoch_aug_val_loss), 'epoch_aug_val_acc': float(epoch_aug_val_acc), 'hp_idx': hp_idx, 'curr_lr': lr_curr, 'found_lr': lr, 'hparams': curr_hparams, 'epochs': num_epochs }) epoch += 1 if args.optim == 'rmsprop': scheduler.step() if args.checkpoints: if epoch_val_acc > best_val_acc: best_val_acc = epoch_val_acc torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'curr_seed': curr_seed, 'epoch': epoch, 'actfun': actfun, 'num_params': num_params, 'sample_size': sample_size, 'p': curr_p, 'k': curr_k, 'g': curr_g, 'perm_method': perm_method }, best_checkpoint_location) torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'curr_seed': curr_seed, 'epoch': epoch, 'actfun': actfun, 'num_params': num_params, 'sample_size': sample_size, 'p': curr_p, 'k': curr_k, 'g': curr_g, 'perm_method': perm_method }, final_checkpoint_location)
default=1, required=False, help='number of gpus') parser.add_argument('--rank', type=int, default=0, required=False, help='rank of current gpu') parser.add_argument('--group_name', type=str, default='group_name', required=False, help='Distributed group name') add_hparams(parser) args = parser.parse_args() hparams = get_hparams(args, parser) torch.backends.cudnn.enabled = hparams.cudnn_enabled torch.backends.cudnn.benchmark = hparams.cudnn_benchmark print("FP16 Run:", hparams.fp16_run) print("Dynamic Loss Scaling:", hparams.dynamic_loss_scaling) print("Distributed Run:", hparams.distributed_run) print("cuDNN Enabled:", hparams.cudnn_enabled) print("cuDNN Benchmark:", hparams.cudnn_benchmark) train(args.output_directory, args.log_directory, args.checkpoint_path, args.warm_start, args.n_gpus, args.rank, args.group_name, hparams)
def test(): hparams = get_hparams() print(hparams.task_name) model_path = os.path.join( hparams.model_path, hparams.task_name, hparams.spec_opt ) # Load Dataset Loader root = '../dataset/feat/test' list_dir_A = './etc/Test_dt05_real_isolated_1ch_track_list.csv' list_dir_B = './etc/Test_dt05_simu_isolated_1ch_track_list.csv' output_dir = './output/{}/{}_AB_dt'.format(hparams.task_name, hparams.iteration_num) output_dir_real = os.path.join( output_dir, 'dt_real') output_dir_simu = os.path.join( output_dir, 'dt_simu') if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.exists(output_dir_real): os.makedirs(output_dir_real) if not os.path.exists(output_dir_simu): os.makedirs(output_dir_simu) normalizer_clean = Tanhize('clean') normalizer_noisy = Tanhize('noisy') test_list_A, speaker_A = testset_list_classifier(root, list_dir_A) test_list_B, speaker_B = testset_list_classifier(root, list_dir_B) # test_list_C, speaker_C = testset_list_classifier(root, list_dir_C, 'clean') generator_A = Generator() generator_B = Generator() discriminator_A = Discriminator() discriminator_B = Discriminator() ContEncoder_A = ContentEncoder() ContEncoder_B = ContentEncoder() StEncoder_A = StyleEncoder() StEncoder_B = StyleEncoder() generator_A = nn.DataParallel(generator_A).cuda() generator_B = nn.DataParallel(generator_B).cuda() discriminator_A = nn.DataParallel(discriminator_A).cuda() discriminator_B = nn.DataParallel(discriminator_B).cuda() ContEncoder_A = nn.DataParallel(ContEncoder_A).cuda() ContEncoder_B = nn.DataParallel(ContEncoder_B).cuda() StEncoder_A = nn.DataParallel(StEncoder_A).cuda() StEncoder_B = nn.DataParallel(StEncoder_B).cuda() map_location = lambda storage, loc: storage generator_A.load_state_dict( torch.load('./models/{}/{}/model_gen_A_{}.pth'.format(hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) generator_B.load_state_dict( torch.load('./models/{}/{}/model_gen_B_{}.pth'.format(hparams.task_name, hparams.spec_opt,hparams.iteration_num), map_location=map_location)) discriminator_A.load_state_dict( torch.load('./models/{}/{}/model_dis_A_{}.pth'.format(hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) discriminator_B.load_state_dict( torch.load('./models/{}/{}/model_dis_B_{}.pth'.format(hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) ContEncoder_A.load_state_dict( torch.load('./models/{}/{}/model_ContEnc_A_{}.pth'.format(hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) ContEncoder_B.load_state_dict( torch.load('./models/{}/{}/model_ContEnc_B_{}.pth'.format(hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) StEncoder_A.load_state_dict( torch.load('./models/{}/{}/model_StEnc_A_{}.pth'.format(hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) StEncoder_B.load_state_dict( torch.load('./models/{}/{}/model_StEnc_B_{}.pth'.format(hparams.task_name, hparams.spec_opt, hparams.iteration_num), map_location=map_location)) for i in range(len(test_list_A)): generator_B.eval() ContEncoder_A.eval() StEncoder_B.eval() feat = testset_loader(root, test_list_A[i],speaker_A, normalizer =normalizer_noisy) print(feat['audio_name']) A_content = Variable(torch.FloatTensor(feat['sp']).unsqueeze(0)).cuda() A_cont= ContEncoder_A(A_content) z_st = get_z_random(1, 8) feature_z = generator_B(A_cont, z_st) feature_z = normalizer_noisy.backward_process(feature_z.squeeze().data) feature_z = feature_z.squeeze().data.cpu().numpy() np.save( os.path.join( output_dir_real, 'z-' + feat['audio_name']), feature_z) for i in range(len(test_list_B)): generator_B.eval() ContEncoder_A.eval() feat = testset_loader(root, test_list_B[i],speaker_B, normalizer =normalizer_noisy) print(feat['audio_name']) A_content = Variable(torch.FloatTensor(feat['sp']).unsqueeze(0)).cuda() A_cont= ContEncoder_A(A_content) z_st = get_z_random(1, 8) feature_z = generator_B(A_cont, z_st) feature_z = normalizer_noisy.backward_process(feature_z.squeeze().data) feature_z = feature_z.squeeze().data.cpu().numpy() np.save( os.path.join( output_dir_simu, 'z-' + feat['audio_name']), feature_z)
def main() : hp = get_hparams() transform = transforms.Compose([ transforms.ToTensor()]) train_loader = get_loader(hp.bg_data_path, hp.ev_data_path, hp.batch_size, hp.dataset_size, True, transform, mode="train") valid_loader = get_loader(hp.bg_data_path, hp.ev_data_path, hp.num_way, hp.valid_trial * hp.num_way, False, transform, mode="valid") test_loader = get_loader(hp.bg_data_path, hp.ev_data_path, hp.num_way, hp.test_trial * hp.num_way, False, transform, mode="test") model = SiameseNet().to(device) def weights_init(m) : if isinstance(m, nn.Conv2d) : torch.nn.init.normal_(m.weight, 0.0, 1e-2) torch.nn.init.normal_(m.bias, 0.5, 1e-2) if isinstance(m, nn.Linear) : torch.nn.init.normal_(m.weight, 0.0, 0.2) torch.nn.init.normal_(m.bias, 0.5, 1e-2) model.apply(weights_init) num_epochs = hp.num_epochs total_step = len(train_loader) stop_decision = 1 prev_error = 0.0 for epoch in range(num_epochs) : lr = hp.learning_rate * pow(0.99, epoch) optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=hp.momentum, weight_decay=hp.reg_scale) for i, (images_1, images_2, label) in enumerate(train_loader) : images_1 = images_1.to(device).float() images_2 = images_2.to(device).float() label = label.to(device).float() prob = model(images_1, images_2) obj = label * torch.log(prob) + (1. - label) * torch.log(1. - prob) loss = -torch.sum(obj) / float(hp.batch_size) optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % hp.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) valid_errors = 0.0 total_sample = 0.0 for images_1, images_2, label in valid_loader : images_1 = images_1.to(device).float() images_2 = images_2.to(device).float() label = label.to(device).float() prob = model(images_1, images_2) obj = label * torch.log(prob) + (1. - label) * torch.log(1. - prob) valid_errors += -torch.sum(obj).detach().cpu().numpy() / float(hp.num_way) total_sample += 1.0 valid_error = np.round(valid_errors / total_sample, 4) print('Epoch [{}/{}], Validation Error : {:.4f}' .format(epoch+1, num_epochs, valid_error)) if valid_error == prev_error : stop_decision += 1 else : stop_decision = 1 if stop_decision == 20 : print('Epoch [{}/{}], Early Stopped Training!'.format(epoch+1, num_epochs)) torch.save(model.state_dict(), os.path.join( hp.model_path, 'siamese-{}.ckpt'.format(epoch+1))) break prev_error = valid_error if (epoch + 1) % 20 == 0 : torch.save(model.state_dict(), os.path.join( hp.model_path, 'siamese-{}.ckpt'.format(epoch+1)))
def run(args, server): hparams = get_hparams(args) catastrophe_dir = os.path.join(args.log_dir, "catastrophes", "w{}".format(args.task)) block_dir = os.path.join(args.log_dir, "blocks", "w{}".format(args.task)) env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes, catastrophe_dir=catastrophe_dir, block_dir=block_dir, **hparams) print(args.env_id) print(env) # todo - move this wrapper further down the stack (before the AtariWrapper that modifies the # observation and scales the rewards) max_episodes = args.max_episodes if args.online: if max_episodes is not None: print( "WARNING: Setting max_episodes to None because we are in online labelling mode" ) max_episodes = None env = frame.HumanLabelWrapper(args.env_id, env, **hparams) env = frame.FrameSaverWrapper( env, os.path.join(args.log_dir, "episodes", "w{}".format(args.task)), max_episodes) trainer = A3C(env, args.task, **hparams) # Variable names that start with "local" are not saved in checkpoints. if use_tf12_api: variables_to_save = [ v for v in tf.global_variables() if not v.name.startswith("local") ] init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.global_variables_initializer() else: variables_to_save = [ v for v in tf.all_variables() if not v.name.startswith("local") ] init_op = tf.initialize_variables(variables_to_save) init_all_op = tf.initialize_all_variables() saver = FastSaver(variables_to_save) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) config = tf.ConfigProto(device_filters=[ "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task) ]) logdir = os.path.join(args.log_dir, 'train') if use_tf12_api: summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task) else: summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task) # pylint: disable=E1101 logger.info("Events directory: %s_%s", logdir, args.task) sv = tf.train.Supervisor( is_chief=(args.task == 0), logdir=logdir, saver=saver, summary_op=None, init_op=init_op, init_fn=init_fn, summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(variables_to_save), global_step=trainer.global_step, save_model_secs=30, save_summaries_secs=30) num_global_steps = 100000000 num_global_steps = 0 logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified." ) with sv.managed_session(server.target, config=config) as sess, sess.as_default(): sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info("Starting training at step=%d", global_step) while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps): trainer.process(sess) global_step = sess.run(trainer.global_step) # Ask for all the services to stop. sv.stop() logger.info('reached %s steps. worker stopped.', global_step)