layers_per_stack = args.layers, total_samples = total_samples, l2_lambda = args.L2, bias = args.bias, dropout = args.dropout, use_bayesian = args.bayesian, backwards = args.backwards, multi_gpu = True ) print(model.summary()) if args.multi_gpu: model = nn.DataParallel(model) model.to(device) optimizer = optim.Adam(model.parameters(), lr = args.learning_rate) if args.anneal_learning_rates: T_0 = 1 T_mult = 2 scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult) else: scheduler = None model_save_name = args.results_dir / Path("model.torch") model_save_name_latest = args.results_dir / Path("model_latest.torch") if model_save_name.exists(): print(f"Loading saved model from {model_save_name}...") if args.multi_gpu:
print("Data loaded!") model = WaveNet(input_channels=NUM_TOKENS, residual_channels=args.residual_channels, out_channels=NUM_TOKENS, stacks=args.stacks, layers_per_stack=args.layers, total_samples=train_length, l2_lambda=args.L2, bias=args.bias, dropout=args.dropout, use_bayesian=args.bayesian, backwards=args.backwards).to(device) print(model.summary()) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) #, weight_decay = args.L2) if args.anneal_learning_rates: T_0 = 1 # Emil: I just picked a small number, no clue if any good T_mult = 2 scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult) else: scheduler = None model_save_name = args.results_dir / Path("model.torch") if model_save_name.exists(): print(f"Loading saved model from {model_save_name}...") model.load_state_dict( torch.load(model_save_name, map_location=device)["state_dict"]) print(f"Model loaded.")
def train(args): # Arugments & parameters dataset = args.dataset dataset_dir = args.dataset_dir workspace = args.workspace filename = args.filename batch_size = args.batch_size # Use an audio clip as a mini-batch. Must # be 1 if audio clips has different length. condition = args.condition cuda = args.cuda quantize_bins = config.quantize_bins dilations = config.dilations # Paths models_dir = os.path.join(workspace, 'models', 'dataset={}'.format(dataset), filename, 'condition={}'.format(condition)) create_folder(models_dir) # Data Generator Dataset = get_dataset(dataset) train_dataset = Dataset(dataset_dir, data_type='train') validate_dataset = Dataset(dataset_dir, data_type='validate') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) # Model model = WaveNet( dilations, residual_channels=config.residual_channels, dilation_channels=config.dilation_channels, skip_channels=config.skip_channels, quantize_bins=config.quantize_bins, global_condition_channels=config.global_condition_channels, global_condition_cardinality=Dataset.global_condition_cardinality, use_cuda=cuda) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) train_bgn_time = time.time() iteration = 0 while True: for (batch_x, global_condition) in train_loader: '''batch_x: (batch_size, seq_len) global_condition: (batch_size,) ''' print('iteration: {}, input size: {}'.format( iteration, batch_x.shape)) # Evaluate if iteration % 1000 == 0: train_fin_time = time.time() evaluate_bgn_time = time.time() loss = evaluate(model, validate_loader, condition, cuda) print('-----------------') logging.info( 'iteration: {}, loss: {:.3f}, train_time: {:.3f}, ' 'validate time: {:.3f} s'.format( iteration, loss, train_fin_time - train_bgn_time, time.time() - evaluate_bgn_time)) train_bgn_time = time.time() # Save model if iteration % 10000 == 0: save_out_dict = { 'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_out_path = os.path.join( models_dir, 'md_{}_iters.tar'.format(iteration)) torch.save(save_out_dict, save_out_path) logging.info('Save model to {}'.format(save_out_path)) # Move data to GPU if condition: global_condition = move_data_to_gpu(global_condition, cuda) else: global_condition = None batch_x = move_data_to_gpu(batch_x, cuda) # Prepare input and target data batch_input = batch_x[:, 0:-1] output_width = batch_input.shape[-1] - model.receptive_field + 1 batch_target = batch_x[:, -output_width:] # Forward model.train() batch_output = model(batch_input, global_condition) loss = _loss_func(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() print('loss: {:.3f}'.format(loss.data.cpu().numpy())) iteration += 1