def main(exp, frame_sizes, dataset, **params): params = dict(default_params, exp=exp, frame_sizes=frame_sizes, dataset=dataset, **params) os.environ['CUDA_VISIBLE_DEVICES'] = params['gpu'] results_path = setup_results_dir(params) tee_stdout(os.path.join(results_path, 'log')) model = SampleRNN(frame_sizes=params['frame_sizes'], n_rnn=params['n_rnn'], dim=params['dim'], learn_h0=params['learn_h0'], q_levels=params['q_levels'], weight_norm=params['weight_norm'], dropout=params['dropout']) predictor = Predictor(model) if params['cuda']: model = model.cuda() predictor = predictor.cuda() optimizer = gradient_clipping( torch.optim.Adam(predictor.parameters(), lr=params['lr'])) data_loader = make_data_loader(model.lookback, params) test_split = 1 - params['test_frac'] val_split = test_split - params['val_frac'] criterion = sequence_nll_loss_bits checkpoints_path = os.path.join(results_path, 'checkpoints') checkpoint_data = load_last_checkpoint(checkpoints_path, params) if checkpoint_data is not None: (state_dict, epoch, iteration) = checkpoint_data start_epoch = int(epoch) global_step = iteration start_epoch = iteration predictor.load_state_dict(state_dict) else: start_epoch = 0 global_step = 0 #writer = SummaryWriter("runs/{}-{}".format(params['dataset'], str(datetime.datetime.now()).split('.')[0].replace(' ', '-'))) writer = SummaryWriter( os.path.join( results_path, "{}-{}".format( params['dataset'], str(datetime.datetime.now()).split('.')[0].replace(' ', '-')))) dataset_train = data_loader(0, val_split, eval=False) dataset_val = data_loader(val_split, test_split, eval=True) dataset_test = data_loader(test_split, 1, eval=True) generator = Generator(predictor.model, params['cuda']) best_val_loss = 10000000000000 for e in range(start_epoch, int(params['epoch_limit'])): for i, data in enumerate(dataset_train): batch_inputs = data[:-1] batch_target = data[-1] def wrap(input): if torch.is_tensor(input): input = torch.autograd.Variable(input) if params['cuda']: input = input.cuda() return input batch_inputs = list(map(wrap, batch_inputs)) batch_target = torch.autograd.Variable(batch_target) if params['cuda']: batch_target = batch_target.cuda() plugin_data = [None, None] def closure(): batch_output = predictor(*batch_inputs) loss = criterion(batch_output, batch_target) loss.backward() if plugin_data[0] is None: plugin_data[0] = batch_output.data plugin_data[1] = loss.data return loss optimizer.zero_grad() optimizer.step(closure) train_loss = plugin_data[1] # stats: iteration writer.add_scalar('train/train loss', train_loss, global_step) print("E:{:03d}-S{:05d}: Loss={}".format(e, i, train_loss)) global_step += 1 # validation: per epoch predictor.eval() with torch.no_grad(): loss_sum = 0 n_examples = 0 for data in dataset_val: batch_inputs = data[:-1] batch_target = data[-1] batch_size = batch_target.size()[0] def wrap(input): if torch.is_tensor(input): input = torch.autograd.Variable(input) if params['cuda']: input = input.cuda() return input batch_inputs = list(map(wrap, batch_inputs)) batch_target = torch.autograd.Variable(batch_target) if params['cuda']: batch_target = batch_target.cuda() batch_output = predictor(*batch_inputs) loss_sum += criterion(batch_output, batch_target).item() * batch_size n_examples += batch_size val_loss = loss_sum / n_examples writer.add_scalar('validation/validation loss', val_loss, global_step) print("== Validation Step E:{:03d}: Loss={} ==".format( e, val_loss)) predictor.train() # saver: epoch last_pattern = 'ep{}-it{}' best_pattern = 'best-ep{}-it{}' if not params['keep_old_checkpoints']: pattern = os.path.join(checkpoints_path, last_pattern.format('*', '*')) for file_name in glob(pattern): os.remove(file_name) torch.save( predictor.state_dict(), os.path.join(checkpoints_path, last_pattern.format(e, global_step))) cur_val_loss = val_loss if cur_val_loss < best_val_loss: pattern = os.path.join(checkpoints_path, last_pattern.format('*', '*')) for file_name in glob(pattern): os.remove(file_name) torch.save( predictor.state_dict(), os.path.join(checkpoints_path, best_pattern.format(e, global_step))) best_val_loss = cur_val_loss generate_sample(generator, params, writer, global_step, results_path, e) # generate final results generate_sample(generator, params, None, global_step, results_path, 0)
best_test1_acc = 0.0 best_test2_acc = 0.0 best_test3_acc = 0.0 best_epoch_num = 0 total_epoch_num = 0 all_losses = [] all_acc_1 = [] all_acc_2 = [] all_acc_3 = [] for epoch in range(1, num_epochs): total_epoch_num += 1 shuffled_id_blocks = get_shuffled_ids(_data['tr'], batch_size) running_loss = 0.0 predictor.train() for id_block in shuffled_id_blocks: predictor.zero_grad() h0 = torch.zeros(num_of_layers * num_of_directions, id_block.shape[0], lstm_dim) c0 = torch.zeros(num_of_layers * num_of_directions, id_block.shape[0], lstm_dim) batch_input, batch_len, batch_label = make_batch( _data['tr'], _label['tr'], id_block) output = predictor(batch_input, batch_len, h0, c0) loss = criterion(output, batch_label) running_loss += loss.item() * batch_input.size(0) loss.backward() _ = torch.nn.utils.clip_grad_norm_(predictor.parameters(), clip)
def main(): # Data Loader (Input Pipeline) print('loading dataset...') train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=4, drop_last=True, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=4, drop_last=True, shuffle=False) # Define models print('building model...') g = Generator(input_channel=input_channel) h = Predictor(n_outputs=num_classes) h2 = Predictor(n_outputs=num_classes) g.cuda() h.cuda() h2.cuda() print(g.parameters, h.parameters, h2.parameters) optimizer_g = torch.optim.Adam(g.parameters(), lr=learning_rate) optimizer_h = torch.optim.Adam(h.parameters(), lr=learning_rate) optimizer_h2 = torch.optim.Adam(h2.parameters(), lr=learning_rate) with open(txtfile, "a") as myfile: myfile.write('epoch: train_acc test_acc\n') epoch = 0 train_acc = 0 # evaluate models with random weights test_acc = evaluate(test_loader, g, h) print('Epoch [%d/%d], Test Accuracy: %.4f' % (epoch + 1, args.n_epoch, test_acc)) # save results with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ': ' + str(train_acc) + ' ' + str(test_acc) + "\n") best_ce_acc = 0 # training for epoch in range(1, args.n_epoch): # train models g.train() h.train() h2.train() adjust_learning_rate(optimizer_g, epoch) adjust_learning_rate(optimizer_h, epoch) adjust_learning_rate(optimizer_h2, epoch) train_acc = train(train_loader, epoch, g, optimizer_g, h, optimizer_h, h2, optimizer_h2) # evaluate models test_acc = evaluate(test_loader, g, h) print( 'Epoch [%d/%d], Training Accuracy: %.4F %%, Test Accuracy: %.4F %%' % (epoch + 1, args.n_epoch, train_acc * 100, test_acc * 100)) with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ': ' + str(train_acc) + ' ' + str(test_acc) + "\n")
dim1=1) # make (N, T, C, H, W) short_data = train_data[:, short_start:short_end, :, :, :] long_data = train_data[:, long_start:long_end, :, :, :] out_gt = train_data[:, out_gt_start:out_gt_end, :, :, :] # predict only 10 frames in the first few iterations to warm up the model if (not args.checkpoint_load) and (train_i < args.iterations_warmup): train_out_len = 10 long_data = train_data[:, short_start:out_gt_start + train_out_len, :, :, :] out_gt = train_data[:, out_gt_start:out_gt_start + train_out_len, :, :, :] else: train_out_len = args.out_len pred_model.train() # training phase 1 with long-term sequence pred_model.module.memory.memory_w.requires_grad = True # train memory weights out_pred = pred_model(short_data, long_data, train_out_len, phase=1) loss_p1 = l1_loss(out_pred, out_gt) + l2_loss(out_pred, out_gt) optimizer.zero_grad() loss_p1.backward() optimizer.step() # training phase 2 without long-term sequence pred_model.module.memory.memory_w.requires_grad = False # do not train memory weights out_pred = pred_model(short_data, None, train_out_len, phase=2) loss_p2 = l1_loss(out_pred, out_gt) + l2_loss(out_pred, out_gt) optimizer.zero_grad() loss_p2.backward() optimizer.step()