def train(features, fea_len, split_frac, out_file, save=False, save_folder=None): ''' hyperparameters: features amount of training data feature length ''' if isinstance(out_file, str): out_file = open(out_file, 'w') d = Dataset(features, split_frac, 1, gpu) print 'defining architecture' enc = ChainEncoder(d.get_v_fea_len(), d.get_e_fea_len(), fea_len, 'last') predictor = Predictor(fea_len) loss = nn.NLLLoss() if gpu: enc.cuda() predictor.cuda() loss.cuda() optimizer = optim.Adam( list(enc.parameters()) + list(predictor.parameters())) print 'training' test_v_features, test_e_features, test_A_pls, test_B_pls, test_y = d.get_test_pairs( ) test_y = test_y.data.cpu().numpy() for train_iter in xrange(12000): v_features, e_features, A_pls, B_pls, y = d.get_train_pairs(100) enc.zero_grad() predictor.zero_grad() A_code, B_code = encode(enc, fea_len, v_features, e_features, A_pls, B_pls) softmax_output = predictor(A_code, B_code) loss_val = loss(softmax_output, y) loss_val.backward() optimizer.step() enc.zero_grad() predictor.zero_grad() test_A_code, test_B_code = encode(enc, fea_len, test_v_features, test_e_features, test_A_pls, test_B_pls) softmax_output = predictor(test_A_code, test_B_code).data.cpu().numpy() test_y_pred = softmax_output.argmax(axis=1) cur_acc = (test_y_pred == test_y).sum() / len(test_y) out_file.write('%f\n' % cur_acc) out_file.flush() if save and train_iter % 50 == 0: if save_folder[-1] == '/': save_folder = save_folder[:-1] torch.save(enc.state_dict(), '%s/%i_enc.model' % (save_folder, train_iter)) torch.save(predictor.state_dict(), '%s/%i_pred.model' % (save_folder, train_iter)) out_file.close()
def train(features, fea_len, split_frac, out_file): if isinstance(out_file, str): out_file = open(out_file, 'w') d = Dataset(features, split_frac, gpu) print 'defining architecture' enc = ChainEncoder(d.get_v_fea_len(), d.get_e_fea_len(), fea_len, 'last') predictor = Predictor(fea_len) loss = nn.NLLLoss() if gpu: enc.cuda() predictor.cuda() loss.cuda() optimizer = optim.Adam( list(enc.parameters()) + list(predictor.parameters())) print 'training' test_chain_A, test_chain_B, test_y = d.get_test_pairs() test_y = test_y.data.cpu().numpy() for train_iter in xrange(4000): chains_A, chains_B, y = d.get_train_pairs(1000) enc.zero_grad() predictor.zero_grad() output_A = enc(chains_A) output_B = enc(chains_B) softmax_output = predictor(output_A, output_B) loss_val = loss(softmax_output, y) loss_val.backward() optimizer.step() enc.zero_grad() predictor.zero_grad() output_test_A = enc(test_chain_A) output_test_B = enc(test_chain_B) softmax_output = predictor(output_test_A, output_test_B).data.cpu().numpy() test_y_pred = softmax_output.argmax(axis=1) cur_acc = (test_y_pred == test_y).sum() / len(test_y) print 'test acc:', cur_acc out_file.write('%f\n' % cur_acc) if train_iter % 50 == 0: torch.save(enc.state_dict(), 'ckpt/%i_encoder.model' % train_iter) torch.save(predictor.state_dict(), 'ckpt/%i_predictor.model' % train_iter) out_file.close()
def main(exp, frame_sizes, dataset, **params): params = dict(default_params, exp=exp, frame_sizes=frame_sizes, dataset=dataset, **params) os.environ['CUDA_VISIBLE_DEVICES'] = params['gpu'] results_path = setup_results_dir(params) tee_stdout(os.path.join(results_path, 'log')) model = SampleRNN(frame_sizes=params['frame_sizes'], n_rnn=params['n_rnn'], dim=params['dim'], learn_h0=params['learn_h0'], q_levels=params['q_levels'], weight_norm=params['weight_norm'], dropout=params['dropout']) predictor = Predictor(model) if params['cuda']: model = model.cuda() predictor = predictor.cuda() optimizer = gradient_clipping( torch.optim.Adam(predictor.parameters(), lr=params['lr'])) data_loader = make_data_loader(model.lookback, params) test_split = 1 - params['test_frac'] val_split = test_split - params['val_frac'] criterion = sequence_nll_loss_bits checkpoints_path = os.path.join(results_path, 'checkpoints') checkpoint_data = load_last_checkpoint(checkpoints_path, params) if checkpoint_data is not None: (state_dict, epoch, iteration) = checkpoint_data start_epoch = int(epoch) global_step = iteration start_epoch = iteration predictor.load_state_dict(state_dict) else: start_epoch = 0 global_step = 0 #writer = SummaryWriter("runs/{}-{}".format(params['dataset'], str(datetime.datetime.now()).split('.')[0].replace(' ', '-'))) writer = SummaryWriter( os.path.join( results_path, "{}-{}".format( params['dataset'], str(datetime.datetime.now()).split('.')[0].replace(' ', '-')))) dataset_train = data_loader(0, val_split, eval=False) dataset_val = data_loader(val_split, test_split, eval=True) dataset_test = data_loader(test_split, 1, eval=True) generator = Generator(predictor.model, params['cuda']) best_val_loss = 10000000000000 for e in range(start_epoch, int(params['epoch_limit'])): for i, data in enumerate(dataset_train): batch_inputs = data[:-1] batch_target = data[-1] def wrap(input): if torch.is_tensor(input): input = torch.autograd.Variable(input) if params['cuda']: input = input.cuda() return input batch_inputs = list(map(wrap, batch_inputs)) batch_target = torch.autograd.Variable(batch_target) if params['cuda']: batch_target = batch_target.cuda() plugin_data = [None, None] def closure(): batch_output = predictor(*batch_inputs) loss = criterion(batch_output, batch_target) loss.backward() if plugin_data[0] is None: plugin_data[0] = batch_output.data plugin_data[1] = loss.data return loss optimizer.zero_grad() optimizer.step(closure) train_loss = plugin_data[1] # stats: iteration writer.add_scalar('train/train loss', train_loss, global_step) print("E:{:03d}-S{:05d}: Loss={}".format(e, i, train_loss)) global_step += 1 # validation: per epoch predictor.eval() with torch.no_grad(): loss_sum = 0 n_examples = 0 for data in dataset_val: batch_inputs = data[:-1] batch_target = data[-1] batch_size = batch_target.size()[0] def wrap(input): if torch.is_tensor(input): input = torch.autograd.Variable(input) if params['cuda']: input = input.cuda() return input batch_inputs = list(map(wrap, batch_inputs)) batch_target = torch.autograd.Variable(batch_target) if params['cuda']: batch_target = batch_target.cuda() batch_output = predictor(*batch_inputs) loss_sum += criterion(batch_output, batch_target).item() * batch_size n_examples += batch_size val_loss = loss_sum / n_examples writer.add_scalar('validation/validation loss', val_loss, global_step) print("== Validation Step E:{:03d}: Loss={} ==".format( e, val_loss)) predictor.train() # saver: epoch last_pattern = 'ep{}-it{}' best_pattern = 'best-ep{}-it{}' if not params['keep_old_checkpoints']: pattern = os.path.join(checkpoints_path, last_pattern.format('*', '*')) for file_name in glob(pattern): os.remove(file_name) torch.save( predictor.state_dict(), os.path.join(checkpoints_path, last_pattern.format(e, global_step))) cur_val_loss = val_loss if cur_val_loss < best_val_loss: pattern = os.path.join(checkpoints_path, last_pattern.format('*', '*')) for file_name in glob(pattern): os.remove(file_name) torch.save( predictor.state_dict(), os.path.join(checkpoints_path, best_pattern.format(e, global_step))) best_val_loss = cur_val_loss generate_sample(generator, params, writer, global_step, results_path, e) # generate final results generate_sample(generator, params, None, global_step, results_path, 0)
voc_size = len(Sigma) lstm_dim = 10 batch_size = 128 num_of_layers = 1 num_of_directions = 1 num_epochs = 300 clip = 1.0 predictor = Predictor(voc_size, lstm_dim) optimizer = optim.Adam(predictor.parameters()) criterion = nn.NLLLoss() best_dev_acc = 0.0 best_model_wts = copy.deepcopy(predictor.state_dict()) best_test1_acc = 0.0 best_test2_acc = 0.0 best_test3_acc = 0.0 best_epoch_num = 0 total_epoch_num = 0 all_losses = [] all_acc_1 = [] all_acc_2 = [] all_acc_3 = [] for epoch in range(1, num_epochs): total_epoch_num += 1 shuffled_id_blocks = get_shuffled_ids(_data['tr'], batch_size) running_loss = 0.0
optimizer.zero_grad() loss_p1.backward() optimizer.step() # training phase 2 without long-term sequence pred_model.module.memory.memory_w.requires_grad = False # do not train memory weights out_pred = pred_model(short_data, None, train_out_len, phase=2) loss_p2 = l1_loss(out_pred, out_gt) + l2_loss(out_pred, out_gt) optimizer.zero_grad() loss_p2.backward() optimizer.step() train_loss.update(float(loss_p1) + float(loss_p2)) if (train_i + 1) % args.print_freq == 0: torch.save( pred_model.state_dict(), args.checkpoint_save_dir + '/trained_file_' + str(train_i + 1).zfill(6) + '.pt') # validation phase pred_model.eval() with torch.no_grad(): for valid_data in validloader: # define data indexes short_start, short_end = 0, args.short_len out_gt_start, out_gt_end = short_end, short_end + args.out_len # obtain input data and output gt valid_data = torch.stack(valid_data).to(device) valid_data = valid_data.transpose( dim0=0, dim1=1) # make (N, T, C, H, W) short_data = valid_data[:, short_start:short_end, :, :, :]