def do_test(config): _test_turn_tensor(config) data_dict = get_data_dict(config) category_lines = data_dict["category_lines"] all_categories = data_dict["all_categories"] n_categories = data_dict["n_categories"] # _random_train_example_test(all_categories, category_lines, config) n_letters = config["n_letters"] n_categories = data_dict["n_categories"] n_hidden = config["n_hidden"] rnn = rnn_model.RNN(n_letters, n_hidden, n_categories) print(_dataset_test(rnn, all_categories, config))
def main(args): logout = Logger('reslog.txt') # read data seqlimit = 100 train_x, train_y = read_data('train', args.granularity, seqlimit) valid_x, valid_y = read_data('validation', args.granularity, seqlimit) test_x, test_y = read_data('test', args.granularity, seqlimit) logout.print('shape of train data:\t{} {}'.format(train_x.shape, train_y.shape)) logout.print('shape of valid data:\t{} {}'.format(valid_x.shape, valid_y.shape)) logout.print('shape of test data:\t{} {}'.format(test_x.shape, test_y.shape)) # read data max_seq_length = np.max([ np.max([len(e) for e in train_x]), np.max([len(e) for e in valid_x]), np.max([len(e) for e in test_x]) ]) min_seq_length = np.min([ np.min([len(e) for e in train_x]), np.min([len(e) for e in valid_x]), np.min([len(e) for e in test_x]) ]) logout.print('max_seq_length:\t{}'.format(max_seq_length)) logout.print('min_seq_length:\t{}'.format(min_seq_length)) batch_size = 256 logout.print('batch_size:\t{}'.format(batch_size)) model = rnn_model.RNN(max_seq_length=max_seq_length, input_dim=len(train_x[0][0]), output_n_vocab=args.granularity, num_hidden_layers=1, num_hidden_units=256, input_embedding=512, forget_bias=0.03, learning_rate=0.05) logout.print('{}'.format(model)) model.create_model() last_valid_acc = None show_step = 1 stop_step = 100 stop_cnt = 0 best_valid_acc = -1.0 best_test_acc = -1.0 best_result = None i = 0 while True: batch_x, batch_y = batch(train_x, train_y, size=256) model.train_step(batch_x, batch_y) if i % show_step == 0: train_acc = model.get_accuracy(train_x, train_y) valid_acc = model.get_accuracy(valid_x, valid_y) test_acc = model.get_accuracy(test_x, test_y) log = '{}\tStep: {:04d}, '.format(datetime.datetime.now(), i) log += 'train/test/valid acc = {:.4f}/{:.4f}/{:.4f}'.format( train_acc, test_acc, valid_acc) if valid_acc > best_valid_acc: best_valid_acc = valid_acc best_test_acc = test_acc best_result = model.get_prediction(test_x, test_y) stop_cnt = 0 log += '*' else: stop_cnt += show_step log += '\t{}'.format(stop_cnt) logout.print(log) if stop_cnt >= stop_step: break i += 1 logout.print('final result: {:.4f}'.format(best_test_acc)) logout.close() for tag in ['train', 'test', 'validation']: print('saving: ' + tag) x, y = getxy(tag, args.granularity, seqlimit) np.savetxt('logits-{}'.format(tag), model.get_logits(x, y), delimiter='\t')
def training(feats_all, labels_all, total_length, optimizer, loss_fn, epoch, input_feature, train): momentum = 0.9 learning_rate = 0.01 num_of_rows_per_song = total_length / num_of_songs print('num_of_rows_per_song: ', num_of_rows_per_song) print('total number of songs: ', num_of_songs) cut = feats_all.shape[0] / 10 learning_rate = 0.01 #Split dataset for 10-folder cross-validation x = [] y = [] start = 0 end = cut for i in range(10): x.append(feats_all[start:end, :]) y.append(labels_all[start:end, 0:2]) start = end end += cut #default loss_function = nn.MSELoss() model = rnn_model.RNN() if loss_fn == 'MSE': loss_function = nn.MSELoss() elif loss_fn == 'L1': loss_function = nn.L1Loss() if optimizer == 'Adam': optimizer = optim.Adam(model.parameters(), lr=learning_rate) elif optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) model_filename = str(optimizer) + '_' + str(loss_fn) + '_' + str( epoch) + '_' + str(input_feature) + '.pkt' loss_change = [] #keep track of the loss in the training process if (train): #Cross-Validation for k in range(10): if k == 9: x_in = x[k] y_in = y[k] else: x_in = x[k + 1] y_in = y[k + 1] for index in range(10): if index != k: x_in = np.append(x_in, x[k], axis=0) y_in = np.append(y_in, y[k], axis=0) x_test = x[k] y_test = y[k] start = time.time() print('validation set number: ', k) print('training set shape =', x_in.shape, y_in.shape) print('testing set shape = ', x_test.shape, y_test.shape) for epoch in range(int(epoch)): # for idx in range(test_x.shape[0]): model.zero_grad() model.hidden = model.init_hidden() if torch.cuda.is_available(): model.cuda() input = prepare_sequence(x_in) target = prepare_sequence(y_in) #print(input.view(-1,1,12),target) #prediction (batch_size * time_step * size_of(output space)) prediction = model(input) loss = loss_function(prediction, target.view(1, -1, 2)) loss.backward() optimizer.step() if epoch % 10 == 0: end = time.time() print('epoch: ', epoch, 'loss: ', loss.data[0]) print('time/10epoch: ', (end - start)) start = time.time() loss_change.append(loss.data[0]) test_score = model(prepare_sequence(x_test)) #used for accuracy calculation loss_new = (test_score - prepare_sequence(y_test)).abs() if torch.cuda.is_available(): loss_nparr = loss_new.data[0].cpu().numpy() else: loss_nparr = loss_new.data[0].numpy() metric_arr = np.linspace(0.1, 0.5, 5) print('validation set:', k) for metric in metric_arr: accuracy = float( loss_nparr[loss_nparr <= metric].size) / float( loss_nparr.size) print('metric = ', metric, 'accuracy = ', accuracy) torch.save(model, model_filename) # print(loss_change) # plt.xlabel("time") # plt.ylabel("loss") # plt.plot(loss_change) # plt.show() else: model = torch.load(model_filename)
def training(optimizer, loss_fn, learning_rate, momentum, training_size, epoch, train): num_of_rows_per_song = total_length / num_of_songs print('num_of_rows_per_song: ', num_of_rows_per_song) print('total number of songs: ', num_of_songs) number_of_songs_in_training_set = int(training_size) cut = num_of_rows_per_song * number_of_songs_in_training_set learning_rate = 0.01 x_in = feats_all[0:cut, :] y_in = labels_all[0:cut, 0:2] print('size of training set: ', x_in.shape, y_in.shape) x_test = feats_all[cut:total_length, :] y_test = labels_all[cut:total_length, 0:2] print('size of test set: ', x_test.shape, y_test.shape) model_filename = str(optimizer) + '_' + str(loss_fn) + '_' + str( learning_rate) + '_' + str(momentum) + '_' + str( training_size) + '_' + str(epoch) + '_' + 'pkt' loss_change = [] #keep track of the loss in the training process model = rnn_model.RNN() if loss_fn == 'MSE': loss_function = nn.MSELoss() elif loss_fn == 'L1': loss_function = nn.L1Loss() if optimizer == 'Adam': optimizer = optim.Adam(model.parameters(), lr=learning_rate) elif optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) elif optimizer == 'RMSprop': optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, momentum=momentum) #plt.close() #fig=plt.figure() #plt.grid(True) #plt.ylim(0,1) #plt.legend() if (train): start = time.time() for epoch in range(int(epoch)): # for idx in range(test_x.shape[0]): model.zero_grad() model.hidden = model.init_hidden() input = prepare_sequence(x_in) target = prepare_sequence(y_in) #print(input.view(-1,1,12),target) #prediction (batch_size * time_step * size_of(output space)) prediction = model(input) loss = loss_function(prediction, target.view(1, -1, 2)) loss.backward() optimizer.step() if epoch % 10 == 0: end = time.time() print('epoch: ', epoch, 'loss: ', loss.data[0]) print('time/10epoch: ', (end - start)) start = time.time() torch.save(model, model_filename) loss_change.append(loss.data[0]) #plt.cla() #plt.plot(score_nparr[:,1],'x-',label = 'logits') #plt.plot(target_nparr[:,1],'+-',label = 'targets') #plt.legend() #plt.show() #print(dataset.vec2word(word_vec)) else: model = torch.load(model_filename) test_score = model(prepare_sequence(x_test)) loss_new = (test_score - prepare_sequence(y_test)).abs() loss_nparr = loss_new.data[0].numpy() metric_arr = np.linspace(0.1, 0.5, 5) for metric in metric_arr: accuracy = float(loss_nparr[loss_nparr <= metric].size) / float( loss_nparr.size) print('metric = ', metric, 'accuracy = ', accuracy) print(loss_change) plt.xlabel("time") plt.ylabel("loss") plt.plot(loss_change) plt.show()
def run(experiment): save_path = "checkpoints/" + experiment.name log_path = "tensorboard/train/" + experiment.name # create or clean directory for path in [save_path, log_path]: if not os.path.exists(path): os.makedirs(path) else: shutil.rmtree(path) os.makedirs(path) save_path += "/dev" # log git commit hash repo = git.Repo(search_parent_directories=True) sha = repo.head.object.hexsha file = open(log_path + "/git_commit_" + sha, 'w') file.close() epochs, input_batch_size, rnn_size, num_layers, encoding_embedding_size, decoding_embedding_size, learning_rate, keep_probability, num_samples, reward = map(experiment.hyperparams.get, ('epochs', 'input_batch_size', 'rnn_size', 'num_layers', 'encoding_embedding_size', 'decoding_embedding_size', 'learning_rate', 'keep_probability', 'num_samples', "reward")) ### prepare data ### (train_source_int_text, train_target_int_text), (valid_source_int_text, valid_target_int_text), ( source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = data_preprocessing.get_data(experiment.data["dataset"], experiment.data["folder"], experiment.data["train_source_file"], experiment.data["train_target_file"], experiment.data["dev_source_file"], experiment.data["dev_target_file"], experiment.tokenization) max_source_sentence_length = max([len(sentence) for sentence in train_source_int_text]) train_source = train_source_int_text train_target = train_target_int_text valid_source = valid_source_int_text valid_target = valid_target_int_text # shuffle rnd = random.Random(1234) train_combined = list(zip(train_source, train_target)) rnd.shuffle(train_combined) train_source, train_target = zip(*train_combined) valid_combined = list(zip(valid_source, valid_target)) rnd.shuffle(valid_combined) valid_source, valid_target = zip(*valid_combined) # set reward function if reward == "levenshtein": reward_func = lambda ref_hyp: - textdistance.levenshtein(ref_hyp[0], ref_hyp[1]) elif reward == "jaro-winkler": reward_func = lambda ref_hyp: textdistance.JaroWinkler()(ref_hyp[0], ref_hyp[1]) elif reward == "hamming": reward_func = lambda ref_hyp: - textdistance.hamming(ref_hyp[0], ref_hyp[1]) if experiment.train_method == 'MLE': graph_batch_size = input_batch_size elif experiment.train_method == 'reinforce' or experiment.train_method == 'reinforce_test': graph_batch_size = num_samples ### prepare model ### tf.reset_default_graph()# maybe need? with tf.variable_scope(tf.get_variable_scope(), reuse=False): model = rnn_model.RNN(graph_batch_size, max_source_sentence_length, source_vocab_to_int, target_vocab_to_int, encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers) eval_batch_size = 128 with tf.variable_scope(tf.get_variable_scope(), reuse=True): eval_model = rnn_model.RNN(eval_batch_size, max_source_sentence_length, source_vocab_to_int, target_vocab_to_int, encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, False) early_stopping = True ### train model ### if experiment.train_method == 'reinforce_test': train.reinforce_test(model, experiment.start_checkpoint, source_vocab_to_int, learning_rate, keep_probability, graph_batch_size, target_int_to_vocab, source_int_to_vocab, valid_source, valid_target) else: train.train(experiment.name, experiment.train_method, model, epochs, input_batch_size, train_source, train_target, valid_source, valid_target, learning_rate, keep_probability, save_path, experiment.start_checkpoint, target_int_to_vocab, source_int_to_vocab, source_vocab_to_int, log_path, graph_batch_size, experiment.max_hours, eval_model, eval_batch_size, reward_func, early_stopping)