def get_average_loss_value_of_test_sample(test_order_list, nn, dictionary, source_file_name, n_best_list_file_name, sbleu_score_list_file_name, smoothing_factor): loss_value_test_set = 0 for t_i in test_order_list: (phrase_pair_dict_n_listase_pair_dict_all, phrase_pair_dict_n_list, total_base_score_list, sbleu_score_list) \ = get_everything(t_i, source_file_name, n_best_list_file_name, sbleu_score_list_file_name) xBlue_t_i, _ = xbleu(nn, total_base_score_list, sbleu_score_list, phrase_pair_dict_n_list, dictionary, smoothing_factor) sys.stdout.write(str(-xBlue_t_i) + ", ") sys.stdout.flush() loss_value_test_set -= xBlue_t_i print return loss_value_test_set/len(test_order_list) if len(test_order_list) > 0 else float('nan')
def get_average_loss_value_of_test_sample(test_order_list, nn, dictionary, source_file_name, n_best_list_file_name, sbleu_score_list_file_name, smoothing_factor): loss_value_test_set = 0 for t_i in test_order_list: (phrase_pair_dict_n_listase_pair_dict_all, phrase_pair_dict_n_list, total_base_score_list, sbleu_score_list) \ = get_everything(t_i, source_file_name, n_best_list_file_name, sbleu_score_list_file_name) xBlue_t_i, _ = xbleu(nn, total_base_score_list, sbleu_score_list, phrase_pair_dict_n_list, dictionary, smoothing_factor) sys.stdout.write(str(-xBlue_t_i) + ", ") sys.stdout.flush() loss_value_test_set -= xBlue_t_i print return loss_value_test_set / len(test_order_list) if len( test_order_list) > 0 else float('nan')
def main(source_file_name, n_best_list_file_name, sbleu_score_list_file_name, learning_rate, smoothing_factor): print "Loading and initializing neural network" W1 = np.loadtxt("data/weight_initialization.gz") W2 = np.identity(100) nn = CPTMNeuralNetwork([W1.shape[0], 100, 100], [W1, W2]) dictionary = corpora.Dictionary.load("data/dictionary.dict") training_set_size = 0 # Each line in source_file is a source sentence. # source_file should end with an empty line with open(source_file_name, 'r') as source_file: for _ in source_file: training_set_size += 1 training_set_size -= 1 # ends with empty line # uncomment to manually set training_set_size for testing purposes #training_set_size = 30 training_order_list = range(training_set_size) # uncomment to randomize training samples # (should be done for deployment, however might be good with reproducable results while testing) #random.shuffle(training_order_list) test_set_size = max(10, int(0.1 * training_set_size)) # uncomment to manually set test_set_size for testing purposes #test_set_size = 5 test_order_list = training_order_list[-test_set_size:] training_order_list = training_order_list[:-test_set_size] print "Training sample size:", training_set_size, "(nr of source sentences)" print "Test sample size:", test_set_size, "(nr of source sentences)" # initialize variables d_theta_old = [0, 0] # momentum terms # calculate average loss function value of test samples print "Calculating average loss function value of test samples using initial weights" initial_loss_value_test_set = get_average_loss_value_of_test_sample( test_order_list, nn, dictionary, source_file_name, n_best_list_file_name, sbleu_score_list_file_name, smoothing_factor) print "Average loss function value:", initial_loss_value_test_set loss_value_history = [initial_loss_value_test_set] converged = False epoch_count = 0 # For debug xBleu_history = [] xBleu_change_history = [] seen_nan = 0 # train until overfit (early stop) print print "Start training..." while not converged: theta_previous = nn.weights for list_index, i in enumerate(training_order_list): (phrase_pair_dict_all, phrase_pair_dict_n_list, total_base_score_list, sbleu_score_list) \ = get_everything(i, source_file_name, n_best_list_file_name, sbleu_score_list_file_name) xblue_i, Ej_translation_probability_list = xbleu( nn, total_base_score_list, sbleu_score_list, phrase_pair_dict_n_list, dictionary, smoothing_factor) error_term_dict_i = get_error_term_dict( phrase_pair_dict_all, phrase_pair_dict_n_list, sbleu_score_list, xblue_i, Ej_translation_probability_list) d_theta_old = nn.update_mini_batch(phrase_pair_dict_all, learning_rate, dictionary, error_term_dict_i, d_theta_old) if debug_mode: debug_print_weights_after_update(nn, d_theta_old, error_term_dict_i) # check if xBleu increases after each iteration for testing purposes # change to >> if True: << if you want to make this check and see output if False: xblue_i_after, _ = xbleu(nn, total_base_score_list, sbleu_score_list, phrase_pair_dict_n_list, dictionary, smoothing_factor) if np.isnan(xblue_i_after): converged = True xBleu_history.append((i, xblue_i)) xBleu_change_history.append(xblue_i_after - xblue_i) print "-------------------------------------------------------------" print "xBleu history: [(xBleu_before_gradient_descent, xBleu_after_gradient_descent)]" print xBleu_history print print "xBleu_change_history [xBleu_after - xBleu_before]" print xBleu_change_history print "-------------------------------------------------------------" print "Finished epoch nr", epoch_count, "training sample nr", list_index + 1,\ "(of %d)" % (training_set_size - test_set_size), "| source sentence nr", i+1 epoch_count += 1 print "=====================================" print "Finished epoch number:", epoch_count print "=====================================" # calculate loss function on test set after each epoch using updated weights print "Calculating loss function value on test set (%d samples) using new weights" % test_set_size loss_value_test_set = get_average_loss_value_of_test_sample( test_order_list, nn, dictionary, source_file_name, n_best_list_file_name, sbleu_score_list_file_name, smoothing_factor) loss_value_history.append(loss_value_test_set) print_loss_value_history(loss_value_history, test_set_size) # TODO: CONVERGENCE TEST, a simple approach is to stop once the loss function value # of this iteration is worse than the previous one #if loss_value_history[-2] > loss_value_history[-1]: if False: # as of now, run forever. Change this once you have determined a good convergence criteria converged = True print "CONVERGED!!!!!!!!!!!!" print "Saving weights from previous epoch to file" np.savetxt('W1.gz', theta_previous[0]) np.savetxt('W2.gz', theta_previous[1]) else: print "No overfitting, keep training..."
def main(source_file_name, n_best_list_file_name, sbleu_score_list_file_name, learning_rate, smoothing_factor): print "Loading and initializing neural network" W1 = np.loadtxt("data/weight_initialization.gz") W2 = np.identity(100) nn = CPTMNeuralNetwork([W1.shape[0], 100, 100], [W1, W2]) dictionary = corpora.Dictionary.load("data/dictionary.dict") training_set_size = 0 # Each line in source_file is a source sentence. # source_file should end with an empty line with open(source_file_name, 'r') as source_file: for _ in source_file: training_set_size += 1 training_set_size -= 1 # ends with empty line # uncomment to manually set training_set_size for testing purposes #training_set_size = 30 training_order_list = range(training_set_size) # uncomment to randomize training samples # (should be done for deployment, however might be good with reproducable results while testing) #random.shuffle(training_order_list) test_set_size = max(10, int(0.1*training_set_size)) # uncomment to manually set test_set_size for testing purposes #test_set_size = 5 test_order_list = training_order_list[-test_set_size:] training_order_list = training_order_list[:-test_set_size] print "Training sample size:", training_set_size, "(nr of source sentences)" print "Test sample size:", test_set_size, "(nr of source sentences)" # initialize variables d_theta_old = [0, 0] # momentum terms # calculate average loss function value of test samples print "Calculating average loss function value of test samples using initial weights" initial_loss_value_test_set = get_average_loss_value_of_test_sample( test_order_list, nn, dictionary, source_file_name, n_best_list_file_name, sbleu_score_list_file_name, smoothing_factor) print "Average loss function value:", initial_loss_value_test_set loss_value_history = [initial_loss_value_test_set] converged = False epoch_count = 0 # For debug xBleu_history = [] xBleu_change_history = [] seen_nan = 0 # train until overfit (early stop) print print "Start training..." while not converged: theta_previous = nn.weights for list_index, i in enumerate(training_order_list): (phrase_pair_dict_all, phrase_pair_dict_n_list, total_base_score_list, sbleu_score_list) \ = get_everything(i, source_file_name, n_best_list_file_name, sbleu_score_list_file_name) xblue_i, Ej_translation_probability_list = xbleu( nn, total_base_score_list, sbleu_score_list, phrase_pair_dict_n_list, dictionary, smoothing_factor) error_term_dict_i = get_error_term_dict( phrase_pair_dict_all, phrase_pair_dict_n_list, sbleu_score_list, xblue_i, Ej_translation_probability_list) d_theta_old = nn.update_mini_batch( phrase_pair_dict_all, learning_rate, dictionary, error_term_dict_i, d_theta_old) if debug_mode: debug_print_weights_after_update(nn, d_theta_old, error_term_dict_i) # check if xBleu increases after each iteration for testing purposes # change to >> if True: << if you want to make this check and see output if False: xblue_i_after, _ = xbleu(nn, total_base_score_list, sbleu_score_list, phrase_pair_dict_n_list, dictionary, smoothing_factor) if np.isnan(xblue_i_after): converged = True xBleu_history.append((i, xblue_i)) xBleu_change_history.append(xblue_i_after - xblue_i) print "-------------------------------------------------------------" print "xBleu history: [(xBleu_before_gradient_descent, xBleu_after_gradient_descent)]" print xBleu_history print print "xBleu_change_history [xBleu_after - xBleu_before]" print xBleu_change_history print "-------------------------------------------------------------" print "Finished epoch nr", epoch_count, "training sample nr", list_index + 1,\ "(of %d)" % (training_set_size - test_set_size), "| source sentence nr", i+1 epoch_count += 1 print "=====================================" print "Finished epoch number:", epoch_count print "=====================================" # calculate loss function on test set after each epoch using updated weights print "Calculating loss function value on test set (%d samples) using new weights" % test_set_size loss_value_test_set = get_average_loss_value_of_test_sample( test_order_list, nn, dictionary, source_file_name, n_best_list_file_name, sbleu_score_list_file_name, smoothing_factor) loss_value_history.append(loss_value_test_set) print_loss_value_history(loss_value_history, test_set_size) # TODO: CONVERGENCE TEST, a simple approach is to stop once the loss function value # of this iteration is worse than the previous one #if loss_value_history[-2] > loss_value_history[-1]: if False: # as of now, run forever. Change this once you have determined a good convergence criteria converged = True print "CONVERGED!!!!!!!!!!!!" print "Saving weights from previous epoch to file" np.savetxt('W1.gz', theta_previous[0]) np.savetxt('W2.gz', theta_previous[1]) else: print "No overfitting, keep training..."