def run_baseline(): """ Main function which loads in data, runs the model, and prints out statistics """ # Get train dataset for task 6 train_total = get_task_6_train() train, validation = split_training_data(train_total) # Get test dataset for task 6 test = get_task_6_test() # Get word to glove vectors dictionary glove_dict = load_glove_vectors() # Split data into batches validation_batches = batch_data(validation, BATCH_SIZE) test_batches = batch_data(test, BATCH_SIZE) # Convert batches into vectors val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \ val_batched_question_lengths, val_batched_answer_vecs = convert_to_vectors_with_sentences(validation_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \ test_batched_question_lengths, test_batched_answer_vecs = convert_to_vectors_with_sentences( test_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Print summary statistics print "Training samples: {}".format(len(train)) print "Validation samples: {}".format(len(validation)) print "Testing samples: {}".format(len(test)) print "Batch size: {}".format(BATCH_SIZE) print "Validation number of batches: {}".format(len(validation_batches)) print "Test number of batches: {}".format(len(test_batches)) # Add placeholders input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \ question_length_placeholder, labels_placeholder = add_placeholders() # Input module with tf.variable_scope("input"): sentence_states, all_outputs = input_module(input_placeholder, input_length_placeholder, end_of_sentences_placeholder) # Question module with tf.variable_scope("question"): question_state = question_module(question_placeholder, question_length_placeholder) # Episodic memory module with tf.variable_scope("episode"): episodic_memory_state = episodic_memory_module(sentence_states, num_sentences_placeholder, question_state) # Answer module with tf.variable_scope("answer"): projections = answer_module(episodic_memory_state) prediction_probs = tf.nn.softmax(projections) # Compute loss cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(projections, labels_placeholder)) l2_loss = compute_regularization_penalty() cost = cross_entropy_loss + REG * l2_loss # Add optimizer optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost) # Initialize all variables init = tf.initialize_all_variables() saver = tf.train.Saver() # Train over multiple epochs with tf.Session() as sess: best_validation_accuracy = 0.0 best_val_epoch = 0 sess.run(init) # train until we reach the maximum number of epochs for epoch in range(MAX_EPOCHS): print 'Epoch {}'.format(epoch) start = time.time() ### total_training_loss = 0 sum_training_accuracy = 0 train_batches = batch_data(train, BATCH_SIZE) train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \ train_batched_question_lengths, train_batched_answer_vecs = convert_to_vectors_with_sentences( train_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Compute average loss on training data for i in range(len(train_batches)): # print "Train batch ", train_batches[i] # print "End of sentences ", train_batched_end_of_sentences[i] loss, _, batch_prediction_probs, input_outputs, sentence_states_out = sess.run( [cost, optimizer, prediction_probs, all_outputs, sentence_states], feed_dict={input_placeholder: train_batched_input_vecs[i], input_length_placeholder: train_batched_input_lengths[i], end_of_sentences_placeholder: train_batched_end_of_sentences[i], num_sentences_placeholder: train_batched_num_sentences[i], question_placeholder: train_batched_question_vecs[i], question_length_placeholder: train_batched_question_lengths[i], labels_placeholder: train_batched_answer_vecs[i]}) # end_of_first_sentence_first_batch = train_batched_end_of_sentences[i][0,0] # # print "Index end of first sentence:", end_of_first_sentence_first_batch # # print "Shape input outputs", np.shape(input_outputs) # print "States at end of first sentence for first element of batch", input_outputs[end_of_first_sentence_first_batch, 0, :] # print "States at end of first sentence for first element of batch {}".format(sentence_states[0,0:]) # print "Train batch number of sentences:", train_batched_num_sentences[i] total_training_loss += loss batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), np.argmax(train_batched_answer_vecs[i], axis=1)).mean() sum_training_accuracy += batch_accuracy # Print a training update if i % UPDATE_LENGTH == 0: print "Current average training loss: {}".format(total_training_loss / (i + 1)) print "Current training accuracy: {}".format(sum_training_accuracy / (i + 1)) average_training_loss = total_training_loss / len(train_batches) training_accuracy = sum_training_accuracy / len(train_batches) total_validation_loss = 0 sum_validation_accuracy = 0 # Compute average loss on validation data for i in range(len(validation_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={input_placeholder: val_batched_input_vecs[i], input_length_placeholder: val_batched_input_lengths[i], end_of_sentences_placeholder: val_batched_end_of_sentences[i], num_sentences_placeholder: val_batched_num_sentences[i], question_placeholder: val_batched_question_vecs[i], question_length_placeholder: val_batched_question_lengths[i], labels_placeholder: val_batched_answer_vecs[i]}) total_validation_loss += loss batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), np.argmax(val_batched_answer_vecs[i], axis=1)).mean() sum_validation_accuracy += batch_accuracy average_validation_loss = total_validation_loss / len(validation_batches) validation_accuracy = sum_validation_accuracy / len(validation_batches) print 'Training loss: {}'.format(average_training_loss) print 'Training accuracy: {}'.format(training_accuracy) print 'Validation loss: {}'.format(average_validation_loss) print 'Validation accuracy: {}'.format(validation_accuracy) if validation_accuracy > best_validation_accuracy: best_validation_accuracy = validation_accuracy best_val_epoch = epoch saver.save(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') print "Weights saved" print 'Total time: {}'.format(time.time() - start) outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt' f = open(outfile, "a") f.write('train_acc, ' + str(training_accuracy) + '\n') f.write('train_loss, ' + str(average_training_loss) + '\n') f.write('val_acc, ' + str(validation_accuracy) + '\n') f.write('val_loss, ' + str(average_validation_loss) + '\n') f.close() # Compute average loss on testing data with best weights saver.restore(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') total_test_loss = 0 sum_test_accuracy = 0 # Compute average loss on test data for i in range(len(test_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={input_placeholder: test_batched_input_vecs[i], input_length_placeholder: test_batched_input_lengths[i], end_of_sentences_placeholder: test_batched_end_of_sentences[i], num_sentences_placeholder: test_batched_num_sentences[i], question_placeholder: test_batched_question_vecs[i], question_length_placeholder: test_batched_question_lengths[i], labels_placeholder: test_batched_answer_vecs[i]}) total_test_loss += loss batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), np.argmax(test_batched_answer_vecs[i], axis=1)).mean() sum_test_accuracy += batch_accuracy average_test_loss = total_test_loss / len(test_batches) test_accuracy = sum_test_accuracy / len(test_batches) print '=-=' * 5 print 'Test accuracy: {}'.format(test_accuracy) print '=-=' * 5
def run_dmn(input, question): if input == None or question == None: return None, None, None, None, None input_split = re.findall(r"[\w']+|[.,!?;]", input) print input_split question_split = re.findall(r"[\w']+|[.,!?;]", question) print question_split # Dummy answer must be in the dataset input_question_answer = (input_split, question_split, unicode('hallway')) print input_question_answer dummy_data = [] for i in range(2 * BATCH_SIZE): dummy_data.append(input_question_answer) # Convert data into a batch dummy_batch = batch_data(dummy_data, BATCH_SIZE) # Convert words into indices val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \ val_batched_question_lengths, val_batched_answers = convert_to_indices(dummy_batch, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) print "Running DMN" # Run dmn probs, episode_1_gates, episode_2_gates, episode_3_gates = session.run( [ prediction_probs, gates_for_episodes[0], gates_for_episodes[1], gates_for_episodes[2] ], feed_dict={ input_placeholder: val_batched_input_vecs[0], input_length_placeholder: val_batched_input_lengths[0], end_of_sentences_placeholder: val_batched_end_of_sentences[0], num_sentences_placeholder: val_batched_num_sentences[0], question_placeholder: val_batched_question_vecs[0], question_length_placeholder: val_batched_question_lengths[0], dropout_placeholder: 1.0 }) print "DMN finished" answer_probs = probs[0] index_answer = np.argmax(answer_probs) # Convert answer into a word answer = index_to_answer[index_answer] print "Answer is", answer # Get sentences sentences = re.split(r"[.]+", input) sentences.pop() print "Sentences", sentences num_sentences = len(sentences) print num_sentences print episode_1_gates[0] print episode_2_gates[0] print episode_3_gates[0] # Get gates gates_1 = episode_1_gates[0][:num_sentences] gates_2 = episode_2_gates[0][:num_sentences] gates_3 = episode_3_gates[0][:num_sentences] print gates_1 print gates_2 print gates_3 return answer, sentences, gates_1, gates_2, gates_3
def run_baseline(): """ Main function which loads in data, runs the model, and prints out statistics """ # Get train dataset for task 6 train_total = get_task_6_train() train, validation = split_training_data(train_total) # Get test dataset for task 6 test = get_task_6_test() # Get word to glove vectors dictionary glove_dict = load_glove_vectors() # Split data into batches validation_batches = batch_data(validation, BATCH_SIZE) test_batches = batch_data(test, BATCH_SIZE) # Convert batches into vectors val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \ val_batched_question_lengths, val_batched_answer_vecs = convert_to_vectors_with_sentences(validation_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \ test_batched_question_lengths, test_batched_answer_vecs = convert_to_vectors_with_sentences( test_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Print summary statistics print "Training samples: {}".format(len(train)) print "Validation samples: {}".format(len(validation)) print "Testing samples: {}".format(len(test)) print "Batch size: {}".format(BATCH_SIZE) print "Validation number of batches: {}".format(len(validation_batches)) print "Test number of batches: {}".format(len(test_batches)) # Add placeholders input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \ question_length_placeholder, labels_placeholder = add_placeholders() # Input module with tf.variable_scope("input"): sentence_states, all_outputs = input_module( input_placeholder, input_length_placeholder, end_of_sentences_placeholder) # Question module with tf.variable_scope("question"): question_state = question_module(question_placeholder, question_length_placeholder) # Episodic memory module with tf.variable_scope("episode"): episodic_memory_state = episodic_memory_module( sentence_states, num_sentences_placeholder, question_state) # Answer module with tf.variable_scope("answer"): projections = answer_module(episodic_memory_state) prediction_probs = tf.nn.softmax(projections) # Compute loss cross_entropy_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(projections, labels_placeholder)) l2_loss = compute_regularization_penalty() cost = cross_entropy_loss + REG * l2_loss # Add optimizer optimizer = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(cost) # Initialize all variables init = tf.initialize_all_variables() saver = tf.train.Saver() # Train over multiple epochs with tf.Session() as sess: best_validation_accuracy = 0.0 best_val_epoch = 0 sess.run(init) # train until we reach the maximum number of epochs for epoch in range(MAX_EPOCHS): print 'Epoch {}'.format(epoch) start = time.time() ### total_training_loss = 0 sum_training_accuracy = 0 train_batches = batch_data(train, BATCH_SIZE) train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \ train_batched_question_lengths, train_batched_answer_vecs = convert_to_vectors_with_sentences( train_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Compute average loss on training data for i in range(len(train_batches)): # print "Train batch ", train_batches[i] # print "End of sentences ", train_batched_end_of_sentences[i] loss, _, batch_prediction_probs, input_outputs, sentence_states_out = sess.run( [ cost, optimizer, prediction_probs, all_outputs, sentence_states ], feed_dict={ input_placeholder: train_batched_input_vecs[i], input_length_placeholder: train_batched_input_lengths[i], end_of_sentences_placeholder: train_batched_end_of_sentences[i], num_sentences_placeholder: train_batched_num_sentences[i], question_placeholder: train_batched_question_vecs[i], question_length_placeholder: train_batched_question_lengths[i], labels_placeholder: train_batched_answer_vecs[i] }) # end_of_first_sentence_first_batch = train_batched_end_of_sentences[i][0,0] # # print "Index end of first sentence:", end_of_first_sentence_first_batch # # print "Shape input outputs", np.shape(input_outputs) # print "States at end of first sentence for first element of batch", input_outputs[end_of_first_sentence_first_batch, 0, :] # print "States at end of first sentence for first element of batch {}".format(sentence_states[0,0:]) # print "Train batch number of sentences:", train_batched_num_sentences[i] total_training_loss += loss batch_accuracy = np.equal( np.argmax(batch_prediction_probs, axis=1), np.argmax(train_batched_answer_vecs[i], axis=1)).mean() sum_training_accuracy += batch_accuracy # Print a training update if i % UPDATE_LENGTH == 0: print "Current average training loss: {}".format( total_training_loss / (i + 1)) print "Current training accuracy: {}".format( sum_training_accuracy / (i + 1)) average_training_loss = total_training_loss / len(train_batches) training_accuracy = sum_training_accuracy / len(train_batches) total_validation_loss = 0 sum_validation_accuracy = 0 # Compute average loss on validation data for i in range(len(validation_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={ input_placeholder: val_batched_input_vecs[i], input_length_placeholder: val_batched_input_lengths[i], end_of_sentences_placeholder: val_batched_end_of_sentences[i], num_sentences_placeholder: val_batched_num_sentences[i], question_placeholder: val_batched_question_vecs[i], question_length_placeholder: val_batched_question_lengths[i], labels_placeholder: val_batched_answer_vecs[i] }) total_validation_loss += loss batch_accuracy = np.equal( np.argmax(batch_prediction_probs, axis=1), np.argmax(val_batched_answer_vecs[i], axis=1)).mean() sum_validation_accuracy += batch_accuracy average_validation_loss = total_validation_loss / len( validation_batches) validation_accuracy = sum_validation_accuracy / len( validation_batches) print 'Training loss: {}'.format(average_training_loss) print 'Training accuracy: {}'.format(training_accuracy) print 'Validation loss: {}'.format(average_validation_loss) print 'Validation accuracy: {}'.format(validation_accuracy) if validation_accuracy > best_validation_accuracy: best_validation_accuracy = validation_accuracy best_val_epoch = epoch saver.save( sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') print "Weights saved" print 'Total time: {}'.format(time.time() - start) outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt' f = open(outfile, "a") f.write('train_acc, ' + str(training_accuracy) + '\n') f.write('train_loss, ' + str(average_training_loss) + '\n') f.write('val_acc, ' + str(validation_accuracy) + '\n') f.write('val_loss, ' + str(average_validation_loss) + '\n') f.close() # Compute average loss on testing data with best weights saver.restore(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') total_test_loss = 0 sum_test_accuracy = 0 # Compute average loss on test data for i in range(len(test_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={ input_placeholder: test_batched_input_vecs[i], input_length_placeholder: test_batched_input_lengths[i], end_of_sentences_placeholder: test_batched_end_of_sentences[i], num_sentences_placeholder: test_batched_num_sentences[i], question_placeholder: test_batched_question_vecs[i], question_length_placeholder: test_batched_question_lengths[i], labels_placeholder: test_batched_answer_vecs[i] }) total_test_loss += loss batch_accuracy = np.equal( np.argmax(batch_prediction_probs, axis=1), np.argmax(test_batched_answer_vecs[i], axis=1)).mean() sum_test_accuracy += batch_accuracy average_test_loss = total_test_loss / len(test_batches) test_accuracy = sum_test_accuracy / len(test_batches) print '=-=' * 5 print 'Test accuracy: {}'.format(test_accuracy) print '=-=' * 5
def run_dmn(input, question): if input == None or question == None: return None, None, None, None, None input_split = re.findall(r"[\w']+|[.,!?;]", input) print input_split question_split = re.findall(r"[\w']+|[.,!?;]", question) print question_split # Dummy answer must be in the dataset input_question_answer = (input_split, question_split, unicode('hallway')) print input_question_answer dummy_data = [] for i in range(2*BATCH_SIZE): dummy_data.append(input_question_answer) # Convert data into a batch dummy_batch = batch_data(dummy_data, BATCH_SIZE) # Convert words into indices val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \ val_batched_question_lengths, val_batched_answers = convert_to_indices(dummy_batch, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) print "Running DMN" # Run dmn probs, episode_1_gates, episode_2_gates, episode_3_gates = session.run( [prediction_probs, gates_for_episodes[0], gates_for_episodes[1], gates_for_episodes[2]], feed_dict={input_placeholder: val_batched_input_vecs[0], input_length_placeholder: val_batched_input_lengths[0], end_of_sentences_placeholder: val_batched_end_of_sentences[0], num_sentences_placeholder: val_batched_num_sentences[0], question_placeholder: val_batched_question_vecs[0], question_length_placeholder: val_batched_question_lengths[0], dropout_placeholder: 1.0}) print "DMN finished" answer_probs = probs[0] index_answer = np.argmax(answer_probs) # Convert answer into a word answer = index_to_answer[index_answer] print "Answer is", answer # Get sentences sentences = re.split(r"[.]+", input) sentences.pop() print "Sentences", sentences num_sentences = len(sentences) print num_sentences print episode_1_gates[0] print episode_2_gates[0] print episode_3_gates[0] # Get gates gates_1 = episode_1_gates[0][:num_sentences] gates_2 = episode_2_gates[0][:num_sentences] gates_3 = episode_3_gates[0][:num_sentences] print gates_1 print gates_2 print gates_3 return answer, sentences, gates_1, gates_2, gates_3
def run_dmn(): """ Main function which loads in data, runs the model, and prints out statistics """ print "Task", TASK # Get train dataset for task train_total = get_task_train(TASK) train_total = remove_long_sentences(train_total, MAX_INPUT_SENTENCES) train, validation = split_training_data(train_total) # Get all tokens from answers in training answer_to_index = answer_tokens_to_index(train_total) print answer_to_index number_of_answers = len(answer_to_index) print number_of_answers # Get test dataset for task test = get_task_test(TASK) test = remove_long_sentences(test, MAX_INPUT_SENTENCES) # Get word to glove vectors dictionary word_to_index, embedding_mat = load_glove_embedding() def initialize_word_vectors(shape, dtype): return embedding_mat # Create L tensor from embedding_mat with tf.variable_scope("Embedding") as scope: # L = tf.get_variable("L", shape=np.shape(embedding_mat), initializer=initialize_word_vectors) L = tf.get_variable("L", shape=np.shape(embedding_mat), initializer=tf.random_uniform_initializer(minval=-np.sqrt(3), maxval=np.sqrt(3))) # Split data into batches validation_batches = batch_data(validation, BATCH_SIZE) test_batches = batch_data(test, BATCH_SIZE) # Convert batches into indeces val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \ val_batched_question_lengths, val_batched_answers = convert_to_indices(validation_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \ test_batched_question_lengths, test_batched_answers = convert_to_indices(test_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Print summary statistics print "Training samples: {}".format(len(train)) print "Validation samples: {}".format(len(validation)) print "Testing samples: {}".format(len(test)) print "Batch size: {}".format(BATCH_SIZE) print "Validation number of batches: {}".format(len(validation_batches)) print "Test number of batches: {}".format(len(test_batches)) # Add placeholders input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \ question_length_placeholder, labels_placeholder, dropout_placeholder = add_placeholders() # Input module sentence_states, all_outputs = input_module(input_placeholder, input_length_placeholder, end_of_sentences_placeholder, dropout_placeholder) # Question module question_state = question_module(question_placeholder, question_length_placeholder, dropout_placeholder) # Episodic memory module episodic_memory_state, gates_for_episodes = episodic_memory_module(sentence_states, num_sentences_placeholder, question_state) # Answer module projections = answer_module(episodic_memory_state, number_of_answers, dropout_placeholder) prediction_probs = tf.nn.softmax(projections) # Compute loss cross_entropy_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(projections, labels_placeholder)) l2_loss = compute_regularization_penalty() cost = cross_entropy_loss + REG * l2_loss # Add optimizer optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost) # Initialize all variables init = tf.initialize_all_variables() saver = tf.train.Saver() # Train over multiple epochs with tf.Session() as sess: best_validation_accuracy = 0.0 best_val_epoch = 0 sess.run(init) # train until we reach the maximum number of epochs for epoch in range(MAX_EPOCHS): print 'Epoch {}'.format(epoch) start = time.time() ### total_training_loss = 0 sum_training_accuracy = 0 train_batches = batch_data(train, BATCH_SIZE) train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \ train_batched_question_lengths, train_batched_answers = convert_to_indices( train_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Compute average loss on training data for i in range(len(train_batches)): # print "Train batch ", train_batches[i][0] # print "End of sentences ", train_batched_end_of_sentences[i] loss, _, batch_prediction_probs, input_outputs, sentence_states_out, episode_1_gates, episode_2_gates, episode_3_gates = sess.run( [cost, optimizer, prediction_probs, all_outputs, sentence_states, gates_for_episodes[0], gates_for_episodes[1], gates_for_episodes[2]], feed_dict={input_placeholder: train_batched_input_vecs[i], input_length_placeholder: train_batched_input_lengths[i], end_of_sentences_placeholder: train_batched_end_of_sentences[i], num_sentences_placeholder: train_batched_num_sentences[i], question_placeholder: train_batched_question_vecs[i], question_length_placeholder: train_batched_question_lengths[i], labels_placeholder: train_batched_answers[i], dropout_placeholder: DROPOUT}) # print episode_1_gates[0] # print episode_2_gates[0] # print episode_3_gates[0] total_training_loss += loss batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), train_batched_answers[i]).mean() sum_training_accuracy += batch_accuracy # Print a training update if i % UPDATE_LENGTH == 0: print "Current average training loss: {}".format(total_training_loss / (i + 1)) print "Current training accuracy: {}".format(sum_training_accuracy / (i + 1)) average_training_loss = total_training_loss / len(train_batches) training_accuracy = sum_training_accuracy / len(train_batches) total_validation_loss = 0 sum_validation_accuracy = 0 # Compute average loss on validation data for i in range(len(validation_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={input_placeholder: val_batched_input_vecs[i], input_length_placeholder: val_batched_input_lengths[i], end_of_sentences_placeholder: val_batched_end_of_sentences[i], num_sentences_placeholder: val_batched_num_sentences[i], question_placeholder: val_batched_question_vecs[i], question_length_placeholder: val_batched_question_lengths[i], labels_placeholder: val_batched_answers[i], dropout_placeholder: 1.0}) total_validation_loss += loss batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), val_batched_answers[i]).mean() sum_validation_accuracy += batch_accuracy average_validation_loss = total_validation_loss / len(validation_batches) validation_accuracy = sum_validation_accuracy / len(validation_batches) print 'Training loss: {}'.format(average_training_loss) print 'Training accuracy: {}'.format(training_accuracy) print 'Validation loss: {}'.format(average_validation_loss) print 'Validation accuracy: {}'.format(validation_accuracy) if validation_accuracy >= best_validation_accuracy: best_validation_accuracy = validation_accuracy best_val_epoch = epoch saver.save(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') print "Weights saved to " + '../data/weights/dmn_' + OUTFILE_STRING + '.weights' print 'Total time: {}'.format(time.time() - start) outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt' f = open(outfile, "a") f.write('epoch, ' + str(epoch) + '\n') f.write('train_acc, ' + str(training_accuracy) + '\n') f.write('train_loss, ' + str(average_training_loss) + '\n') f.write('val_acc, ' + str(validation_accuracy) + '\n') f.write('val_loss, ' + str(average_validation_loss) + '\n') f.close() # Compute average loss on testing data with best weights saver.restore(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') total_test_loss = 0 sum_test_accuracy = 0 # Compute average loss on test data for i in range(len(test_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={input_placeholder: test_batched_input_vecs[i], input_length_placeholder: test_batched_input_lengths[i], end_of_sentences_placeholder: test_batched_end_of_sentences[i], num_sentences_placeholder: test_batched_num_sentences[i], question_placeholder: test_batched_question_vecs[i], question_length_placeholder: test_batched_question_lengths[i], labels_placeholder: test_batched_answers[i], dropout_placeholder: 1.0}) total_test_loss += loss batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), test_batched_answers[i]).mean() sum_test_accuracy += batch_accuracy average_test_loss = total_test_loss / len(test_batches) test_accuracy = sum_test_accuracy / len(test_batches) print '=-=' * 5 print 'Test accuracy: {}'.format(test_accuracy) print '=-=' * 5 f = open(outfile, "a") f.write('test_acc, ' + str(test_accuracy) + '\n') f.close()
def run_dmn(): """ Main function which loads in data, runs the model, and prints out statistics """ # Get train dataset for task train_total = get_rc_train() train_total = remove_long_sentences(train_total, MAX_INPUT_SENTENCES) train, validation = split_training_data(train_total) # Get all tokens from answers in training answer_to_index = answer_tokens_to_index(train_total) number_of_answers = len(answer_to_index) # Get test dataset for task test = get_rc_val() test = remove_long_sentences(test, MAX_INPUT_SENTENCES) # Get word to glove vectors dictionary # word_to_index, embedding_mat = load_glove_embedding() # print "embedding mat shape: " # print np.shape(embedding_mat) word_to_index, vocab_size = create_word_to_index_from_vocab(train) print "vocab_size: " print vocab_size # def initialize_word_vectors(shape, dtype): # return embedding_mat # Create L tensor from embedding_mat with tf.variable_scope("Embedding") as scope: # L = tf.get_variable("L", shape=np.shape(embedding_mat), initializer=initialize_word_vectors) L = tf.get_variable("L", shape=(vocab_size, WORD_VECTOR_LENGTH), initializer=tf.random_uniform_initializer( minval=-np.sqrt(3), maxval=np.sqrt(3))) # Split data into batches validation_batches = batch_data(validation, BATCH_SIZE) test_batches = batch_data(test, BATCH_SIZE) # Convert batches into indeces val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \ val_batched_question_lengths, val_batched_answers = convert_to_indices(validation_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \ test_batched_question_lengths, test_batched_answers = convert_to_indices(test_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Print summary statistics print "Training samples: {}".format(len(train)) print "Validation samples: {}".format(len(validation)) print "Testing samples: {}".format(len(test)) print "Batch size: {}".format(BATCH_SIZE) print "Validation number of batches: {}".format(len(validation_batches)) print "Test number of batches: {}".format(len(test_batches)) # Add placeholders input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \ question_length_placeholder, labels_placeholder, dropout_placeholder = add_placeholders() # Input module sentence_states, all_outputs = input_module(input_placeholder, input_length_placeholder, end_of_sentences_placeholder, dropout_placeholder) # Question module question_state = question_module(question_placeholder, question_length_placeholder, dropout_placeholder) # Episodic memory module episodic_memory_state, gates_for_episodes = episodic_memory_module( sentence_states, num_sentences_placeholder, question_state) # Answer module projections = answer_module(episodic_memory_state, number_of_answers, dropout_placeholder) prediction_probs = tf.nn.softmax(projections) # Compute loss cross_entropy_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(projections, labels_placeholder)) l2_loss = compute_regularization_penalty() cost = cross_entropy_loss + REG * l2_loss # Add optimizer optimizer = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(cost) # Initialize all variables init = tf.initialize_all_variables() saver = tf.train.Saver() # Train over multiple epochs with tf.Session() as sess: best_validation_accuracy = 0.0 best_val_epoch = 0 sess.run(init) # train until we reach the maximum number of epochs for epoch in range(MAX_EPOCHS): print 'Epoch {}'.format(epoch) start = time.time() ### total_training_loss = 0 sum_training_accuracy = 0 train_batches = batch_data(train, BATCH_SIZE) train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \ train_batched_question_lengths, train_batched_answers = convert_to_indices( train_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH) # Compute average loss on training data for i in range(len(train_batches)): # print "Train batch ", train_batches[i][0] # print "End of sentences ", train_batched_end_of_sentences[i] loss, _, batch_prediction_probs, input_outputs, sentence_states_out, episode_1_gates, episode_2_gates, episode_3_gates = sess.run( [ cost, optimizer, prediction_probs, all_outputs, sentence_states, gates_for_episodes[0], gates_for_episodes[1], gates_for_episodes[2] ], feed_dict={ input_placeholder: train_batched_input_vecs[i], input_length_placeholder: train_batched_input_lengths[i], end_of_sentences_placeholder: train_batched_end_of_sentences[i], num_sentences_placeholder: train_batched_num_sentences[i], question_placeholder: train_batched_question_vecs[i], question_length_placeholder: train_batched_question_lengths[i], labels_placeholder: train_batched_answers[i], dropout_placeholder: DROPOUT }) # print episode_1_gates[0] # print episode_2_gates[0] # print episode_3_gates[0] total_training_loss += loss batch_accuracy = np.equal( np.argmax(batch_prediction_probs, axis=1), train_batched_answers[i]).mean() sum_training_accuracy += batch_accuracy # Print a training update if i % UPDATE_LENGTH == 0: print "Current average training loss: {}".format( total_training_loss / (i + 1)) print "Current training accuracy: {}".format( sum_training_accuracy / (i + 1)) average_training_loss = total_training_loss / len(train_batches) training_accuracy = sum_training_accuracy / len(train_batches) total_validation_loss = 0 sum_validation_accuracy = 0 # Compute average loss on validation data for i in range(len(validation_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={ input_placeholder: val_batched_input_vecs[i], input_length_placeholder: val_batched_input_lengths[i], end_of_sentences_placeholder: val_batched_end_of_sentences[i], num_sentences_placeholder: val_batched_num_sentences[i], question_placeholder: val_batched_question_vecs[i], question_length_placeholder: val_batched_question_lengths[i], labels_placeholder: val_batched_answers[i], dropout_placeholder: 1.0 }) total_validation_loss += loss batch_accuracy = np.equal( np.argmax(batch_prediction_probs, axis=1), val_batched_answers[i]).mean() sum_validation_accuracy += batch_accuracy average_validation_loss = total_validation_loss / len( validation_batches) validation_accuracy = sum_validation_accuracy / len( validation_batches) print 'Training loss: {}'.format(average_training_loss) print 'Training accuracy: {}'.format(training_accuracy) print 'Validation loss: {}'.format(average_validation_loss) print 'Validation accuracy: {}'.format(validation_accuracy) if validation_accuracy > best_validation_accuracy: best_validation_accuracy = validation_accuracy best_val_epoch = epoch saver.save( sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') print "Weights saved to " + '../data/weights/dmn_' + OUTFILE_STRING + '.weights' print 'Total time: {}'.format(time.time() - start) outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt' f = open(outfile, "a") f.write('epoch, ' + str(epoch) + '\n') f.write('train_acc, ' + str(training_accuracy) + '\n') f.write('train_loss, ' + str(average_training_loss) + '\n') f.write('val_acc, ' + str(validation_accuracy) + '\n') f.write('val_loss, ' + str(average_validation_loss) + '\n') f.close() # Compute average loss on testing data with best weights saver.restore(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights') total_test_loss = 0 sum_test_accuracy = 0 # Compute average loss on test data for i in range(len(test_batches)): loss, batch_prediction_probs = sess.run( [cost, prediction_probs], feed_dict={ input_placeholder: test_batched_input_vecs[i], input_length_placeholder: test_batched_input_lengths[i], end_of_sentences_placeholder: test_batched_end_of_sentences[i], num_sentences_placeholder: test_batched_num_sentences[i], question_placeholder: test_batched_question_vecs[i], question_length_placeholder: test_batched_question_lengths[i], labels_placeholder: test_batched_answers[i], dropout_placeholder: 1.0 }) total_test_loss += loss batch_accuracy = np.equal( np.argmax(batch_prediction_probs, axis=1), test_batched_answers[i]).mean() sum_test_accuracy += batch_accuracy average_test_loss = total_test_loss / len(test_batches) test_accuracy = sum_test_accuracy / len(test_batches) print '=-=' * 5 print 'Test accuracy: {}'.format(test_accuracy) print '=-=' * 5 f = open(outfile, "a") f.write('test_acc, ' + str(test_accuracy) + '\n') f.close()