def main(): dialogues_train = map(lambda x: x, train_babi) dialogues_train_eval = map(lambda x: [x[-1]], train_babi) \ if args.predict_last_turn_only \ else map(lambda x: x, train_babi) dialogues_test = map(lambda x: [x[-1]], test_plus) \ if args.predict_last_turn_only \ else map(lambda x: x, test_plus) data_train = reduce(lambda x, y: x + y, dialogues_train, []) data_train_eval = reduce(lambda x, y: x + y, dialogues_train_eval, []) data_test = reduce(lambda x, y: x + y, dialogues_test, []) train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx, answer_idx, sentence_size, memory_size) train_eval_s, train_eval_q, train_eval_a = vectorize_data_dialog( data_train_eval, word_idx, answer_idx, sentence_size, memory_size) test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx, answer_idx, sentence_size, memory_size) print("Training Size (dialogues)", len(dialogues_train)) print("Training/Evaluation Size (dialogues)", len(dialogues_train_eval)) print("Testing Size (dialogues)", len(dialogues_test)) print("Training Size (stories)", len(data_train)) print("Training/Evaluation Size (stories)", len(data_train_eval)) print("Testing Size (stories)", len(data_test)) tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size optimizer = tf.train.GradientDescentOptimizer( learning_rate=FLAGS.learning_rate) batches = zip(range(0, len(data_train) - batch_size, batch_size), range(batch_size, len(data_train), batch_size)) batches = [(start, end) for start, end in batches] with tf.Session() as sess: model = MemN2N(batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, answers_vectorized, session=sess, hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm, optimizer=optimizer) best_accuracy_per_epoch = train_model( model, (train_s, train_q, train_a), (train_eval_s, train_eval_q, train_eval_a), (test_s, test_q, test_a), batches) return best_accuracy_per_epoch
def main(in_train_dialogue_name, in_testset_size, in_fold_number, in_dataset_shuffle): trainset_idx = [get_global_dialogue_index(in_train_dialogue_name)] testset_idx = [] testset_counter = in_fold_number * in_testset_size while len(testset_idx) != in_testset_size: current_idx = in_dataset_shuffle[testset_counter] if current_idx not in trainset_idx: testset_idx.append(current_idx) testset_counter += 1 dialogues_train = map(lambda x: all_dialogues[x], trainset_idx) dialogues_test = map(lambda x: all_dialogues[x], testset_idx) data_train = reduce(lambda x, y: x + y, dialogues_train, []) data_test = reduce(lambda x, y: x + y, dialogues_test, []) train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx, answer_idx, sentence_size, memory_size) test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx, answer_idx, sentence_size, memory_size) print("Training Size (dialogues)", len(dialogues_train)) print("Testing Size (dialogues)", len(dialogues_test)) print("Training Size (stories)", len(data_train)) print("Testing Size (stories)", len(data_test)) tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, epsilon=FLAGS.epsilon) batches = zip(range(0, len(data_train) - batch_size, batch_size), range(batch_size, len(data_train), batch_size)) batches = [(start, end) for start, end in batches] with tf.Session() as sess: model = MemN2N(batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, answer_vocab_size=answer_vocab_size, session=sess, hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm, optimizer=optimizer) best_accuracy_per_epoch = train_model(model, (train_s, train_q, train_a), (test_s, test_q, test_a), batches) return best_accuracy_per_epoch
def main(in_trainset_size, in_split_number): # train/validation/test sets all_dialogues_idx = reduce(lambda x, y: x + [y], range(len(all_dialogues)), []) random.shuffle(all_dialogues_idx) trainset_idx = all_dialogues_idx[in_split_number * in_trainset_size:in_split_number * in_trainset_size + in_trainset_size] testset_idx = filter(lambda x: x not in trainset_idx, all_dialogues_idx) dialogues_train = map(lambda x: all_dialogues[x], trainset_idx) dialogues_test = map(lambda x: all_dialogues[x], testset_idx) data_train = reduce(lambda x, y: x + y, dialogues_train, []) data_test = reduce(lambda x, y: x + y, dialogues_test, []) train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx, answer_idx, sentence_size, memory_size) test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx, answer_idx, sentence_size, memory_size) print("Training Size (dialogues)", len(dialogues_train)) print("Testing Size (dialogues)", len(dialogues_test)) print("Training Size (stories)", len(data_train)) print("Testing Size (stories)", len(data_test)) tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, epsilon=FLAGS.epsilon) batches = zip(range(0, len(data_train) - batch_size, batch_size), range(batch_size, len(data_train), batch_size)) batches = [(start, end) for start, end in batches] with tf.Session() as sess: model = MemN2N(batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, answer_vocab_size=answer_vocab_size, session=sess, hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm, optimizer=optimizer) best_accuracy_per_epoch = train_model(model, (train_s, train_q, train_a), (test_s, test_q, test_a), batches) return best_accuracy_per_epoch
def main(): data_train = reduce(lambda x, y: x + y, train, []) data_test = reduce(lambda x, y: x + y, test, []) train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx, answer_idx, sentence_size, memory_size) test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx, answer_idx, sentence_size, memory_size) print("Training Size (dialogues)", len(train)) print("Testing Size (dialogues)", len(test)) print("Training Size (stories)", len(data_train)) print("Testing Size (stories)", len(data_test)) tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, epsilon=FLAGS.epsilon) batches = zip(range(0, len(data_train) - batch_size, batch_size), range(batch_size, len(data_train), batch_size)) batches = [(start, end) for start, end in batches] with tf.Session() as sess: model = MemN2N(batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, answer_vocab_size=answer_vocab_size, session=sess, hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm, optimizer=optimizer) best_accuracy_per_epoch = train_model(model, (train_s, train_q, train_a), (test_s, test_q, test_a), batches) return best_accuracy_per_epoch
def main( in_trainset_size, in_testset_size, in_fold_number, in_dataset_shuffle ): max_dialogue_length = max(map(len, all_dialogues_babi)) longer_dialogues_idx = filter( lambda x: len(all_dialogues_babi[x]) == max_dialogue_length, in_dataset_shuffle ) trainset_idx = [] for train_dialogue_counter in range(in_trainset_size * in_fold_number, in_trainset_size * (in_fold_number + 1)): trainset_idx.append(longer_dialogues_idx[train_dialogue_counter % len(longer_dialogues_idx)]) testset_idx = [] for test_dialogue_counter in range(in_testset_size * in_fold_number, in_testset_size * (in_fold_number + 1)): testset_idx.append(in_dataset_shuffle[test_dialogue_counter % len(in_dataset_shuffle)]) dialogues_train = map(lambda x: all_dialogues_babi[x], trainset_idx) # testing on API calls only? dialogues_test = map(lambda x: [all_dialogues_babi_plus[x][-1]], testset_idx) data_train = reduce(lambda x, y: x + y, dialogues_train, []) data_test = reduce(lambda x, y: x + y, dialogues_test, []) train_s, train_q, train_a = vectorize_data_dialog( data_train, word_idx, answer_idx, sentence_size, memory_size ) test_s, test_q, test_a = vectorize_data_dialog( data_test, word_idx, answer_idx, sentence_size, memory_size ) print("Training Size (dialogues)", len(dialogues_train)) print("Testing Size (dialogues)", len(dialogues_test)) print("Training Size (stories)", len(data_train)) print("Testing Size (stories)", len(data_test)) tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size optimizer = tf.train.GradientDescentOptimizer( learning_rate=FLAGS.learning_rate # , # epsilon=FLAGS.epsilon ) batches = zip( range(0, len(data_train) - batch_size, batch_size), range(batch_size, len(data_train), batch_size) ) batches = [(start, end) for start, end in batches] with tf.Session() as sess: model = MemN2N( batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, answers_vectorized, session=sess, hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm, optimizer=optimizer ) best_accuracy_per_epoch = train_model( model, (train_s, train_q, train_a), (test_s, test_q, test_a), batches ) return best_accuracy_per_epoch