def generate_RDL_data(FLAGS, testset=None): if testset == None: kb_path = DATASET_PATH + "extendedkb1.txt" for i in range(len(TASK_NAME)): inputtaskfile = DATASET_PATH + TASK_NAME[ i] + '-kb1_atmosphere-distr0.5-trn10000.json' print("Generating RDL data for ", inputtaskfile) data, _, _, _, _, _, _, _, _ = load_task([inputtaskfile], FLAGS, testing_ratio=0.0) store_template_json(FLAGS.temp_path + TASK_NAME[int(i)], kb_path, data) else: data = [] loop_dir = ['tst4/', 'tst3/', 'tst2/', 'tst1/' ] if testset == 'all' else ['tst{}/'.format(str(testset))] for tst in loop_dir: t12345 = _get_source_paths(TEST_DATASET_PATH + tst) data.append(t12345) for d12345 in data: for d in d12345: inputtaskfile = d + '.json' outputtaskfile = d + '-RDL.json' kb_path = TEST_DATASET_PATH + 'extendedkb1.txt' if ( 'kb1' in d) else TEST_DATASET_PATH + 'extendedkb2.txt' print("Generating RDL data for ", d) data, _, _, _, _, _, _, _, _ = load_task([inputtaskfile], FLAGS, testing_ratio=0.0, testing=True) store_template_json(d.replace('.json', ''), kb_path, data)
def get_F1(log_dir, task_id): predicted_mat = np.load( os.path.join(log_dir, 'task_' + str(task_id) + '_pred.npy')) truth_mat = np.load( os.path.join(log_dir, 'task_' + str(task_id) + '_truth.npy')) # Load dataset _, test_data = load_task(data_dir, task_id) # Find unique queries # Find idx corresponding to query query_map = defaultdict(list) answer_set = Set() for idx, ex in enumerate(test_data): s, q, a, sf = ex q_str = ' '.join(q) answer_set.add(a[0]) query_map[q_str].append(idx) for val in predicted_mat: answer_set.add(val) answer_map = dict((val, idx) for idx, val in enumerate(answer_set)) # Compute confusion matrix net_confusion_mat = np.zeros((len(answer_map), len(answer_map))) for k in query_map.keys(): true = truth_mat[query_map[k]] pred = predicted_mat[query_map[k]] confusion_mat_question = confusion_question(true, pred, answer_map) if plot: print_confusion_matrix(confusion_mat_question, answer_map.keys(), k + '?') net_confusion_mat += confusion_mat_question #print(net_confusion_mat) f1 = f1_score(net_confusion_mat) f1 = [i for i in f1 if not math.isnan(i)] return np.mean(f1)
def getdata(): ids = range(1, 21) train, test = [], [] for i in ids: tr, te = load_task(FLAGS.data_dir, i) train.append(tr) test.append(te) data = list(chain.from_iterable(train + test)) vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean([len(s) for s, _, _ in data])) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size global_step = tf.Variable(0, name="global_step", trainable=False) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 90000, 0.96, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=FLAGS.epsilon) model = Hashed_Mem_Nw(vocab_size=vocab_size, query_size=sentence_size, story_size=sentence_size, memory_key_size=memory_size, memory_value_size=memory_size, embedding_size=FLAGS.embedding_size, reader=FLAGS.reader, l2_lambda=FLAGS.l2_lambda) global model grads_and_vars = optimizer.compute_gradients(model.loss_op) grads_and_vars = [(tf.clip_by_norm(g, FLAGS.max_grad_norm), v) for g, v in grads_and_vars if g is not None] grads_and_vars = [(add_gradient_noise(g), v) for g, v in grads_and_vars] nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in model._nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) train_op = optimizer.apply_gradients(nil_grads_and_vars, name="train_op", global_step=global_step) return data, vocab, word_idx, sentence_size, memory_size, vocab_size
def test(tasks, vocab_tasks, device, mix=False, task_id=None): train, test = list(), list() if mix: for task in tasks: task_train, task_test = load_task(data_dir, task, valid=False) train, test = train + task_train, test + task_test else: task = tasks[0] train, test = load_task(data_dir, task, valid=False) vocab, vocab_size = get_vocab(vocab_tasks) print_start_test_message(task_id) data = train + test len_max_sentence = get_len_max_sentence(data) token_to_idx = {token: i + 1 for i, token in enumerate(vocab)} vec_test = indexize_data(test, token_to_idx, len_max_sentence) routed_network = RoutedNetwork(vocab_size, len_max_sentence, 3, device) routed_network.to(device) routed_network = routed_network.float() routed_network.load_state_dict( torch.load(STATE_PATH.format(task_id, try_n))) loss, correct = eval(task_id, device, routed_network, vec_test, len_max_sentence) best_results = { "train tasks": [train_str], "test_tasks": [test_str], "try": [try_n], "accuracy": [float(correct) / len(vec_test)], "loss": [loss] } df = pd.DataFrame(best_results) df.to_csv( os.path.join( basedir, "results/csv_doc/train{}_test{}_try_{}.csv".format( train_str, test_str, try_n))) if not verbose: print("Finished Testing task {}\n".format(task_id) + "loss is: {}\n".format(loss) + "correct: {} out of {}\n".format(correct, len(vec_test)))
def test(tasks, vocab_tasks, device, mix=False, name=None): train, test = list(), list() if mix: for task in tasks: task_train, task_test = load_task(data_dir, task) train, test = train + task_train, test + task_test else: task = tasks[0] train, test = load_task(data_dir, task) vocab, vocab_size, entities = get_vocab_and_entities(vocab_tasks) global n_memories if not n_memories: n_memories = len(entities) print_start_test_message(name) embeddings_matrix, token_to_idx = init_embedding_matrix(vocab, device) keys = get_key_tensors(entities, embeddings_matrix, token_to_idx, device, tie_keys) data = train + test if mix: train, test = list(), list() for task in vocab_tasks: task_train, task_test = load_task(data_dir, task) train, test = train + task_train, test + task_test data = train + test len_max_sentence = get_len_max_sentence(data) vec_test = indexize_data(test, token_to_idx, len_max_sentence) entnet = EntNet(vocab_size, keys, len_max_sentence, embeddings_matrix, device) entnet.to(device) entnet = entnet.float() entnet.load_state_dict(torch.load(STATE_PATH.format(name, 0))) loss, correct = eval(name, device, entnet, vec_test, len_max_sentence) if not verbose: print("Finished Testing task {}\n".format(name) + "loss is: {}\n".format(loss) + "correct: {} out of {}\n".format(correct, len(vec_test)))
def load_data(self): # single babi task # TODO: refactor all this running elsewhere # task data train, test = load_task(data_dir, task_id) vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in train + test))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) self.memory_size = 50 self.max_story_size = max(map(len, (s for s, _, _ in train + test))) self.mean_story_size = int( np.mean(map(len, (s for s, _, _ in train + test)))) self.sentence_size = max( map(len, chain.from_iterable(s for s, _, _ in train + test))) self.query_size = max(map(len, (q for _, q, _ in train + test))) self.memory_size = min(self.memory_size, self.max_story_size) self.vocab_size = len(word_idx) + 1 # +1 for nil word self.sentence_size = max(self.query_size, self.sentence_size) # for the position print("Longest sentence length", self.sentence_size) print("Longest story length", self.max_story_size) print("Average story length", self.mean_story_size) # train/validation/test sets self.S, self.Q, self.A = vectorize_data(train, word_idx, self.sentence_size, self.memory_size) self.trainS, self.valS, self.trainQ, self.valQ, self.trainA, self.valA = cross_validation.train_test_split( self.S, self.Q, self.A, test_size=.1) # TODO: randomstate self.testS, self.testQ, self.testA = vectorize_data( test, word_idx, self.sentence_size, self.memory_size) print(self.testS[0]) print("Training set shape", self.trainS.shape) # params self.n_train = self.trainS.shape[0] self.n_test = self.testS.shape[0] self.n_val = self.valS.shape[0] print("Training Size", self.n_train) print("Validation Size", self.n_val) print("Testing Size", self.n_test)
def __init__(self, dataset_dir, task_id=1, memory_size=50, train=True): self.train = train self.task_id = task_id self.dataset_dir = dataset_dir train_data, test_data = load_task(self.dataset_dir, task_id) data = train_data + test_data self.vocab = set() for story, query, answer in data: self.vocab = self.vocab | set( list(chain.from_iterable(story)) + query + answer) self.vocab = sorted(self.vocab) word_idx = dict((word, i + 1) for i, word in enumerate(self.vocab)) self.max_story_size = max([len(story) for story, _, _ in data]) self.query_size = max([len(query) for _, query, _ in data]) self.sentence_size = max([len(row) for row in \ chain.from_iterable([story for story, _, _ in data])]) self.memory_size = min(memory_size, self.max_story_size) # Add time words/indexes for i in range(self.memory_size): word_idx["time{}".format(i + 1)] = "time{}".format(i + 1) self.num_vocab = len(word_idx) + 1 # +1 for nil word self.sentence_size = max(self.query_size, self.sentence_size) # for the position self.sentence_size += 1 # +1 for time words self.word_idx = word_idx self.mean_story_size = int(np.mean([len(s) for s, _, _ in data])) if train: story, query, answer = vectorize_data(train_data, self.word_idx, self.sentence_size, self.memory_size) # print 'story',story.shape # print 'query[0]',torch.LongTensor(query)[0].shape # print 'answer',answer.shape else: story, query, answer = vectorize_data(test_data, self.word_idx, self.sentence_size, self.memory_size) self.data_story = torch.LongTensor(story) self.data_query = torch.LongTensor(query) self.data_answer = torch.LongTensor(np.argmax(answer, axis=1))
"Embedding size for embedding matrices.") tf.flags.DEFINE_integer("memory_size", 50, "Maximum size of memory.") tf.flags.DEFINE_integer("random_state", None, "Random state.") tf.flags.DEFINE_string("data_dir", "data/tasks_1-20_v1-2/en/", "Directory containing bAbI tasks") tf.flags.DEFINE_string("output_file", "scores.csv", "Name of output file for final bAbI accuracy scores.") FLAGS = tf.flags.FLAGS print("Started Joint Model") # load all train/test data ids = range(1, 21) train, test = [], [] for i in ids: tr, te = load_task(FLAGS.data_dir, i) train.append(tr) test.append(te) data = list(chain.from_iterable(train + test)) vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean(map(len, (s for s, _, _ in data)))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word
tf.flags.DEFINE_integer("hops", 3, "Number of hops in the Memory Network.") tf.flags.DEFINE_integer("epochs", 100, "Number of epochs to train for.") tf.flags.DEFINE_integer("embedding_size", 40, "Embedding size for embedding matrices.") tf.flags.DEFINE_integer("memory_size", 50, "Maximum size of memory.") tf.flags.DEFINE_integer("random_state", None, "Random state.") tf.flags.DEFINE_string("data_dir", "data/tasks_1-20_v1-2/en/", "Directory containing bAbI tasks") tf.flags.DEFINE_string("output_file", "scores.csv", "Name of output file for final bAbI accuracy scores.") FLAGS = tf.flags.FLAGS print("Started Joint Model") # load all train/test data ids = range(1, 21) train, test = [], [] for i in ids: tr, te = load_task(FLAGS.data_dir, i) train.append(tr) test.append(te) data = list(chain.from_iterable(train + test)) vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean(map(len, (s for s, _, _ in data)))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position
def learn(task): train, test = load_task(data_dir, task) vocab, vocab_size = get_vocab(train, test) embeddings_matrix, token_to_idx = init_embedding_matrix(vocab) keys = get_key_tensors(vocab, embeddings_matrix, token_to_idx, True) vec_train = vectorize_data(train, token_to_idx, embeddings_matrix) vec_test = vectorize_data(test, token_to_idx, embeddings_matrix) entnet = EntNet(vocab_size, keys) ##### Define Loss and Optimizer ##### criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(entnet.parameters(), lr=0.01) ##### Train Model ##### epoch = 0 prev_loss = None loss = None stuck_epochs = 0 max_stuck_epochs = 3 epsilon = 0.1 while True: # when to stop adding epochs? running_loss = 0.0 for i, sample in enumerate(vec_train): # get the inputs; data is a list of [inputs, labels] story, query, answer = sample # zero the parameter gradients optimizer.zero_grad() for sentence in story: entnet(sentence.view(1, n_input_words, embedding_dim)) output = entnet.decode(query) loss = criterion(output, answer) loss.backward() nn.utils.clip_grad_value_(entnet.parameters(), gradient_clip_value) optimizer.step() # print statistics running_loss += loss.item() if i % 50 == 49: # print every 50 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 50)) running_loss = 0.0 # correct = 0 # pred_idx = np.argmax(output.detach().numpy()) # # print("pred is: " + str(pred_idx) + ", answer is: " + str(answer[0].item())) # if pred_idx == answer[0].item(): # correct += 1 # if i % 50 == 49: # print every 50 mini-batches # print('[%d, %5d] correct: %.3f' % # (epoch + 1, i + 1, correct / 50)) # correct = 0 if epoch == 0: prev_loss = loss elif prev_loss - loss < epsilon: stuck_epochs += 1 prev_loss = loss if stuck_epochs > max_stuck_epochs: break # adjust learning rate every 25 epochs until 200 epochs if epoch < 200 and epoch % 25 == 24: optimizer.lr = optimizer.lr / 2 epoch += 1 print('Finished Training')
evaluation_interval = 50 batch_size = 32 feature_size = 40 hops = 3 epochs = 100 embedding_size = 30 memory_size = 20 task_id = 1 data_dir = "data/tasks_1-20_v1-2/en/" reader = "bow" # bow / simple_gru allow_soft_placement = True log_device_placement = False output_file = 'single_scores.csv' # Below two lines load the test and train data for a particular task in tokenized form, where each data tuple has story, related question and its answer. train, test = load_task(data_dir, task_id) data = train + test # from the words we got from dataset below lines makes a dictionary(vocab-index on number) and a reverse dictionary (word_idx-indexed on words). vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) # Below lines calculate the maximum story size and sentence size to get dimension of vectors need to be created for bag of words representation. max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean(map(len, (s for s, _, _ in data)))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word
def main(_): print("Started Task:", FLAGS.task_id) train, test = load_task(FLAGS.data_dir, FLAGS.task_id) data = train + test vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(list(map(len, (s for s, _, _ in data)))) sentence_size = max(list(map(len, chain.from_iterable(s for s, _, _ in data)))) query_size = max(list(map(len, (q for _, q, _ in data)))) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position S, Q, A = vectorize_data(train, word_idx, sentence_size, max_story_size) trainS, valS, trainQ, valQ, trainA, valA = cross_validation.train_test_split(S, Q, A, test_size=.1) testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, max_story_size) FLAGS.N = max(max_story_size, FLAGS.N) config = tf.ConfigProto() #config.graph_options.optimizer_options.opt_level=tf.OptimizerOptions.L3 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = PTRModel(sentence_size, vocab_size, FLAGS.N, FLAGS.W, FLAGS.G, sess, batch_size=FLAGS.batch_size, story_size=max_story_size, lr=FLAGS.learning_rate, epsilon=FLAGS.epsilon, controller_layer_size=FLAGS.controller_layer_size, controller_hidden_size=FLAGS.controller_hidden_size) print(" [*] Initalize all variables") #model.load(FLAGS.checkpoint_dir, 'QA_new') tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() if FLAGS.is_train: for t in range(FLAGS.epochs): total_corrects = 0.0 total_cost = 0.0 batchs = 0 for start in range(0, trainS.shape[0], FLAGS.batch_size): end = start + FLAGS.batch_size feed_dict = {model.storys: trainS[start:end], model.querys: trainQ[start:end], model.labels: trainA[start:end]} _, cost, corrects, step = sess.run([model.optim, model.loss, model.num_corrects, model.global_step], feed_dict=feed_dict) total_cost += cost total_corrects += corrects batchs += 1 print("[epoch %5d]: train_loss:%.2f acc:%.2f (%.1fs)" % (t, total_cost/batchs, total_corrects/(batchs*FLAGS.batch_size), time.time() - start_time)) total_corrects = 0.0 total_cost = 0.0 batchs = 0 for start in range(0, testS.shape[0], FLAGS.batch_size): end = start + FLAGS.batch_size feed_dict = {model.storys: testS[start:end], model.querys: testQ[start:end], model.labels: testA[start:end]} cost, corrects = sess.run([model.loss, model.num_corrects], feed_dict=feed_dict) total_cost += cost total_corrects += corrects batchs += 1 print("[epoch %5d]: test_loss:%.2f acc:%.2f (%.1fs)" % (t, total_cost/batchs, total_corrects/(batchs*FLAGS.batch_size), time.time() - start_time)) sys.stdout.flush() if t % 5 == 0 or t == FLAGS.epochs-1: model.save(FLAGS.checkpoint_dir, 'QA', step) else: model.load(FLAGS.checkpoint_dir, 'QA') total_cost = 0.0 total_corrects = 0.0 batchs = 0 for start in range(0, testS.shape[0], FLAGS.batch_size): end = start + FLAGS.batch_size feed_dict = {model.storys: testS[start:end], model.querys: testQ[start:end], model.labels: testA[start:end]} cost, corrects = sess.run([model.loss, model.num_corrects], feed_dict=feed_dict) total_cost += cost total_corrects += corrects batchs += 1 print("Test cost:%.2f acc:%.2f (%.1fs)" % (total_cost/batchs, total_corrects/(batchs*FLAGS.batch_size), time.time() - start_time))
def load_data(data_dir, task_ids, memory_size, num_caches, random_seed): # Load all train and test data train = [] for i in task_ids: tr = load_task(data_dir, i) train.append(tr) te = load_task(data_dir, None, load_test=True) test = list(te.values()) data = list(chain.from_iterable(train + test)) vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a + ['.']) for s, _, q, a, _ in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) reverse_word_idx = ['NIL'] + sorted(word_idx.keys(), key=lambda x: word_idx[x]) max_story_size = max(map(len, (s for s, _, _, _, _ in data))) mean_story_size = int(np.mean([len(s) for s, _, _, _, _ in data])) sentence_size = max( map(len, chain.from_iterable(s for s, _, _, _, _ in data))) query_size = max(map(len, (q for _, _, q, _, _ in data))) memory_size = min(memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for the NIL word sentence_size = max(query_size, sentence_size) # for the position logging.info("Longest sentence length: %d" % sentence_size) logging.info("Longest story length: %d" % max_story_size) logging.info("Average story length: %d" % mean_story_size) # Train/validation/test splits trainS = [] valS = [] trainO = [] valO = [] trainQ = [] valQ = [] trainA = [] valA = [] trainL = [] valL = [] for task in train: S, O, Q, A, L = vectorize_data(task, word_idx, sentence_size, memory_size, num_caches) ts, vs, to, vo, tq, vq, ta, va, tl, vl = cross_validation.train_test_split( S, O, Q, A, L, test_size=0.1, random_state=random_seed) trainS.append(ts) trainO.append(to) trainQ.append(tq) trainA.append(ta) trainL.append(tl) valS.append(vs) valO.append(vo) valQ.append(vq) valA.append(va) valL.append(vl) trainS = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainS)) trainO = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainO)) trainQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainQ)) trainA = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainA)) trainL = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainL)) valS = reduce(lambda a, b: np.vstack((a, b)), (x for x in valS)) valO = reduce(lambda a, b: np.vstack((a, b)), (x for x in valO)) valQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in valQ)) valA = reduce(lambda a, b: np.vstack((a, b)), (x for x in valA)) valL = reduce(lambda a, b: np.vstack((a, b)), (x for x in valL)) test_data = {} for f in te: test_data[f] = vectorize_data(te[f], word_idx, sentence_size, memory_size, num_caches) logging.info("Training set shape: %s" % str(trainS.shape)) train_data = trainS, trainO, trainQ, trainA, trainL val_data = valS, valO, valQ, valA, valL return train_data, val_data, test_data, word_idx, reverse_word_idx, vocab_size, sentence_size, memory_size
def main(_): print("\nParameters: ") for k, v in sorted(FLAGS.__flags.items()): print("{} = {}".format(k, v)) if not os.path.exists("./prepro/"): os.makedirs("./prepro/") if FLAGS.eval: print("Evaluation...") feats, test_id = data_utils.load_test_data(FLAGS.test_id, FLAGS.test_dir) vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = load_model(sess, FLAGS.checkpoint_file, vocab_processor) sentences = greedy_inference(sess, model, feats, vocab_processor) # sentences = beam_search(sess, model, feats, vocab_processor) ans = [] for idx, sentence in enumerate(sentences): ans.append({"caption": sentence, "id": test_id[idx]}) json.dump(ans, open(FLAGS.output, 'w')) else: if FLAGS.prepro: print("Start preprocessing data...") vocab_processor, train_dict = data_utils.load_text_data( train_lab=FLAGS.train_lab, prepro_train_p=FLAGS.prepro_train, vocab_path=FLAGS.vocab) print("Vocabulary size: {}".format( len(vocab_processor._reverse_mapping))) print("Start dumping word2vec matrix...") w2v_W = data_utils.build_w2v_matrix(vocab_processor, FLAGS.w2v_data, FLAGS.vector_file, FLAGS.embedding_dim) else: train_dict = cPickle.load(open(FLAGS.prepro_train, 'rb')) vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) w2v_W = cPickle.load(open(FLAGS.w2v_data, 'rb')) print("Start generating training data...") feats, encoder_in_idx, decoder_in = data_utils.gen_train_data( FLAGS.train_dir, FLAGS.train_lab, train_dict) print("Start generating validation data...") v_encoder_in, truth_captions = data_utils.load_valid( FLAGS.valid_dir, FLAGS.valid_lab) t_encoder_in = None files = None if FLAGS.task_dir != None: t_encoder_in, files = data_utils.load_task(FLAGS.task_dir) print('feats size: {}, training size: {}'.format( len(feats), len(encoder_in_idx))) print(encoder_in_idx.shape, decoder_in.shape) print(v_encoder_in.shape, len(truth_captions)) data = Data(feats, encoder_in_idx, decoder_in, v_encoder_in, truth_captions, t_encoder_in, files) model = CapGenModel(data, w2v_W, vocab_processor) model.build_model() model.train()
tf.flags.DEFINE_string("data_dir", "my_data_rename", "Directory containing bAbI tasks") tf.flags.DEFINE_boolean('visual', True, 'whether visualize the embedding') tf.flags.DEFINE_boolean('joint', False, 'whether to train all tasks') tf.flags.DEFINE_boolean('trained_emb', False, 'whether use trained embedding, such as Glove') tf.flags.DEFINE_boolean('introspect', True, 'whether use the introspect unit') FLAGS = tf.flags.FLAGS print("Started Task:", FLAGS.task_id) if FLAGS.joint: ids = range(1, 21) train, test, train_tags, test_tags = [], [], [], [] # pdb.set_trace() for i in ids: tr, te, tr_tag, te_tag = load_task(FLAGS.data_dir, i, joint=True) train += tr train_tags += tr_tag test = te test_tags = te_tag # pdb.set_trace() else: # task data train, test, train_tags, test_tags = load_task(FLAGS.data_dir, FLAGS.task_id) data = train + test # pdb.set_trace() # vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) vocab_my = [] for s, q, a in data:
print(key + ": " + str(value)) print() # detect whether cuda is available use_cuda = torch.cuda.is_available() if use_cuda: print("cuda is used!!!") else: print("cuda is not supported, use cpu") # We use task 1 as default. task_id = 1 print("Started Task:", task_id) # task data train, test = load_task(args.data_dir, task_id) data = train + test # 's' is list of list, 'chain.from_iterable(s)' gets all the words from them # '+ q + a' makes a big list of all the words in [s, q, a]; 'set' removes all the same words # 'x | y' denotes union, so 'reduce' iterates each [s, q, a] to union all the words # 'sorted' rearrange these words to give 'vocab' vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) # max sentence number in 's' for all [s, q, a] max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean([len(s) for s, _, _ in data])) # max token number in a sentence
embedding_size = 40 memory_size = 100 random_state = None data_dir = "data/tasks_1-20_v1-2/en/" file_name = 'output_hop_' + str(hop) + '_jac_' + str( jac) + '_memory_' + str(memory_size) + '_gradient_001.csv' print(file_name) output_file = file_name FLAGS = tf.flags.FLAGS # load all train/test data ids = range(1, 21) train, test = [], [] for i in ids: tr, te = load_task(data_dir, i) train.append(tr) test.append(te) data = list(chain.from_iterable(train + test)) if jac == 1: temp_train = [] for t in train: temp_t = jaccard_cutting(t) temp_train.append(temp_t) temp_test = [] for t in test: temp_t = jaccard_cutting(t) temp_test.append(temp_t) train = temp_train
tf.flags.DEFINE_integer("batch_size", 32, "Batch size for training.") tf.flags.DEFINE_integer("hops", 3, "Number of hops in the Memory Network.") tf.flags.DEFINE_integer("epochs", 200, "Number of epochs to train for.") tf.flags.DEFINE_integer("embedding_size", 20, "Embedding size for embedding matrices.") tf.flags.DEFINE_integer("memory_size", 50, "Maximum size of memory.") tf.flags.DEFINE_integer("task_id", 1, "bAbI task id, 1 <= id <= 20") tf.flags.DEFINE_integer("random_state", None, "Random state.") tf.flags.DEFINE_string("data_dir", "data/tasks_1-20_v1-2/en/", "Directory containing bAbI tasks") tf.flags.DEFINE_string("test_pred", "test_pred.txt", "Output file containing the predcited results") tf.flags.DEFINE_string("target_word_from_context", "True", "Choose the target word from the context") FLAGS = tf.flags.FLAGS print("Started Task:", FLAGS.task_id) # task data train, test = load_task(FLAGS.data_dir, FLAGS.task_id) _data = train + test data = [] correct_count, total_count = 0, 0 for d in _data: context = [item for sublist in d[0] for item in sublist] question = [item for item in d[1]] vocab = context + question target_word = d[2][0] rand_word = vocab[random.randint(0, len(vocab)-1)] if target_word == rand_word: correct_count += 1 total_count += 1 # data.append([vocab, d[2][0]]) print("correct labels: ")
def train(tasks, vocab_tasks, device, mix=False, name=None): train, test = list(), list() if mix: for task in tasks: task_train, task_test = load_task(data_dir, task) train, test = train + task_train, test + task_test else: task = tasks[0] train, test = load_task(data_dir, task) vocab, vocab_size, entities = get_vocab_and_entities(vocab_tasks) data = train + test if mix: train, test = list(), list() for task in vocab_tasks: task_train, task_test = load_task(data_dir, task) train, test = train + task_train, test + task_test data = train + test len_max_sentence = get_len_max_sentence(data) global n_memories if not n_memories: n_memories = len(entities) print_start_train_message(name) models = [None] * n_tries optims = [None] * n_tries model_scores = [np.inf] * n_tries model_test_scores = [np.inf] * n_tries model_correct_scores = [0] * n_tries model_test_correct_scores = [0] * n_tries for try_idx in range(n_tries): embeddings_matrix, token_to_idx = init_embedding_matrix(vocab, device) keys = get_key_tensors(entities, embeddings_matrix, token_to_idx, device, tie_keys) # vec_train = vectorize_data(train, token_to_idx, len_max_sentence, len_max_story) vec_train = indexize_data(train, token_to_idx, len_max_sentence) vec_test = indexize_data(test, token_to_idx, len_max_sentence) entnet = EntNet(vocab_size, keys, len_max_sentence, embeddings_matrix, device) entnet.to(device) entnet = entnet.float() # entnet.load_state_dict(torch.load(STATE_PATH.format(task, 0))) ##### Define Loss and Optimizer ##### criterion = nn.CrossEntropyLoss().to(device) learning_rate = 0.01 optimizer = optim.Adam(entnet.parameters(), lr=learning_rate) if optimizer_name == 'sgd': optimizer = optim.SGD(entnet.parameters(), lr=learning_rate) # optimizer.load_state_dict(torch.load(OPTIM_PATH.format(task, 0))) ##### Train Model ##### epoch = 0 permute_data = 'full' if teach: permute_data = 'no' loss_history = [np.inf] * max_stuck_epochs test_loss_history = [np.inf] * max_stuck_epochs correct_history = [0] * max_stuck_epochs test_correct_history = [0] * max_stuck_epochs net_history = [None] * max_stuck_epochs optim_history = [None] * max_stuck_epochs while True: epoch_loss = 0.0 running_loss = 0.0 correct_batch = 0 correct_epoch = 0 start_time = time.time() if teach and (epoch > 1 or loss_history[-1] < 0.3): permute_data = 'full' for i, batch in enumerate(batch_generator(vec_train, len_max_sentence, batch_size, permute_data)): batch_stories, batch_queries, batch_answers = batch # batch_stories, batch_queries, batch_answers = torch.tensor(batch_stories, requires_grad=False, device=device),\ # torch.tensor(batch_queries, requires_grad=False, device=device),\ # torch.tensor(batch_answers, requires_grad=False, device=device) batch_stories, batch_queries, batch_answers = batch_stories.clone().detach().to(device), \ batch_queries.clone().detach().to(device), \ batch_answers.clone().detach().to(device) entnet(batch_stories) output = entnet.decode(batch_queries) loss = criterion(output, batch_answers) loss.backward() running_loss += loss.item() epoch_loss += loss.item() nn.utils.clip_grad_value_(entnet.parameters(), gradient_clip_value) optimizer.step() # zero the parameter gradients optimizer.zero_grad() pred_idx = np.argmax(output.cpu().detach().numpy(), axis=1) for j in range(len(output)): if pred_idx[j] == batch_answers[j].item(): correct_batch += 1 correct_epoch += 1 if verbose: if i % 50 == 49: # print statistics print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 50)) running_loss = 0.0 print('[%d, %5d] correct: %d out of %d' % (epoch + 1, i + 1, correct_batch, 50 * batch_size)) correct_batch = 0 # very loose approximation for the average loss over the epoch epoch_loss = epoch_loss / (len(vec_train) / batch_size) # print epoch time end_time = time.time() if verbose: print("###################################################################################################") print(end_time - start_time) print('epoch loss: %.3f' % epoch_loss) print("###################################################################################################") test_loss, test_correct = eval(name, device, entnet, vec_test, len_max_sentence) test_fail_rate = 100 - (float(test_correct)/len(vec_test)) * 100 net_history.append(entnet.state_dict()) optim_history.append(optimizer.state_dict()) loss_history.append(epoch_loss) test_loss_history.append(test_loss) correct_history.append(correct_epoch) test_correct_history.append(test_correct) net_history = net_history[1:] optim_history = optim_history[1:] loss_history = loss_history[1:] test_loss_history = test_loss_history[1:] correct_history = correct_history[1:] test_correct_history = test_correct_history[1:] fail_rate_condition = test_fail_rate <= entnet_threshold[tasks[0] - 1] if (mix and tasks[0] <= 20) else False if (test_loss_history[0] - min(test_loss_history[1:]) < min_improvement) or fail_rate_condition: best_idx = np.argmin(test_loss_history) models[try_idx] = net_history[best_idx] optims[try_idx] = optim_history[best_idx] model_scores[try_idx] = loss_history[best_idx] model_test_scores[try_idx] = test_loss_history[best_idx] model_correct_scores[try_idx] = correct_history[best_idx] model_test_correct_scores[try_idx] = test_correct_history[best_idx] break # adjust learning rate every 25 epochs until 200 epochs if epoch < 200 and epoch % 25 == 24: learning_rate = learning_rate / 2 optimizer = optim.Adam(entnet.parameters(), lr=learning_rate) if epoch == 200: best_idx = np.argmin(test_loss_history) models[try_idx] = net_history[best_idx] optims[try_idx] = optim_history[best_idx] model_scores[try_idx] = loss_history[best_idx] model_test_scores[try_idx] = test_loss_history[best_idx] model_correct_scores[try_idx] = correct_history[best_idx] model_test_correct_scores[try_idx] = test_correct_history[best_idx] break epoch += 1 model_fail_rate = 100 - (float(model_test_correct_scores[try_idx]) / len(vec_test)) * 100 fail_rate_condition = model_fail_rate <= entnet_threshold[tasks[0] - 1] if (mix and tasks[0] <= 20) else False if model_fail_rate <= fail_rate_condition: break best_idx = np.argmin(model_test_scores) torch.save(models[best_idx], STATE_PATH.format(name)) torch.save(optims[best_idx], OPTIM_PATH.format(name)) print("Finished Training task {}\n".format(name) + "try {} was best\n".format(best_idx) + "loss is: {}\n".format(model_scores[best_idx]) + "correct: {} out of {}\n".format(model_correct_scores[best_idx], len(vec_train)) + "test loss is: {}\n".format(model_test_scores[best_idx]) + "test correct: {} out of {}\n".format(model_test_correct_scores[best_idx], len(vec_test)))
def train(tasks, vocab_tasks, device, mix=False, task_id=None): train, test = list(), list() if mix: for task in tasks: task_train, task_test = load_task(data_dir, task) train, test = train + task_train, test + task_test else: task = tasks[0] train, test = load_task(data_dir, task) vocab, vocab_size = get_vocab(vocab_tasks) data = train + test len_max_sentence = get_len_max_sentence(data) print_start_train_message(task_id) token_to_idx = {token: i + 1 for i, token in enumerate(vocab)} vec_train = indexize_data(train, token_to_idx, len_max_sentence) vec_test = indexize_data(test, token_to_idx, len_max_sentence) # router_vec_train = get_router_data(vec_train) # entnet.load_state_dict(torch.load(STATE_PATH.format(task, 0))) ######################################################################## routed_network = RoutedNetwork(vocab_size, len_max_sentence, 3, device) routed_network.to(device) routed_network = routed_network.float() ##### Define Loss and Optimizer ##### criterion = nn.CrossEntropyLoss().to(device) learning_rate = 0.01 network_optimizer = optim.SGD(routed_network.parameters(), lr=learning_rate) router_optimizer = optim.SGD(routed_network.router.parameters(), lr=learning_rate) if optimizer_name == 'adam': network_optimizer = optim.Adam(routed_network.parameters(), lr=learning_rate) # router_optimizer = optim.Adam(routed_network.router.parameters(), lr=learning_rate) # optimizer.load_state_dict(torch.load(OPTIM_PATH.format(task, 0))) ##### Train Model ##### epoch = 0 permute_data = 'full' loss_history, test_loss_history = [], [] correct_history, test_correct_history = [], [] net_history, optim_history = [], [] train_acc_history, train_loss_history = [], [] test_acc_history, full_test_loss_history = [], [] while True: # train_router(router_vec_train, len_max_sentence, device, routed_network, criterion, router_optimizer) train_network(vec_train, vec_test, len_max_sentence, permute_data, epoch, task_id, train_acc_history, train_loss_history, test_acc_history, full_test_loss_history, net_history, optim_history, loss_history, test_loss_history, correct_history, test_correct_history, learning_rate, device, routed_network, network_optimizer, criterion, router_optimizer) epoch += 1 if epoch == 200: best_idx = np.argmin(test_loss_history) best_model = net_history[best_idx] best_optim = optim_history[best_idx] model_score = loss_history[best_idx] model_test_score = test_loss_history[best_idx] model_correct_score = correct_history[best_idx] model_test_correct_score = test_correct_history[best_idx] break # adjust learning rate every 25 epochs until 200 epochs if epoch < 200 and epoch % 25 == 24: learning_rate = learning_rate / 2 network_optimizer = optim.SGD(routed_network.parameters(), lr=learning_rate) router_optimizer = optim.SGD(routed_network.router.parameters(), lr=learning_rate) if optimizer_name == 'adam': network_optimizer = optim.Adam(routed_network.parameters(), lr=learning_rate) router_optimizer = optim.Adam( routed_network.router.parameters(), lr=learning_rate) torch.save(best_model, STATE_PATH.format(task_id, try_n)) torch.save(best_optim, OPTIM_PATH.format(task_id, try_n)) print( "Finished Training task {}\n".format(task_id) + "try {} was best\n".format(best_idx) + "loss is: {}\n".format(model_score) + "correct: {} out of {}\n".format(model_correct_score, len(vec_train)) + "test loss is: {}\n".format(model_test_score) + "test correct: {} out of {}\n".format(model_test_correct_score, len(vec_test)))
record=FLAGS.record, taskchosen=FLAGS.task, testset=FLAGS.testset, temp_path=FLAGS.temp_path) # create dircotory if not os.path.exists(FLAGS.model_path): os.mkdir(FLAGS.model_path) if not os.path.exists(FLAGS.model_path + '{}/'.format(taskchosen)): os.mkdir(FLAGS.model_path + '{}/'.format(taskchosen)) if not os.path.exists(FLAGS.log_path): os.mkdir(FLAGS.log_path) if FLAGS.train: # load task data and contextInfo train, val, test, candidates, train_cand, val_cand, test_cand, cand_idx, idx_cand = utils.load_task( inputtaskfile, FLAGS, testing_ratio=testing_ratio, template=FLAGS.record, buildtestset=(officialtestfile == [])) # get the contextInfo data stored trainInfo = [d['a'].pop() for d in train] valInfo = [d['a'].pop() for d in val] testInfo = [d['a'].pop() for d in test] if (len(officialtestfile) == 0) else [] # get vocab and sentence information from data vocab, word_idx, max_story_size, mean_story_size, sentence_size, query_size = utils.data_information( train, candidates) vocab_size = len(word_idx) + 1 # +1 for nil word (0 for nil) sentence_size = max(query_size, sentence_size) + 5 # add some space for testing data memory_size = min(FLAGS.memory_size, max_story_size)
#print("\nParameters:") #with open(FLAGS.param_output_file, 'w') as f: # for attr, value in sorted(FLAGS.__flags.items()): # line = "{}={}".format(attr.upper(), value) # f.write(line + '\n') # print(line) # print("") print("Started Joint Model") # load all train/test data PATH = 'data/tasks_1-20_v1-2/en' ids = range(1, 21) train, test = [], [] for i in ids: tr, te = load_task(PATH, i) train.append(tr) test.append(te) data = list(chain.from_iterable(train + test)) import pdb; pdb.set_trace() vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean([len(s) for s, _, _ in data])) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position
"Directory containing bAbI tasks") tf.flags.DEFINE_string("reader", "simple_gru", "Reader for the model (bow, simple_gru)") tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") tf.flags.DEFINE_string("output_file", "single_scores.csv", "Name of output file for final bAbI accuracy scores.") FLAGS = tf.flags.FLAGS print("Started Task:", FLAGS.task_id) # task data train, test = load_task(FLAGS.data_dir, FLAGS.task_id) data = train + test vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean(list(map(len, (s for s, _, _ in data))))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position
def run_task(task_id): print("Started Task:", task_id) # task data train, test = load_task(FLAGS.data_dir, task_id) data = train + test vocab = sorted( reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) reverse_lookup = {v: k for (k, v) in word_idx.items()} lookup_vocab = ['nil'] print(reverse_lookup) print(word_idx) for i in range(1, len(reverse_lookup) + 1): lookup_vocab.append(reverse_lookup[i]) max_story_size = max(map(len, (s for s, _, _ in data))) mean_story_size = int(np.mean([len(s) for s, _, _ in data])) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) # Add time words/indexes for i in range(memory_size): word_idx['time{}'.format(i + 1)] = 'time{}'.format(i + 1) print(len(word_idx)) print(word_idx) vocab_size = len(word_idx) + 1 # +1 for nil word sentence_size = max(query_size, sentence_size) # for the position sentence_size += 1 # +1 for time words print("Longest sentence length", sentence_size) print("Longest story length", max_story_size) print("Average story length", mean_story_size) # train/validation/test sets S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size) trainS, valS, trainQ, valQ, trainA, valA = cross_validation.train_test_split( S, Q, A, test_size=.1, random_state=FLAGS.random_state) testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, memory_size) #print(testS[0]) print("Training set shape", trainS.shape) # params n_train = trainS.shape[0] n_test = testS.shape[0] n_val = valS.shape[0] print("Training Size", n_train) print("Validation Size", n_val) print("Testing Size", n_test) train_labels = np.argmax(trainA, axis=1) val_labels = np.argmax(valA, axis=1) test_labels = np.argmax(testA, axis=1) tf.set_random_seed(FLAGS.random_state) batch_size = FLAGS.batch_size batches = zip(range(0, n_train - batch_size, batch_size), range(batch_size, n_train, batch_size)) batches = [(start, end) for start, end in batches] train_accuracies = [] validation_accuracies = [] test_accuracies = [] max_testAccuracy = 0 max_trainAccuracy = 0 max_valAccuracy = 0 best_test_prob_hops = 0 best_test_prob_vocab = 0 best_pred_word = 0 best_true_word = 0 best_test_A1 = 0 best_test_C = 0 best_lookup_vocab = 0 for run_id in range(FLAGS.NoOfRuns): print('Run Number: ' + str(run_id)) max_epoch_trainAccuracy = 0 max_epoch_valAccuracy = 0 with tf.Session() as sess: model = MemN2N(batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, session=sess, hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm, regularization=FLAGS.regularization, nonlin=FLAGS.nonlin) for t in range(1, FLAGS.epochs + 1): # Stepped learning rate if t - 1 <= FLAGS.anneal_stop_epoch: anneal = 2.0**((t - 1) // FLAGS.anneal_rate) else: anneal = 2.0**(FLAGS.anneal_stop_epoch // FLAGS.anneal_rate) lr = FLAGS.learning_rate / anneal np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = model.batch_fit(s, q, a, lr) total_cost += cost_t if t % FLAGS.evaluation_interval == 0: train_preds = [] for start in range(0, n_train, batch_size): end = start + batch_size s = trainS[start:end] q = trainQ[start:end] pred = model.predict(s, q) train_preds += list(pred) val_preds, valid_prob_vocab, valid_prob_hops, valid_A1, valid_C = model.predict_prob_instrument( valS, valQ) train_acc = metrics.accuracy_score(np.array(train_preds), train_labels) val_acc = metrics.accuracy_score(val_preds, val_labels) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') if (val_acc > max_epoch_valAccuracy): max_epoch_trainAccuracy = train_acc max_epoch_valAccuracy = val_acc test_preds, test_prob_vocab, test_prob_hops, test_A1, test_C = model.predict_prob_instrument( testS, testQ) pred_word = [reverse_lookup[i] for i in test_preds] true_word = [reverse_lookup[i] for i in test_labels] test_acc = metrics.accuracy_score(test_preds, test_labels) train_accuracies.append(max_trainAccuracy) validation_accuracies.append(max_valAccuracy) test_accuracies.append(test_acc) if (test_acc > max_testAccuracy): max_testAccuracy = test_acc max_trainAccuracy = max_epoch_trainAccuracy max_valAccuracy = max_epoch_valAccuracy best_test_prob_hops = test_prob_hops best_test_prob_vocab = test_prob_vocab best_pred_word = pred_word best_true_word = true_word best_test_A1 = test_A1 best_test_C = test_C best_lookup_vocab = lookup_vocab print("Test Accuracy: ", test_acc) iou = calculate_iou(test_prob_hops, test, task_id) print('IoU: ' + str(iou)) print("Best Testing Accuracy:", max_testAccuracy) # save test files np.save(logs_dir + 'task_' + str(task_id) + '_attention', best_test_prob_hops) np.save(logs_dir + 'task_' + str(task_id) + '_vocab_prob', best_test_prob_vocab) np.save(logs_dir + 'task_' + str(task_id) + '_pred', best_pred_word) np.save(logs_dir + 'task_' + str(task_id) + '_truth', best_true_word) np.save(logs_dir + 'task_' + str(task_id) + '_A', best_test_A1) np.save(logs_dir + 'task_' + str(task_id) + '_C', np.array(best_test_C)) np.save(logs_dir + 'task_' + str(task_id) + '_lookupvocab', best_lookup_vocab) iou = calculate_iou(best_test_prob_hops, test, task_id) print('Best IoU: ' + str(iou)) return max_trainAccuracy, max_valAccuracy, max_testAccuracy, iou