def get_minibatch_input(minibatch, tokenized_paras, tokenized_paras_chars, ques_to_para): # Variable length question, answer and paragraph sequences for batch. ques_lens_in = [ len(example[0]) for example in minibatch ] paras_in = [ tokenized_paras[ques_to_para[example[2]]] \ for example in minibatch ] paras_chars_in = [ tokenized_paras_chars[ques_to_para[example[2]]] \ for example in minibatch ] paras_chars_in_b = [ [ c[::-1] for c in f ] for f in paras_chars_in ] paras_lens_in = [ len(para) for para in paras_in ] max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) ques_chars_forward = [ example[3] for example in minibatch ] ques_chars_backward = [ [ c[::-1] for c in f ] for f in ques_chars_forward ] ques_chars_lens_in = [ [ len(x) for x in ques_word_chars ] \ for ques_word_chars in ques_chars_forward ] paras_chars_lens_in = [ [ len(x) for x in para_word_chars ] \ for para_word_chars in paras_chars_in ] max_ques_wordlen = max([ max(x) for x in ques_chars_lens_in ]) max_paras_wordlen = max([ max(x) for x in paras_chars_lens_in ]) ques_chars_lens_in = [ pad(x, 1, max_ques_len) for x in ques_chars_lens_in ] paras_chars_lens_in = [ pad(x, 1, max_para_len) for x in paras_chars_lens_in ] # Question forward character LSTM input. ques_chars_forward_in = [] zero_padded_word = [0] * max_ques_wordlen for ques in ques_chars_forward: ques_words = [] for word in ques: ques_words.append(pad(word, 0, max_ques_wordlen)) ques_chars_forward_in.append(pad(ques_words, zero_padded_word, max_ques_len)) ques_chars_forward_in = np.array(ques_chars_forward_in) # Question backward character LSTM input. ques_chars_backward_in = [] for ques in ques_chars_backward: ques_words = [] for word in ques: ques_words.append(pad(word, 0, max_ques_wordlen)) ques_chars_backward_in.append(pad(ques_words, zero_padded_word, max_ques_len)) ques_chars_backward_in = np.array(ques_chars_backward_in) # Passage forward character LSTM input. paras_chars_forward_in = [] zero_padded_word = [0] * max_paras_wordlen for para in paras_chars_in: para_words = [] for word in para: para_words.append(pad(word, 0, max_paras_wordlen)) paras_chars_forward_in.append(pad(para_words, zero_padded_word, max_para_len)) paras_chars_forward_in = np.array(paras_chars_forward_in) # Passage backward character LSTM input. paras_chars_backward_in = [] for para in paras_chars_in_b: para_words = [] for word in para: para_words.append(pad(word, 0, max_paras_wordlen)) paras_chars_backward_in.append(pad(para_words, zero_padded_word, max_para_len)) paras_chars_backward_in = np.array(paras_chars_backward_in) # ans_in.shape = (2, batch) ans_in = np.array([ example[1] for example in minibatch ]).T # Fixed-length (padded) input sequences with shape=(seq_len, batch). ques_in_f = np.array([ pad(example[0], 0, max_ques_len)\ for example in minibatch ]).T paras_in_f = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T ques_in_b = np.array([ pad(example[0][::-1], 0, max_ques_len)\ for example in minibatch ]).T paras_in_b = np.array([ pad(para[::-1], 0, max_para_len) for para in paras_in ]).T passage_input_f = paras_in_f passage_input_b = paras_in_b question_input_f = ques_in_f question_input_b = ques_in_b passage_input_lens = paras_lens_in question_input_lens = ques_lens_in passage_input_chars_f = np.transpose(paras_chars_forward_in, (1, 0, 2)) passage_input_chars_b = np.transpose(paras_chars_backward_in, (1, 0, 2)) question_input_chars_f = np.transpose(ques_chars_forward_in, (1, 0, 2)) question_input_chars_b = np.transpose(ques_chars_backward_in, (1, 0, 2)) passage_input_chars_lens = np.transpose(np.array(paras_chars_lens_in)) question_input_chars_lens = np.transpose(np.array(ques_chars_lens_in)) answer_input = ans_in return passage_input_f, passage_input_b, question_input_f, question_input_b,\ passage_input_lens, question_input_lens, passage_input_chars_f,\ passage_input_chars_b, question_input_chars_f, question_input_chars_b,\ passage_input_chars_lens, question_input_chars_lens, answer_input
def test_model(args): # Read and process data train, dev, test, batch_size, test_batch_size, train_ques_to_para,\ dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\ dev_tokenized_paras, test_tokenized_paras, train_1_examples, dev_1_examples,\ test_1_examples, train_order, dev_order, test_order, train_data, dev_data,\ test_data = read_and_process_data(args) # Build model print "Building model." model, config = build_model(args, train_data.dictionary.size(), train_data.dictionary.index_to_word) print "Done." #------------------------- Reload and test model ----------------------------# tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: print "Initializing variables." tf.global_variables_initializer().run() print "Done." assert not args.ckpt == 0 assert not args.predictions_output_json is None saver = tf.train.Saver(max_to_keep=args.epochs) print "Loading model from checkpoint." saver.restore(sess, args.model_dir + 'model' + str(args.ckpt) + '.ckpt') print "Done." # Run pass over test data to compute stats test_start_t = time.time() test_loss_sum = 0.0 test_error_sum = 0 test_error0_sum = 0 test_error1_sum = 0 all_predictions = {} for i, num in enumerate(test_order): print "\rTest: %.2f s (Done %d of %d) " %\ ((time.time()-test_start_t)*(len(test_order)-i-1)/(i+1), i+1, len(test_order)), sys.stdout.flush() # Prepare test batch by computing lengths and padding test_batch = test[num:num+test_batch_size] ans_lens_in = [ len(example[1]) for example in test_batch ] ques_lens_in = [ len(example[0]) for example in test_batch ] paras_in = [ test_tokenized_paras[test_ques_to_para[example[3]]] \ for example in test_batch ] paras_lens_in = [ len(para) for para in paras_in ] max_ans_len = max(ans_lens_in) max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) ans_in = [ pad(example[1], 0, max_ans_len) for example in test_batch ] ques_in = [ pad(example[0], 0, max_ques_len) for example in test_batch ] paras_in = [ pad(para_in, 0, max_para_len) for para_in in paras_in ] labels = [ example[2] for example in test_batch ] # Add all batch qids to predictions dict, if they don't already exist qids = [ example[3] for example in test_batch ] answers = [ " ".join([ test_data.dictionary.get_word(idx) for idx in example[1] ]) \ for example in test_batch ] for qid in qids: if not qid in all_predictions: all_predictions[qid] = [] test_loss, predictions =\ sess.run([model.loss, model.predictions], feed_dict = { model.ans_input: ans_in, model.ans_lens: ans_lens_in, model.ques_input: ques_in, model.ques_lens: ques_lens_in, model.passage_input: paras_in, model.passage_lens: paras_lens_in, model.labels: labels, model.keep_prob: 1.0 }) test_loss_sum += test_loss print "[Average loss : %.5f]" % (test_loss_sum/(i+1)), for qid, answer, prob in zip(qids, answers, predictions): all_predictions[qid].append([answer, prob]) # Compute overall prediction-error, error for 0s, and error for 1s predictions = np.round(predictions) test_errors = np.abs(predictions-labels) test_error_sum += np.sum(test_errors) test_error0_sum += np.sum((1-np.array(labels)) * test_errors) test_error1_sum += np.sum(np.array(labels) * test_errors) # Print test stats for epoch print "\nTest Loss: %.4f (in time: %.2f s)" %\ (test_loss_sum/len(test_order), (time.time() - test_start_t)) print ("Total error: %d/%d (%.2f%%), 1 errors: %d/%d (%.2f%%), " +\ "0 errors: %d/%d (%.2f%%)") %\ (test_error_sum, len(test), 100 * float(test_error_sum)/len(test), test_error1_sum, test_1_examples, 100 * float(test_error1_sum)/test_1_examples, test_error0_sum, len(test)-test_1_examples, 100 * float(test_error0_sum)/(len(test)-test_1_examples)) # Select the best answer for each question (highest probability) print "Getting best answers." for qid in all_predictions: all_predictions[qid] = max(all_predictions[qid], key=itemgetter(1))[0] print "Done." # Dump the results json in the required format print "Dumping prediction results." with open(args.predictions_output_json, "w") as predictions_out: json.dump(all_predictions, predictions_out) predictions_out.close() print "Done."
def train_model(args): # Read and process data train, dev, test, batch_size, test_batch_size, train_ques_to_para,\ dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\ dev_tokenized_paras, test_tokenized_paras, train_1_examples, dev_1_examples,\ test_1_examples, train_order, dev_order, test_order, train_data, dev_data,\ test_data = read_and_process_data(args) # Build model model, config = build_model(args, train_data.dictionary.size(), train_data.dictionary.index_to_word) #------------------------------ Train System ----------------------------------# tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: tf.global_variables_initializer().run() # Keep model parameters for all epochs saver = tf.train.Saver(max_to_keep=args.epochs) # Should we resume running from an existing checkpoint? last_done_epoch = config['cont'] if last_done_epoch > 0: print "Continue from ckpt", last_done_epoch + 1 saver.restore(sess, args.model_dir + 'model' + str(last_done_epoch) + '.ckpt') start_time = time.time() print "Starting training." for EPOCH in range(last_done_epoch+1, args.epochs): start_t = time.time() random.shuffle(train_order) train_loss_sum = 0.0 for i, num in enumerate(train_order): print "\rTrain epoch %d, %.2f s - (Done %d of %d) " %\ (EPOCH, (time.time()-start_t)*(len(train_order)-i-1)/(i+1), i+1, len(train_order)), # Create next batch by getting lengths and padding #if(i+1 == len(train_order)): #train_batch = train[train_order[i]:] train_batch = train[train_order[i][0]:train_order[i][1]] random.shuffle(train_batch) ans_lens_in = [ len(example[1]) for example in train_batch ] ques_lens_in = [ len(example[0]) for example in train_batch ] paras_in = [ train_tokenized_paras[train_ques_to_para[example[3]]] \ for example in train_batch ] paras_lens_in = [ len(para) for para in paras_in ] max_ans_len = max(ans_lens_in) max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) ans_in = [ pad(example[1], 0, max_ans_len) for example in train_batch ] ques_in = [ pad(example[0], 0, max_ques_len) for example in train_batch ] paras_in = [ pad(para, 0, max_para_len) for para in paras_in ] labels = [ example[2] for example in train_batch ] train_loss, predictions, _ =\ sess.run([model.loss, model.predictions, model.optimizer], feed_dict = { model.ans_input: ans_in, model.ans_lens: ans_lens_in, model.ques_input: ques_in, model.ques_lens: ques_lens_in, model.passage_input: paras_in, model.passage_lens: paras_lens_in, model.labels: labels, model.keep_prob: config['drop_emb'] }) predictions = np.round(predictions) train_errors = np.abs(predictions-labels) train_error_sum = np.sum(train_errors) train_error0_sum = np.sum((1-np.array(labels)) * train_errors) train_error1_sum = np.sum(np.array(labels) * train_errors) train_1_examples_batch = sum([ example[2] for example in train_batch ]) print ("Total error: %.2f%%, 1 errors: %.2f%%, " +\ "0 errors: %.2f%%, ") %\ (100 * float(train_error_sum)/len(train_batch), 100 * float(train_error1_sum)/(1+train_1_examples_batch), 100 * float(train_error0_sum)/(len(train_batch)-train_1_examples_batch)), sys.stdout.flush() train_loss_sum += train_loss print "Loss, %.5f" % (train_loss_sum/(i+1)), # Print train stats for epoch print "\nEpoch %d: Train Loss: %.4f (in time: %.2f s)" %\ (EPOCH, train_loss_sum/len(train_order), (time.time() - start_t)) # Run pass over dev data to compute stats dev_start_t = time.time() dev_loss_sum = 0.0 dev_error_sum = 0 dev_error0_sum = 0 dev_error1_sum = 0 for i, num in enumerate(dev_order): print "\rDev: %.2f s (Done %d of %d) " %\ ((time.time()-dev_start_t)*(len(dev_order)-i-1)/(i+1), i+1, len(dev_order)), sys.stdout.flush() # Prepare dev bath by computing lengths and padding dev_batch = dev[num:num+test_batch_size] ans_lens_in = [ len(example[1]) for example in dev_batch ] ques_lens_in = [ len(example[0]) for example in dev_batch ] paras_in = [ dev_tokenized_paras[dev_ques_to_para[example[3]]] \ for example in dev_batch ] paras_lens_in = [ len(para) for para in paras_in ] max_ans_len = max(ans_lens_in) max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) ans_in = [ pad(example[1], 0, max_ans_len) for example in dev_batch ] ques_in = [ pad(example[0], 0, max_ques_len) for example in dev_batch ] paras_in = [ pad(para_in, 0, max_para_len) for para_in in paras_in ] labels = [ example[2] for example in dev_batch ] dev_loss, predictions =\ sess.run([model.loss, model.predictions], feed_dict = { model.ans_input: ans_in, model.ans_lens: ans_lens_in, model.ques_input: ques_in, model.ques_lens: ques_lens_in, model.passage_input: paras_in, model.passage_lens: paras_lens_in, model.labels: labels, model.keep_prob: 1.0 }) dev_loss_sum += dev_loss print "[Average loss : %.5f]" % (dev_loss_sum/(i+1)), # Compute overall prediction-error, error for 0s, and error for 1s predictions = np.round(predictions) dev_errors = np.abs(predictions-labels) dev_error_sum += np.sum(dev_errors) dev_error0_sum += np.sum((1-np.array(labels)) * dev_errors) dev_error1_sum += np.sum(np.array(labels) * dev_errors) # Print dev stats for epoch print "\nDev Loss: %.4f (in time: %.2f s)" %\ (dev_loss_sum/len(dev_order), (time.time() - dev_start_t)) print ("Total error: %d/%d (%.2f%%), 1 errors: %d/%d (%.2f%%), " +\ "0 errors: %d/%d (%.2f%%)") %\ (dev_error_sum, len(dev), 100 * float(dev_error_sum)/len(dev), dev_error1_sum, dev_1_examples, 100 * float(dev_error1_sum)/dev_1_examples, dev_error0_sum, len(dev)-dev_1_examples, 100 * float(dev_error0_sum)/(len(dev)-dev_1_examples)) # Save model parameters from this epoch. save_path = saver.save(sess, args.model_dir + 'model' + str(EPOCH) + '.ckpt') print "Model saved."
def test_model(args): # Read and process data train, dev, test, batch_size, test_batch_size, train_ques_to_para,\ dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\ dev_tokenized_paras, test_tokenized_paras, train_order, dev_order, test_order,\ train_data, dev_data, test_data = read_and_process_data(args) # Build model model, config = build_model(args, train_data.dictionary.size(), train_data.dictionary.index_to_word, train_data.dictionary.word_to_index) print(model) #------------------------- Reload and test model ----------------------------# if args.model_file is not None: model = model.load_from_file(args.model_file) print "Loaded model from %s." % args.model_file else: last_done_epoch = config['ckpt'] model = model.load(args.model_dir, last_done_epoch) print "Loaded model." if not args.disable_glove: print "Embedding shape:", model.embedding.shape test_start_t = time.time() test_loss_sum = 0.0 all_predictions = {} attention_starts = {} attention_ends = {} model.eval() for i, num in enumerate(test_order): print "\rTest: %.2f s (Done %d of %d) " %\ ((time.time()-test_start_t)*(len(test_order)-i-1)/(i+1), i+1, len(test_order)), test_batch = test[num:num+test_batch_size] batch_size = len(test_batch) # Variable length question, answer and paragraph sequences for batch. ques_lens_in = [ len(example[0]) for example in test_batch ] paras_in = [ test_tokenized_paras[test_ques_to_para[example[2]]] \ for example in test_batch ] paras_lens_in = [ len(para) for para in paras_in ] max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) # ans_in.shape = (2, batch) ans_in = np.array([ example[1] for example in test_batch ]).T # Fixed-length (padded) input sequences with shape=(seq_len, batch). ques_in = np.array([ pad(example[0], 0, max_ques_len)\ for example in test_batch ]).T paras_in = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T passage_input = (paras_in, paras_lens_in) question_input = (ques_in, ques_lens_in) answer_input = ans_in # distributions[{0,1}].shape = (batch, max_passage_len) distributions = model(passage_input, question_input, answer_input) distributions[0] = distributions[0].data.cpu().numpy() distributions[1] = distributions[1].data.cpu().numpy() # Add all batch qids to predictions dict, if they don't already exist. qids = [ example[2] for example in test_batch ] for qid in qids: if not qid in all_predictions: all_predictions[qid] = [] # Search, or be greedy? if not args.use_greedy: best_idxs = [] for idx in range(len(test_batch)): best_prob = -1 best = [0, 0] max_end = paras_lens_in[idx] for j, start_prob in enumerate(distributions[0][idx][:max_end]): cur_end_idx = max_end if args.max_answer_span == -1 \ else j + args.max_answer_span end_idx = np.argmax(distributions[1][idx][j:cur_end_idx]) prob = distributions[1][idx][j+end_idx] * start_prob if prob > best_prob: best_prob = prob best = [j, j+end_idx] best_idxs.append(best) else: best_idxs = [] for idx in range(len(test_batch)): start = np.argmax(distributions[0][idx]) end_idx = paras_lens_in[idx] if args.max_answer_span == -1 \ else start + args.max_answer_span end = np.argmax(distributions[1][idx][start:end_idx]) best_idxs.append([start, start+end]) tokenized_paras = test_data.tokenized_paras answers = [ tokenized_paras[test_ques_to_para[qids[idx]]][start:end+1] \ for idx, (start, end) in enumerate(best_idxs) ] answers = [ " ".join([ test_data.dictionary.get_word(idx) for idx in ans ]) \ for ans in answers ] for qid, answer in zip(qids, answers): all_predictions[qid] = answer # Dump start and end attention distributions. for idx in range(batch_size): if qids[idx] in attention_starts: attention_starts[qids[idx]][1].append(ans_in[0][idx]) else: attention_starts[qids[idx]] = (distributions[0][idx], [ans_in[0][idx]]) if qids[idx] in attention_ends: attention_ends[qids[idx]][1].append(ans_in[0][idx]) else: attention_ends[qids[idx]] = (distributions[1][idx], [ans_in[1][idx]]) test_loss_sum += model.loss.data[0] print "[Average loss : %.5f]" % (test_loss_sum/(i+1)), sys.stdout.flush() # Print stats print "\nTest Loss: %.4f (in time: %.2f s)" %\ (test_loss_sum/len(test_order), (time.time() - test_start_t)) # Dump the results json in the required format print "Dumping prediction results." json.dump(all_predictions, open(args.predictions_output_json, "w")) # Dump attention start and end distributions. pickle.dump(attention_starts, open(args.predictions_output_json + "_starts.p", "wb")) pickle.dump(attention_ends, open(args.predictions_output_json + "_ends.p", "wb")) print "Done."
def train_model(args): # Read and process data train, dev, test, batch_size, test_batch_size, train_ques_to_para,\ dev_ques_to_para, test_ques_to_para, train_tokenized_paras,\ dev_tokenized_paras, test_tokenized_paras, train_order, dev_order, test_order,\ train_data, dev_data, test_data = read_and_process_data(args) # Build model model, config = build_model(args, train_data.dictionary.size(), train_data.dictionary.index_to_word, train_data.dictionary.word_to_index) if not os.path.exists(args.model_dir): os.mkdir(args.model_dir) #------------------------------ Train System ----------------------------------# # Should we resume running from an existing checkpoint? last_done_epoch = config['ckpt'] if last_done_epoch > 0: model = model.load(args.model_dir, last_done_epoch) print "Loaded model." if not args.disable_glove: print "Embedding shape:", model.embedding.shape if args.model_file is not None: model = model.load_from_file(args.model_file) print "Loaded model from %s." % args.model_file start_time = time.time() print "Starting training." if args.optimizer == "SGD": print "Using SGD optimizer." optimizer = SGD(model.parameters(), lr = args.learning_rate) elif args.optimizer == "Adamax": print "Using Adamax optimizer." optimizer = Adamax(model.parameters(), lr = args.learning_rate) if last_done_epoch > 0: if os.path.exists(args.model_dir + "/optim_%d.pt" % last_done_epoch): optimizer = torch.load(args.model_dir + "/optim_%d.pt" % last_done_epoch) else: print "Optimizer saved state not found. Not loading optimizer." else: assert False, "Unrecognized optimizer." print(model) for EPOCH in range(last_done_epoch+1, args.epochs): start_t = time.time() train_loss_sum = 0.0 model.train() for i, num in enumerate(train_order): print "\rTrain epoch %d, %.2f s - (Done %d of %d)" %\ (EPOCH, (time.time()-start_t)*(len(train_order)-i-1)/(i+1), i+1, len(train_order)), # Create next batch by getting lengths and padding train_batch = train[num:num+batch_size] # Variable length question, answer and paragraph sequences for batch. ques_lens_in = [ len(example[0]) for example in train_batch ] paras_in = [ train_tokenized_paras[train_ques_to_para[example[2]]] \ for example in train_batch ] paras_lens_in = [ len(para) for para in paras_in ] max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) # ans_in.shape = (2, batch) ans_in = np.array([ example[1] for example in train_batch ]).T # Fixed-length (padded) input sequences with shape=(seq_len, batch). ques_in = np.array([ pad(example[0], 0, max_ques_len)\ for example in train_batch ]).T paras_in = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T passage_input = (paras_in, paras_lens_in) question_input = (ques_in, ques_lens_in) answer_input = ans_in # Zero previous gradient. model.zero_grad() model(passage_input, question_input, answer_input) model.loss.backward() optimizer.step() train_loss_sum += model.loss.data[0] print "Loss: %.5f (in time %.2fs)" % \ (train_loss_sum/(i+1), time.time() - start_t), sys.stdout.flush() print "\nLoss: %.5f (in time %.2fs)" % \ (train_loss_sum/len(train_order), time.time() - start_t) # End of epoch. random.shuffle(train_order) model.zero_grad() model.save(args.model_dir, EPOCH) # Updating LR for optimizer for param in optimizer.param_groups: param['lr'] *= config['decay_rate'] if args.optimizer == "Adamax": torch.save(optimizer, args.model_dir + "/optim_%d.pt" % EPOCH) # Run pass over dev data. dev_start_t = time.time() dev_loss_sum = 0.0 all_predictions = {} print "\nRunning on Dev." model.eval() for i, num in enumerate(dev_order): print "\rDev: %.2f s (Done %d of %d)" %\ ((time.time()-dev_start_t)*(len(dev_order)-i-1)/(i+1), i+1, len(dev_order)), dev_batch = dev[num:num+test_batch_size] # Variable length question, answer and paragraph sequences for batch. ques_lens_in = [ len(example[0]) for example in dev_batch ] paras_in = [ dev_tokenized_paras[dev_ques_to_para[example[2]]] \ for example in dev_batch ] paras_lens_in = [ len(para) for para in paras_in ] max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) # ans_in.shape = (2, batch) ans_in = np.array([ example[1] for example in dev_batch ]).T # Fixed-length (padded) input sequences with shape=(seq_len, batch). ques_in = np.array([ pad(example[0], 0, max_ques_len)\ for example in dev_batch ]).T paras_in = np.array([ pad(para, 0, max_para_len) for para in paras_in ]).T passage_input = (paras_in, paras_lens_in) question_input = (ques_in, ques_lens_in) answer_input = ans_in # distributions[{0,1}].shape = (batch, max_passage_len) distributions = model(passage_input, question_input, answer_input) distributions[0] = distributions[0].data.cpu().numpy() distributions[1] = distributions[1].data.cpu().numpy() # Add all batch qids to predictions dict, if they don't already exist. qids = [ example[2] for example in dev_batch ] for qid in qids: if not qid in all_predictions: all_predictions[qid] = [] best_idxs = [] for idx in range(len(dev_batch)): best_prob = -1 best = [0, 0] max_end = paras_lens_in[idx] for j, start_prob in enumerate(distributions[0][idx][:max_end]): cur_end_idx = min(j + args.max_answer_span, max_end) end_idx = np.argmax(distributions[1][idx][j:cur_end_idx]) prob = distributions[1][idx][j+end_idx] * start_prob if prob > best_prob: best_prob = prob best = [j, j+end_idx] best_idxs.append(best) tokenized_paras = dev_data.tokenized_paras answers = [ tokenized_paras[dev_ques_to_para[qids[idx]]][start:end+1] \ for idx, (start, end) in enumerate(best_idxs) ] answers = [ " ".join([ dev_data.dictionary.get_word(idx) for idx in ans ]) \ for ans in answers ] for qid, answer in zip(qids, answers): all_predictions[qid] = answer dev_loss_sum += model.loss.data[0] print "[Average loss : %.5f]" % (dev_loss_sum/(i+1)), sys.stdout.flush() # Print dev stats for epoch print "\nDev Loss: %.4f (in time: %.2f s)" %\ (dev_loss_sum/len(dev_order), (time.time() - dev_start_t)) # Dump the results json in the required format print "Dumping prediction results." json.dump( all_predictions, open(args.model_dir + "/dev_predictions_" + str(EPOCH) + ".json", "w")) print "Done."
def get_batch(batch, ques_to_para, tokenized_paras, paras_pos_tags, paras_ner_tags, question_pos_tags, question_ner_tags, num_pos_tags, num_ner_tags): # Variable length question, answer and paragraph sequences for batch. ques_lens_in = [len(example[0]) for example in batch] paras_in = [ tokenized_paras[ques_to_para[example[2]]] \ for example in batch ] paras_pos_tags_in = [ paras_pos_tags[ques_to_para[example[2]]] \ for example in batch ] paras_ner_tags_in = [ paras_ner_tags[ques_to_para[example[2]]] \ for example in batch ] ques_pos_tags_in = [ question_pos_tags[example[2]] \ for example in batch ] ques_ner_tags_in = [ question_ner_tags[example[2]] \ for example in batch ] paras_lens_in = [len(para) for para in paras_in] max_ques_len = max(ques_lens_in) max_para_len = max(paras_lens_in) # ans_in.shape = (2, batch) ans_in = np.array([example[1] for example in batch]).T sent_in = np.array([example[4] for example in batch]).T # f1_mat_in.shape = (batch, seq_len, seq_len) f1_mat_in = np.array([ create2d(example[3], 0, max_para_len, example[1][0]) \ for example in batch]) # Fixed-length (padded) input sequences with shape=(seq_len, batch). ques_in = np.array([ pad(example[0], 0, max_ques_len)\ for example in batch ]).T paras_in = np.array([pad(para, 0, max_para_len) for para in paras_in]).T # Fixed-length (padded) pos-tag and ner-tag inputs. question_pos_tags = \ np.array([ pad([ one_hot(postag, num_pos_tags) for postag in ques_pos_tags ], one_hot(-1, num_pos_tags), max_ques_len) \ for ques_pos_tags in ques_pos_tags_in ]) question_ner_tags = \ np.array([ pad([ one_hot(nertag, num_ner_tags) for nertag in ques_ner_tags ], one_hot(-1, num_ner_tags), max_ques_len) \ for ques_ner_tags in ques_ner_tags_in ]) paragraph_pos_tags = \ np.array([ pad([ one_hot(postag, num_pos_tags) for postag in paras_pos_tags ], one_hot(-1, num_pos_tags), max_para_len) \ for paras_pos_tags in paras_pos_tags_in ]) paragraph_ner_tags = \ np.array([ pad([ one_hot(nertag, num_ner_tags) for nertag in paras_ner_tags ], one_hot(-1, num_ner_tags), max_para_len) \ for paras_ner_tags in paras_ner_tags_in ]) question_pos_tags = np.transpose(question_pos_tags, (1, 0, 2)) question_ner_tags = np.transpose(question_ner_tags, (1, 0, 2)) paragraph_pos_tags = np.transpose(paragraph_pos_tags, (1, 0, 2)) paragraph_ner_tags = np.transpose(paragraph_ner_tags, (1, 0, 2)) passage_input = (paras_in, paras_lens_in) question_input = (ques_in, ques_lens_in) answer_input = ans_in answer_sentence_input = sent_in return passage_input, question_input, answer_input, f1_mat_in, question_pos_tags,\ question_ner_tags, paragraph_pos_tags, paragraph_ner_tags,\ answer_sentence_input