def main(args): word_embeddings = p.load(open(args.word_embeddings, 'rb')) word_embeddings = np.array(word_embeddings) word2index = p.load(open(args.vocab, 'rb')) index2word = reverse_dict(word2index) train_data = read_data(args.train_context, args.train_question, args.train_answer, None, args.max_post_len, args.max_ques_len, args.max_ans_len, count=args.batch_size * 5) if args.tune_ids is not None: test_data = read_data(args.tune_context, args.tune_question, args.tune_answer, args.tune_ids, args.max_post_len, args.max_ques_len, args.max_ans_len) else: test_data = read_data(args.tune_context, args.tune_question, args.tune_answer, None, args.max_post_len, args.max_ques_len, args.max_ans_len, count=args.batch_size * 2) print 'No. of train_data %d' % len(train_data) print 'No. of test_data %d' % len(test_data) run_model(train_data, test_data, word_embeddings, word2index, index2word, args)
def main(args): word_embeddings = p.load(open(args.word_embeddings, 'rb')) word_embeddings = np.array(word_embeddings) word2index = p.load(open(args.vocab, 'rb')) #word_embeddings = update_embs(word2index, word_embeddings) --> updating embs gives poor utility results (0.5 acc) index2word = reverse_dict(word2index) train_data = read_data(args.train_context, args.train_question, args.train_answer, args.train_ids, args.max_post_len, args.max_ques_len, args.max_ans_len) if args.tune_ids is not None: test_data = read_data(args.tune_context, args.tune_question, args.tune_answer, args.tune_ids, args.max_post_len, args.max_ques_len, args.max_ans_len) else: test_data = read_data(args.tune_context, args.tune_question, args.tune_answer, None, args.max_post_len, args.max_ques_len, args.max_ans_len) print 'No. of train_data %d' % len(train_data) print 'No. of test_data %d' % len(test_data) ids_seqs, post_seqs, post_lens, ques_seqs, ques_lens, post_ques_seqs, post_ques_lens, ans_seqs, ans_lens = \ preprocess_data(train_data, word2index, args.max_post_len, args.max_ques_len, args.max_ans_len) q_train_data = ids_seqs, post_seqs, post_lens, ques_seqs, ques_lens a_train_data = ids_seqs, post_ques_seqs, post_ques_lens, ans_seqs, ans_lens u_train_data = ids_seqs, post_seqs, post_lens, ques_seqs, ques_lens, ans_seqs, ans_lens ids_seqs, post_seqs, post_lens, ques_seqs, ques_lens, post_ques_seqs, post_ques_lens, ans_seqs, ans_lens = \ preprocess_data(test_data, word2index, args.max_post_len, args.max_ques_len, args.max_ans_len) q_test_data = ids_seqs, post_seqs, post_lens, ques_seqs, ques_lens a_test_data = ids_seqs, post_ques_seqs, post_ques_lens, ans_seqs, ans_lens u_test_data = ids_seqs, post_seqs, post_lens, ques_seqs, ques_lens, ans_seqs, ans_lens if args.pretrain_ques: run_seq2seq(q_train_data, q_test_data, word2index, word_embeddings, args.q_encoder_params, args.q_decoder_params, args.max_ques_len, args.n_epochs, args.batch_size, n_layers=2) elif args.pretrain_ans: run_seq2seq(a_train_data, a_test_data, word2index, word_embeddings, args.a_encoder_params, args.a_decoder_params, args.max_ans_len, args.n_epochs, args.batch_size, n_layers=2) elif args.pretrain_util: run_utility(u_train_data, u_test_data, word_embeddings, index2word, args, n_layers=1) else: print 'Please specify model to pretrain' return