def main(_): data_dir = cfg.DATA_DIR vocab, rev_vocab = initialize_vocab(FLAGS.vocab) # gpu setting config = tf.ConfigProto() config.gpu_options.allow_growth = True tf.reset_default_graph() encoder = Encoder(size=2 * cfg.lstm_num_hidden) decoder = Decoder(output_size=2 * cfg.lstm_num_hidden) qa = QASystem(encoder, decoder, FLAGS.embed) with tf.Session(config=config) as sess: init = tf.global_variables_initializer() sess.run(init) load_train_dir = get_normalized_train_dir(FLAGS.ckpt) initialize_model(sess, qa, load_train_dir) print( '*********************************************************************' ) print( "Welcome! You can use this to explore the behavior of the model.") print( '*********************************************************************' ) while True: print('-------------------') print('Input the context: ') print('-------------------') sentence = raw_input() print('-------------------') print('Input the question: ') print('-------------------') query = raw_input() raw_context = nltk.word_tokenize(sentence) context = sentence_to_token_ids(sentence, vocab, tokenizer=nltk.word_tokenize) question = sentence_to_token_ids(query, vocab, tokenizer=nltk.word_tokenize) context_in = mask_input(context, cfg.context_max_len) question_in = mask_input(question, cfg.question_max_len) start, end = qa.answer(sess, [context_in], [question_in]) answer = ' '.join(raw_context[start[0]:end[0] + 1]) print('==========================================') print('ANSWER: {}'.format(answer)) print('==========================================')
def run_func(): config = Config() train = squad_dataset(config.question_train, config.context_train, config.answer_train) dev = squad_dataset(config.question_dev, config.context_dev, config.answer_dev) # print(config.question_train) embed_path = config.embed_path vocab_path = config.vocab_path # print(config.embed_path, config.vocab_path) vocab, rev_vocab = initialize_vocab(vocab_path) embeddings = get_trimmed_glove_vectors(embed_path) encoder = Encoder(config.hidden_size) decoder = Decoder(config.hidden_size) qa = QASystem(encoder, decoder, embeddings, config) with tf.Session() as sess: # ====== Load a pretrained model if it exists or create a new one if no pretrained available ====== qa.initialize_model(sess, config.train_dir) # train process # qa.train(sess, [train, dev], config.train_dir) # em = qa.evaluate_model(sess, dev) # run process while True: question = input('please input question: ') if question == 'exit': break raw_context = input('please input context: ') if raw_context == 'exit': break question = [ vocab[x] if x in vocab.keys() else 2 for x in question.split() ] context = [ vocab[x] if x in vocab.keys() else 2 for x in raw_context.split() ] test = [[question], [context], [[1, 2]]] a_s, a_e = qa.answer(sess, test) if a_e == a_s: print("answer: ", raw_context.split()[a_s[0]]) else: print("answer: ", ' '.join(raw_context.split()[a_s[0]:a_e[0] + 1]))
def main(_): data_dir = cfg.DATA_DIR vocab, rev_vocab = initialize_vocab(FLAGS.vocab) # gpu setting config = tf.ConfigProto() config.gpu_options.allow_growth = True tf.reset_default_graph() encoder = Encoder(size=2 * cfg.lstm_num_hidden) decoder = Decoder(output_size=2 * cfg.lstm_num_hidden) qa = QASystem(encoder, decoder, FLAGS.embed) with tf.Session(config=config) as sess: init = tf.global_variables_initializer() sess.run(init) load_train_dir = get_normalized_train_dir(FLAGS.ckpt) initialize_model(sess, qa, load_train_dir) print('*********************************************************************') print("Welcome! You can use this to explore the behavior of the model.") print('*********************************************************************') while True: print('-------------------') print('Input the context: ') print('-------------------') sentence = raw_input() print('-------------------') print('Input the question: ') print('-------------------') query = raw_input() raw_context = nltk.word_tokenize(sentence) context = sentence_to_token_ids(sentence, vocab, tokenizer=nltk.word_tokenize) question = sentence_to_token_ids(query, vocab, tokenizer=nltk.word_tokenize) context_in = mask_input(context, cfg.context_max_len) question_in = mask_input(question, cfg.question_max_len) start, end = qa.answer(sess, [context_in], [question_in]) answer = ' '.join(raw_context[start[0]: end[0] + 1]) print('==========================================') print('ANSWER: {}'.format(answer)) print('==========================================')