예제 #1
0
    def preprocess_test(self, sentence):
        splits = self.parse_sentence(sentence)
        mapped_vectors = self.map_vector(splits)
        pad_len = max([len(mapped_vectors[0]), len(mapped_vectors[1])])
        padded_vectors = [
            pad_sequence(mapped_vectors[0], pad_len, word2vec_mapped_size),
            pad_sequence(mapped_vectors[1], pad_len, word2vec_mapped_size)
        ]

        return padded_vectors
예제 #2
0
def do_shell(model, dev, input_model=None):
    """ Interactive shell

    Type a question, write next for the next paragraph or enter a blank for another human's question.  

    Args:  
        model: QA model that has an instance variable 'answer' that returns answer span and takes placeholders  
        question, question_length, paragraph, paragraph_length  
        dev: Development set
    """
    # what is is_training if import_meta_graph
    checkpoint_dir = os.path.join(FLAGS.train_dir, FLAGS.model_name)
    vocab_path = FLAGS.vocab_path or pjoin(FLAGS.data_dir, "vocab.dat")
    vocab, rev_vocab = initialize_vocab(vocab_path)
    # TODO no logs
    saver = tf.train.Saver()
    with tf.Session() as session:
        if False:  # load_meta
            last_meta = next(
                reversed(
                    [f for f in os.listdir(checkpoint_dir) if '.meta' in f]))
            saver = tf.train.import_meta_graph(os.path.join(last_meta))
        saver.restore(session, tf.train.latest_checkpoint(checkpoint_dir))
        print('HINT: Input as question "next" for next paragraph')
        while True:
            original_question, paragraphs, question_lengths, paragraph_lengths, answers = dev.get_batch(
                1)
            for i in itertools.count():
                paragraph = reverse_indices(paragraphs[0], rev_vocab)
                if not i:
                    print('\n')
                    print(paragraph, end='\n\n')

                question_input = input('QUESTION: ')

                if question_input == 'next':
                    break
                elif question_input:
                    question = [
                        vocab.get(word, UNK_ID)
                        for word in tokenize(question_input)
                    ]
                    question, question_length = pad_sequence(
                        question, FLAGS.max_question_length)
                    questions, question_lengths = [question], [question_length]
                else:
                    question_words = reverse_indices(original_question[0],
                                                     rev_vocab)
                    questions = original_question
                    print(question_words)

                if input_model:
                    #feed into siamese model instead
                    question = feed_dict_inputs[0]
                    question = input_model.run(question)
                feed_dict = model.fill_feed_dict(questions, paragraphs,
                                                 question_lengths,
                                                 paragraph_lengths)

                if False:  #load_meta
                    start, end = session.run([
                        'prediction/answer_start:0', 'prediction/answer_end:0'
                    ], feed_dict)
                    start, end = start[0], end[0]
                else:
                    start, end = session.run(model.answer, feed_dict)
                    start, end = start[0], end[0]

                answer_idxs = paragraphs[0][start:end + 1]
                answer_words = ''.join(reverse_indices(answer_idxs, rev_vocab))
                print(f'COMPUTER: {answer_words}')

                if not question_input:
                    start, end = answers[0]
                    correct_answer_idxs = paragraphs[0][start:end + 1]
                    correct_answer = ''.join(
                        reverse_indices(correct_answer_idxs, rev_vocab))
                    print(f'HUMAN: {correct_answer}')
                print()