Esempio n. 1
0
def main():
    # Get tokenizer
    with open(os.path.join(DATA_PATH, 'special/tokenizer.p'), 'rb') as f:
        tokenizer = pickle.load(f)
    # Build reverse dictionary
    reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))

    with open(os.path.join(DATA_PATH, 'special/embedding_matrix.npy'),
              'rb') as f:
        embeddings = np.load(f, allow_pickle=True)

    print("Please enter task number")
    task_num = int(input())
    if task_num <= 0 or task_num > 20:
        print("Wrong task number")
    else:
        model = load_model('lstm_glove_train.h5')
        print(model.summary())
        running = True
        while running:
            # Get input from user
            print("Please enter context or type 'q' for quit the program: ")
            context = input()
            if context == 'q':
                running = False
                break
            print("Please enter question based on the context: ")
            question = input()

            # Transform the context and answer input
            context = transform(context,
                                max_len=MAX_CONTEXT,
                                tokenizer=tokenizer)
            question = transform(question,
                                 max_len=MAX_QUESTION,
                                 tokenizer=tokenizer)
            answer = model.predict([context, question])

            correct_tag_id = np.argmax(
                answer)  # Turn one hot encoding to index
            # TODO: this only gives index 10 which is bedroom
            # pred_word_ind = model.predict_classes(pad_encoded,verbose=0)[0]
            # This doesnt work somehow saying there is no predict_classes
            word = reverse_word_map.get(correct_tag_id)
            print(word)
EMBED_HIDDEN_SIZE = 300 # size of the vector space (word embedding)

###################################
#       Loading dataset           #
###################################
# Get tokenizer
with open(os.path.join(data_path, 'special/tokenizer.p'), 'rb') as f:
    tokenizer = pickle.load(f)
vocab_size = len(tokenizer.word_index) + 1

# Restore the model and construct the encoder and decoder.
model = load_model('lstm_train{}.h5'.format(TASK_NBR))

# Get Test data
with open(os.path.join(data_path, 'Context_Test_{}.txt'.format(TASK_NBR)), 'r') as f:
    context = f.read().strip().split('\n')
with open(os.path.join(data_path, 'Question_Test_{}.txt'.format(TASK_NBR)), 'r') as f:
    question = f.read().strip().split('\n')
with open(os.path.join(data_path, 'Answer_Test_{}.txt'.format(TASK_NBR)), 'r') as f:
    answer = f.read().strip().split('\n')

context = transform(context, max_len=MAX_CONTEXT, tokenizer=tokenizer)
question = transform(question, max_len=MAX_QUESTION, tokenizer=tokenizer)
answer = transform(answer, max_len=1, tokenizer=tokenizer)
encoded_answer = to_categorical(tf.squeeze(answer, axis=1), num_classes=vocab_size)
loss, acc = model.evaluate([context, question], encoded_answer, batch_size=BATCH_SIZE)
print("Test Task {}: loss/accuracy = {:.4f}, {:.4f}".format(TASK_NBR, loss, acc))
# Test Task 1: loss/accuracy = 3.9904, 0.4360
# Test Task 2: loss/accuracy = 3.6211, 0.2090
# Test Task 3: loss/accuracy = 3.1279, 0.1610