import argparse import os import sys import numpy as np sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from cakechat.utils.env import init_theano_env init_theano_env() from cakechat.config import QUESTIONS_CORPUS_NAME, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE, \ PREDICTION_MODES, PREDICTION_MODE_FOR_TESTS, DEFAULT_CONDITION, RANDOM_SEED from cakechat.utils.text_processing import get_tokens_sequence, replace_out_of_voc_tokens from cakechat.utils.dataset_loader import get_tokenized_test_lines from cakechat.dialog_model.model_utils import transform_context_token_ids_to_sentences, \ transform_contexts_to_token_ids, lines_to_context from cakechat.dialog_model.inference import get_nn_responses from cakechat.dialog_model.factory import get_trained_model np.random.seed(seed=RANDOM_SEED) def load_corpus(nn_model, corpus_name): return get_tokenized_test_lines(corpus_name, set(nn_model.index_to_token.values())) def process_text(nn_model, text): tokenized_line = get_tokens_sequence(text)
import os import sys import unittest import numpy as np sys.path.append( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))) from cakechat.utils.env import init_theano_env init_theano_env() from cakechat.dialog_model.factory import get_trained_model from cakechat.dialog_model.inference import get_sequence_log_probs from cakechat.dialog_model.inference.utils import get_next_token_log_prob_one_step from cakechat.config import DECODER_DEPTH, HIDDEN_LAYER_DIMENSION, RANDOM_SEED np.random.seed(seed=RANDOM_SEED) class TestPredict(unittest.TestCase): @staticmethod def _predict_log_probabilities_one_step(nn_model, x_batch, y_batch): """ Predict answers for every sequence token by token until EOS_TOKEN occurred in the sequence using sampling with temperature. All the rest of the sequence is filled with PAD_TOKENs. """ thought_vectors_batch = nn_model.get_thought_vectors(x_batch) hidden_states_batch = np.zeros((x_batch.shape[0], DECODER_DEPTH, HIDDEN_LAYER_DIMENSION), dtype=np.float32) total_log_probs = np.zeros((y_batch.shape[0], y_batch.shape[1] - 1, nn_model.vocab_size))