from qanta import logging from qanta.config import conf from qanta.guesser.abstract import AbstractGuesser from qanta.datasets.abstract import TrainingData, Answer, QuestionText from qanta.datasets.wikipedia import WikipediaDataset from qanta.preprocess import preprocess_dataset, tokenize_question from qanta.guesser.nn import create_load_embeddings_function, convert_text_to_embeddings_indices, compute_n_classes from qanta.torch import (BaseLogger, TerminateOnNaN, EarlyStopping, ModelCheckpoint, MaxEpochStopping, TrainingManager, Tensorboard) log = logging.get(__name__) PTDAN_WE_TMP = '/tmp/qanta/deep/pt_dan_we.pickle' PTDAN_WE = 'pt_dan_we.pickle' load_embeddings = create_load_embeddings_function(PTDAN_WE_TMP, PTDAN_WE, log) CUDA = torch.cuda.is_available() def create_save_model(model): def save_model(path): torch.save(model.state_dict(), path) return save_model def flatten_and_offset(x_batch): flat_x_batch = [] for r in x_batch: flat_x_batch.extend(r) flat_x_batch = np.array(flat_x_batch)
from qanta.preprocess import preprocess_dataset, tokenize_question from qanta.guesser.abstract import AbstractGuesser from qanta.guesser.nn import create_load_embeddings_function, convert_text_to_embeddings_indices, compute_n_classes from qanta.torch import ( BaseLogger, TerminateOnNaN, Tensorboard, EarlyStopping, ModelCheckpoint, MaxEpochStopping, TrainingManager ) from qanta.guesser.torch.util import create_save_model log = logging.get(__name__) MEM_WE_TMP = '/tmp/qanta/deep/mem_we.pickle' MEM_WE = 'mem_we.pickle' load_embeddings = create_load_embeddings_function(MEM_WE_TMP, MEM_WE, log) connections.create_connection(hosts='localhost') class Answer(DocType): page = Text(fields={'raw': Keyword()}) content = Text() class Meta: index = 'mem' def paragraph_tokenize(page): # The first element is always just the wikipedia page title
from keras.optimizers import Adam from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau import numpy as np log = logging.get(__name__) CNN_WE_TMP = '/tmp/qanta/deep/cnn_we.pickle' CNN_WE = 'cnn_we.pickle' CNN_MODEL_TMP_TARGET = '/tmp/qanta/deep/final_cnn.keras' CNN_MODEL_TARGET = 'final_cnn.keras' CNN_PARAMS_TARGET = 'cnn_params.pickle' load_embeddings = nn.create_load_embeddings_function(CNN_WE_TMP, CNN_WE, log) class CNNGuesser(AbstractGuesser): def __init__(self): super().__init__() guesser_conf = conf['guessers']['CNN'] self.expand_we = guesser_conf['expand_we'] self.n_filter_list = guesser_conf['n_filter_list'] self.filter_sizes = guesser_conf['filter_sizes'] self.nn_dropout_rate = guesser_conf['nn_dropout_rate'] self.batch_size = guesser_conf['batch_size'] self.learning_rate = guesser_conf['learning_rate'] self.max_n_epochs = guesser_conf['max_n_epochs'] self.max_patience = guesser_conf['max_patience'] self.activation_function = guesser_conf['activation_function']