Esempio n. 1
0
from qanta import logging
from qanta.config import conf
from qanta.guesser.abstract import AbstractGuesser
from qanta.datasets.abstract import TrainingData, Answer, QuestionText
from qanta.datasets.wikipedia import WikipediaDataset
from qanta.preprocess import preprocess_dataset, tokenize_question
from qanta.guesser.nn import create_load_embeddings_function, convert_text_to_embeddings_indices, compute_n_classes
from qanta.torch import (BaseLogger, TerminateOnNaN, EarlyStopping,
                         ModelCheckpoint, MaxEpochStopping, TrainingManager,
                         Tensorboard)

log = logging.get(__name__)

PTDAN_WE_TMP = '/tmp/qanta/deep/pt_dan_we.pickle'
PTDAN_WE = 'pt_dan_we.pickle'
load_embeddings = create_load_embeddings_function(PTDAN_WE_TMP, PTDAN_WE, log)
CUDA = torch.cuda.is_available()


def create_save_model(model):
    def save_model(path):
        torch.save(model.state_dict(), path)

    return save_model


def flatten_and_offset(x_batch):
    flat_x_batch = []
    for r in x_batch:
        flat_x_batch.extend(r)
    flat_x_batch = np.array(flat_x_batch)
Esempio n. 2
0
from qanta.preprocess import preprocess_dataset, tokenize_question
from qanta.guesser.abstract import AbstractGuesser
from qanta.guesser.nn import create_load_embeddings_function, convert_text_to_embeddings_indices, compute_n_classes
from qanta.torch import (
    BaseLogger, TerminateOnNaN, Tensorboard,
    EarlyStopping, ModelCheckpoint, MaxEpochStopping, TrainingManager
)
from qanta.guesser.torch.util import create_save_model


log = logging.get(__name__)


MEM_WE_TMP = '/tmp/qanta/deep/mem_we.pickle'
MEM_WE = 'mem_we.pickle'
load_embeddings = create_load_embeddings_function(MEM_WE_TMP, MEM_WE, log)


connections.create_connection(hosts='localhost')


class Answer(DocType):
    page = Text(fields={'raw': Keyword()})
    content = Text()

    class Meta:
        index = 'mem'


def paragraph_tokenize(page):
    # The first element is always just the wikipedia page title
Esempio n. 3
0
File: cnn.py Progetto: amit2014/qb
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

import numpy as np


log = logging.get(__name__)

CNN_WE_TMP = '/tmp/qanta/deep/cnn_we.pickle'
CNN_WE = 'cnn_we.pickle'
CNN_MODEL_TMP_TARGET = '/tmp/qanta/deep/final_cnn.keras'
CNN_MODEL_TARGET = 'final_cnn.keras'
CNN_PARAMS_TARGET = 'cnn_params.pickle'


load_embeddings = nn.create_load_embeddings_function(CNN_WE_TMP, CNN_WE, log)


class CNNGuesser(AbstractGuesser):
    def __init__(self):
        super().__init__()
        guesser_conf = conf['guessers']['CNN']
        self.expand_we = guesser_conf['expand_we']
        self.n_filter_list = guesser_conf['n_filter_list']
        self.filter_sizes = guesser_conf['filter_sizes']
        self.nn_dropout_rate = guesser_conf['nn_dropout_rate']
        self.batch_size = guesser_conf['batch_size']
        self.learning_rate = guesser_conf['learning_rate']
        self.max_n_epochs = guesser_conf['max_n_epochs']
        self.max_patience = guesser_conf['max_patience']
        self.activation_function = guesser_conf['activation_function']