コード例 #1
0
import yaml
from jinja2 import Environment, PackageLoader
import tqdm

from qanta import qlogging
from qanta.guesser.abstract import AbstractGuesser
from qanta.guesser.elasticsearch import elasticsearch_cli
from qanta.util.environment import ENVIRONMENT
from qanta.util.io import safe_open, shell, get_tmp_filename
from qanta.util.constants import QANTA_SQL_DATASET_PATH, GUESSER_GENERATION_FOLDS
from qanta.hyperparam import expand_config
from qanta.wikipedia.categories import categorylinks_cli
from qanta.wikipedia.vital import vital_cli
from qanta.ingestion.trickme import trick_cli

log = qlogging.get('cli')

CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])


@click.group(context_settings=CONTEXT_SETTINGS)
def main():
    log.info("QANTA starting with configuration:")
    for k, v in ENVIRONMENT.items():
        log.info("{0}={1}".format(k, v))


main.add_command(categorylinks_cli, name='categories')
main.add_command(vital_cli, name='vital')
main.add_command(elasticsearch_cli, name='elasticsearch')
main.add_command(trick_cli, name='trick')
コード例 #2
0
import pickle
import os
import time

import luigi
from luigi import LocalTarget, Task, WrapperTask

from qanta.config import conf
from qanta.util import constants as c
from qanta.util.io import shell
from qanta.guesser.abstract import AbstractGuesser, get_class
from qanta.pipeline.preprocess import DownloadData
from qanta import qlogging

log = qlogging.get(__name__)


class EmptyTask(luigi.Task):
    def complete(self):
        return True


class TrainGuesser(Task):
    guesser_module = luigi.Parameter()  # type: str
    guesser_class = luigi.Parameter()  # type: str
    dependency_module = luigi.Parameter()  # type: str
    dependency_class = luigi.Parameter()  # type: str
    config_num = luigi.IntParameter()  # type: int

    def requires(self):
        yield DownloadData()
コード例 #3
0
ファイル: elasticsearch.py プロジェクト: Pinafore/qb
from elasticsearch_dsl.connections import connections
import elasticsearch
import tqdm
from nltk.tokenize import word_tokenize
from jinja2 import Environment, PackageLoader

from qanta.wikipedia.cached_wikipedia import Wikipedia
from qanta.datasets.abstract import QuestionText
from qanta.guesser.abstract import AbstractGuesser
from qanta.spark import create_spark_context
from qanta.config import conf
from qanta.util.io import get_tmp_dir, safe_path
from qanta import qlogging


log = qlogging.get(__name__)
ES_PARAMS = 'es_params.pickle'
connections.create_connection(hosts=['localhost'])


def create_es_config(output_path, host='localhost', port=9200, tmp_dir=None):
    if tmp_dir is None:
        tmp_dir = get_tmp_dir()
    data_dir = safe_path(os.path.join(tmp_dir, 'elasticsearch/data/'))
    log_dir = safe_path(os.path.join(tmp_dir, 'elasticsearch/log/'))
    env = Environment(loader=PackageLoader('qanta', 'templates'))
    template = env.get_template('elasticsearch.yml')
    config_content = template.render({
        'host': host,
        'port': port,
        'log_dir': log_dir,
コード例 #4
0
ファイル: cli.py プロジェクト: Pinafore/qb
from jinja2 import Environment, PackageLoader
import tqdm

from qanta import qlogging
from qanta.guesser.abstract import AbstractGuesser
from qanta.guesser.elasticsearch import elasticsearch_cli
from qanta.util.environment import ENVIRONMENT
from qanta.util.io import safe_open, shell, get_tmp_filename
from qanta.util.constants import QANTA_SQL_DATASET_PATH, GUESSER_GENERATION_FOLDS
from qanta.hyperparam import expand_config
from qanta.wikipedia.categories import categorylinks_cli
from qanta.wikipedia.vital import vital_cli
from qanta.ingestion.trickme import trick_cli
from qanta.ingestion.command import ingestion_cli

log = qlogging.get('cli')

CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])


@click.group(context_settings=CONTEXT_SETTINGS)
def main():
    log.info("QANTA starting with configuration:")
    for k, v in ENVIRONMENT.items():
        log.info("{0}={1}".format(k, v))


main.add_command(categorylinks_cli, name='categories')
main.add_command(vital_cli, name='vital')
main.add_command(elasticsearch_cli, name='elasticsearch')
main.add_command(trick_cli, name='trick')
コード例 #5
0
from qanta import qlogging
from qanta.ingestion.answer_mapping import read_wiki_titles
from qanta.ingestion.annotated_mapping import PageAssigner

log = qlogging.get('validate_annotations')


def normalize(title):
    return title.replace(' ', '_')


def check_page(page, titles):
    n_page = normalize(page)
    if n_page not in titles:
        log.error(f'Title not found: {page}')


def main():
    titles = read_wiki_titles()
    assigner = PageAssigner()
    log.info('Checking direct protobowl mappings...')
    for page in assigner.protobowl_direct.values():
        check_page(page, titles)

    log.info('Checking direct quizdb mappings...')
    for page in assigner.quizdb_direct.values():
        check_page(page, titles)

    log.info('Checking unambiguous mappings...')
    for page in assigner.unambiguous.values():
        check_page(page, titles)
コード例 #6
0
ファイル: cli.py プロジェクト: nhatsmrt/qb
from jinja2 import Environment, PackageLoader
import tqdm

from qanta import qlogging
from qanta.guesser.abstract import AbstractGuesser
from qanta.guesser.elasticsearch import elasticsearch_cli
from qanta.util.environment import ENVIRONMENT
from qanta.util.io import safe_open, shell, get_tmp_filename
from qanta.util.constants import QANTA_SQL_DATASET_PATH, GUESSER_GENERATION_FOLDS
from qanta.hyperparam import expand_config
from qanta.wikipedia.categories import categorylinks_cli
from qanta.wikipedia.vital import vital_cli
from qanta.ingestion.trickme import trick_cli
from qanta.ingestion.command import ingestion_cli

log = qlogging.get("cli")

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])


@click.group(context_settings=CONTEXT_SETTINGS)
def main():
    log.info("QANTA starting with configuration:")
    for k, v in ENVIRONMENT.items():
        log.info("{0}={1}".format(k, v))


main.add_command(categorylinks_cli, name="categories")
main.add_command(vital_cli, name="vital")
main.add_command(elasticsearch_cli, name="elasticsearch")
main.add_command(trick_cli, name="trick")
コード例 #7
0
from qanta import qlogging
from qanta.ingestion.answer_mapping import read_wiki_titles
from qanta.ingestion.annotated_mapping import PageAssigner

log = qlogging.get("validate_annotations")


def normalize(title):
    return title.replace(" ", "_")


def check_page(page, titles):
    n_page = normalize(page)
    if n_page not in titles:
        log.error(f"Title not found: {page}")


def main():
    titles = read_wiki_titles()
    assigner = PageAssigner()
    log.info("Checking direct protobowl mappings...")
    for page in assigner.protobowl_direct.values():
        check_page(page, titles)

    log.info("Checking direct quizdb mappings...")
    for page in assigner.quizdb_direct.values():
        check_page(page, titles)

    log.info("Checking unambiguous mappings...")
    for page in assigner.unambiguous.values():
        check_page(page, titles)