Ejemplo n.º 1
0
    def __init__(self, regex, settings, semantics, printout=None):
        """
        Creates the application based on `regex`, `settings`,
        `semantics` and `printout` modules.
        """

        assert isinstance(regex, ModuleType)
        assert isinstance(settings, ModuleType)
        assert isinstance(semantics, ModuleType)

        self._regex_module = regex
        self._settings_module = settings
        self._semantics_module = semantics
        self._printout_module = printout

        # Save the settings right after loading settings module
        self._save_settings_values()

        self.tagger = get_tagger()

        self.rules = []
        for element in dir(self._regex_module):
            element = getattr(self._regex_module, element)

            try:
                if issubclass(element, RegexTemplate) and \
                        element is not RegexTemplate:

                    self.rules.append(element())
            except TypeError:
                continue

        self.rules.sort(key=lambda x: x.weight, reverse=True)
Ejemplo n.º 2
0
    def __init__(self, parsing, settings):
        """
        Creates the application based on `parsing`, `settings` modules.
        """

        assert isinstance(parsing, ModuleType)
        assert isinstance(settings, ModuleType)

        self._parsing_module = parsing
        self._settings_module = settings

        # Save the settings right after loading settings module
        self._save_settings_values()

        self.tagger = get_tagger()
        self.language = getattr(self._settings_module, "LANGUAGE", None)
        if not self.language:
            raise ValueError("Missing configuration for language")

        self.rules = []
        for element in dir(self._parsing_module):
            element = getattr(self._parsing_module, element)

            try:
                if issubclass(element, QuestionTemplate) and \
                        element is not QuestionTemplate:

                    self.rules.append(element())
            except TypeError:
                continue

        self.rules.sort(key=lambda x: x.weight, reverse=True)
Ejemplo n.º 3
0
    def __init__(self, regex, settings, semantics):
        """
        Creates the application based on `regex`, `settings` and
        `semantics` modules.
        """

        assert isinstance(regex, ModuleType)
        assert isinstance(settings, ModuleType)
        assert isinstance(semantics, ModuleType)

        self._regex_module = regex
        self._settings_module = settings
        self._semantics_module = semantics

        # Save the settings right after loading settings module
        self._save_settings_values()

        self.tagger = get_tagger()

        self.rules = []
        for element in dir(self._regex_module):
            element = getattr(self._regex_module, element)

            try:
                if issubclass(element, RegexTemplate) and \
                        element is not RegexTemplate:

                    self.rules.append(element())
            except TypeError:
                continue

        self.rules.sort(key=lambda x: x.weight, reverse=True)
Ejemplo n.º 4
0
    def __init__(self, parsing, settings):
        """
        Creates the application based on `parsing`, `settings` modules.
        """

        assert isinstance(parsing, ModuleType)
        assert isinstance(settings, ModuleType)

        self._parsing_module = parsing
        self._settings_module = settings

        # Save the settings right after loading settings module
        self._save_settings_values()

        self.tagger = get_tagger()
        self.language = getattr(self._settings_module, "LANGUAGE", None)
        if not self.language:
            raise ValueError("Missing configuration for language")

        self.rules = []
        for element in dir(self._parsing_module):
            element = getattr(self._parsing_module, element)

            try:
                if issubclass(element, QuestionTemplate) and element is not QuestionTemplate:

                    self.rules.append(element())
            except TypeError:
                continue

        self.rules.sort(key=lambda x: x.weight, reverse=True)
Ejemplo n.º 5
0
    def _iter_compiled_forms(self, question):
        """
        Returns all the compiled form of the question.
        """
        try:
            tagger = get_tagger()
            words = list(tagger(question))
        except TaggingError:
            logger.warning(u"Can't parse tagger's output for: '%s'", question)
            return

        logger.debug(u"Tagged question:\n" +
                     u"\n".join(u"\t{}".format(w for w in words)))

        for rule in self.rules:
            expression, userdata = rule.get_interpretation(words)
            if expression:
                yield expression, userdata
Ejemplo n.º 6
0
    def _iter_compiled_forms(self, question):
        """
        Returns all the compiled form of the question.
        """
        try:
            tagger = get_tagger()
            words = list(tagger(question))
        except TaggingError:
            logger.warning(u"Can't parse tagger's output for: '%s'",
                           question)
            return

        logger.debug(u"Tagged question:\n" +
                     u"\n".join(u"\t{}".format(w for w in words)))

        for rule in self.rules:
            expression, userdata = rule.get_interpretation(words)
            if expression:
                yield expression, userdata
Ejemplo n.º 7
0
 def _get_subquery_by_subquestion_and_rule(self, subquestion, rule):
     question = encoding_flexible_conversion(' '.join(subquestion))
     tagger = get_tagger()
     words = list(tagger(question))
     subquery_expression, meta = rule.get_interpretation(words)
     return get_core_sparql_expression(subquery_expression)
Ejemplo n.º 8
0
 def _get_subquery_by_subquestion_and_rule(self, subquestion, rule):
     question = encoding_flexible_conversion(' '.join(subquestion))
     tagger = get_tagger()
     words = list(tagger(question))
     subquery_expression, meta = rule.get_interpretation(words)
     return get_core_sparql_expression(subquery_expression)
Ejemplo n.º 9
0
    def semantics(self, match):
        return match


###
### Begin boilerplate
###

import inspect
from quepy.tagger import get_tagger
from quepy import settings
from refo import Question, Plus

# Put your nltk path here:
settings.NLTK_DATA_PATH = ["/home/rafael/deploy/nltk/data"]
tagger = get_tagger()


class Thing(Particle):
    regex = Any()

    def semantics(self, match):
        return match.words.tokens


class Movie(Particle):
    regex = Question(Pos("DT")) + \
            Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS"))

    def semantics(self, match):
        return match.words.tokens
Ejemplo n.º 10
0
Applies the quepy tagger to the questions of the corpus.

Usage:
    POS_analisys.py <input_filename> <output_filename>

The input file must have a pickled list of dictionaries with keys 'questions'.

The output file will have the same structure replacing each questions by a list
of instances of quepy.tagger.Word
"""
import pickle

from docopt import docopt
from quepy.tagger import get_tagger

tagger = get_tagger()


def process_instaces(instances):
    """Runs the tagger in each 'question' of instances.

    Args:
        instances: a list of dictionaries. Each dictionaries must have the
        key question with the instance to be tagged.

    Returns:
        A list of dictionaries equal to the one passed as argument but with
        the value of question tagged.
    """
    for instance in original_corpus:
        question = instance['question']