def __init__(self, regex, settings, semantics, printout=None): """ Creates the application based on `regex`, `settings`, `semantics` and `printout` modules. """ assert isinstance(regex, ModuleType) assert isinstance(settings, ModuleType) assert isinstance(semantics, ModuleType) self._regex_module = regex self._settings_module = settings self._semantics_module = semantics self._printout_module = printout # Save the settings right after loading settings module self._save_settings_values() self.tagger = get_tagger() self.rules = [] for element in dir(self._regex_module): element = getattr(self._regex_module, element) try: if issubclass(element, RegexTemplate) and \ element is not RegexTemplate: self.rules.append(element()) except TypeError: continue self.rules.sort(key=lambda x: x.weight, reverse=True)
def __init__(self, parsing, settings): """ Creates the application based on `parsing`, `settings` modules. """ assert isinstance(parsing, ModuleType) assert isinstance(settings, ModuleType) self._parsing_module = parsing self._settings_module = settings # Save the settings right after loading settings module self._save_settings_values() self.tagger = get_tagger() self.language = getattr(self._settings_module, "LANGUAGE", None) if not self.language: raise ValueError("Missing configuration for language") self.rules = [] for element in dir(self._parsing_module): element = getattr(self._parsing_module, element) try: if issubclass(element, QuestionTemplate) and \ element is not QuestionTemplate: self.rules.append(element()) except TypeError: continue self.rules.sort(key=lambda x: x.weight, reverse=True)
def __init__(self, regex, settings, semantics): """ Creates the application based on `regex`, `settings` and `semantics` modules. """ assert isinstance(regex, ModuleType) assert isinstance(settings, ModuleType) assert isinstance(semantics, ModuleType) self._regex_module = regex self._settings_module = settings self._semantics_module = semantics # Save the settings right after loading settings module self._save_settings_values() self.tagger = get_tagger() self.rules = [] for element in dir(self._regex_module): element = getattr(self._regex_module, element) try: if issubclass(element, RegexTemplate) and \ element is not RegexTemplate: self.rules.append(element()) except TypeError: continue self.rules.sort(key=lambda x: x.weight, reverse=True)
def __init__(self, parsing, settings): """ Creates the application based on `parsing`, `settings` modules. """ assert isinstance(parsing, ModuleType) assert isinstance(settings, ModuleType) self._parsing_module = parsing self._settings_module = settings # Save the settings right after loading settings module self._save_settings_values() self.tagger = get_tagger() self.language = getattr(self._settings_module, "LANGUAGE", None) if not self.language: raise ValueError("Missing configuration for language") self.rules = [] for element in dir(self._parsing_module): element = getattr(self._parsing_module, element) try: if issubclass(element, QuestionTemplate) and element is not QuestionTemplate: self.rules.append(element()) except TypeError: continue self.rules.sort(key=lambda x: x.weight, reverse=True)
def _iter_compiled_forms(self, question): """ Returns all the compiled form of the question. """ try: tagger = get_tagger() words = list(tagger(question)) except TaggingError: logger.warning(u"Can't parse tagger's output for: '%s'", question) return logger.debug(u"Tagged question:\n" + u"\n".join(u"\t{}".format(w for w in words))) for rule in self.rules: expression, userdata = rule.get_interpretation(words) if expression: yield expression, userdata
def _get_subquery_by_subquestion_and_rule(self, subquestion, rule): question = encoding_flexible_conversion(' '.join(subquestion)) tagger = get_tagger() words = list(tagger(question)) subquery_expression, meta = rule.get_interpretation(words) return get_core_sparql_expression(subquery_expression)
def semantics(self, match): return match ### ### Begin boilerplate ### import inspect from quepy.tagger import get_tagger from quepy import settings from refo import Question, Plus # Put your nltk path here: settings.NLTK_DATA_PATH = ["/home/rafael/deploy/nltk/data"] tagger = get_tagger() class Thing(Particle): regex = Any() def semantics(self, match): return match.words.tokens class Movie(Particle): regex = Question(Pos("DT")) + \ Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS")) def semantics(self, match): return match.words.tokens
Applies the quepy tagger to the questions of the corpus. Usage: POS_analisys.py <input_filename> <output_filename> The input file must have a pickled list of dictionaries with keys 'questions'. The output file will have the same structure replacing each questions by a list of instances of quepy.tagger.Word """ import pickle from docopt import docopt from quepy.tagger import get_tagger tagger = get_tagger() def process_instaces(instances): """Runs the tagger in each 'question' of instances. Args: instances: a list of dictionaries. Each dictionaries must have the key question with the instance to be tagged. Returns: A list of dictionaries equal to the one passed as argument but with the value of question tagged. """ for instance in original_corpus: question = instance['question']