Exemple #1
0
 def load(filename: str, lexicon, model, **kwargs) -> None:
     kwargs['compile'] = False
     analyzer = Analyzer(lexicon, model, **kwargs)
     analyzer.fst = FST.load_transducer(filename)
     rules_tr = FST.load_transducer(shared.filenames['rules-tr'])
     analyzer.inv_rules_tr = hfst.HfstTransducer(rules_tr)
     analyzer.inv_rules_tr.invert()
     return analyzer
Exemple #2
0
 def _compile_fst(self) -> None:
     rules_tr = FST.load_transducer(shared.filenames['rules-tr'])
     self.inv_rules_tr = hfst.HfstTransducer(rules_tr)
     self.inv_rules_tr.invert()
     logging.getLogger('main').info('Building lexicon transducer...')
     lexicon_tr = FST.load_transducer(\
                    shared.filenames['lexicon-tr'])
     self.fst = hfst.HfstTransducer(lexicon_tr)
     logging.getLogger('main').info('Composing with rules...')
     self.fst.compose(rules_tr)
     self.fst.minimize()
     self.fst.invert()
     self.fst.convert(hfst.ImplementationType.HFST_OLW_TYPE)
Exemple #3
0
 def load(filename :str, **kwargs) -> 'AlergiaRootModel':
     # TODO saving/loading smoothing and parameters
     result = AlergiaRootModel(**kwargs)
     result.automaton = FST.load_transducer(filename)
     if result.smoothing > 0:
         result.smoothing_model = \
             UnigramRootModel.load(filename + '.smoothing')
     return result
Exemple #4
0
def similar_words_with_pylookup_static(words, transducer_path):
    '''Not really feasible because of astronomical memory consumption.
       Implemented only for comparison.'''
    t = FST.load_transducer(transducer_path)
    t.minimize()
    t.convert(hfst.ImplementationType.HFST_OL_TYPE)
    for word in words:
        similar_words = set(w for w, c in t.lookup(word))
        yield (word, list(similar_words))
Exemple #5
0
def run():
    lexicon = Lexicon.load(shared.filenames['wordlist'])
    lexicon_tr = FST.load_transducer(shared.filenames['lexicon-tr'])
    rules_tr = FST.load_transducer(shared.filenames['rules-tr'])
    rules_tr.convert(hfst.ImplementationType.HFST_OLW_TYPE)
    alphabet = lexicon_tr.get_alphabet()
    model = ModelSuite.load()
    max_results = shared.config['inflect'].getint('max_results')

    if shared.options['interactive']:
        for line in sys.stdin:
            try:
                lemma_str, tag = line.rstrip().split()
                lemma = LexiconEntry(lemma_str)
                for analysis in inflect_word(lemma,
                                             tag,
                                             rules_tr,
                                             model,
                                             max_results=max_results):
                    print(*analysis, sep='\t')
            except Exception as e:
                logging.getLogger('main').warning(e)
    else:
        pairs = []
        # FIXME is there a better solution for creating lists of LexiconEntry
        # objects and skipping the ones for which exceptions are thrown?
        for lemma, tag in read_tsv_file(shared.filenames['analyze.wordlist']):
            try:
                pairs.append((LexiconEntry(lemma), tag))
            except Exception as e:
                logging.warning(e)
        for lemma, tag in tqdm.tqdm(pairs):
            for analysis in inflect_word(lemma,
                                         tag,
                                         rules_tr,
                                         model,
                                         max_results=max_results):
                print(*analysis, sep='\t')