Example #1
0
def main():
    if len(sys.argv) < 3:
        print("Usage: <nlpdoc-path> <output_path> <pair_path> <lang>")
        exit()
    nlpdocpath = sys.argv[1]
    output_path = sys.argv[2]
    pair_path = sys.argv[3] 
    language = sys.argv[4]
    counter = 0
    apertium.append_pair_path(pair_path)
    apert = apertium.Analyzer(language)
    with gzip.open(nlpdocpath, 'rt') as r, gzip.open(output_path, 'at') as wr, \
            open("exceptions.{}.txt".format(language), 'w', encoding='utf-8') as ex:
        for line in r:
            counter += 1
            nlpdoc_dict = json.loads(line)
            text = nlpdoc_dict['text']
            apertium_analyzer = ApertiumAnalyzer(apert, text)
            lemmas = []
            for lemma in apertium_analyzer.compute_lemmas():
                # print(lemma)
                lemmas += lemma
            if len(lemmas) == 0:
                ex.write(str(counter) + "\n")
            else:
                tokens = [{"start": lemma["start"], "end": lemma["end"]} for lemma in lemmas]
                nlpdoc_dict["annotations"]['token'] = tokens
                nlpdoc_dict["annotations"]['lemma'] = lemmas
                wr.write(json.dumps(nlpdoc_dict) + "\n")
Example #2
0
 def test_analyzer_en(self):
     analyzer = apertium.Analyzer('en')
     lexical_units = analyzer.analyze('cats')
     lexical_unit = lexical_units[0]
     self.assertListEqual(lexical_unit.readings,
                          [[SReading(baseform='cat', tags=['n', 'pl'])]])
     self.assertEqual(lexical_unit.wordform, 'cats')
     self.assertEqual(lexical_unit.knownness, known)
Example #3
0
 def test_uninstalled_mode(self):
     with self.assertRaises(apertium.ModeNotInstalled):
         apertium.Analyzer('spa')
Example #4
0
def translate(data):
    analyzer = apertium.Analyzer('en')
    analyzed = analyzer.analyze(data)
    # translator = apertium.Translator('en','spa')
    # translated = translator.translate('cat')
    return analyzed