Ejemplo n.º 1
0
 def load_from_file(cls, path: str):  # CorpusFeatures
     with codecs.open(path, 'rb') as fr:
         data = json.load(fr)
     alphabet = f'apps.vnlp.training.alphabet.{data["alphabet"]}'
     cf = CorpusFeatures(data['language'], alphabet, data['path'])
     cf.version = data['version']
     cf.dictionary = DetailedDictionary.json_deserialize(data['dictionary'])
     cf.ngrams_collector = MarginNgramsCollector.json_deserialize(data['ngrams_collector'])
     return cf
Ejemplo n.º 2
0
    def test_serialize_deserialize(self):
        dd = DetailedDictionary()
        dd.files_processed = 1
        dd.words_processed = 4
        dd.words.append(WordCard('detail', 12, 'tail'))
        dd.words[0].prefix = 'de'
        dd.words.append(WordCard('corpus', 2, ''))
        dd.words.append(WordCard('plural', 1, ''))
        dd.words.append(WordCard('omnis', 1, ''))
        dd.words_total = len(dd.words)
        dd.word_grams = {(2, 'corpus omins'): 24,
                         (3, 'plural corpus omins'): 21}
        jsn = dd.json_serialize()
        self.assertGreater(len(jsn), 10)

        rd = DetailedDictionary.json_deserialize(jsn)
        self.assertEqual(dd.files_processed, rd.files_processed)
        self.assertEqual(dd.words_processed, rd.words_processed)
        self.assertEqual(dd.words_total, rd.words_total)
        self.assertEqual(len(dd.words), len(rd.words))
        self.assertEqual(len(dd.word_grams), len(rd.word_grams))