예제 #1
0
 def setUpClass(cls):
     cls.stress_predictor = CombinedStressPredictor(
         stress_model_path=RU_STRESS_DEFAULT_MODEL,
         zalyzniak_dict=ZALYZNYAK_DICT,
         cmu_dict=CMU_DICT,
         raw_stress_dict_path=RU_GRAPHEME_STRESS_PATH,
         stress_trie_path=RU_GRAPHEME_STRESS_TRIE_PATH)
예제 #2
0
 def inflate_vocab(self, top_n=None) -> None:
     """
     Получение словаря с ударениями по этому словарю.
     :param top_n: сколько первых записей взять?
     """
     vocab = Vocabulary(GENERATOR_VOCAB_PATH)
     stress_predictor = CombinedStressPredictor()
     forms = self.word_forms
     if top_n is not None:
         forms = forms[:top_n]
     for index, word_form in tqdm(enumerate(forms), desc="Accenting words"):
         text = word_form.text
         stresses = stress_predictor.predict(text)
         word = Word(-1, -1, text, Graphemes.get_syllables(text))
         word.set_stresses(stresses)
         vocab.add_word(word, index)
     vocab.save()
예제 #3
0
 def setUpClass(cls):
     cls.stress_predictor = CombinedStressPredictor(
         stress_model_path=RU_STRESS_DEFAULT_MODEL,
         zalyzniak_dict=ZALYZNYAK_DICT,
         cmu_dict=CMU_DICT,
         raw_stress_dict_path=RU_GRAPHEME_STRESS_PATH,
         stress_trie_path=RU_GRAPHEME_STRESS_TRIE_PATH)
     logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
예제 #4
0
 def setUpClass(cls):
     cls.stress_predictor = CombinedStressPredictor(
         stress_model_path=RU_STRESS_DEFAULT_MODEL,
         g2p_model_path=RU_G2P_DEFAULT_MODEL,
         zalyzniak_dict=ZALYZNYAK_DICT,
         ru_wiki_dict=RU_WIKI_DICT,
         cmu_dict=CMU_DICT,
         raw_stress_dict_path=RU_GRAPHEME_STRESS_PATH,
         stress_trie_path=RU_GRAPHEME_STRESS_TRIE_PATH,
         aligner_dump_path=RU_ALIGNER_DEFAULT_PATH,
         g2p_dict_path=RU_G2P_DICT_PATH)
예제 #5
0
 def inflate_vocab(self, dump_path, top_n=None) -> None:
     """
     Получение словаря с ударениями по этому словарю.
     
     :param top_n: сколько первых записей взять?
     :param dump_path: путь, куда сохранить словарь.
     """
     from rupo.main.vocabulary import Vocabulary
     from rupo.stress.predictor import CombinedStressPredictor
     vocab = Vocabulary(dump_path)
     stress_predictor = CombinedStressPredictor()
     forms = self.word_forms
     if top_n is not None:
         forms = forms[:top_n]
     for index, word_form in tqdm(enumerate(forms), desc="Accenting words"):
         text = word_form.text
         stresses = stress_predictor.predict(text)
         word = Word(-1, -1, text, Graphemes.get_syllables(text))
         word.set_stresses(stresses)
         vocab.add_word(word, index)
     vocab.save()
예제 #6
0
파일: api.py 프로젝트: che1974/rupo
 def get_stress_predictor(self,
                          language="ru",
                          stress_model_path: str = None,
                          raw_stress_dict_path=None,
                          stress_trie_path=None,
                          zalyzniak_dict=ZALYZNYAK_DICT,
                          cmu_dict=CMU_DICT):
     if self.stress_predictors.get(language) is None:
         self.stress_predictors[language] = CombinedStressPredictor(
             language, stress_model_path, raw_stress_dict_path,
             stress_trie_path, zalyzniak_dict, cmu_dict)
     return self.stress_predictors[language]
예제 #7
0
 def inflate_vocab(self, dump_path, top_n=None) -> None:
     """
     Получение словаря с ударениями по этому словарю.
     
     :param top_n: сколько первых записей взять?
     :param dump_path: путь, куда сохранить словарь.
     """
     from rupo.main.vocabulary import StressVocabulary
     from rupo.stress.word import StressedWord, Stress
     from rupo.stress.predictor import CombinedStressPredictor
     vocab = StressVocabulary(dump_path)
     stress_predictor = CombinedStressPredictor()
     forms = self.word_forms
     if top_n is not None:
         forms = forms[:top_n]
     for index, word_form in tqdm(enumerate(forms), desc="Accenting words"):
         text = word_form.text
         stresses = [Stress(pos, Stress.Type.PRIMARY) for pos in stress_predictor.predict(text)]
         word = StressedWord(text, set(stresses))
         vocab.add_word(word, index)
     vocab.save()
예제 #8
0
 def get_stress_predictor(self,
                          language="ru",
                          stress_model_path: str = None,
                          g2p_model_path: str = None,
                          grapheme_set=RU_GRAPHEME_SET,
                          g2p_dict_path=None,
                          aligner_dump_path=None,
                          raw_stress_dict_path=None,
                          stress_trie_path=None,
                          zalyzniak_dict=ZALYZNYAK_DICT,
                          ru_wiki_dict=RU_WIKI_DICT,
                          cmu_dict=CMU_DICT):
     if self.stress_predictors.get(language) is None:
         self.stress_predictors[language] = CombinedStressPredictor(
             language, stress_model_path, g2p_model_path, grapheme_set,
             g2p_dict_path, aligner_dump_path, raw_stress_dict_path,
             stress_trie_path, zalyzniak_dict, cmu_dict, ru_wiki_dict)
     return self.stress_predictors[language]
예제 #9
0
 def setUpClass(cls):
     cls.stress_predictor = CombinedStressPredictor()