def setUpClass(cls): cls.stress_predictor = CombinedStressPredictor( stress_model_path=RU_STRESS_DEFAULT_MODEL, zalyzniak_dict=ZALYZNYAK_DICT, cmu_dict=CMU_DICT, raw_stress_dict_path=RU_GRAPHEME_STRESS_PATH, stress_trie_path=RU_GRAPHEME_STRESS_TRIE_PATH)
def inflate_vocab(self, top_n=None) -> None: """ Получение словаря с ударениями по этому словарю. :param top_n: сколько первых записей взять? """ vocab = Vocabulary(GENERATOR_VOCAB_PATH) stress_predictor = CombinedStressPredictor() forms = self.word_forms if top_n is not None: forms = forms[:top_n] for index, word_form in tqdm(enumerate(forms), desc="Accenting words"): text = word_form.text stresses = stress_predictor.predict(text) word = Word(-1, -1, text, Graphemes.get_syllables(text)) word.set_stresses(stresses) vocab.add_word(word, index) vocab.save()
def setUpClass(cls): cls.stress_predictor = CombinedStressPredictor( stress_model_path=RU_STRESS_DEFAULT_MODEL, zalyzniak_dict=ZALYZNYAK_DICT, cmu_dict=CMU_DICT, raw_stress_dict_path=RU_GRAPHEME_STRESS_PATH, stress_trie_path=RU_GRAPHEME_STRESS_TRIE_PATH) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
def setUpClass(cls): cls.stress_predictor = CombinedStressPredictor( stress_model_path=RU_STRESS_DEFAULT_MODEL, g2p_model_path=RU_G2P_DEFAULT_MODEL, zalyzniak_dict=ZALYZNYAK_DICT, ru_wiki_dict=RU_WIKI_DICT, cmu_dict=CMU_DICT, raw_stress_dict_path=RU_GRAPHEME_STRESS_PATH, stress_trie_path=RU_GRAPHEME_STRESS_TRIE_PATH, aligner_dump_path=RU_ALIGNER_DEFAULT_PATH, g2p_dict_path=RU_G2P_DICT_PATH)
def inflate_vocab(self, dump_path, top_n=None) -> None: """ Получение словаря с ударениями по этому словарю. :param top_n: сколько первых записей взять? :param dump_path: путь, куда сохранить словарь. """ from rupo.main.vocabulary import Vocabulary from rupo.stress.predictor import CombinedStressPredictor vocab = Vocabulary(dump_path) stress_predictor = CombinedStressPredictor() forms = self.word_forms if top_n is not None: forms = forms[:top_n] for index, word_form in tqdm(enumerate(forms), desc="Accenting words"): text = word_form.text stresses = stress_predictor.predict(text) word = Word(-1, -1, text, Graphemes.get_syllables(text)) word.set_stresses(stresses) vocab.add_word(word, index) vocab.save()
def get_stress_predictor(self, language="ru", stress_model_path: str = None, raw_stress_dict_path=None, stress_trie_path=None, zalyzniak_dict=ZALYZNYAK_DICT, cmu_dict=CMU_DICT): if self.stress_predictors.get(language) is None: self.stress_predictors[language] = CombinedStressPredictor( language, stress_model_path, raw_stress_dict_path, stress_trie_path, zalyzniak_dict, cmu_dict) return self.stress_predictors[language]
def inflate_vocab(self, dump_path, top_n=None) -> None: """ Получение словаря с ударениями по этому словарю. :param top_n: сколько первых записей взять? :param dump_path: путь, куда сохранить словарь. """ from rupo.main.vocabulary import StressVocabulary from rupo.stress.word import StressedWord, Stress from rupo.stress.predictor import CombinedStressPredictor vocab = StressVocabulary(dump_path) stress_predictor = CombinedStressPredictor() forms = self.word_forms if top_n is not None: forms = forms[:top_n] for index, word_form in tqdm(enumerate(forms), desc="Accenting words"): text = word_form.text stresses = [Stress(pos, Stress.Type.PRIMARY) for pos in stress_predictor.predict(text)] word = StressedWord(text, set(stresses)) vocab.add_word(word, index) vocab.save()
def get_stress_predictor(self, language="ru", stress_model_path: str = None, g2p_model_path: str = None, grapheme_set=RU_GRAPHEME_SET, g2p_dict_path=None, aligner_dump_path=None, raw_stress_dict_path=None, stress_trie_path=None, zalyzniak_dict=ZALYZNYAK_DICT, ru_wiki_dict=RU_WIKI_DICT, cmu_dict=CMU_DICT): if self.stress_predictors.get(language) is None: self.stress_predictors[language] = CombinedStressPredictor( language, stress_model_path, g2p_model_path, grapheme_set, g2p_dict_path, aligner_dump_path, raw_stress_dict_path, stress_trie_path, zalyzniak_dict, cmu_dict, ru_wiki_dict) return self.stress_predictors[language]
def setUpClass(cls): cls.stress_predictor = CombinedStressPredictor()