def test_generate_pretrained_threshold( english_g2p_model, basic_corpus_dir, temp_dir, generated_dir ): if G2P_DISABLED: pytest.skip("No Pynini found") output_path = os.path.join(generated_dir, "g2p_out.txt") command = [ "g2p", english_g2p_model, basic_corpus_dir, output_path, "-t", temp_dir, "-q", "--clean", "--g2p_threshold", "0.95", ] args, unknown = parser.parse_known_args(command) run_g2p(args, unknown) assert os.path.exists(output_path) d = MultispeakerDictionary(output_path, temporary_directory=temp_dir) d.dictionary_setup() assert len(d.word_mapping(1)) > 0
def test_generate_dict_textgrid( multilingual_ipa_tg_corpus_dir, english_g2p_model, generated_dir, temp_dir, g2p_config_path, ): if G2P_DISABLED: pytest.skip("No Pynini found") output_file = os.path.join(generated_dir, "tg_g2pped.dict") command = [ "g2p", english_g2p_model, multilingual_ipa_tg_corpus_dir, output_file, "-t", temp_dir, "-q", "--clean", "--debug", "--config_path", g2p_config_path, ] args, unknown = parser.parse_known_args(command) run_g2p(args, unknown) assert os.path.exists(output_file) d = MultispeakerDictionary(dictionary_path=output_file, temporary_directory=temp_dir) d.dictionary_setup() assert len(d.word_mapping()) > 0
def test_generate_dict_text_only( basic_split_dir, basic_g2p_model_path, g2p_basic_output, temp_dir, g2p_config_path, ): if G2P_DISABLED: pytest.skip("No Pynini found") text_dir = basic_split_dir[1] command = [ "g2p", basic_g2p_model_path, text_dir, g2p_basic_output, "-t", temp_dir, "-q", "--clean", "--debug", "--config_path", g2p_config_path, ] args, unknown = parser.parse_known_args(command) run_g2p(args, unknown) assert os.path.exists(g2p_basic_output) d = MultispeakerDictionary(dictionary_path=g2p_basic_output, temporary_directory=temp_dir) d.dictionary_setup() assert len(d.word_mapping()) > 0
def test_generate_orthography_dict(basic_corpus_dir, orth_basic_output, temp_dir): if G2P_DISABLED: pytest.skip("No Pynini found") command = [ "g2p", basic_corpus_dir, orth_basic_output, "-t", temp_dir, "-q", "--clean", "--debug", "--use_mp", "False", ] args, unknown = parser.parse_known_args(command) run_g2p(args, unknown) assert os.path.exists(orth_basic_output) d = MultispeakerDictionary(dictionary_path=orth_basic_output, temporary_directory=temp_dir) d.dictionary_setup() assert len(d.word_mapping()) > 0