Example #1
0
def test_generate_pretrained_threshold(
    english_g2p_model, basic_corpus_dir, temp_dir, generated_dir
):
    if G2P_DISABLED:
        pytest.skip("No Pynini found")
    output_path = os.path.join(generated_dir, "g2p_out.txt")
    command = [
        "g2p",
        english_g2p_model,
        basic_corpus_dir,
        output_path,
        "-t",
        temp_dir,
        "-q",
        "--clean",
        "--g2p_threshold",
        "0.95",
    ]
    args, unknown = parser.parse_known_args(command)
    run_g2p(args, unknown)
    assert os.path.exists(output_path)
    d = MultispeakerDictionary(output_path, temporary_directory=temp_dir)
    d.dictionary_setup()

    assert len(d.word_mapping(1)) > 0
Example #2
0
def test_generate_dict_text_only(
    basic_split_dir,
    basic_g2p_model_path,
    g2p_basic_output,
    temp_dir,
    g2p_config_path,
):
    if G2P_DISABLED:
        pytest.skip("No Pynini found")
    text_dir = basic_split_dir[1]
    command = [
        "g2p",
        basic_g2p_model_path,
        text_dir,
        g2p_basic_output,
        "-t",
        temp_dir,
        "-q",
        "--clean",
        "--debug",
        "--config_path",
        g2p_config_path,
    ]
    args, unknown = parser.parse_known_args(command)
    run_g2p(args, unknown)
    assert os.path.exists(g2p_basic_output)
    d = MultispeakerDictionary(dictionary_path=g2p_basic_output, temporary_directory=temp_dir)
    d.dictionary_setup()
    assert len(d.word_mapping()) > 0
Example #3
0
def test_generate_dict_textgrid(
    multilingual_ipa_tg_corpus_dir,
    english_g2p_model,
    generated_dir,
    temp_dir,
    g2p_config_path,
):
    if G2P_DISABLED:
        pytest.skip("No Pynini found")
    output_file = os.path.join(generated_dir, "tg_g2pped.dict")
    command = [
        "g2p",
        english_g2p_model,
        multilingual_ipa_tg_corpus_dir,
        output_file,
        "-t",
        temp_dir,
        "-q",
        "--clean",
        "--debug",
        "--config_path",
        g2p_config_path,
    ]
    args, unknown = parser.parse_known_args(command)
    run_g2p(args, unknown)
    assert os.path.exists(output_file)
    d = MultispeakerDictionary(dictionary_path=output_file, temporary_directory=temp_dir)
    d.dictionary_setup()
    assert len(d.word_mapping()) > 0
Example #4
0
def test_generate_orthography_dict(basic_corpus_dir, orth_basic_output, temp_dir):
    if G2P_DISABLED:
        pytest.skip("No Pynini found")
    command = [
        "g2p",
        basic_corpus_dir,
        orth_basic_output,
        "-t",
        temp_dir,
        "-q",
        "--clean",
        "--debug",
        "--use_mp",
        "False",
    ]
    args, unknown = parser.parse_known_args(command)
    run_g2p(args, unknown)
    assert os.path.exists(orth_basic_output)
    d = MultispeakerDictionary(dictionary_path=orth_basic_output, temporary_directory=temp_dir)
    d.dictionary_setup()
    assert len(d.word_mapping()) > 0
Example #5
0
def test_check_bracketed(basic_dict_path):
    """Checks if the brackets are removed correctly and handling an empty string works"""
    word_set = ["uh", "(the)", "sick", "<corpus>", "[a]", "{cold}", ""]
    expected_result = ["uh", "sick", ""]
    dictionary_config = MultispeakerDictionary(dictionary_path=basic_dict_path)
    assert [x for x in word_set if not dictionary_config.check_bracketed(x)] == expected_result