Exemple #1
0
def test_basic_tri(basic_dict_path, basic_corpus_dir, generated_dir,
                   tri_train_config_path):
    data_directory = os.path.join(generated_dir, "temp", "tri_test")
    shutil.rmtree(data_directory, ignore_errors=True)
    a = TrainableAligner(
        corpus_directory=basic_corpus_dir,
        dictionary_path=basic_dict_path,
        temporary_directory=data_directory,
        debug=True,
        verbose=True,
        **TrainableAligner.parse_parameters(tri_train_config_path))
    a.train()
Exemple #2
0
def test_basic_lda(basic_dict_path, basic_corpus_dir, generated_dir,
                   lda_train_config_path):
    data_directory = os.path.join(generated_dir, "temp", "lda_test")
    shutil.rmtree(data_directory, ignore_errors=True)
    a = TrainableAligner(
        corpus_directory=basic_corpus_dir,
        dictionary_path=basic_dict_path,
        temporary_directory=data_directory,
        debug=True,
        verbose=True,
        **TrainableAligner.parse_parameters(lda_train_config_path))
    a.train()
    assert len(
        a.training_configs[a.final_identifier].realignment_iterations) > 0
    assert len(a.training_configs[a.final_identifier].mllt_iterations) > 1
Exemple #3
0
def test_trainer(basic_dict_path, basic_corpus_dir, generated_dir):
    data_directory = os.path.join(generated_dir, "temp", "train_test")
    a = TrainableAligner(
        corpus_directory=basic_corpus_dir,
        dictionary_path=basic_dict_path,
        temporary_directory=data_directory,
    )
    assert a.final_identifier == "sat_4"
    assert a.training_configs[a.final_identifier].subset == 0
    assert a.training_configs[a.final_identifier].num_leaves == 7000
    assert a.training_configs[a.final_identifier].max_gaussians == 150000
Exemple #4
0
def test_pitch_feature_training(basic_dict_path, basic_corpus_dir,
                                generated_dir, pitch_train_config_path):
    data_directory = os.path.join(generated_dir, "temp", "tri_pitch_test")
    shutil.rmtree(data_directory, ignore_errors=True)
    a = TrainableAligner(
        corpus_directory=basic_corpus_dir,
        dictionary_path=basic_dict_path,
        temporary_directory=data_directory,
        debug=True,
        verbose=True,
        **TrainableAligner.parse_parameters(pitch_train_config_path))
    assert a.use_pitch
    a.train()
    assert a.get_feat_dim() == 48
def test_alternate_punctuation(punctuated_dir, generated_dir, basic_dict_path,
                               different_punctuation_config_path):
    from montreal_forced_aligner.acoustic_modeling.trainer import TrainableAligner

    output_directory = os.path.join(generated_dir, "corpus_tests", "alternate")
    if os.path.exists(output_directory):
        shutil.rmtree(output_directory, ignore_errors=True)
    params, skipped = AcousticCorpusWithPronunciations.extract_relevant_parameters(
        TrainableAligner.parse_parameters(different_punctuation_config_path))
    params["use_mp"] = True
    corpus = AcousticCorpusWithPronunciations(
        corpus_directory=punctuated_dir,
        dictionary_path=basic_dict_path,
        temporary_directory=output_directory,
        **params,
    )
    corpus.load_corpus()
    punctuated = corpus.get_utterances(file="punctuated")[0]
    assert (
        punctuated.text ==
        "oh yes, they - they, you know, they love her' and so' 'i mean... ‘you"
    )
def test_xsampa_corpus(xsampa_corpus_dir, xsampa_dict_path, generated_dir,
                       different_punctuation_config_path):
    from montreal_forced_aligner.acoustic_modeling.trainer import TrainableAligner

    output_directory = os.path.join(generated_dir, "corpus_tests", "xsampa")
    if os.path.exists(output_directory):
        shutil.rmtree(output_directory, ignore_errors=True)
    params, skipped = AcousticCorpusWithPronunciations.extract_relevant_parameters(
        TrainableAligner.parse_parameters(different_punctuation_config_path))
    params["use_mp"] = True
    corpus = AcousticCorpusWithPronunciations(
        corpus_directory=xsampa_corpus_dir,
        dictionary_path=xsampa_dict_path,
        temporary_directory=output_directory,
        **params,
    )
    print(corpus.quote_markers)
    corpus.load_corpus()
    xsampa = corpus.get_utterances(file="xsampa")[0]
    assert (
        xsampa.text ==
        r"@bUr\tOU {bstr\{kt {bSaIr\ Abr\utseIzi {br\@geItIN @bor\n {b3kr\Ambi {bI5s@`n Ar\g thr\Ip@5eI Ar\dvAr\k"
    )
Exemple #7
0
def test_basic_sat(basic_dict_path, basic_corpus_dir, generated_dir,
                   sat_train_config_path):
    data_directory = os.path.join(generated_dir, "temp", "sat_test")
    output_model_path = os.path.join(data_directory, "sat_model.zip")
    shutil.rmtree(data_directory, ignore_errors=True)
    args = argparse.Namespace(use_mp=True, debug=True, verbose=True)
    a = TrainableAligner(**TrainableAligner.parse_parameters(
        sat_train_config_path, args=args),
                         corpus_directory=basic_corpus_dir,
                         dictionary_path=basic_dict_path,
                         temporary_directory=data_directory,
                         disable_mp=False)
    a.train()
    assert len(
        a.training_configs[a.final_identifier].realignment_iterations) > 0
    assert len(a.training_configs[a.final_identifier].fmllr_iterations) > 1
    a.export_model(output_model_path)

    assert os.path.exists(output_model_path)
    assert os.path.exists(
        os.path.join(data_directory, "basic_train_acoustic_model", "sat",
                     "trans.1.0.ark"))
def test_no_punctuation(punctuated_dir, generated_dir, basic_dict_path,
                        no_punctuation_config_path):
    from montreal_forced_aligner.acoustic_modeling.trainer import TrainableAligner

    output_directory = os.path.join(generated_dir, "corpus_tests",
                                    "no_punctuation")
    if os.path.exists(output_directory):
        shutil.rmtree(output_directory, ignore_errors=True)
    params, skipped = AcousticCorpusWithPronunciations.extract_relevant_parameters(
        TrainableAligner.parse_parameters(no_punctuation_config_path))
    params["use_mp"] = False
    corpus = AcousticCorpusWithPronunciations(
        corpus_directory=punctuated_dir,
        dictionary_path=basic_dict_path,
        temporary_directory=output_directory,
        **params,
    )
    assert not corpus.punctuation
    assert not corpus.compound_markers
    assert not corpus.clitic_markers
    corpus.load_corpus()
    punctuated = corpus.get_utterances(file="punctuated")[0]
    print(corpus.punctuation)
    print(corpus.word_break_markers)
    assert (
        punctuated.text ==
        "oh yes, they - they, you know, they love her' and so' 'i mean... ‘you"
    )
    assert punctuated.normalized_text.split() == [
        "oh",
        "yes,",
        "they",
        "-",
        "they,",
        "you",
        "know,",
        "they",
        "love",
        "her'",
        "and",
        "so'",
        "'i",
        "mean...",
        "‘you",
    ]
    weird_words = corpus.get_utterances(file="weird_words")[0]
    assert (
        weird_words.text ==
        "i’m talking-ajfish me-really [me-really] [me'really] [me_??_really] asds-asda sdasd-me <s> </s>"
    )
    assert weird_words.normalized_text.split() == [
        "i’m",
        "talking-ajfish",
        "me-really",
        "[me-really]",
        "[me'really]",
        "[me_??_really]",
        "asds-asda",
        "sdasd-me",
        "<s>",
        "</s>",
    ]
Exemple #9
0
def test_pronunciation_training(mixed_dict_path, basic_corpus_dir,
                                generated_dir, pron_train_config_path):
    data_directory = os.path.join(generated_dir, "temp", "pron_train_test")
    export_path = os.path.join(generated_dir, "pron_train_test_export",
                               "model.zip")
    shutil.rmtree(data_directory, ignore_errors=True)
    args = argparse.Namespace(use_mp=True, debug=False, verbose=True)
    a = TrainableAligner(corpus_directory=basic_corpus_dir,
                         dictionary_path=mixed_dict_path,
                         temporary_directory=data_directory,
                         **TrainableAligner.parse_parameters(
                             pron_train_config_path, args=args))
    a.train()

    a.cleanup()
    assert not os.path.exists(export_path)
    assert not os.path.exists(
        os.path.join(generated_dir, "pron_train_test_export",
                     os.path.basename(mixed_dict_path)))
    a = TrainableAligner(corpus_directory=basic_corpus_dir,
                         dictionary_path=mixed_dict_path,
                         temporary_directory=data_directory,
                         **TrainableAligner.parse_parameters(
                             pron_train_config_path, args=args))
    a.train()
    a.export_model(export_path)
    assert os.path.exists(export_path)
    assert os.path.exists(
        os.path.join(
            generated_dir,
            "pron_train_test_export",
            os.path.basename(mixed_dict_path).replace(".txt", ".dict"),
        ))
Exemple #10
0
def test_basic_mono(
    mixed_dict_path,
    basic_corpus_dir,
    generated_dir,
    mono_train_config_path,
    mono_align_model_path,
    mono_output_directory,
):
    data_directory = os.path.join(generated_dir, "temp", "mono_train_test")
    shutil.rmtree(data_directory, ignore_errors=True)
    args = argparse.Namespace(use_mp=True, debug=False, verbose=True)
    a = TrainableAligner(corpus_directory=basic_corpus_dir,
                         dictionary_path=mixed_dict_path,
                         temporary_directory=data_directory,
                         **TrainableAligner.parse_parameters(
                             mono_train_config_path, args=args))
    a.train()
    a.export_model(mono_align_model_path)

    data_directory = os.path.join(generated_dir, "temp", "mono_align_test")
    shutil.rmtree(data_directory, ignore_errors=True)
    a = PretrainedAligner(corpus_directory=basic_corpus_dir,
                          dictionary_path=mixed_dict_path,
                          acoustic_model_path=mono_align_model_path,
                          temporary_directory=data_directory,
                          **PretrainedAligner.parse_parameters(args=args))
    a.align()
    a.export_files(mono_output_directory)