Ejemplo n.º 1
0
def test_determine_labels(tmpdir):  #fs is the fake filesystem fixture
    """test the function that determines what labels exist in a directory"""
    from pathlib import Path

    base_dir = tmpdir
    label_dir = base_dir.mkdir("label")

    test_1_phonemes = 'ɖ ɯ ɕ i k v̩'
    test_1_phonemes_and_tones = 'ɖ ɯ ˧ ɕ i ˧ k v̩ ˧˥'
    test_2_phonemes = 'g v̩ tsʰ i g v̩ k v̩'
    test_2_phonemes_and_tones = 'g v̩ ˧ tsʰ i ˩ g v̩ ˩ k v̩ ˩'

    label_dir.join("test1.phonemes").write(test_1_phonemes)
    label_dir.join("test1.phonemes_and_tones").write(test_1_phonemes_and_tones)
    label_dir.join("test2.phonemes").write(test_2_phonemes)
    label_dir.join("test2.phonemes_and_tones").write(test_2_phonemes_and_tones)

    all_phonemes = set(test_1_phonemes.split(' ')) | set(
        test_2_phonemes.split(' '))

    from persephone.corpus import determine_labels
    phoneme_labels = determine_labels(Path(str(base_dir)), "phonemes")
    assert phoneme_labels
    assert phoneme_labels == all_phonemes

    all_phonemes_and_tones = set(test_1_phonemes_and_tones.split(' ')) | set(
        test_2_phonemes_and_tones.split(' '))

    phoneme_and_tones_labels = determine_labels(Path(str(base_dir)),
                                                "phonemes_and_tones")
    assert phoneme_and_tones_labels
    assert phoneme_and_tones_labels == all_phonemes_and_tones
Ejemplo n.º 2
0
def test_determine_labels_throws():
    """Test that a non existant directory will throw"""
    import pathlib
    from persephone.corpus import determine_labels
    non_existent_path = pathlib.Path("thispathdoesntexist")
    with pytest.raises(FileNotFoundError):
        determine_labels(non_existent_path, "phonemes")
Ejemplo n.º 3
0
def test_fast():
    """
    A fast integration test that runs 1 training epoch over a tiny
    dataset. Note that this does not run ffmpeg to normalize the WAVs since
    Travis doesn't have that installed. So the normalized wavs are included in
    the feat/ directory so that the normalization isn't run.
    """

    # 4 utterance toy set
    TINY_EXAMPLE_LINK = "https://cloudstor.aarnet.edu.au/plus/s/g2GreDNlDKUq9rz/download"
    tiny_example_dir = join(DATA_BASE_DIR, "tiny_example/")
    rm_dir(Path(tiny_example_dir))

    download_example_data(TINY_EXAMPLE_LINK)

    labels = corpus.determine_labels(Path(tiny_example_dir), "phonemes")

    corp = corpus.Corpus("fbank",
                         "phonemes",
                         Path(tiny_example_dir),
                         labels=labels)
    exp_dir = experiment.prep_exp_dir(directory=EXP_BASE_DIR)
    model = experiment.get_simple_model(exp_dir, corp)
    model.train(min_epochs=2, max_epochs=5)

    # Assert the convergence of the model at the end by reading the test scores
    ler = get_test_ler(exp_dir)
    # Can't expect a decent test score but just check that there's something.
    assert ler < 2.0
Ejemplo n.º 4
0
def test_tutorial():
    """ Tests running the example described in the tutorial in README.md """

    # 1024 utterance sample set.
    NA_EXAMPLE_LINK = "https://cloudstor.aarnet.edu.au/plus/s/YJXTLHkYvpG85kX/download"
    na_example_dir = join(DATA_BASE_DIR, "na_example/")
    rm_dir(Path(na_example_dir))

    download_example_data(NA_EXAMPLE_LINK)

    # Test the first setup encouraged in the tutorial
    labels = corpus.determine_labels(Path(na_example_dir), "phonemes")
    corp = corpus.Corpus("fbank", "phonemes", Path(na_example_dir), labels)

    exp_dir = experiment.train_ready(corp, directory=EXP_BASE_DIR)

    # Assert the convergence of the model at the end by reading the test scores
    ler = get_test_ler(exp_dir)
    assert ler < 0.3