def test_determine_labels(tmpdir): #fs is the fake filesystem fixture """test the function that determines what labels exist in a directory""" from pathlib import Path base_dir = tmpdir label_dir = base_dir.mkdir("label") test_1_phonemes = 'ɖ ɯ ɕ i k v̩' test_1_phonemes_and_tones = 'ɖ ɯ ˧ ɕ i ˧ k v̩ ˧˥' test_2_phonemes = 'g v̩ tsʰ i g v̩ k v̩' test_2_phonemes_and_tones = 'g v̩ ˧ tsʰ i ˩ g v̩ ˩ k v̩ ˩' label_dir.join("test1.phonemes").write(test_1_phonemes) label_dir.join("test1.phonemes_and_tones").write(test_1_phonemes_and_tones) label_dir.join("test2.phonemes").write(test_2_phonemes) label_dir.join("test2.phonemes_and_tones").write(test_2_phonemes_and_tones) all_phonemes = set(test_1_phonemes.split(' ')) | set( test_2_phonemes.split(' ')) from persephone.corpus import determine_labels phoneme_labels = determine_labels(Path(str(base_dir)), "phonemes") assert phoneme_labels assert phoneme_labels == all_phonemes all_phonemes_and_tones = set(test_1_phonemes_and_tones.split(' ')) | set( test_2_phonemes_and_tones.split(' ')) phoneme_and_tones_labels = determine_labels(Path(str(base_dir)), "phonemes_and_tones") assert phoneme_and_tones_labels assert phoneme_and_tones_labels == all_phonemes_and_tones
def test_determine_labels_throws(): """Test that a non existant directory will throw""" import pathlib from persephone.corpus import determine_labels non_existent_path = pathlib.Path("thispathdoesntexist") with pytest.raises(FileNotFoundError): determine_labels(non_existent_path, "phonemes")
def test_fast(): """ A fast integration test that runs 1 training epoch over a tiny dataset. Note that this does not run ffmpeg to normalize the WAVs since Travis doesn't have that installed. So the normalized wavs are included in the feat/ directory so that the normalization isn't run. """ # 4 utterance toy set TINY_EXAMPLE_LINK = "https://cloudstor.aarnet.edu.au/plus/s/g2GreDNlDKUq9rz/download" tiny_example_dir = join(DATA_BASE_DIR, "tiny_example/") rm_dir(Path(tiny_example_dir)) download_example_data(TINY_EXAMPLE_LINK) labels = corpus.determine_labels(Path(tiny_example_dir), "phonemes") corp = corpus.Corpus("fbank", "phonemes", Path(tiny_example_dir), labels=labels) exp_dir = experiment.prep_exp_dir(directory=EXP_BASE_DIR) model = experiment.get_simple_model(exp_dir, corp) model.train(min_epochs=2, max_epochs=5) # Assert the convergence of the model at the end by reading the test scores ler = get_test_ler(exp_dir) # Can't expect a decent test score but just check that there's something. assert ler < 2.0
def test_tutorial(): """ Tests running the example described in the tutorial in README.md """ # 1024 utterance sample set. NA_EXAMPLE_LINK = "https://cloudstor.aarnet.edu.au/plus/s/YJXTLHkYvpG85kX/download" na_example_dir = join(DATA_BASE_DIR, "na_example/") rm_dir(Path(na_example_dir)) download_example_data(NA_EXAMPLE_LINK) # Test the first setup encouraged in the tutorial labels = corpus.determine_labels(Path(na_example_dir), "phonemes") corp = corpus.Corpus("fbank", "phonemes", Path(na_example_dir), labels) exp_dir = experiment.train_ready(corp, directory=EXP_BASE_DIR) # Assert the convergence of the model at the end by reading the test scores ler = get_test_ler(exp_dir) assert ler < 0.3