예제 #1
0
def test_new_dataset_using_use_existing(tmpdir):
    """
    Using the use_existing when an existing dataset does not exist is okay.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x', use_existing=True)

    # Black-box property testing
    assert ds.files == []
    assert ds.processed_labels == []
    assert ds.importer is None
    assert ds.has_been_processed == False

    assert ds.state == json.loads(f"""
    {{
        "name": "dataset_x",
        "hash": "{ds.hash}",
        "date": "{ds.date}",
        "has_been_processed": false,
        "files": [],
        "processed_labels": [],
        "importer": null
    }}
    """)
    return
예제 #2
0
def test_select_importer_when_already_selected(tmpdir):
    """
    Re-specify the importer will clear settings back to the default.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.select_importer('Elan')
    ds.importer.context["tier"] = "Shift"
    ds.select_importer('Elan')
    assert ds.state == json.loads(f"""
    {{
        "name": "dataset_x",
        "hash": "{ds.hash}",
        "date": "{ds.date}",
        "has_been_processed": false,
        "files": [],
        "processed_labels": [],
        "importer": {{
            "name": "Elan",
            "context": {{
                "tier": "Phrase",
                "graphic": "elan.png"
            }}
        }}
    }}
    """)
    return
예제 #3
0
def test_select_importer(tmpdir):
    """
    Select an existing importer.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.select_importer('Elan')
    assert ds.importer is not None
    assert ds.has_been_processed == False
    assert ds.state == json.loads(f"""
    {{
        "name": "dataset_x",
        "hash": "{ds.hash}",
        "date": "{ds.date}",
        "has_been_processed": false,
        "files": [],
        "processed_labels": [],
        "importer": {{
            "name": "Elan",
            "context": {{
                "tier": "Phrase",
                "graphic": "elan.png"
            }}
        }}
    }}
    """)
    return
예제 #4
0
def test_change_importer_setting(tmpdir):
    """
    Change a property of the importer.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.select_importer('Elan')
    ds.importer.context["tier"] = "Shift"
    assert ds.state == json.loads(f"""
    {{
        "name": "dataset_x",
        "hash": "{ds.hash}",
        "date": "{ds.date}",
        "has_been_processed": false,
        "files": [],
        "processed_labels": [],
        "importer": {{
            "name": "Elan",
            "context": {{
                "tier": "Shift",
                "graphic": "elan.png"
            }}
        }}
    }}
    """)
    return
예제 #5
0
def test_new_dataset(tmpdir):
    """
    Check the state of a dataset without adding any files, selecting an
    importer or processing.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')

    # Black-box property testing
    assert ds.files == []
    assert ds.processed_labels == []
    assert ds.importer is None
    assert ds.has_been_processed == False

    assert ds.state == json.loads(f"""
    {{
        "name": "dataset_x",
        "hash": "{ds.hash}",
        "date": "{ds.date}",
        "has_been_processed": false,
        "files": [],
        "processed_labels": [],
        "importer": null
    }}
    """)

    # White-box testing, contains empty child directories "original" and
    # "resampled".
    path_to_original = Path(f'{tmpdir}/state/datasets/{ds.hash}/original')
    assert path_to_original.is_dir()
    assert [n for n in path_to_original.iterdir()] == []
    path_to_resampled = Path(f'{tmpdir}/state/datasets/{ds.hash}/resampled')
    assert path_to_resampled.is_dir()
    assert [n for n in path_to_resampled.iterdir()] == []
    return
예제 #6
0
def test_new_pron_dict_using_use_existing(tmpdir):
    """
    Using the use_existing when an existing pron dict does not exist is okay.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    kaldi.new_pron_dict('p', use_existing=True)
    return
예제 #7
0
def test_add_directory(tmpdir):
    """
    Add all files in a directory and see the state change.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.add_directory('/recordings/transcribed')
    assert set(ds.files) == {
        "1_1_1.wav", "1_1_1.eaf", "1_1_2.wav", "1_1_2.eaf", "1_1_3.wav",
        "1_1_3.eaf", "1_1_4.wav", "1_1_4.eaf"
    }
    assert set(ds.state['files']) == set(
        json.loads(f"""
    [
        "1_1_1.wav",
        "1_1_1.eaf",
        "1_1_2.wav",
        "1_1_2.eaf",
        "1_1_3.wav",
        "1_1_3.eaf",
        "1_1_4.wav",
        "1_1_4.eaf"
    ]
    """))
    return
예제 #8
0
def test_new_transcription_using_use_existing(tmpdir):
    """
    Using the use_existing when an existing transcription does not exist is okay.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    kaldi.new_transcription('transcription_w', use_existing=True)
    return
예제 #9
0
def test_override_and_use_existing(tmpdir):
    """
    Cannot have both the override and use_existing parameters set to True.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    with pytest.raises(ValueError):
        kaldi.new_pron_dict('x', override=True, use_existing=True)
    return
예제 #10
0
def test_select_nonexistant_importer(tmpdir):
    """
    The importer name must exist.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    with pytest.raises(ValueError):
        ds.select_importer('this_importer_name_does_not_exist')
    return
예제 #11
0
def test_set_l2s_missing_path(tmpdir):
    """
    If the path does not exist then get a missing file error.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    pd = kaldi.new_pron_dict('pronunciation dictionary')
    with pytest.raises(FileNotFoundError):
        pd.set_l2s_path('/missing/letter_to_sound.txt')
    return
예제 #12
0
def test_process_empty_dataset(tmpdir):
    """
    Attempting to process a dataset with no files will not produce an error.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.select_importer('Elan')
    ds.process()
    return
예제 #13
0
def test_change_setting_before_selecting_importer(tmpdir):
    """
    If an attempt to change a setting of the importer is made, then an error
    will be raised.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    with pytest.raises(AttributeError):  # importer is None
        ds.importer.context["tier"] = "Shift"
    return
예제 #14
0
def test_remove_file_that_does_not_exist(tmpdir):
    """
    Raise an error when there is an attempt to remove a file that has not been
    added.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    with pytest.raises(ValueError):
        ds.remove_file("1_1_1.wav")
    return
예제 #15
0
def test_process_without_importer(tmpdir):
    """
    Running the process() function without specifying an importer from the
    available data transformers will raise an error.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    with pytest.raises(RuntimeError):
        ds.process()
    return
예제 #16
0
def test_load(tmpdir):
    """
    Load an already existing dataset.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds1 = kaldi.new_dataset('dataset_x')
    ds2 = Dataset.load(ds1.path)

    assert ds2.importer is None
    assert ds2.has_been_processed == False
    return
예제 #17
0
def test_lexicon_before_linking(tmpdir):
    """
    Must link to a dataset before attempting to generate the lexicon,
    otherwise an error is produced.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    pd = kaldi.new_pron_dict('pronunciation dictionary')
    pd.set_l2s_path('/recordings/letter_to_sound.txt')
    with pytest.raises(RuntimeError):
        pd.generate_lexicon()
    return
예제 #18
0
def test_lexicon_before_dataset_processing(tmpdir):
    """
    An attempt to generate lexicon before processing the dataset will raise an
    error.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    pd = kaldi.new_pron_dict('pronunciation dictionary')
    pd.link(ds)
    pd.set_l2s_path('/recordings/letter_to_sound.txt')
    with pytest.raises(RuntimeError):
        pd.generate_lexicon()
예제 #19
0
def test_existing_pron_dict_using_use_existing(tmpdir):
    """
    Use the use_existing parameter to load configurations from existing pron dict.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    pd1 = kaldi.new_pron_dict('x')
    pd1_hash = pd1.hash
    pd2 = kaldi.new_pron_dict('x', use_existing=True)
    assert len(kaldi.list_pron_dicts()) == 1
    assert pd1_hash == pd2.hash
    assert pd1.path == pd2.path
    return
예제 #20
0
def test_annotations_before_processing(tmpdir):
    """
    If there is an attempt to collect the annotations before processing the
    data (annotationts cannot exist), an error will be raised,
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.add_directory('/recordings/transcribed')
    ds.select_importer('Elan')
    with pytest.raises(RuntimeError):
        _ = ds.annotations
    return
예제 #21
0
def test_existing_dataset_using_use_existing(tmpdir):
    """
    Use the use_existing parameter to load configurations from existing dataset.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds1 = kaldi.new_dataset('dataset_x')
    ds1_hash = ds1.hash
    ds2 = kaldi.new_dataset('dataset_x', use_existing=True)
    assert len(kaldi.list_datasets()) == 1
    assert ds1_hash == ds2.hash
    assert ds1.path == ds2.path
    return
예제 #22
0
def test_existing_dataset_using_override(tmpdir):
    """
    Use override to delete a dataset with the same name and create a totally
    new dataset with the same name.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds1 = kaldi.new_dataset('dataset_x')
    ds1_hash = ds1.hash
    ds2 = kaldi.new_dataset('dataset_x', override=True)
    # note ds1 can no longer be used
    assert len(kaldi.list_datasets()) == 1
    assert ds1_hash != ds2.hash
    return
예제 #23
0
def test_existing_transcription_using_override(tmpdir):
    """
    Use override to delete a transcription with the same name and create a totally
    new transcription with the same name.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    t1 = kaldi.new_transcription('transcription_w')
    t1_hash = t1.hash
    t2 = kaldi.new_transcription('transcription_w', override=True)
    # note t1 can no longer be used
    assert len(kaldi.list_transcriptions()) == 1
    assert t1_hash != t2.hash
    return
예제 #24
0
def test_existing_pron_dict_using_override(tmpdir):
    """
    Use override to delete a pron dict with the same name and create a totally
    new pron dict with the same name.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    pd1 = kaldi.new_pron_dict('pronunciation dictionary')
    pd1_hash = pd1.hash
    pd2 = kaldi.new_pron_dict('pronunciation dictionary', override=True)
    # note pd1 can no longer be used
    assert len(kaldi.list_pron_dicts()) == 1
    assert pd1_hash != pd2.hash
    return
예제 #25
0
def test_remove_file(tmpdir):
    """
    Test that a file can be removed.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.add_directory('/recordings/transcribed')
    ds.remove_file("1_1_1.wav")
    assert set(ds.files) == {
        "1_1_1.eaf", "1_1_2.wav", "1_1_2.eaf", "1_1_3.wav", "1_1_3.eaf",
        "1_1_4.wav", "1_1_4.eaf"
    }
    return
예제 #26
0
def test_process_then_delete_file(tmpdir):
    """
    Test when a file is removed, that the has_been_processed flag is
    switched to false as the dataset has changed.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.add_directory('/recordings/transcribed')
    ds.select_importer('Elan')
    ds.process()
    ds.remove_file("1_1_1.wav")
    assert ds.has_been_processed == False
    return
예제 #27
0
def test_label_reset(tmpdir):
    """
    Test when a file is removed, and the has_been_processed flag is
    switched to false, that the labels are also removed.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.add_directory('/recordings/transcribed')
    ds.select_importer('Elan')
    ds.process()
    ds.remove_file("1_1_1.wav")
    assert ds.processed_labels == []
    return
예제 #28
0
def test_annotations_after_processing(tmpdir):
    """
    Ensure annotations are retrievable.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    ds = kaldi.new_dataset('dataset_x')
    ds.add_directory('/recordings/transcribed')
    ds.select_importer('Elan')
    ds.process()
    annotations = ds.annotations
    # ensure no errors are raised. annotations is JSONable
    assert len(annotations) != 0
    return
예제 #29
0
def test_error_when_writing_to_protected_property(tmpdir):
    """
    An error is raised when there is an attempt to write to a protected
    property.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    t = kaldi.new_transcription('transcription_w')

    with pytest.raises(AttributeError):
        t.has_been_transcribed = True
    with pytest.raises(AttributeError):
        t.exporter = "some obj"
    with pytest.raises(AttributeError):
        t.state = "Not a valid"
예제 #30
0
def test_construction(tmpdir):
    KaldiInterface(f'{tmpdir}/state')

    # White-box testing
    path_to_datasets = Path(f'{tmpdir}/state/datasets')
    path_to_pron_dicts = Path(f'{tmpdir}/state/pron_dicts')
    path_to_models = Path(f'{tmpdir}/state/models')
    path_to_transcriptions = Path(f'{tmpdir}/state/transcriptions')

    # Creates empty child directories:
    #   datasets/
    #   pron_dicts/
    #   models/
    #   transcriptions/
    assert path_to_datasets.is_dir()
    assert path_to_datasets.exists()
    assert [n for n in path_to_datasets.iterdir()] == []
    assert path_to_pron_dicts.is_dir()
    assert path_to_pron_dicts.exists()
    assert [n for n in path_to_pron_dicts.iterdir()] == []
    assert path_to_models.is_dir()
    assert path_to_models.exists()
    assert [n for n in path_to_models.iterdir()] == []
    assert path_to_transcriptions.is_dir()
    assert path_to_transcriptions.exists()
    assert [n for n in path_to_transcriptions.iterdir()] == []


# TODO: much more testing here