def test_new_transcription_using_use_existing(tmpdir): """ Using the use_existing when an existing transcription does not exist is okay. """ kaldi = KaldiInterface(f'{tmpdir}/state') kaldi.new_transcription('transcription_w', use_existing=True) return
def test_override_and_use_existing(tmpdir): """ Cannot have both the override and use_existing parameters set to True. """ kaldi = KaldiInterface(f'{tmpdir}/state') with pytest.raises(ValueError): kaldi.new_transcription('transcription_w', override=True, use_existing=True) return
def test_two_new_transcription_with_same_name(tmpdir): """ Trying to create two transcriptions with the same name without override or use_existing set to True will produce a ValueError. """ kaldi = KaldiInterface(f'{tmpdir}/state') kaldi.new_transcription('transcription_w') with pytest.raises(ValueError): kaldi.new_transcription('transcription_w') return
def test_existing_transcription_using_override(tmpdir): """ Use override to delete a transcription with the same name and create a totally new transcription with the same name. """ kaldi = KaldiInterface(f'{tmpdir}/state') t1 = kaldi.new_transcription('transcription_w') t1_hash = t1.hash t2 = kaldi.new_transcription('transcription_w', override=True) # note t1 can no longer be used assert len(kaldi.list_transcriptions()) == 1 assert t1_hash != t2.hash return
def test_error_when_writing_to_protected_property(tmpdir): """ An error is raised when there is an attempt to write to a protected property. """ kaldi = KaldiInterface(f'{tmpdir}/state') t = kaldi.new_transcription('transcription_w') with pytest.raises(AttributeError): t.has_been_transcribed = True with pytest.raises(AttributeError): t.exporter = "some obj" with pytest.raises(AttributeError): t.state = "Not a valid"
def test_new_transcription(tmpdir): """ Check the state of a new transcription. """ kaldi = KaldiInterface(f'{tmpdir}/state') t = kaldi.new_transcription('transcription_w') assert t.has_been_transcribed == False assert t.exporter == None assert t.state == json.loads(f""" {{ "name": "transcription_w", "hash": "{t.hash}", "date": "{t.date}", "model": null, "has_been_transcribed": false, "exporter": null }} """) return
def test_new_transcription_using_override(tmpdir): """ Using override has no effect when the pron dict with the same name does not exist. """ kaldi = KaldiInterface(f'{tmpdir}/state') t = kaldi.new_transcription('transcription_w', override=True) assert t.has_been_transcribed == False assert t.exporter == None assert t.state == json.loads(f""" {{ "name": "transcription_w", "hash": "{t.hash}", "date": "{t.date}", "model": null, "has_been_transcribed": false, "exporter": null }} """) return
ds.add_directory('/recordings/transcribed', filter=['eaf', 'wav']) ds.process() # Step 2 # ====== # Build pronunciation dictionary pd = kaldi.new_pron_dict('pd') pd.link(ds) pd.set_l2s_path('/recordings/letter_to_sound.txt') pd.generate_lexicon() # Step 3 # ====== # Link dataset and pd to a new model, then train the model. m = kaldi.new_model('mx') m.link(ds, pd) m.build_kaldi_structure() m.train() # may take a while # Step 4 # ====== # Make a transcription interface and transcribe unseen audio to elan. t = kaldi.new_transcription('tx') t.link(m) with open('/recordings/untranscribed/audio.wav', 'rb') as faudio: t.prepare_audio(faudio) # t.transcribe_align() t.transcribe() # print(t.elan().decode('utf-8')) print(t.text().decode('utf-8'))