def mock_model(tmpdir_factory): base_path = tmpdir_factory.mktemp("pipeline") base_path = Path(base_path) if not base_path.joinpath('/state').exists(): kaldi = KaldiInterface(f'{base_path}/state') ds = kaldi.new_dataset('dataset_x') ds.add_directory('/recordings/transcribed') ds.select_importer('Elan') ds.process() pd = kaldi.new_pron_dict('pron_dict_y') pd.link(ds) pd.set_l2s_path('/recordings/letter_to_sound.txt') pd.generate_lexicon() m = kaldi.new_model('model_z') m.link(ds, pd) m.build_kaldi_structure() # TODO: remove this line m.train() # may take a while else: kaldi = KaldiInterface.load(f'{base_path}/state') m = kaldi.new_model('model_z', use_existing=True) return (kaldi, m)
ds = kaldi.new_dataset('dsy') ds.add_directory('/recordings/transcribed', filter=['eaf', 'wav']) ds.process() # Step 2 # ====== # Build pronunciation dictionary pd = kaldi.new_pron_dict('pd') pd.link(ds) pd.set_l2s_path('/recordings/letter_to_sound.txt') pd.generate_lexicon() # Step 3 # ====== # Link dataset and pd to a new model, then train the model. m = kaldi.new_model('mx') m.link(ds, pd) m.build_kaldi_structure() m.train() # may take a while # Step 4 # ====== # Make a transcription interface and transcribe unseen audio to elan. t = kaldi.new_transcription('tx') t.link(m) with open('/recordings/untranscribed/audio.wav', 'rb') as faudio: t.prepare_audio(faudio) # t.transcribe_align() t.transcribe() # print(t.elan().decode('utf-8')) print(t.text().decode('utf-8'))