Ejemplo n.º 1
0
def mock_model(tmpdir_factory):
    base_path = tmpdir_factory.mktemp("pipeline")
    base_path = Path(base_path)
    if not base_path.joinpath('/state').exists():
        kaldi = KaldiInterface(f'{base_path}/state')

        ds = kaldi.new_dataset('dataset_x')
        ds.add_directory('/recordings/transcribed')
        ds.select_importer('Elan')
        ds.process()

        pd = kaldi.new_pron_dict('pron_dict_y')
        pd.link(ds)
        pd.set_l2s_path('/recordings/letter_to_sound.txt')
        pd.generate_lexicon()

        m = kaldi.new_model('model_z')
        m.link(ds, pd)
        m.build_kaldi_structure()  # TODO: remove this line
        m.train()  # may take a while
    else:
        kaldi = KaldiInterface.load(f'{base_path}/state')
        m = kaldi.new_model('model_z', use_existing=True)
    return (kaldi, m)
Ejemplo n.º 2
0
ds = kaldi.new_dataset('dsy')
ds.add_directory('/recordings/transcribed', filter=['eaf', 'wav'])
ds.process()

# Step 2
# ======
# Build pronunciation dictionary
pd = kaldi.new_pron_dict('pd')
pd.link(ds)
pd.set_l2s_path('/recordings/letter_to_sound.txt')
pd.generate_lexicon()

# Step 3
# ======
# Link dataset and pd to a new model, then train the model.
m = kaldi.new_model('mx')
m.link(ds, pd)
m.build_kaldi_structure()
m.train() # may take a while

# Step 4
# ======
# Make a transcription interface and transcribe unseen audio to elan.
t = kaldi.new_transcription('tx')
t.link(m)
with open('/recordings/untranscribed/audio.wav', 'rb') as faudio:
    t.prepare_audio(faudio)
# t.transcribe_align()
t.transcribe()
# print(t.elan().decode('utf-8'))
print(t.text().decode('utf-8'))