def _bake_gmm_decode_align(self): with open('/elpis/elpis/wrappers/inference/gmm-decode-align.sh', 'r') as fin: content: str = fin.read() content = content.replace('../../../../kaldi_helpers/output/ctm_to_textgrid.py', '/elpis/elpis/wrappers/output/ctm_to_textgrid.py') content = content.replace('../../../../kaldi_helpers/output/textgrid_to_elan.py', '/elpis/elpis/wrappers/output/textgrid_to_elan.py') decode_file_path = self.path.joinpath('gmm-decode-align.sh') with decode_file_path.open(mode='w') as file_: file_.write(content) run(f'chmod +x {decode_file_path}') p = subprocess.run(f'sh {decode_file_path}'.split(), cwd=f'{self.model.path.joinpath("kaldi")}', check=True)
def _cook_generate_infer_files(self): # cook the infer file generator # TODO fix below with open('/elpis/elpis/wrappers/inference/generate-infer-files.sh', 'r') as fin: generator: str = fin.read() generator = generator.replace('working_dir/input/infer', f'{self.path}') generator = generator.replace('working_dir/input/output/kaldi/data/test', f"{self.model.path.joinpath('kaldi', 'data', 'test')}") generator = generator.replace('working_dir/input/output/kaldi/data/infer', f"{self.model.path.joinpath('kaldi', 'data', 'infer')}") generator_file_path = self.path.joinpath('gen-infer-files.sh') with generator_file_path.open(mode='w') as fout: fout.write(generator) run(f'chmod +x {generator_file_path}') run(f'{generator_file_path}')
def transcribe(self, audio): self._process_audio_file(audio) self._cook_generate_infer_files() kaldi_infer_path = self.model.path.joinpath('kaldi', 'data', 'infer') kaldi_test_path = self.model.path.joinpath('kaldi', 'data', 'test') kaldi_path = self.model.path.joinpath('kaldi') # run gmm-decoder shutil.copytree(f'{self.path}', f"{kaldi_infer_path}") shutil.copy(f'{self.audio_file_path}', f"{self.model.path.joinpath('kaldi', 'audio.wav')}") subprocess.run('sh /elpis/elpis/wrappers/inferenceinference/gmm-decode.sh'.split(), cwd=f'{self.model.path.joinpath("kaldi")}', check=True) # move results cmd = f"cp {kaldi_infer_path}/one-best-hypothesis.txt {self.path}/ && " cmd += f"infer_audio_filename=$(head -n 1 {kaldi_test_path}/wav.scp | awk '{{print $2}}' | cut -c 3- ) && " cmd += f"cp \"{kaldi_path}/$infer_audio_filename\" {self.path}" run(cmd)
def transcribe(self, on_complete: Callable=None): self.status = "transcribing" self.type = "text" kaldi_infer_path = self.model.path.joinpath('kaldi', 'data', 'infer') kaldi_test_path = self.model.path.joinpath('kaldi', 'data', 'test') kaldi_path = self.model.path.joinpath('kaldi') os.makedirs(f"{kaldi_infer_path}", exist_ok=True) distutils.dir_util.copy_tree(f'{self.path}', f"{kaldi_infer_path}") distutils.file_util.copy_file(f'{self.audio_file_path}', f"{self.model.path.joinpath('kaldi', 'audio.wav')}") subprocess.run('sh /elpis/elpis/wrappers/inference/gmm-decode.sh'.split(), cwd=f'{self.model.path.joinpath("kaldi")}', check=True) # move results cmd = f"cp {kaldi_infer_path}/one-best-hypothesis.txt {self.path}/ && " cmd += f"infer_audio_filename=$(head -n 1 {kaldi_test_path}/wav.scp | awk '{{print $2}}' | cut -c 3- ) && " cmd += f"cp \"{kaldi_path}/$infer_audio_filename\" {self.path}" run(cmd) self.status = "transcribed" if on_complete is not None: on_complete()
def train(): local_kaldi_path = self.path.joinpath('kaldi') # Setup for Training complete ###################################################################### # task _test-train p = run( f"cd {local_kaldi_path}; ./run.sh > /elpis/state/tmp_log.txt") print(p.stdout) print('double done.')
def train(): local_kaldi_path = self.path.joinpath('kaldi') # Setup for Training complete ###################################################################### # task _test-train tmp_log_path = '/elpis/state/tmp_log.txt' if os.path.isfile(tmp_log_path): os.remove(tmp_log_path) p = run(f"cd {local_kaldi_path}; ./run.sh > {tmp_log_path}") print(p.stdout) print('train double done.')
def prepare_for_training(): # task make-kaldi-subfolders kaldi_structure = KaldiPathStructure(self.path) temporary_path = Path('/tmp', self.hash) temporary_path.mkdir(parents=True, exist_ok=True) local_kaldi_path = self.path.joinpath('kaldi') local_kaldi_path.mkdir(parents=True, exist_ok=True) kaldi_data_local_dict = local_kaldi_path.joinpath( 'data', 'local', 'dict') kaldi_data_local_dict.mkdir(parents=True, exist_ok=True) kaldi_data_local = local_kaldi_path.joinpath('data', 'local') kaldi_data_local.mkdir(parents=True, exist_ok=True) kaldi_data_test = local_kaldi_path.joinpath('data', 'test') kaldi_data_test.mkdir(parents=True, exist_ok=True) kaldi_data_train = local_kaldi_path.joinpath('data', 'train') kaldi_data_train.mkdir(parents=True, exist_ok=True) kaldi_conf = local_kaldi_path.joinpath('conf') kaldi_conf.mkdir(parents=True, exist_ok=True) kaldi_local = local_kaldi_path.joinpath('local') kaldi_local.mkdir(parents=True, exist_ok=True) # task generate-kaldi-configs path_file_path = kaldi_structure.path.joinpath('path.sh') mfcc_file_path = kaldi_structure.conf.joinpath('mfcc.conf') decode_config_file_path = kaldi_structure.conf.joinpath( 'decode.config') template_path = Path('/elpis/elpis/wrappers/templates') path_resource = template_path.joinpath('path.sh') mfcc_resource = template_path.joinpath('mfcc.conf') decode_config_resource = template_path.joinpath('decode.config') # task make-nonsil-phones > {{ .KALDI_OUTPUT_PATH }}/tmp/nonsilence_phones.txt nonsilence_phones_path = kaldi_data_local_dict.joinpath( 'nonsilence_phones.txt') cmd = f"grep -v '^#' < {self.l2s_path} | cut -d' ' -f2 | grep -v '^$' | sort -u" p = run(cmd) with nonsilence_phones_path.open(mode='wb') as fout: fout.write(p.stdout) with path_file_path.open(mode='w') as fout: with path_resource.open() as fin: content = pystache.render( fin.read(), { 'KALDI_ROOT': '/kaldi', 'HELPERS_PATH': '/kaldi-helpers', 'CORPUS_PATH': f'..{self.dataset.pathto.original}' }) fout.write(content) with mfcc_file_path.open(mode='w') as fout: with mfcc_resource.open() as fin: content = pystache.render( fin.read(), { 'MFCC_SAMPLE_FREQUENCY': '44100', 'MFCC_FRAME_LENGTH': '25', 'MFCC_LOW_FREQ': '20', 'MFCC_HIGH_FREQ': '22050', 'MFCC_NUM_CEPS': '7', }) fout.write(content) with decode_config_file_path.open(mode='w') as fout: with decode_config_resource.open() as fin: content = pystache.render(fin.read(), { 'DECODE_BEAM': '11.0', 'DECODE_FIRST_BEAM': '8.0' }) fout.write(content) try: # task copy-generated-files output_path = self.path.joinpath('output') output_path.mkdir(parents=True, exist_ok=True) # - cp {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/training/corpus.txt {{ .KALDI_OUTPUT_PATH }}/kaldi/data/local/ shutil.move( f"{output_path.joinpath('training', 'corpus.txt')}", f"{kaldi_data_local}") # - cp {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/testing/segments {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/ # testing/text {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/testing/utt2spk {{ .KALDI_OUTPUT_PATH }}/tmp/json_ # splitted/testing/wav.scp {{ .KALDI_OUTPUT_PATH }}/kaldi/data/test/ shutil.move(f"{output_path.joinpath('testing', 'segments')}", f"{kaldi_data_test.joinpath('segments')}") shutil.move(f"{output_path.joinpath('testing', 'text')}", f"{kaldi_data_test.joinpath('text')}") shutil.move(f"{output_path.joinpath('testing', 'utt2spk')}", f"{kaldi_data_test.joinpath('utt2spk')}") shutil.move(f"{output_path.joinpath('testing', 'wav.scp')}", f"{kaldi_data_test.joinpath('wav.scp')}") # - cp {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/training/segments {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted # /training/text {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/training/utt2spk {{ .KALDI_OUTPUT_PATH }}/tmp/json # _splitted/training/wav.scp {{ .KALDI_OUTPUT_PATH }}/kaldi/data/train/ shutil.move(f"{output_path.joinpath('training', 'segments')}", f"{kaldi_data_train.joinpath('segments')}") shutil.move(f"{output_path.joinpath('training', 'text')}", f"{kaldi_data_train.joinpath('text')}") shutil.move(f"{output_path.joinpath('training', 'utt2spk')}", f"{kaldi_data_train.joinpath('utt2spk')}") shutil.move(f"{output_path.joinpath('training', 'wav.scp')}", f"{kaldi_data_train.joinpath('wav.scp')}") # task copy-phones-configs optional_silence_file_path = kaldi_data_local_dict.joinpath( 'optional_silence.txt') silence_phones_file_path = kaldi_data_local_dict.joinpath( 'silence_phones.txt') with optional_silence_file_path.open(mode='w') as fout: fout.write('SIL\n') with silence_phones_file_path.open(mode='w') as fout: fout.write('SIL\nsil\nspn\n') # task copy-helper-scripts # - cp {{ .KALDI_TEMPLATES }}/cmd.sh {{ .KALDI_OUTPUT_PATH }}/kaldi/ shutil.copy(f"{template_path.joinpath('cmd.sh')}", f"{local_kaldi_path}") # - cp {{ .KALDI_TEMPLATES }}/run.sh {{ .KALDI_OUTPUT_PATH }}/kaldi/ with open(f"{template_path.joinpath('run.sh')}", 'r') as fin, \ open(f"{local_kaldi_path.joinpath('run.sh')}", 'w') as fout: fout.write(fin.read().replace('lm_order=1', f"lm_order={self.ngram}")) os.chmod(f"{local_kaldi_path.joinpath('run.sh')}", 0o774) # - cp {{ .KALDI_TEMPLATES }}/score.sh {{ .KALDI_OUTPUT_PATH }}/kaldi/local/ shutil.copy(f"{template_path.joinpath('score.sh')}", f"{kaldi_local}") # - cp -L -r {{ .KALDI_ROOT }}/egs/wsj/s5/steps {{ .KALDI_OUTPUT_PATH }}/kaldi/steps run(f"cp -L -r /kaldi/egs/wsj/s5/steps {local_kaldi_path}/steps" ) # - cp -L -r {{ .KALDI_ROOT }}/egs/wsj/s5/utils {{ .KALDI_OUTPUT_PATH }}/kaldi/utils run(f"cp -L -r /kaldi/egs/wsj/s5/utils {local_kaldi_path}/utils" ) # modified extract-wavs for audio_file in os.listdir(self.dataset.pathto.resampled): src = f'{self.dataset.pathto.resampled.joinpath(audio_file)}' dst = f'{local_kaldi_path}' shutil.copy(src, dst) print('done.') except: print('couldnt prepare kaldi dirs')
def prepare_for_training(): print("prepare_for_training") # task make-kaldi-subfolders kaldi_structure = KaldiPathStructure(self.path) local_kaldi_path = self.path.joinpath('kaldi') local_kaldi_path.mkdir(parents=True, exist_ok=True) kaldi_data_local_dict = local_kaldi_path.joinpath('data', 'local', 'dict') kaldi_data_local_dict.mkdir(parents=True, exist_ok=True) kaldi_data_local = local_kaldi_path.joinpath('data', 'local') kaldi_data_local.mkdir(parents=True, exist_ok=True) kaldi_data_test = local_kaldi_path.joinpath('data', 'test') kaldi_data_test.mkdir(parents=True, exist_ok=True) kaldi_data_train = local_kaldi_path.joinpath('data', 'train') kaldi_data_train.mkdir(parents=True, exist_ok=True) kaldi_conf = local_kaldi_path.joinpath('conf') kaldi_conf.mkdir(parents=True, exist_ok=True) kaldi_local = local_kaldi_path.joinpath('local') kaldi_local.mkdir(parents=True, exist_ok=True) # copy the pron dict shutil.copy(f"{self.pron_dict.lexicon_txt}", f"{kaldi_data_local_dict.joinpath('lexicon.txt')}") # task generate-kaldi-configs path_file_path = kaldi_structure.path.joinpath('path.sh') mfcc_file_path = kaldi_structure.conf.joinpath('mfcc.conf') decode_config_file_path = kaldi_structure.conf.joinpath('decode.config') template_path = Path('/elpis/elpis/wrappers/templates') path_resource = template_path.joinpath('path.sh') mfcc_resource = template_path.joinpath('mfcc.conf') decode_config_resource = template_path.joinpath('decode.config') # task make-nonsil-phones > {{ .KALDI_OUTPUT_PATH }}/tmp/nonsilence_phones.txt nonsilence_phones_path = kaldi_data_local_dict.joinpath('nonsilence_phones.txt') # build a unnique non-sorted list of the phone symbols # can't use sorting, because the rules may have order significance # ignore comment lines that begin with # seen = OrderedDict() for line in open(self.pron_dict.l2s_path, "r"): if line[0] == "#": pass else: line = line.split()[1:] if len(line) > 0: line = line[0] seen[line] = seen.get(line, 0) + 1 with nonsilence_phones_path.open(mode='w') as fout: for (item,i) in seen.items(): fout.write("%s\n" % item) with path_file_path.open(mode='w') as fout: with path_resource.open() as fin: content = pystache.render( fin.read(), { 'KALDI_ROOT': '/kaldi', 'HELPERS_PATH': '/kaldi-helpers', 'CORPUS_PATH': f'..{self.dataset.pathto.original}' } ) fout.write(content) with mfcc_file_path.open(mode='w') as fout: with mfcc_resource.open() as fin: content = pystache.render( fin.read(), { 'MFCC_SAMPLE_FREQUENCY': '44100', 'MFCC_FRAME_LENGTH': '25', 'MFCC_LOW_FREQ': '20', 'MFCC_HIGH_FREQ': '22050', 'MFCC_NUM_CEPS': '7', } ) fout.write(content) with decode_config_file_path.open(mode='w') as fout: with decode_config_resource.open() as fin: content = pystache.render( fin.read(), { 'DECODE_BEAM': '11.0', 'DECODE_FIRST_BEAM': '8.0' } ) fout.write(content) try: # task copy-generated-files output_path = self.path.joinpath('output') output_path.mkdir(parents=True, exist_ok=True) # - cp {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/training/corpus.txt {{ .KALDI_OUTPUT_PATH }}/kaldi/data/local/ shutil.move(f"{output_path.joinpath('training', 'corpus.txt')}", f"{kaldi_data_local}") # - cp {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/testing/segments {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/ # testing/text {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/testing/utt2spk {{ .KALDI_OUTPUT_PATH }}/tmp/json_ # splitted/testing/wav.scp {{ .KALDI_OUTPUT_PATH }}/kaldi/data/test/ shutil.move(f"{output_path.joinpath('testing', 'segments')}", f"{kaldi_data_test.joinpath('segments')}") shutil.move(f"{output_path.joinpath('testing', 'text')}", f"{kaldi_data_test.joinpath('text')}") shutil.move(f"{output_path.joinpath('testing', 'utt2spk')}", f"{kaldi_data_test.joinpath('utt2spk')}") shutil.move(f"{output_path.joinpath('testing', 'wav.scp')}", f"{kaldi_data_test.joinpath('wav.scp')}") # - cp {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/training/segments {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted # /training/text {{ .KALDI_OUTPUT_PATH }}/tmp/json_splitted/training/utt2spk {{ .KALDI_OUTPUT_PATH }}/tmp/json # _splitted/training/wav.scp {{ .KALDI_OUTPUT_PATH }}/kaldi/data/train/ shutil.move(f"{output_path.joinpath('training', 'segments')}", f"{kaldi_data_train.joinpath('segments')}") shutil.move(f"{output_path.joinpath('training', 'text')}", f"{kaldi_data_train.joinpath('text')}") shutil.move(f"{output_path.joinpath('training', 'utt2spk')}", f"{kaldi_data_train.joinpath('utt2spk')}") shutil.move(f"{output_path.joinpath('training', 'wav.scp')}", f"{kaldi_data_train.joinpath('wav.scp')}") # task copy-phones-configs optional_silence_file_path = kaldi_data_local_dict.joinpath('optional_silence.txt') silence_phones_file_path = kaldi_data_local_dict.joinpath('silence_phones.txt') with optional_silence_file_path.open(mode='w') as fout: fout.write('SIL\n') with silence_phones_file_path.open(mode='w') as fout: fout.write('SIL\nsil\nspn\n') # task copy-helper-scripts # - cp {{ .KALDI_TEMPLATES }}/cmd.sh {{ .KALDI_OUTPUT_PATH }}/kaldi/ shutil.copy(f"{template_path.joinpath('cmd.sh')}", f"{local_kaldi_path}") # - cp {{ .KALDI_TEMPLATES }}/run.sh {{ .KALDI_OUTPUT_PATH }}/kaldi/ with open(f"{template_path.joinpath('run.sh')}", 'r') as fin, \ open(f"{local_kaldi_path.joinpath('run.sh')}", 'w') as fout: fout.write(fin.read().replace('lm_order=1', f"lm_order={self.ngram}")) os.chmod(f"{local_kaldi_path.joinpath('run.sh')}", 0o774) # - cp {{ .KALDI_TEMPLATES }}/score.sh {{ .KALDI_OUTPUT_PATH }}/kaldi/local/ shutil.copy(f"{template_path.joinpath('score.sh')}", f"{kaldi_local}") # - cp -L -r {{ .KALDI_ROOT }}/egs/wsj/s5/steps {{ .KALDI_OUTPUT_PATH }}/kaldi/steps run(f"cp -L -r /kaldi/egs/wsj/s5/steps {local_kaldi_path}/steps") # - cp -L -r {{ .KALDI_ROOT }}/egs/wsj/s5/utils {{ .KALDI_OUTPUT_PATH }}/kaldi/utils run(f"cp -L -r /kaldi/egs/wsj/s5/utils {local_kaldi_path}/utils") # modified extract-wavs for audio_file in os.listdir(self.dataset.pathto.resampled): src = f'{self.dataset.pathto.resampled.joinpath(audio_file)}' dst = f'{local_kaldi_path}' shutil.copy(src, dst) print('kaldi dirs preparation done.') except BaseException as e: print('couldnt prepare kaldi dirs: ', e)