def determinize(self, acwt=1.0, beam=6): ''' Determinize the lattice. Args: <acwt>: acoustic scale. <beam>: prune beam. Return: An new Lattice object. ''' ExkaldiInfo.vertify_kaldi_existed() if self.is_void: raise WrongOperation('No any lattice data.') assert isinstance(acwt, float) and acwt >= 0, "Expected <acwt> is positive float value." assert isinstance(beam, int) and beam >= 0, "Expected <beam> is positive int value." cmd = f"lattice-determinize-pruned --acoustic-scale={acwt} --beam={beam} ark:- ark:-" out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError("Failed to determinize lattice.") else: newName = f"determinize({self.name})" return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
def add_penalty(self, penalty=0): ''' Add penalty to lattice. Args: <penalty>: penalty. Return: An new Lattice object. ''' ExkaldiInfo.vertify_kaldi_existed() if self.is_void: raise WrongOperation('No any lattice to scale.') assert isinstance(penalty, (int,float)) and penalty >= 0, "Expected <penalty> is positive int or float value." cmd = f"lattice-add-penalty --word-ins-penalty={penalty} ark:- ark:-" out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError("Failed to add penalty.") else: newName = f"add_penalty({self.name})" return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
def compute_postprob_norm(ali, posrProbDim): ''' Compute alignment counts in order to normalize acoustic model posterior probability. For more help information, look at the Kaldi <analyze-counts> command. Args: <ali>: exkaldi NumpyAlignmentPhone or NumpyAlignmentPdf object. <posrProbDim>: the dimensionality of posterior probability. Return: A numpy array of the normalization. ''' ExkaldiInfo.vertify_kaldi_existed() if type_name(ali) in ["NumpyAlignmentPhone", "NumpyAlignmentPdf"]: pass else: raise UnsupportedType( f'Expected exkaldi AlignmentPhone or but got a {type_name(ali)}.') cmd = f"analyze-counts --print-args=False --verbose=0 --binary=false --counts-dim={posrProbDim} ark:- -" out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=ali.data) if (isinstance(cod, int) and cod != 0) or out == b"": print(err.decode()) raise KaldiProcessError('Analyze counts defailed.') else: out = out.decode().strip().strip("[]").strip().split() counts = np.array(out, dtype=np.int32) countBias = np.log(counts / np.sum(counts)) return countBias
def compute_cmvn_stats(feat, spk2utt=None, name="cmvn"): ''' Compute CMVN statistics. Args: <feat>: exkaldi feature object. <spk2utt>: spk2utt file or exkaldi ScriptTable object. <name>: a string. Return: A exkaldi CMVN statistics object. ''' ExkaldiInfo.vertify_kaldi_existed() if type_name(feat) == "BytesFeature": feat = feat.sort("utt") elif type_name(feat) == "NumpyFeature": feat = feat.sort("utt").to_bytes() else: raise UnsupportedType( f"Expected <feat> is a exkaldi feature object but got {type_name(feat)}." ) spk2uttTemp = tempfile.NamedTemporaryFile("w+", encoding="utf-8") try: if spk2utt is None: cmd = 'compute-cmvn-stats ark:- ark:-' else: if isinstance(spk2utt, str): if not os.path.isfile(spk2utt): raise WrongPath(f"No such file:{spk2utt}.") spk2uttSorted = ScriptTable( name="spk2utt").load(spk2utt).sort() spk2uttSorted.save(spk2uttTemp) elif isinstance(spk2utt, ScriptTable): spk2uttSorted = spk2utt.sort() spk2uttSorted.save(spk2uttTemp) else: raise UnsupportedType( f"<spk2utt> should be a file path or ScriptTable object but got {type_name(spk2utt)}." ) spk2uttTemp.seek(0) cmd = f'compute-cmvn-stats --spk2utt=ark:{spk2uttTemp.name} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Failed to compute CMVN statistics.') else: return BytesCMVNStatistics(out, name, indexTable=None) finally: spk2uttTemp.close()
def am_rescore(self, hmm, feat): """ Replace the acoustic scores with new HMM-GMM model. """ ''' Determinize the lattice. Args: <hmm>: exkaldi HMM object or file path. Return: An new Lattice object. ''' ExkaldiInfo.vertify_kaldi_existed() if self.is_void: raise WrongOperation('No any lattice data.') hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") featTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") try: if isinstance(hmm, str): assert os.path.isfile(hmm), f"No such file: {hmm}." hmmFile = hmm elif type_name(hmm) in ["BaseHMM", "MonophoneHMM", "TriphoneHMM"]: hmmTemp.write(hmm.data) hmmTemp.seek(0) hmmFile = hmmTemp.name else: raise UnsupportedType(f"<hmm> should be file path or exkaldi HMM object but got: {type_name(hmm)}.") if type_name(feat) == "BytesFeature": feat = feat.sort(by="utt") elif type_name(feat) == "NumpyFeature": feat = feat.sort(by="utt").to_numpy() else: raise UnsupportedType(f"<feat> should be exkaldi feature object but got: {type_name(feat)}.") featTemp.write(feat.data) featTemp.seek(0) featFile = featTemp.name cmd = f"gmm-rescore-lattice {hmmFile} ark:- ark:{featFile} ark:-" out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError("Failed to determinize lattice.") else: newName = f"am_rescore({self.name})" return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName) finally: hmmTemp.close() featTemp.close()
def use_cmvn_sliding(feat, windowsSize=None, std=False): ''' Allpy sliding CMVN statistics. Args: <feat>: exkaldi feature object. <windowsSize>: windows size, If None, use windows size larger than the frames of feature. <std>: a bool value. Return: An exkaldi feature object. ''' ExkaldiInfo.vertify_kaldi_existed() if isinstance(feat, BytesFeature): pass elif type_name(feat) == "NumpyFeature": feat = feat.to_bytes() else: raise UnsupportedType( f"Expected <feat> is an exkaldi feature object but got {type_name(feat)}." ) if windowsSize == None: featLen = feat.lens[1] maxLen = max([length for utt, length in featLen]) windowsSize = math.ceil(maxLen / 100) * 100 else: assert isinstance(windowsSize, int), "Expected <windowsSize> is an int value." if std == True: std = 'true' else: std = 'false' cmd = f'apply-cmvn-sliding --cmn-window={windowsSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Failed to use sliding CMVN.') else: newName = f"cmvn({feat.name},{windowsSize})" return BytesFeature(out, newName, indexTable=None)
def loadArkScpFile(fileName, suffix): ExkaldiInfo.vertify_kaldi_existed() if suffix == "ark": cmd = 'copy-feats ark:' else: cmd = 'copy-feats scp:' cmd += '{} ark:-'.format(fileName) out, err, cod = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Copy feat defeated.') else: #if sys.getsizeof(out) > 10000000000: # print('Warning: Data is extramely large. It could not be used correctly sometimes.') return BytesMatrix(out)
def scale(self, acwt=1, invAcwt=1, ac2lm=0, lmwt=1, lm2ac=0): ''' Scale lattice. Args: <acwt>: acoustic scale. <invAcwt>: inverse acoustic scale. <ac2lm>: acoustic to lm scale. <lmwt>: language lm scale. <lm2ac>: lm scale to acoustic. Return: An new Lattice object. ''' ExkaldiInfo.vertify_kaldi_existed() if self.is_void: raise WrongOperation('No any lattice to scale.') for x in [acwt, invAcwt, ac2lm, lmwt, lm2ac]: assert x >= 0, "Expected scale is positive value." cmd = 'lattice-scale' cmd += ' --acoustic-scale={}'.format(acwt) cmd += ' --acoustic2lm-scale={}'.format(ac2lm) cmd += ' --inv-acoustic-scale={}'.format(invAcwt) cmd += ' --lm-scale={}'.format(lmwt) cmd += ' --lm2acoustic-scale={}'.format(lm2ac) cmd += ' ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError("Failed to scale lattice.") else: newName = f"scale({self.name})" return Lattice(data=out,wordSymbolTable=self.wordSymbolTable,hmm=self.hmm,name=newName)
def load_ali(target, aliType=None, name="ali", hmm=None): ''' Load alignment data. Args: <target>: Python dict object, bytes object, exkaldi alignment object, kaldi alignment file or .npy file. <aliType>: None, or one of 'transitionID', 'phoneID', 'pdfID'. It will return different alignment object. <name>: a string. <hmm>: file path or exkaldi HMM object. Return: exkaldi alignment data objects. ''' assert isinstance( name, str) and len(name) > 0, "Name shoud be a string avaliable." ExkaldiInfo.vertify_kaldi_existed() def transform(data, cmd): out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=data) if (isinstance(cod, int) and cod != 0) and out == b'': print(err.decode()) raise KaldiProcessError('Failed to transform alignment.') else: result = {} sp = BytesIO(out) for line in sp.readlines(): line = line.decode() line = line.strip().split() utt = line[0] matrix = np.array(line[1:], dtype=np.int32) result[utt] = matrix return results if isinstance(target, dict): if aliType is None: result = NumpyAlignment(target, name) elif aliType == "transitionID": result = NumpyAlignmentTrans(target, name) elif aliType == "phoneID": result = NumpyAlignmentPhone(target, name) elif aliType == "pdfID": result = NumpyAlignmentPdf(target, name) else: raise WrongOperation( f"<aliType> should be None, 'transitionID', 'phoneID' or 'pdfID' but got {aliType}." ) result.check_format() return result elif type_name(target) in [ "NumpyAlignment", "NumpyAlignmentTrans", "NumpyAlignmentPhone", "NumpyAlignmentPdf", "BytesAlignmentTrans" ]: result = copy.deepcopy(target) result.rename(name) return result elif isinstance(target, str): allFiles = list_files(target) results = { "NumpyAlignment": NumpyAlignment(), "NumpyAlignmentTrans": NumpyAlignmentTrans(), "NumpyAlignmentPhone": NumpyAlignmentPhone(), "NumpyAlignmentPdf": NumpyAlignmentPdf(), "BytesAlignmentTrans": BytesAlignmentTrans(), } for fileName in allFiles: fileName = os.path.abspath(fileName) if fileName.endswith(".npy"): temp = __read_data_from_file(fileName, "npy") if aliType is None: temp = NumpyAlignment(temp.data) results["NumpyAlignment"] += temp elif aliType == "transitionID": temp = NumpyAlignmentTrans(temp.data) results["NumpyAlignmentTrans"] += temp elif aliType == "phoneID": temp = NumpyAlignmentPhone(temp.data) results["NumpyAlignmentPhone"] += temp elif aliType == "pdfID": temp = NumpyAlignmentPdf(temp.data) results["NumpyAlignmentPdf"] += temp else: raise WrongOperation( f"<aliType> should be None, 'transitionID','phoneID' or 'pdfID' but got {aliType}." ) else: if fileName.endswith('.gz'): cmd = f'gunzip -c {fileName}' else: cmd = f'cat {fileName}' if aliType is None or aliType == "transitionID": out, err, cod = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise ShellProcessError( "Failed to get the alignment data from file.") else: temp = BytesAlignmentTrans(out) results["BytesAlignmentTrans"] += temp else: temp = tempfile.NamedTemporaryFile("wb+") try: if type_name(hmm) in ("HMM", "MonophoneHMM", "TriphoneHMM"): hmm.save(temp) hmmFileName = temp.name elif isinstance(hmm, str): if not os.path.isfile(hmm): raise WrongPath(f"No such file:{hmm}.") hmmFileName = hmm else: raise UnsupportedType( f"<hmm> should be a filePath or exkaldi HMM and its sub-class object. but got {type_name(hmm)}." ) if aliType == "phoneID": cmd += f" | ali-to-phones --per-frame=true {hmmFileName} ark:- ark,t:-" temp = transform(None, cmd) temp = NumpyAlignmentPhone(temp) results["NumpyAlignmentPhone"] += temp elif target == "pdfID": cmd = f" | ali-to-pdf {hmmFileName} ark:- ark,t:-" temp = transform(None, cmd) temp = NumpyAlignmentPdf(temp) results["NumpyAlignmentPdf"] += temp else: raise WrongOperation( f"<target> should be 'trainsitionID', 'phoneID' or 'pdfID' but got {target}." ) finally: temp.close() finalResult = [] for obj in results.values(): if not obj.is_void: obj.rename(name) finalResult.append(obj) if len(finalResult) == 0: raise WrongOperation( "<target> dose not include any data avaliable.") elif len(finalResult) == 1: finalResult = finalResult[0] return finalResult
def use_cmvn(feat, cmvn, utt2spk=None, std=False): ''' Apply CMVN statistics to feature. Args: <feat>: exkaldi feature object. <cmvn>: exkaldi CMVN statistics object. <utt2spk>: utt2spk file path or ScriptTable object. <std>: If true, apply std normalization. Return: A new feature object. ''' ExkaldiInfo.vertify_kaldi_existed() if type_name(feat) == "BytesFeature": feat = feat.sort(by="utt") elif type_name(feat) == "NumpyFeature": feat = feat.sort(by="utt").to_bytes() else: raise UnsupportedType( f"Expected exkaldi feature but got {type_name(feat)}.") if type_name(cmvn) == "BytesCMVNStatistics": cmvn = cmvn.sort(by="utt") elif type_name(cmvn) == "NumpyCMVNStatistics": cmvn = cmvn.sort(by="utt").to_bytes() else: raise UnsupportedType( f"Expected exkaldi CMVN statistics but got {type_name(cmvn)}.") cmvnTemp = tempfile.NamedTemporaryFile('wb+', suffix='_cmvn.ark') utt2spkTemp = tempfile.NamedTemporaryFile('w+', suffix="_utt2spk", encoding="utf-8") try: cmvnTemp.write(cmvn.data) cmvnTemp.seek(0) if std is True: stdOption = " --norm-vars true" else: stdOption = "" if utt2spk is None: cmd = f'apply-cmvn{stdOption} ark:{cmvnTemp.name} ark:- ark:-' else: if isinstance(utt2spk, str): if not os.path.isfile(utt2spk): raise WrongPath(f"No such file:{utt2spk}.") utt2spkSorted = ScriptTable( name="utt2spk").load(utt2spk).sort() utt2spkSorted.save(utt2spkTemp) elif isinstance(utt2spk, ScriptTable): utt2spkSorted = utt2spk.sort() utt2spkSorted.save(utt2spkTemp) else: raise UnsupportedType( f"<utt2spk> should be a file path or ScriptTable object but got {type_name(utt2spk)}." ) utt2spkTemp.seek(0) cmd = f'apply-cmvn{stdOption} --utt2spk=ark:{utt2spkTemp.name} ark:{cmvnTemp.name} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Failed to apply CMVN statistics.') else: newName = f"cmvn({feat.name},{cmvn.name})" if type_name(feat) == "NumpyFeature": return BytesFeature(out, newName, indexTable=None).to_numpy() else: return BytesFeature(out, newName, indexTable=None) finally: cmvnTemp.close() utt2spkTemp.close()
def __compute_feature(wavFile, kaldiTool, useSuffix=None, name="feat"): if useSuffix != None: assert isinstance(useSuffix, str), "Expected <useSuffix> is a string." useSuffix = useSuffix.strip().lower()[-3:] else: useSuffix = "" assert useSuffix in ["", "scp", "wav"], 'Expected <useSuffix> is "scp" or "wav".' ExkaldiInfo.vertify_kaldi_existed() wavFileTemp = tempfile.NamedTemporaryFile("w+", suffix=".scp", encoding="utf-8") try: if isinstance(wavFile, str): if os.path.isdir(wavFile): raise WrongOperation( f'Expected <wavFile> is file path but got a directory:{wavFile}.' ) else: out, err, cod = run_shell_command(f'ls {wavFile}', stdout=subprocess.PIPE, stderr=subprocess.PIPE) if out == b'': raise WrongPath(f"No such file:{wavFile}.") else: allFiles = out.decode().strip().split('\n') elif isinstance(wavFile, ScriptTable): wavFile = wavFile.sort() wavFile.save(wavFileTemp) allFiles = [ wavFileTemp.name, ] else: raise UnsupportedType( f'Expected filename-like string but got a {type_name(wavFile)}.' ) results = [] for wavFile in allFiles: wavFile = os.path.abspath(wavFile) if wavFile[-3:].lower() == "wav": dirName = os.path.dirname(wavFile) fileName = os.path.basename(wavFile) uttID = "".join(fileName[0:-4].split(".")) cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-" elif wavFile[-3:].lower() == 'scp': cmd = f"{kaldiTool} scp,p:{wavFile} ark:-" elif "wav" in useSuffix: dirName = os.path.dirname(wavFile) fileName = os.path.basename(wavFile) uttID = "".join(fileName[0:-4].split(".")) cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-" elif "scp" in useSuffix: cmd = f"{kaldiTool} scp,p:{wavFile} ark:-" else: raise UnsupportedType( 'Unknown file suffix. You can declare it by making <useSuffix> "wav" or "scp".' ) out, err, cod = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if (isinstance(out, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError(f'Failed to compute feature:{name}.') else: results.append(BytesFeature(out)) finally: wavFileTemp.close() if len(results) == 0: raise WrongOperation("No any feature date in file path.") else: result = results[0] for i in results[1:]: result += i result.rename(name) return result
def get_1best(self, wordSymbolTable=None, hmm=None, lmwt=1, acwt=1.0, phoneLevel=False): ''' Get 1 best result with text formation. Args: <wordSymbolTable>: None or file path or ListTable object or LexiconBank object. <hmm>: None or file path or HMM object. <lmwt>: language model weight. <acwt>: acoustic model weight. <phoneLevel>: If Ture, return phone results. Return: An exkaldi Transcription object. ''' ExkaldiInfo.vertify_kaldi_existed() if self.is_void: raise WrongOperation('No any data in lattice.') assert isinstance(lmwt, int) and lmwt >=0, "Expected <lmwt> is a non-negative int number." if wordSymbolTable is None: assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable." wordSymbolTable = self.wordSymbolTable if hmm is None: assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable." hmm = self.hmm modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") wordSymbolTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8") try: if isinstance(wordSymbolTable, str): assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}." wordsFile = wordSymbolTable elif type_name(wordSymbolTable) == "LexiconBank": if phoneLevel: wordSymbolTable.dump_dict("phones", wordSymbolTemp) else: wordSymbolTable.dump_dict("words", wordSymbolTemp) wordsFile = wordSymbolTemp.name elif type_name(wordSymbolTable) == "ListTable": wordSymbolTable.save(wordSymbolTemp) wordSymbolTemp.seek(0) wordsFile = wordSymbolTemp.name else: raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.") if isinstance(hmm, str): assert os.path.isfile(hmm), f"No such file: {hmm}." hmmFile = hmm elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]: hmm.save(modelTemp) hmmFile = modelTemp.name else: raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.") if phoneLevel: cmd0 = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | ' else: cmd0 = "" cmd1 = f"lattice-best-path --lm-scale={lmwt} --acoustic-scale={acwt} --word-symbol-table={wordsFile} --verbose=2 ark:- ark,t:- " cmd = cmd0 + cmd1 out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError('Failed to get 1-best from lattice.') else: out = out.decode().strip().split("\n") if phoneLevel: newName = "1-best-phones" else: newName = "1-best-words" results = Transcription(name=newName) for re in out: re = re.strip().split(maxsplit=1) if len(re) == 0: continue elif len(re) == 1: results[re[0]] = " " else: results[re[0]] = re[1] return results finally: modelTemp.close() wordSymbolTemp.close()
def gmm_decode(feat, hmm, HCLGFile, wordSymbolTable, beam=10, latBeam=8, acwt=1, minActive=200, maxActive=7000, maxMem=50000000, config=None, maxThreads=1): ''' Decode by generating lattice from feature and GMM model. Args: <feat>: An exkaldi feature object. <hmm>: An exkaldi HMM object or file path. <HCLGFile>: HCLG file path. <wordSymbolTable>: words.txt file path or exkaldi LexiconBank object or exkaldi ListTable object. <beam>: beam size. <latBeam>: lattice beam size. <acwt>: acoustic model weight. <minActivate>: . <maxActive>: . <maxMem>: . <config>: decode configure file. <maxThreads>: the number of mutiple threads. Some usual options can be assigned directly. If you want use more, set <config> = your-configure, but if you do this, these usual configures we provided will be ignored. You can use .check_config('gmm_decode') function to get configure information you could set. Also run shell command "gmm-latgen-faster" to look their meaning. Return: An exkaldi Lattice object. ''' ExkaldiInfo.vertify_kaldi_existed() if type_name(feat) == "BytesFeature": pass elif type_name(feat) == "NumpyFeature": feat = feat.to_bytes() else: raise UnsupportedType(f"Expected <feat> is an exkaldi feature object but got: {type_name(feat)}.") assert isinstance(HCLGFile, str), "<HCLGFile> should be a file path." if not os.path.isfile(HCLGFile): raise WrongPath(f"No such file:{HCLGFile}") if maxThreads > 1: kaldiTool = f"gmm-latgen-faster-parallel --num-threads={maxThreads} " else: kaldiTool = "gmm-latgen-faster " kaldiTool += f'--allow-partial=true ' kaldiTool += f'--min-active={minActive} ' kaldiTool += f'--max-active={maxActive} ' kaldiTool += f'--max_mem={maxMem} ' kaldiTool += f'--beam={beam} ' kaldiTool += f'--lattice-beam={latBeam} ' kaldiTool += f'--acoustic-scale={acwt} ' wordsTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8") modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") try: if type_name(wordSymbolTable) == "LexiconBank": wordSymbolTable.dump_dict("words", wordsTemp) wordsFile = wordsTemp.name elif type_name(wordSymbolTable) == "ListTable": wordSymbolTable.save(wordsTemp) wordsTemp.seek(0) wordsFile = wordsTemp.name elif isinstance(wordSymbolTable, str): if not os.path.isfile(wordSymbolTable): raise WrongPath(f"No such file:{wordSymbolTable}.") else: wordsFile = wordSymbolTable else: raise UnsupportedType(f"<wordSymbolTable> should be a file path or exkaldi LexiconBank object but got {type_name(wordSymbolTable)}.") kaldiTool += f'--word-symbol-table={wordsFile} ' if config is not None: if check_config(name='gmm_decode', config=config): for key,value in config.items(): if isinstance(value, bool): if value is True: kaldiTool += f"{key} " else: kaldiTool += f" {key}={value}" if type_name(hmm) in ["MonophoneHMM", "TriphoneHMM"]: modelTemp.write(hmm.data) modelTemp.seek(0) hmmFile = modelTemp.name elif isinstance(hmm, str): if not os.path.isfile(hmm): raise WrongPath(f"No such file:{hmm}.") else: hmmFile = hmm else: raise UnsupportedType(f"<hmm> should be exkaldi HMM object or file path but got {type_name(hmm)}.") cmd = f'{kaldiTool} {hmmFile} {HCLGFile} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError('Failed to generate lattice.') else: newName = f"lat({feat.name})" return Lattice(data=out, name=newName) finally: wordsTemp.close() modelTemp.close()
def wer(ref, hyp, ignore=None, mode='all'): ''' Compute WER (word error rate) between <ref> and <hyp>. Args: <ref>, <hyp>: exkaldi transcription object or file path. <ignore>: ignore a symbol. <mode>: "all" or "present". Return: a namedtuple of score information. ''' assert mode in ['all', 'present'], 'Expected <mode> to be "present" or "all".' ExkaldiInfo.vertify_kaldi_existed() hypTemp = tempfile.NamedTemporaryFile("w+", suffix=".txt", encoding="utf-8") refTemp = tempfile.NamedTemporaryFile("w+", suffix=".txt", encoding="utf-8") try: if ignore is None: if type_name(hyp) == "Transcription": hyp.save(hypTemp) hypTemp.seek(0) hypFileName = hypTemp.name elif isinstance(hyp, str): if not os.path.isfile(hyp): raise WrongPath(f"No such file:{hyp}.") else: hypFileName = hyp else: raise UnsupportedType( '<hyp> should be exkaldi Transcription object or file path.' ) if type_name(ref) == "Transcription": ref.save(refTemp) refTemp.seek(0) refFileName = refTemp.name elif isinstance(ref, str): if not os.path.isfile(ref): raise WrongPath(f"No such file:{ref}.") else: refFileName = ref else: raise UnsupportedType( '<ref> should be exkaldi Transcription object or file path.' ) cmd = f'compute-wer --text --mode={mode} ark:{refFileName} ark,p:{hypFileName}' scoreOut, scoreErr, _ = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: if type_name(hyp) == "Transcription": hyp = hyp.save() elif isinstance(hyp, str): if not os.path.isfile(hyp): raise WrongPath(f"No such file:{hyp}.") else: with open(hyp, "r", encoding="utf-8") as fr: hyp = fr.read() else: raise UnsupportedType( '<hyp> should be exkaldi Transcription object or file path.' ) cmd = f'sed "s/{ignore} //g" > {hypTemp.name}' hypOut, err, _ = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=hyp.encode()) if len(hypOut) == 0: print(err.decode()) raise WrongDataFormat("<hyp> has wrong data formation.") if type_name(ref) == "Transcription": ref = ref.save() elif isinstance(ref, str): if not os.path.isfile(ref): raise WrongPath(f"No such file:{ref}.") else: with open(ref, "r", encoding="utf-8") as fr: ref = fr.read() else: raise UnsupportedType( '<ref> should be exkaldi Transcription object or file path.' ) cmd = f'sed "s/{ignore} //g" > {refTemp.name}' refOut, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=hyp.encode()) if cod != 0 or len(refOut) == 0: print(err.decode()) raise WrongDataFormat("<ref> has wrong data formation.") cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}' scoreOut, scoreErr, _ = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) finally: hypTemp.close() refTemp.close() if len(scoreOut) == 0: print(scoreErr.decode()) raise KaldiProcessError("Failed to compute WER.") else: out = scoreOut.decode().split("\n") pattern1 = '%WER (.*) \[ (.*) \/ (.*), (.*) ins, (.*) del, (.*) sub \]' pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]" pattern3 = "Scored (.*) sentences, (.*) not present in hyp." s1 = re.findall(pattern1, out[0])[0] s2 = re.findall(pattern2, out[1])[0] s3 = re.findall(pattern3, out[2])[0] return namedtuple("Score", [ "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences", "wrongSentences", "missedSentences" ])( float(s1[0]), #WER int(s1[2]), #words int(s1[3]), #ins int(s1[4]), #del int(s1[5]), #sub float(s2[0]), #SER int(s2[1]), #sentences int(s2[2]), #wrong sentences int(s3[1]) #missed sentences )
def prepare_data(): dataOutDir = os.path.join("exp", "data") exkaldi.utils.make_dependent_dirs(dataOutDir, pathIsFile=False) # Prepare tools ExkaldiInfo.vertify_kaldi_existed() sph2pipeTool = os.path.join(ExkaldiInfo.KALDI_ROOT, "tools", "sph2pipe_v2.5", "sph2pipe") if not os.path.join(sph2pipeTool): raise Exception(f"Expected sph2pipe tool existed.") # Check TIMIT data format if not os.path.isdir(timitRoot): raise Exception(f"No such directory: {timitRoot}.") dirNames = os.listdir(timitRoot) if "TRAIN" in dirNames and "TEST" in dirNames: uppercaseFlag = True trainResourceDir = "TRAIN" testResourceDir = "TEST" testWavFile = os.path.join(timitRoot, "TRAIN", "DR1", "FCJF0", "SA1.WAV") wavFileSuffix = "WAV" txtFileSuffix = "PHN" elif "train" in dirNames and "test" in dirNames: uppercaseFlag = False trainResourceDir = "train" testResourceDir = "test" testWavFile = os.path.join(timitRoot, "train", "dr1", "fcjf0", "sa1.wav") wavFileSuffix = "wav" txtFileSuffix = "phn" else: raise Exception(f"Wrong format of train or test data directories.") formatCheckCmd = f"{sph2pipeTool} -f wav {testWavFile}" out, err, cod = exkaldi.utils.run_shell_command(formatCheckCmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if cod == 0: sphFlag = True else: sphFlag = False # Transform phones from 60 categories to 48 catagories and generate the 48 to 39 transform dictionary phoneMap_60_to_48 = exkaldi.ListTable(name="69-48") phoneMap_48_to_39 = exkaldi.ListTable(name="48-39") with open(os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit", "s5", "conf", "phones.60-48-39.map"), "r", encoding="utf-8") as fr: lines = fr.readlines() for line in lines: line = line.strip().split() if len(line) < 3: continue phoneMap_60_to_48[line[0]] = line[1] phoneMap_48_to_39[line[1]] = line[2] phoneMap_48_to_39.save(os.path.join("exp", "dict", "phones.48_to_39.map")) # Design a a function to generate wav.scp, spk2utt, utt2spk, text files. def generate_data(wavFiles, outDir): wavScp = exkaldi.ListTable(name="wavScp") utt2spk = exkaldi.ListTable(name="utt2spk") spk2utt = exkaldi.ListTable(name="spk2utt") transcription = exkaldi.ListTable(name="trans") for Name in wavFiles: if Name[-7:].upper() in [ "SA1.WAV", "SA2.WAV", "sa1.wav", "sa2.wav" ]: continue speaker = os.path.basename(os.path.dirname(Name)) uttID = speaker + "_" + os.path.basename(Name)[0:-4] wavFilePath = os.path.abspath(Name) # wav.scp if sphFlag: wavScp[uttID] = f"{sph2pipeTool} -f wav {wavFilePath} |" else: wavScp[uttID] = wavFilePath # utt2spk utt2spk[uttID] = speaker # spk2utt if speaker not in spk2utt.keys(): spk2utt[speaker] = f"{uttID}" else: spk2utt[speaker] += f" {uttID}" # transcription txtFile = Name[:-3] + txtFileSuffix phones = [] with open(txtFile, "r", encoding="utf-8") as fr: lines = fr.readlines() for line in lines: line = line.strip() if len(line) == 0: continue phone = line.split()[-1] if phone == "q": continue else: phone = phoneMap_60_to_48[phone] phones.append(phone) transcription[uttID] = " ".join(phones) # Save to files wavScp.save(os.path.join(outDir, "wav.scp")) utt2spk.save(os.path.join(outDir, "utt2spk")) spk2utt.save(os.path.join(outDir, "spk2utt")) transcription.save(os.path.join(outDir, "text")) print(f"Generate data done: {outDir}.") # generate train data wavFiles = glob.glob( os.path.join(timitRoot, trainResourceDir, "*", "*", f"*.{wavFileSuffix}")) generate_data( wavFiles=wavFiles, outDir=os.path.join(dataOutDir, "train"), ) # generate dev and test data. for Name in ["dev", "test"]: spkListFile = os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit", "s5", "conf", f"{Name}_spk.list") with open(spkListFile, "r", encoding="utf-8") as fr: spkList = fr.readlines() wavFiles = [] for spk in spkList: spk = spk.strip() if len(spk) == 0: continue if uppercaseFlag: spk = spk.upper() wavFiles.extend( glob.glob( os.path.join(timitRoot, testResourceDir, "*", spk, f"*.{wavFileSuffix}"))) generate_data( wavFiles=wavFiles, outDir=os.path.join(dataOutDir, Name), )