def paste_feature(feats): ''' Paste feature in feature dimension. Args: <feats>: a list of feature objects. Return: a new feature object. ''' declare.kaldi_existed() assert isinstance(feats,(list,tuple)) and len(feats) > 0 for fe in feats: declare.is_feature("feats", fe) allResp = [] pastedName = [] with FileHandleManager() as fhm: for ot in feats: if isinstance(ot,BytesFeat): temp = fhm.create("wb+",suffix=".ark") ot.sort(by="utt").save(temp) allResp.append( f"ark:{temp.name}" ) elif isinstance(ot,NumpyFeat): temp = fhm.create("wb+",suffix=".ark") ot.sort(by="utt").to_bytes().save(temp) allResp.append( f"ark:{temp.name}" ) else: temp = fhm.create("w+",suffix=".scp") ot.sort(by="utt").save(temp) allResp.append( f"scp:{temp.name}" ) pastedName.append( ot.name ) allResp = " ".join(allResp) cmd = f"paste-feats {allResp} ark:-" out,err,cod = run_shell_command(cmd,stdin="PIPE",stdout="PIPE",stderr="PIPE") if cod != 0 or out == b'': raise KaldiProcessError("Failed to paste feature.",err.decode()) else: pastedName = ",".join(pastedName) pastedName = f"paste({pastedName})" # New index table need to be generated later. return BytesFeat(out,name=pastedName,indexTable=None)
def loadArkScpFile(fileName, suffix): declare.kaldi_existed() if suffix == "ark": cmd = 'copy-feats ark:' else: cmd = 'copy-feats scp:' cmd += '{} ark:-'.format(fileName) out, err, cod = run_shell_command(cmd, stdout="PIPE", stderr="PIPE") if (isinstance(cod, int) and cod != 0) or out == b'': raise KaldiProcessError('Failed to read archive table.', err.decode()) else: #if sys.getsizeof(out) > 10000000000: # print('Warning: Data is extramely large. We don't recommend use load_index_table to replace it.') return out
def compute_postprob_norm(ali, probDims): ''' Compute alignment counts in order to normalize acoustic model posterior probability. For more help information,look at the Kaldi <analyze-counts> command. Args: <ali>: exkaldi NumpyAlignmentTrans,NumpyAlignmentPhone or NumpyAlignmentPdf object. <probDims>: the dimensionality of posterior probability. Return: A numpy array of the normalization. ''' declare.kaldi_existed() declare.is_classes( "ali", ali, ["NumpyAlignmentTrans", "NumpyAlignmentPhone", "NumpyAlignmentPdf"]) declare.is_positive_int("probDims", probDims) txt = [] for key, vlaue in ali.items(): value = " ".join(map(str, vlaue.tolist())) txt.append(key + " " + value) txt = "\n".join(txt) cmd = f"analyze-counts --binary=false --counts-dim={probDims} ark:- -" out, err, cod = run_shell_command(cmd, stdin="PIPE", stdout="PIPE", stderr="PIPE", inputs=txt) if (isinstance(cod, int) and cod != 0) or out == b"": print(err.decode()) raise KaldiProcessError('Analyze counts defailed.') else: out = out.decode().strip().strip("[]").strip().split() counts = np.array(out, dtype=np.float32) countBias = np.log(counts / np.sum(counts)) return countBias
def load_ali(target, aliType="transitionID", name="ali", hmm=None): ''' Load alignment data. Args: <target>: Python dict object,bytes object,exkaldi alignment object,kaldi alignment file or .npy file. <aliType>: None,or one of 'transitionID','phoneID','pdfID'. It will return different alignment object. <name>: a string. <hmm>: file path or exkaldi HMM object. Return: exkaldi alignment objects. ''' declare.is_valid_string("name", name) declare.is_instances("aliType", aliType, [None, "transitionID", "phoneID", "pdfID"]) declare.kaldi_existed() def transform(data, cmd): out, err, cod = run_shell_command(cmd, stdin="PIPE", stdout="PIPE", stderr="PIPE", inputs=data) if (isinstance(cod, int) and cod != 0) and out == b'': raise KaldiProcessError('Failed to transform alignment.', err.decode()) else: result = {} sp = BytesIO(out) for line in sp.readlines(): line = line.decode() line = line.strip().split() utt = line[0] matrix = np.array(line[1:], dtype=np.int32) result[utt] = matrix return result if isinstance(target, dict): if aliType is None: result = NumpyAli(target, name) elif aliType == "transitionID": result = NumpyAliTrans(target, name) elif aliType == "phoneID": result = NumpyAliPhone(target, name) elif aliType == "pdfID": result = NumpyAliPdf(target, name) else: raise WrongOperation( f"<aliType> should be None,'transitionID','phoneID' or 'pdfID' but got {aliType}." ) result.check_format() return result elif isinstance(target, (NumpyAli, NumpyAliTrans, BytesAliTrans)): result = copy.deepcopy(target) result.rename(name) return result elif isinstance(target, IndexTable): result = target.fetch(arkType="ali") if aliType in ["phoneID", "pdfID"]: result = result.to_numpy(aliType, hmm) result.rename(name) return result elif isinstance(target, str): allFiles = list_files(target) numpyAli = {} bytesAli = [] for fileName in allFiles: fileName = fileName.strip() if fileName.endswith(".npy"): try: temp = np.load(fileName, allow_pickle=True) numpyAli.update(temp) except: raise UnsupportedType( f'This is not a valid Exkaldi npy file: {fileName}.') else: if fileName.endswith('.gz'): cmd = f'gunzip -c {fileName}' else: cmd = f'cat {fileName}' if aliType is None or aliType == "transitionID": out, err, cod = run_shell_command(cmd, stdout="PIPE", stderr="PIPE") if (isinstance(cod, int) and cod != 0) or out == b'': raise ShellProcessError( f"Failed to get the alignment data from file: {fileName}.", err.decode()) else: bytesAli.append(out) else: with FileHandleManager() as fhm: declare.is_potential_hmm("hmm", hmm) if not isinstance(hmm, str): hmmTemp = fhm.create("wb+") hmm.save(hmmTemp) hmm = hmmTemp.name if aliType == "phoneID": cmd += f" | ali-to-phones --per-frame=true {hmm} ark:- ark,t:-" temp = transform(None, cmd) else: cmd += f" | ali-to-pdf {hmm} ark:- ark,t:-" temp = transform(None, cmd) numpyAli.update(temp) bytesAli = b"".join(bytesAli) if aliType is None: if len(numpyAli) == 0: return BytesAliTrans(bytesAli, name=name) elif len(bytesAli) == 0: return NumpyAli(numpyAli, name=name) else: result = NumpyAliTrans(numpyAli) + BytesAliTrans(bytesAli) result.rename(name) return result elif aliType == "transitionID": if len(numpyAli) == 0: return BytesAliTrans(bytesAli, name=name) elif len(bytesAli) == 0: return NumpyAliTrans(numpyAli, name=name) else: result = NumpyAliTrans(numpyAli) + BytesAliTrans(bytesAli) result.rename(name) return result elif aliType == "phoneID": return NumpyAliPhone(numpyAli, name=name) else: return NumpyAliPdf(numpyAli, name=name) else: raise UnsupportedType( f"<target> should be dict,file name or exkaldi alignment or index table object but got: {type_name(target)}." )
def __read_data_from_file(fileName, useSuffix=None): ''' Read data from file. If the file suffix is unknown,<useSuffix> is necessary. ''' declare.kaldi_existed() if useSuffix != None: declare.is_valid_string("useSuffix", useSuffix) useSuffix = useSuffix.strip().lower()[-3:] declare.is_instances("useSuffix", useSuffix, ["ark", "scp", "npy"]) else: useSuffix = "" allFiles = list_files(fileName) allData_bytes = [] allData_numpy = {} def loadNpyFile(fileName): try: temp = np.load(fileName, allow_pickle=True) data = {} for utt_mat in temp: assert isinstance(utt_mat[0], str) and isinstance( utt_mat[1], np.ndarray) data[utt_mat[0]] = utt_mat[1] except: raise UnsupportedType( f'This is not a valid Exkaldi npy file: {fileName}.') else: return data def loadArkScpFile(fileName, suffix): declare.kaldi_existed() if suffix == "ark": cmd = 'copy-feats ark:' else: cmd = 'copy-feats scp:' cmd += '{} ark:-'.format(fileName) out, err, cod = run_shell_command(cmd, stdout="PIPE", stderr="PIPE") if (isinstance(cod, int) and cod != 0) or out == b'': raise KaldiProcessError('Failed to read archive table.', err.decode()) else: #if sys.getsizeof(out) > 10000000000: # print('Warning: Data is extramely large. We don't recommend use load_index_table to replace it.') return out for fileName in allFiles: sfx = fileName.strip()[-3:].lower() if sfx == "npy": allData_numpy.update(loadNpyFile(fileName)) elif sfx in ["ark", "scp"]: allData_bytes.append(loadArkScpFile(fileName, sfx)) elif useSuffix == "npy": allData_numpy.update(loadNpyFile(fileName)) elif useSuffix in ["ark", "scp"]: allData_bytes.append(loadArkScpFile(fileName, sfx)) else: raise UnsupportedType( 'Unknown file suffix. You can appoint the <useSuffix> option with "scp","ark" or "npy".' ) allData_bytes = b"".join(allData_bytes) if useSuffix == "": useSuffix = allFiles[0].strip()[-3:].lower() if useSuffix == "npy": dataType = "numpy" else: dataType = "bytes" return allData_bytes, allData_numpy, dataType
def __compute_feature(target,kaldiTool,useSuffix=None,name="feat",outFile=None): ''' The base funtion to compute feature. ''' declare.kaldi_existed() if useSuffix != None: declare.is_valid_string("useSuffix",useSuffix) useSuffix = useSuffix.strip().lower()[-3:] declare.is_instances("useSuffix",useSuffix,["scp","wav"]) else: useSuffix = "" targets,kaldiTools,useSuffixs,names,outFiles = check_multiple_resources(target,kaldiTool,useSuffix,name,outFile=outFile) # pretreatment fromSegment = False with FileHandleManager() as fhm: segments = [] for index,kaldiTool,target,useSuffix,name in zip(range(len(outFiles)),kaldiTools,targets,useSuffixs,names): declare.is_classes("target",target,["str","ListTable","WavSegment"]) declare.is_valid_string("name",name) if isinstance(target,str): allFiles = list_files(target) target = ListTable() for filePath in allFiles: filePath = filePath.strip() if filePath[-4:].lower() == ".wav": fileName = os.path.basename(filePath) uttID = fileName[0:-4].replace(".","") target[uttID] = filePath elif filePath[-4:].lower() == '.scp': target += load_list_table(filePath) elif "wav" == useSuffix: fileName = os.path.basename(filePath) uttID = fileName.replace(".","") target[uttID] = filePath elif "scp" == useSuffix: target += load_list_table(filePath) else: raise UnsupportedType('Unknown file suffix. You can declare whether <useSuffix> is "wav" or "scp".') if len(target) == 0: raise WrongDataFormat("There did not include any data to compute data in target.") targets[index] = target elif type_name(target) == "WavSegment": segTemp = fhm.create("w+",suffix=".seg",encode="utf-8") target.save(segTemp) segments.append(segTemp.name) targets[index] = target.detach_wav() fromSegment = True if fromSegment: # define the command pattern cmdPattern = "extract-segments scp:{wavFile} {segment} ark:- | {kaldiTool} ark:- ark:{outFile}" # define resources resources = {"wavFile":targets,"segment":segments,"kaldiTool":kaldiTools,"outFile":outFiles} else: # define the command pattern cmdPattern = "{kaldiTool} scp:{wavFile} ark:{outFile}" # define resources resources = {"wavFile":targets,"kaldiTool":kaldiTools,"outFile":outFiles} # Run return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
def wer(ref, hyp, ignore=None, mode='all'): ''' Compute WER (word error rate) between <ref> and <hyp>. Args: <ref>,<hyp>: exkaldi transcription object or file path. <ignore>: ignore a symbol. <mode>: "all" or "present". Return: a namedtuple of score information. ''' declare.is_potential_transcription("ref", ref) declare.is_potential_transcription("hyp", hyp) declare.is_instances("mode", mode, ['all', 'present']) declare.kaldi_existed() if ignore is not None: declare.is_valid_string("ignore", ignore) with FileHandleManager() as fhm: if ignore is None: if type_name(hyp) == "Transcription": hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8") hyp.save(hypTemp) hyp = hypTemp.name if type_name(ref) == "Transcription": refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8") ref.save(refTemp) ref = refTemp.name cmd = f'compute-wer --text --mode={mode} ark:{ref} ark,p:{hyp}' scoreOut, scoreErr, _ = run_shell_command(cmd, stdout="PIPE", stderr="PIPE") else: # remove the ingored symbol in hyp if type_name(hyp) == "Transcription": hyp = hyp.save() else: with open(hyp, "r", encoding="utf-8") as fr: hyp = fr.read() hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8") cmd = f'sed "s/{ignore} //g" > {hypTemp.name}' hypOut, err, _ = run_shell_command(cmd, stdin="PIPE", stdout="PIPE", stderr="PIPE", inputs=hyp) if len(hypOut) == 0: raise WrongDataFormat("<hyp> has wrong data formation.", err.decode()) # remove the ingored symbol in ref if type_name(ref) == "Transcription": ref = ref.save() else: with open(ref, "r", encoding="utf-8") as fr: ref = fr.read() refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8") cmd = f'sed "s/{ignore} //g" > {refTemp.name}' refOut, err, cod = run_shell_command(cmd, stdin="PIPE", stdout="PIPE", stderr="PIPE", inputs=ref) if cod != 0 or len(refOut) == 0: raise WrongDataFormat("<ref> has wrong data formation.", err.decode()) # score cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}' scoreOut, scoreErr, _ = run_shell_command(cmd, stdout="PIPE", stderr="PIPE") if len(scoreOut) == 0: raise KaldiProcessError("Failed to compute WER.", scoreErr.decode()) else: out = scoreOut.decode().split("\n") pattern1 = '%WER (.*) \[ (.*) \/ (.*),(.*) ins,(.*) del,(.*) sub \]' pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]" pattern3 = "Scored (.*) sentences,(.*) not present in hyp." s1 = re.findall(pattern1, out[0])[0] s2 = re.findall(pattern2, out[1])[0] s3 = re.findall(pattern3, out[2])[0] return namedtuple("Score", [ "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences", "wrongSentences", "missedSentences" ])( float(s1[0]), #WER int(s1[2]), #words int(s1[3]), #ins int(s1[4]), #del int(s1[5]), #sub float(s2[0]), #SER int(s2[1]), #sentences int(s2[2]), #wrong sentences int(s3[1]) #missed sentences )
def run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,timeout=ExKaldiInfo.timeout,generateArchive=None,archiveNames=None): ''' Map resources to command pattern and run this command parallelly. Args: <resources>: a dict whose keys are the name of resource and values are lists of resources objects. For example: {"feat": [BytesFeat01,BytesFeat02,... ],"outFile":{"newFeat01.ark","newFeat02.ark",...} }. The "outFile" resource is necessary. When there is only one process to run,"outFile" can be "-" which means the standard output stream. <cmdPattern>: a string needed to map the resources. For example: "copy-feat {feat} ark:{outFile}". Return: a list of triples: (return code,error info,output file or buffer) ''' declare.kaldi_existed() declare.is_classes("resources",resources,dict) declare.is_classes("cmdPattern",cmdPattern,str) assert "outFile" in resources.keys(),"<outFile> key and value is necessary in recources." declare.members_are_classes("the values of resources",resources.values(),[list,tuple]) if generateArchive is not None: analyzeResult = True #forcely analyze the result # check the format of cmomand pattern nameIndexs = [ i for i,c in enumerate(cmdPattern) if c == "{" or c == "}" ] assert len(nameIndexs)%2 == 0,f"The numbers of braces do not match in command pattern: '{cmdPattern}'. " auxiliaryInfo = {} for i in range(0,len(nameIndexs),2): name = cmdPattern[nameIndexs[i]+1:nameIndexs[i+1]] if name not in resources: raise WrongDataFormat(f"Resource is necessary but has not been provided: {name}.") prefix = "" if nameIndexs[i] == 0 else cmdPattern[nameIndexs[i]-1] if name in auxiliaryInfo.keys(): auxiliaryInfo[name][0] += 1 if not prefix in auxiliaryInfo[name][1]: auxiliaryInfo[name][1] += prefix else: auxiliaryInfo[name] = [1,prefix] assert "outFile" in auxiliaryInfo.keys(),"Key: <outFile> is necessary in command pattern." _outFileCountInfo = auxiliaryInfo.pop("outFile") assert _outFileCountInfo[0] == 1,f"Only allow <outFile> appear one time in command pattern but: {_outFileCountInfo[0]}." outFiles = resources.pop("outFile") for outFile in outFiles: if outFile != "-": make_dependent_dirs(outFile,pathIsFile=True) parallel = len(outFiles) if generateArchive is not None: declare.is_instances("generateArchive",generateArchive,["feat","cmvn","ali","fmllr"]) if archiveNames is None: archiveNames = [ generateArchive for i in range(parallel)] elif isinstance(archiveNames,str): archiveNames = [ archiveNames for i in range(parallel)] elif isinstance(archiveNames,(list,tuple)): declare.equal("the number of achieve names",len(archiveNames),"parallel",parallel) else: raise UnsupportedType(f"<archiveNames> should be string or list or tuple but got: {type_name(archiveNames)}.") # regulate resources and run with FileHandleManager() as fhm: newResources = {} if parallel == 1: # Detect whether there is PIPE in command pattern. testPlaceholder = dict( (key,value[0]) if isinstance(value[0],str) else (key,"placeholder") for key,value in resources.items() ) testPlaceholder["outFile"] = "placeholder" testCmd = cmdPattern.format(**testPlaceholder) if "|" in testCmd: inputsBuffer = False else: inputsBuffer = True del testPlaceholder # regularate resources for key,countPrefix in auxiliaryInfo.items(): count,prefix = countPrefix target = resources[key][0] # If target is a list-table,we can not automatically decide whether it is scp-format or ark-format. # So you should appoint it in the command parttern. if type_name(target) in ["ListTable","Transcription"]: if prefix not in [":","="]: errMes = f"There might miss prefix such as 'ark:' or 'scp:' or '--option=' in command pattern before resource: {key}." errMes += "Check the command line please. If you still think there dose not need the prefix," errMes += "save this ListTable or Transcription into file and instead it will this file name." errMes += "In that case,we will skip checking the prefix." raise WrongOperation(errMes) target = target.sort() if (inputsBuffer is True) and count == 1: inputsBuffer = target.save() newResources[key] = "-" else: targetTemp = fhm.create("w+",encoding="utf-8") target.save(targetTemp) newResources[key] = f"{targetTemp.name}" # If target is an index-table,we automatically recognize it as scp-file,so you do not need appoint it. elif type_name(target) == "IndexTable": if prefix != " ": errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}." errMes += f"Because we will decide the prefix depending on its data type." raise WrongOperation(errMes) target = target.sort() if (inputsBuffer is True) and count == 1: inputsBuffer = target.save() newResources[key] = "scp:-" else: targetTemp = fhm.create("w+",suffix=".scp",encoding="utf-8") target.save(targetTemp) newResources[key] = f"scp:{targetTemp.name}" elif isinstance(target,(str,int,float)): # file or other value parameter newResources[key] = f"{target}" elif isinstance(target,(BytesMatrix,BytesVector)): if prefix != " ": errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}." errMes += f"Because we will decide the prefix depending on its data type." raise WrongOperation(errMes) target = target.sort() if (inputsBuffer is True) and count == 1: inputsBuffer = target.data newResources[key] = "ark:-" else: targetTemp = fhm.create("wb+",suffix=".ark") target.save(targetTemp) newResources[key] = f"ark:{targetTemp.name}" elif isinstance(target,(NumpyMatrix,NumpyVector)): if prefix != " ": errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}." errMes += f"Because we will decide the prefix depending on its data type." raise WrongOperation(errMes) target = target.sort() if (inputsBuffer is True) and count == 1: inputsBuffer = target.to_bytes().data newResources[key] = "ark:-" else: target = target.to_bytes() targetTemp = fhm.create("wb+",suffix=".ark") target.save(targetTemp) newResources[key] = f"ark:{targetTemp.name}" elif isinstance(target,BytesArchive): if (inputsBuffer is True) and count == 1: inputsBuffer = target.data newResources[key] = "-" else: targetTemp = fhm.create("wb+") target.save(targetTemp) newResources[key] = f"{targetTemp.name}" else: raise UnsupportedType(f"<target> should be IndexTable,ListTable,file name,int or float value,or exkaldi achieve object but got: {type_name(target)}.") # Then,process output stream outFile = outFiles[0] newResources["outFile"] = outFile inputsBuffer = None if isinstance(inputsBuffer,bool) else inputsBuffer # Then rum command finalCmd = cmdPattern.format(**newResources) out,err,cod = run_shell_command(finalCmd,stdin="PIPE",stdout="PIPE",stderr="PIPE",inputs=inputsBuffer) if analyzeResult: if cod != 0: finalCmd = ",".join([cmd.strip().split(maxsplit=1)[0] for cmd in finalCmd.split("|")]) raise KaldiProcessError(f"Failed to run Kaldi command: {finalCmd}.",err.decode()) if outFile == "-": if generateArchive is not None: if generateArchive == "feat": out = BytesFeat(data=out,name=archiveNames[0]) elif generateArchive == "ali": out = BytesAliTrans(data=out,name=archiveNames[0]) elif generateArchive == "cmvn": out = BytesCMVN(data=out,name=archiveNames[0]) else: out = BytesFmllr(data=out,name=archiveNames[0]) return out else: return (cod,err,out) else: if generateArchive is not None: return load_index_table(outFile,name=archiveNames[0],useSuffix="ark") else: return (cod,err,outFile) else: # In this case,all input IO stream must be files. for key,countPrefix in auxiliaryInfo.items(): count,prefix = countPrefix values = resources[key] newValues = [] for target in values: # If target is scp resource if type_name(target) in ["ListTable","Transcription"]: if prefix not in [":","="]: errMes = f"There might miss prefix such as 'ark:' or 'scp:' or '--option=' in command pattern before resource: {key}." errMes += "Check the command line please. If you still think there dose not need the prefix," errMes += "save this ListTable or Transcription into file and instead it will this file name." errMes += "In that case,we will skip checking the prefix." raise WrongOperation(errMes) target = target.sort() targetTemp = fhm.create("w+",encoding="utf-8") target.save(targetTemp) newValues.append(f"{targetTemp.name}") elif type_name(target) == "IndexTable": if prefix != " ": errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}." errMes += f"Because we will decide the prefix depending on its data type." raise WrongOperation(errMes) target = target.sort() targetTemp = fhm.create("w+",suffix=".scp",encoding="utf-8") target.save(targetTemp) newValues.append(f"scp:{targetTemp.name}") elif isinstance(target,(str,float,int)): # file name or other value parameters newValues.append(f"{target}") elif isinstance(target,(BytesMatrix,BytesVector)): if prefix != " ": errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}." errMes += f"Because we will decide the prefix depending on its data type." raise WrongOperation(errMes) target = target.sort() targetTemp = fhm.create("wb+",suffix=".ark") target.save(targetTemp) newValues.append(f"ark:{targetTemp.name}") elif isinstance(target,(NumpyMatrix,NumpyVector)): if prefix != " ": errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}." errMes += f"Because we will decide the prefix depending on its data type." raise WrongOperation(errMes) target = target.sort().to_bytes() targetTemp = fhm.create("wb+",suffix=".ark") target.save(targetTemp) newValues.append(f"ark:{targetTemp.name}") elif isinstance(target,BytesArchive): targetTemp = fhm.create("wb+") target.save(targetTemp) newValues.append(f"{targetTemp.name}") else: raise UnsupportedType(f"<target> should be IndexTable,ListTable,Transcription,file,int or float values or exkaldi achieve object but got: {type_name(target)}.") newResources[key] = newValues newResources["outFile"] = outFiles # assign these resources to each process and generate multiple commands parallelResources = [] for i in range(parallel): parallelResources.append({}) for key,items in newResources.items(): parallelResources[-1][key] = items[i] cmds = [ cmdPattern.format(**re) for re in parallelResources ] # run flags = run_shell_command_parallel(cmds,timeout=timeout) finalResult = [] done = True for index,info in enumerate(flags): cod,err = info if analyzeResult and cod != 0: print(f"{index}/{len(flags)} error tracking") print(err.decode()) done = False finalResult.append( (cod,err,outFiles[index]) ) if analyzeResult and (not done): finalCmd = ",".join([cmd.strip().split(maxsplit=1)[0] for cmd in cmds[0].split("|")]) raise KaldiProcessError(f"Failed to run Kaldi command: {finalCmd}. Look the error messages above.") else: if generateArchive is not None: for i,fileName in enumerate(outFiles): finalResult[i] = load_index_table(fileName,name=archiveNames[i],useSuffix="ark") return finalResult