Beispiel #1
0
def load_ngrams(target, name="gram"):
    '''
	Load a N-Grams from arpa or binary language model file.

	Args:
		<target>: file path with suffix .arpa or .binary.
	
	Return:
		a KenNGrams object.
	'''
    declare.is_file("target", target)
    target = target.strip()

    with FileHandleManager() as fhm:

        if target.endswith(".arpa"):
            modelTemp = fhm.create("wb+", suffix=".binary")
            arpa_to_binary(target, modelTemp.name)
            modelTemp.seek(0)
            model = KenNGrams(modelTemp.name, name=name)
            model._path = target

        elif target.endswith(".binary"):
            model = KenNGrams(target, name=name)

        else:
            raise UnsupportedType(
                f"Unknown suffix. Language model file should has a suffix .arpa or .binary but got: {target}."
            )

        return model
Beispiel #2
0
def paste_feature(feats):
	'''
	Paste feature in feature dimension.

	Args:
		<feats>: a list of feature objects.

	Return:
		a new feature object.
	''' 
	declare.kaldi_existed()
	assert isinstance(feats,(list,tuple)) and len(feats) > 0
	for fe in feats:
		declare.is_feature("feats", fe)

	allResp = []
	pastedName = []
	
	with FileHandleManager() as fhm:

		for ot in feats:

			if isinstance(ot,BytesFeat):
				temp = fhm.create("wb+",suffix=".ark")
				ot.sort(by="utt").save(temp)
				allResp.append( f"ark:{temp.name}" )

			elif isinstance(ot,NumpyFeat):
				temp = fhm.create("wb+",suffix=".ark")
				ot.sort(by="utt").to_bytes().save(temp)
				allResp.append( f"ark:{temp.name}" )

			else:
				temp = fhm.create("w+",suffix=".scp")
				ot.sort(by="utt").save(temp)
				allResp.append( f"scp:{temp.name}" )

			pastedName.append( ot.name )	
		
		allResp = " ".join(allResp)
		cmd = f"paste-feats {allResp} ark:-"
		
		out,err,cod = run_shell_command(cmd,stdin="PIPE",stdout="PIPE",stderr="PIPE")

		if cod != 0 or out == b'':
			raise KaldiProcessError("Failed to paste feature.",err.decode())
		else:
			pastedName = ",".join(pastedName)
			pastedName = f"paste({pastedName})"
			# New index table need to be generated later.
			return BytesFeat(out,name=pastedName,indexTable=None)
Beispiel #3
0
def __read_index_table_from_scp_file(fileName):
    '''
	Read index table from scp file.
	'''
    newTable = IndexTable()

    with FileHandleManager() as fhm:

        fr = fhm.open(fileName, "r", encoding="utf-8")
        lines = fr.readlines()

        for lineID, lineTxt in enumerate(lines):
            line = lineTxt.strip().split()
            if len(line) == 0:
                continue
            elif len(line) == 1:
                raise WrongDataFormat(
                    f"line {lineID}: {lineTxt}\n" +
                    "Missed complete utterance-filepath information.")
            elif len(line) > 2:
                raise WrongDataFormat(
                    "We don't support reading index table from binary data generated via PIPE line. The second value should be ark file path and the shift."
                )
            else:
                uttID = line[0]
                line = line[1].split(":")
                if len(line) != 2:
                    raise WrongDataFormat(
                        f"line {lineID}: {lineTxt}\n" +
                        "Missed complete file path and shift value information."
                    )
                arkFileName = line[0]
                startIndex = int(line[1]) - 1 - len(uttID)

                fr = fhm.call(arkFileName)
                if fr is None:
                    fr = fhm.open(arkFileName, "rb")

                fr.seek(startIndex)
                _, frames, dataSize = __read_one_record_from_ark(fr)
                arkFileName = os.path.abspath(arkFileName)
                newTable[uttID] = newTable.spec(frames, startIndex, dataSize,
                                                arkFileName)

    return newTable
Beispiel #4
0
def load_ali(target, aliType="transitionID", name="ali", hmm=None):
    '''
	Load alignment data.

	Args:
		<target>: Python dict object,bytes object,exkaldi alignment object,kaldi alignment file or .npy file.
		<aliType>: None,or one of 'transitionID','phoneID','pdfID'. It will return different alignment object.
		<name>: a string.
		<hmm>: file path or exkaldi HMM object.

	Return:
		exkaldi alignment objects.
	'''
    declare.is_valid_string("name", name)
    declare.is_instances("aliType", aliType,
                         [None, "transitionID", "phoneID", "pdfID"])
    declare.kaldi_existed()

    def transform(data, cmd):
        out, err, cod = run_shell_command(cmd,
                                          stdin="PIPE",
                                          stdout="PIPE",
                                          stderr="PIPE",
                                          inputs=data)
        if (isinstance(cod, int) and cod != 0) and out == b'':
            raise KaldiProcessError('Failed to transform alignment.',
                                    err.decode())
        else:
            result = {}
            sp = BytesIO(out)
            for line in sp.readlines():
                line = line.decode()
                line = line.strip().split()
                utt = line[0]
                matrix = np.array(line[1:], dtype=np.int32)
                result[utt] = matrix
            return result

    if isinstance(target, dict):
        if aliType is None:
            result = NumpyAli(target, name)
        elif aliType == "transitionID":
            result = NumpyAliTrans(target, name)
        elif aliType == "phoneID":
            result = NumpyAliPhone(target, name)
        elif aliType == "pdfID":
            result = NumpyAliPdf(target, name)
        else:
            raise WrongOperation(
                f"<aliType> should be None,'transitionID','phoneID' or 'pdfID' but got {aliType}."
            )
        result.check_format()
        return result

    elif isinstance(target, (NumpyAli, NumpyAliTrans, BytesAliTrans)):
        result = copy.deepcopy(target)
        result.rename(name)
        return result

    elif isinstance(target, IndexTable):
        result = target.fetch(arkType="ali")
        if aliType in ["phoneID", "pdfID"]:
            result = result.to_numpy(aliType, hmm)
        result.rename(name)
        return result

    elif isinstance(target, str):
        allFiles = list_files(target)
        numpyAli = {}
        bytesAli = []

        for fileName in allFiles:
            fileName = fileName.strip()
            if fileName.endswith(".npy"):
                try:
                    temp = np.load(fileName, allow_pickle=True)
                    numpyAli.update(temp)
                except:
                    raise UnsupportedType(
                        f'This is not a valid Exkaldi npy file: {fileName}.')
            else:
                if fileName.endswith('.gz'):
                    cmd = f'gunzip -c {fileName}'
                else:
                    cmd = f'cat {fileName}'

                if aliType is None or aliType == "transitionID":
                    out, err, cod = run_shell_command(cmd,
                                                      stdout="PIPE",
                                                      stderr="PIPE")
                    if (isinstance(cod, int) and cod != 0) or out == b'':
                        raise ShellProcessError(
                            f"Failed to get the alignment data from file: {fileName}.",
                            err.decode())
                    else:
                        bytesAli.append(out)

                else:
                    with FileHandleManager() as fhm:
                        declare.is_potential_hmm("hmm", hmm)
                        if not isinstance(hmm, str):
                            hmmTemp = fhm.create("wb+")
                            hmm.save(hmmTemp)
                            hmm = hmmTemp.name

                        if aliType == "phoneID":
                            cmd += f" | ali-to-phones --per-frame=true {hmm} ark:- ark,t:-"
                            temp = transform(None, cmd)

                        else:
                            cmd += f" | ali-to-pdf {hmm} ark:- ark,t:-"
                            temp = transform(None, cmd)

                    numpyAli.update(temp)

        bytesAli = b"".join(bytesAli)
        if aliType is None:
            if len(numpyAli) == 0:
                return BytesAliTrans(bytesAli, name=name)
            elif len(bytesAli) == 0:
                return NumpyAli(numpyAli, name=name)
            else:
                result = NumpyAliTrans(numpyAli) + BytesAliTrans(bytesAli)
                result.rename(name)
                return result
        elif aliType == "transitionID":
            if len(numpyAli) == 0:
                return BytesAliTrans(bytesAli, name=name)
            elif len(bytesAli) == 0:
                return NumpyAliTrans(numpyAli, name=name)
            else:
                result = NumpyAliTrans(numpyAli) + BytesAliTrans(bytesAli)
                result.rename(name)
                return result
        elif aliType == "phoneID":
            return NumpyAliPhone(numpyAli, name=name)
        else:
            return NumpyAliPdf(numpyAli, name=name)

    else:
        raise UnsupportedType(
            f"<target> should be dict,file name or exkaldi alignment or index table object but got: {type_name(target)}."
        )
Beispiel #5
0
def __compute_feature(target,kaldiTool,useSuffix=None,name="feat",outFile=None):
	'''
	The base funtion to compute feature.
	'''
	declare.kaldi_existed()

	if useSuffix != None:
		declare.is_valid_string("useSuffix",useSuffix)
		useSuffix = useSuffix.strip().lower()[-3:]
		declare.is_instances("useSuffix",useSuffix,["scp","wav"])
	else:
		useSuffix = ""	

	targets,kaldiTools,useSuffixs,names,outFiles = check_multiple_resources(target,kaldiTool,useSuffix,name,outFile=outFile)
	# pretreatment
	fromSegment = False
	with FileHandleManager() as fhm:

		segments = []
		for index,kaldiTool,target,useSuffix,name in zip(range(len(outFiles)),kaldiTools,targets,useSuffixs,names):
			
			declare.is_classes("target",target,["str","ListTable","WavSegment"])
			declare.is_valid_string("name",name)

			if isinstance(target,str):		
		
				allFiles = list_files(target)
				target = ListTable()

				for filePath in allFiles:
					filePath = filePath.strip()
					if filePath[-4:].lower() == ".wav":
						fileName = os.path.basename(filePath)
						uttID = fileName[0:-4].replace(".","")
						target[uttID] = filePath
					
					elif filePath[-4:].lower() == '.scp':
						target += load_list_table(filePath)
					
					elif "wav" == useSuffix:
						fileName = os.path.basename(filePath)
						uttID = fileName.replace(".","")
						target[uttID] = filePath

					elif "scp" == useSuffix:
						target += load_list_table(filePath)

					else:
						raise UnsupportedType('Unknown file suffix. You can declare whether <useSuffix> is "wav" or "scp".')
				
				if len(target) == 0:
					raise WrongDataFormat("There did not include any data to compute data in target.")

				targets[index] = target
			
			elif type_name(target) == "WavSegment":

				segTemp = fhm.create("w+",suffix=".seg",encode="utf-8")
				target.save(segTemp)
				segments.append(segTemp.name)

				targets[index] = target.detach_wav()
				fromSegment = True

	if fromSegment:
		# define the command pattern
		cmdPattern = "extract-segments scp:{wavFile} {segment} ark:- | {kaldiTool} ark:- ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"segment":segments,"kaldiTool":kaldiTools,"outFile":outFiles}
	else:
		# define the command pattern
		cmdPattern = "{kaldiTool} scp:{wavFile} ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"kaldiTool":kaldiTools,"outFile":outFiles}

	# Run
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Beispiel #6
0
def wer(ref, hyp, ignore=None, mode='all'):
    '''
	Compute WER (word error rate) between <ref> and <hyp>. 

	Args:
		<ref>,<hyp>: exkaldi transcription object or file path.
		<ignore>: ignore a symbol.
		<mode>: "all" or "present".

	Return:
		a namedtuple of score information.
	'''
    declare.is_potential_transcription("ref", ref)
    declare.is_potential_transcription("hyp", hyp)
    declare.is_instances("mode", mode, ['all', 'present'])
    declare.kaldi_existed()

    if ignore is not None:
        declare.is_valid_string("ignore", ignore)

    with FileHandleManager() as fhm:

        if ignore is None:

            if type_name(hyp) == "Transcription":
                hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
                hyp.save(hypTemp)
                hyp = hypTemp.name

            if type_name(ref) == "Transcription":
                refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
                ref.save(refTemp)
                ref = refTemp.name

            cmd = f'compute-wer --text --mode={mode} ark:{ref} ark,p:{hyp}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout="PIPE",
                                                      stderr="PIPE")

        else:
            # remove the ingored symbol in hyp
            if type_name(hyp) == "Transcription":
                hyp = hyp.save()
            else:
                with open(hyp, "r", encoding="utf-8") as fr:
                    hyp = fr.read()
            hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
            cmd = f'sed "s/{ignore} //g" > {hypTemp.name}'
            hypOut, err, _ = run_shell_command(cmd,
                                               stdin="PIPE",
                                               stdout="PIPE",
                                               stderr="PIPE",
                                               inputs=hyp)
            if len(hypOut) == 0:
                raise WrongDataFormat("<hyp> has wrong data formation.",
                                      err.decode())
            # remove the ingored symbol in ref
            if type_name(ref) == "Transcription":
                ref = ref.save()
            else:
                with open(ref, "r", encoding="utf-8") as fr:
                    ref = fr.read()
            refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
            cmd = f'sed "s/{ignore} //g" > {refTemp.name}'
            refOut, err, cod = run_shell_command(cmd,
                                                 stdin="PIPE",
                                                 stdout="PIPE",
                                                 stderr="PIPE",
                                                 inputs=ref)
            if cod != 0 or len(refOut) == 0:
                raise WrongDataFormat("<ref> has wrong data formation.",
                                      err.decode())
            # score
            cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout="PIPE",
                                                      stderr="PIPE")

    if len(scoreOut) == 0:
        raise KaldiProcessError("Failed to compute WER.", scoreErr.decode())
    else:
        out = scoreOut.decode().split("\n")
        pattern1 = '%WER (.*) \[ (.*) \/ (.*),(.*) ins,(.*) del,(.*) sub \]'
        pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]"
        pattern3 = "Scored (.*) sentences,(.*) not present in hyp."
        s1 = re.findall(pattern1, out[0])[0]
        s2 = re.findall(pattern2, out[1])[0]
        s3 = re.findall(pattern3, out[2])[0]

        return namedtuple("Score", [
            "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences",
            "wrongSentences", "missedSentences"
        ])(
            float(s1[0]),  #WER
            int(s1[2]),  #words
            int(s1[3]),  #ins
            int(s1[4]),  #del
            int(s1[5]),  #sub
            float(s2[0]),  #SER
            int(s2[1]),  #sentences
            int(s2[2]),  #wrong sentences
            int(s3[1])  #missed sentences
        )
Beispiel #7
0
def train_ngrams_srilm(lexicons, order, text, outFile, config=None):
    '''
	Train N-Grams language model with SriLM tookit.
	If you don't specified the discount by the <config> option, We defaultly use "kndiscount".

	Args:
		<lexicons>: an exkaldi LexiconBank object.
		<order>: the maximum order of N-Grams.
		<text>: a text corpus file or an exkaldi transcription object.
		<outFile>: output file name of arpa LM.
		<config>: extra configurations, a Python dict object.

	You can use .check_config("train_ngrams_srilm") function to get a reference of extra configurations.
	Also you can run shell command "ngram-count" to look their usage.
	'''
    declare.is_lexicon_bank("lexicons", lexicons)
    declare.is_positive_int("order", order)
    declare.is_potential_transcription("text", text)
    declare.is_valid_file_name("outFile", outFile)
    # verify the max order
    declare.less_equal("order", order, "max order", 9)
    # prepare srilm tool
    ExkaldiInfo.prepare_srilm()

    with FileHandleManager() as fhm:
        # check whether this is a reasonable text corpus that should be splited by space.
        if isinstance(text, str):
            cmd = f"shuf {text} -n 100"
            out, err, cod = run_shell_command(cmd,
                                              stdout="PIPE",
                                              stderr="PIPE")
            if (isinstance(cod, int) and cod != 0):
                print(err.decode())
                raise ShellProcessError(
                    f"Failed to sample from text file:{text}.")
            elif out == b'':
                raise WrongDataFormat(f"Void text file:{text}.")
            else:
                out = out.decode().strip().split("\n")
                spaceCount = 0
                for line in out:
                    spaceCount += line.count(" ")
                if spaceCount < len(out) // 2:
                    raise WrongDataFormat(
                        "The text file doesn't seem to be separated by spaces or sentences are extremely short."
                    )

        else:
            sampleText = text.subset(nRandom=100)
            spaceCount = 0
            for key, value in sampleText.items():
                assert isinstance(
                    value, str
                ), f"Transcription must be string but got: {type_name(value)}."
                spaceCount += value.count(" ")
            if spaceCount < len(sampleText) // 2:
                raise WrongDataFormat(
                    "The text file doesn't seem to be separated by spaces or sentences are extremely short."
                )
            textTemp = fhm.create("a+", suffix=".txt", encoding="utf-8")
            text.save(textTemp, discardUttID=True)
            text = textTemp.name

        unkSymbol = lexicons("oov")

        wordlistTemp = fhm.create("w+", encoding='utf-8', suffix=".txt")
        words = lexicons("words")
        words = "\n".join(words.keys())
        wordlistTemp.write(words)
        wordlistTemp.seek(0)

        extraConfig = " "
        specifyDiscount = False
        if config is not None:
            if check_config(name='train_ngrams_srilm', config=config):
                for key, value in config.items():
                    if isinstance(value, bool):
                        if value is True:
                            extraConfig += f"{key} "
                        if key.endswith("discount"):
                            specifyDiscount = True
                    else:
                        extraConfig += f" {key} {value}"

        cmd = f'ngram-count -text {text} -order {order} -limit-vocab -vocab {wordlistTemp.name} -unk -map-unk "{unkSymbol}" '
        if specifyDiscount is False:
            cmd += "-kndiscount "
        cmd += "-interpolate "

        if not outFile.rstrip().endswith(".arpa"):
            outFile += ".arpa"
        make_dependent_dirs(outFile, pathIsFile=True)
        cmd += f" -lm {outFile}"

        out, err, cod = run_shell_command(cmd, stderr="PIPE")

        if (isinstance(cod, int) and cod != 0) or (
                not os.path.isfile(outFile)) or os.path.getsize(outFile) == 0:
            print(err.decode())
            if os.path.isfile(outFile):
                os.remove(outFile)
            raise KaldiProcessError(
                f'Failed to generate N-Grams language model.')

        return outFile
Beispiel #8
0
def train_ngrams_kenlm(lexicons, order, text, outFile, config=None):
    '''
	Train N-Grams language model with SriLM tookit.

	Args:
		<lexicons>: an exkaldi LexiconBank object.
		<order>: the maximum order of N-Grams.
		<text>: a text corpus file or an exkaldi transcription object.
		<outFile>: output file name of arpa LM.
		<config>: extra configurations, a Python dict object.

	You can use .check_config("train_ngrams_kenlm") function to get a reference of extra configurations.
	Also you can run shell command "lmplz" to look their usage.
	'''
    declare.is_lexicon_bank("lexicons", lexicons)
    declare.is_positive_int("order", order)
    declare.is_potential_transcription("text", text)
    declare.is_valid_file_name("outFile", outFile)

    declare.less_equal("order", order, "max order", 9)

    with FileHandleManager() as fhm:
        # check whether this is a reasonable text corpus that should be splited by space.
        if isinstance(text, str):
            cmd = f"shuf {text} -n 100"
            out, err, cod = run_shell_command(cmd,
                                              stdout="PIPE",
                                              stderr="PIPE")
            if (isinstance(cod, int) and cod != 0):
                print(err.decode())
                raise ShellProcessError(
                    f"Failed to sample from text file:{text}.")
            elif out == b'':
                raise WrongDataFormat(f"Void text file:{text}.")
            else:
                out = out.decode().strip().split("\n")
                spaceCount = 0
                for line in out:
                    spaceCount += line.count(" ")
                if spaceCount < len(out) // 2:
                    raise WrongDataFormat(
                        "The text file doesn't seem to be separated by spaces or sentences are extremely short."
                    )

        else:
            sampleText = text.subset(nRandom=100)
            spaceCount = 0
            for key, value in sampleText.items():
                assert isinstance(
                    value, str
                ), f"Transcription must be string but got: {type_name(value)}."
                spaceCount += value.count(" ")
            if spaceCount < len(sampleText) // 2:
                raise WrongDataFormat(
                    "The text file doesn't seem to be separated by spaces or sentences are extremely short."
                )
            textTemp = fhm.create("a+", suffix=".txt", encoding="utf-8")
            text.save(textTemp, discardUttID=True)
            text = textTemp.name

        extraConfig = " "
        if config is not None:
            if check_config(name='train_ngrams_kenlm', config=config):
                if "--temp_prefix" in config.keys() and "-T" in config.keys():
                    raise WrongOperation(
                        f'"--temp_prefix" and "-T" is the same configuration so only one of them is expected.'
                    )
                if "--memory" in config.keys() and "-S" in config.keys():
                    raise WrongOperation(
                        f'"--memory" and "-S" is the same configuration so only one of them is expected.'
                    )
                for key, value in config.items():
                    if isinstance(value, bool):
                        if value is True:
                            extraConfig += f"{key} "
                    else:
                        extraConfig += f"{key} {value} "

        if not outFile.rstrip().endswith(".arpa"):
            outFile += ".arpa"
        make_dependent_dirs(outFile, pathIsFile=True)

        wordlistTemp = fhm.create("w+", encoding='utf-8', suffix=".txt")
        words = lexicons("words")
        words_count = math.ceil(len(words) / 10) * 10
        words = "\n".join(words.keys())
        wordlistTemp.write(words)
        wordlistTemp.seek(0)

        KenLMTool = os.path.join(sys.prefix, "exkaldisrc", "tools", "lmplz")

        cmd = f"{KenLMTool}{extraConfig}-o {order} --vocab_estimate {words_count} --text {text} --arpa {outFile} --limit_vocab_file {wordlistTemp.name}"
        out, err, cod = run_shell_command(cmd, stderr="PIPE")

        if (isinstance(cod, int) and cod != 0) or (
                not os.path.isfile(outFile)) or (os.path.getsize(outFile)
                                                 == 0):
            print(err.decode())
            if os.path.isfile(outFile):
                os.remove(outFile)
            raise KenlmProcessError("Failed to generate arpa file.")

        return outFile
Beispiel #9
0
def run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,timeout=ExKaldiInfo.timeout,generateArchive=None,archiveNames=None):
	'''
	Map resources to command pattern and run this command parallelly.

	Args:
		<resources>: a dict whose keys are the name of resource and values are lists of resources objects.
					For example: {"feat": [BytesFeat01,BytesFeat02,... ],"outFile":{"newFeat01.ark","newFeat02.ark",...} }.
					The "outFile" resource is necessary.
					When there is only one process to run,"outFile" can be "-" which means the standard output stream.

		<cmdPattern>: a string needed to map the resources.
					For example: "copy-feat {feat} ark:{outFile}".
	
	Return:
		a list of triples: (return code,error info,output file or buffer)
	'''
	declare.kaldi_existed()
	declare.is_classes("resources",resources,dict)
	declare.is_classes("cmdPattern",cmdPattern,str)
	assert "outFile" in resources.keys(),"<outFile> key and value is necessary in recources."

	declare.members_are_classes("the values of resources",resources.values(),[list,tuple])
	if generateArchive is not None:
		analyzeResult = True #forcely analyze the result

	# check the format of cmomand pattern
	nameIndexs = [ i for i,c in enumerate(cmdPattern) if c == "{" or c == "}" ]
	assert len(nameIndexs)%2 == 0,f"The numbers of braces do not match in command pattern: '{cmdPattern}'. "
	auxiliaryInfo = {}
	for i in range(0,len(nameIndexs),2):
		name = cmdPattern[nameIndexs[i]+1:nameIndexs[i+1]]
		if name not in resources:
			raise WrongDataFormat(f"Resource is necessary but has not been provided: {name}.")
		prefix = "" if nameIndexs[i] == 0 else cmdPattern[nameIndexs[i]-1]
		if name in auxiliaryInfo.keys():
			auxiliaryInfo[name][0] += 1
			if not prefix in auxiliaryInfo[name][1]:
				auxiliaryInfo[name][1] += prefix
		else:
			auxiliaryInfo[name] = [1,prefix]

	assert "outFile" in auxiliaryInfo.keys(),"Key: <outFile> is necessary in command pattern."
	_outFileCountInfo = auxiliaryInfo.pop("outFile")
	assert _outFileCountInfo[0] == 1,f"Only allow <outFile> appear one time in command pattern but: {_outFileCountInfo[0]}."
	outFiles = resources.pop("outFile")

	for outFile in outFiles:
		if outFile != "-":
			make_dependent_dirs(outFile,pathIsFile=True)
	parallel = len(outFiles)

	if generateArchive is not None:
		declare.is_instances("generateArchive",generateArchive,["feat","cmvn","ali","fmllr"])
		if archiveNames is None:
			archiveNames = [ generateArchive for i in range(parallel)]
		elif isinstance(archiveNames,str):
			archiveNames = [ archiveNames for i in range(parallel)]
		elif isinstance(archiveNames,(list,tuple)):
			declare.equal("the number of achieve names",len(archiveNames),"parallel",parallel)
		else:
			raise UnsupportedType(f"<archiveNames> should be string or list or tuple but got: {type_name(archiveNames)}.")

	# regulate resources and run
	with FileHandleManager() as fhm:

		newResources = {}
		if parallel == 1:
			# Detect whether there is PIPE in command pattern.
			testPlaceholder = dict( (key,value[0]) if isinstance(value[0],str) else (key,"placeholder") for key,value in resources.items() )
			testPlaceholder["outFile"] = "placeholder"
			testCmd = cmdPattern.format(**testPlaceholder)
			if "|" in testCmd:
				inputsBuffer = False
			else:
				inputsBuffer = True
			del testPlaceholder
			# regularate resources
			for key,countPrefix in auxiliaryInfo.items():
				count,prefix = countPrefix
				target = resources[key][0]

				# If target is a list-table,we can not automatically decide whether it is scp-format or ark-format.
				# So you should appoint it in the command parttern.
				if type_name(target) in ["ListTable","Transcription"]:
					if prefix not in [":","="]:
						errMes = f"There might miss prefix such as 'ark:' or 'scp:' or '--option=' in command pattern before resource: {key}."
						errMes += "Check the command line please. If you still think there dose not need the prefix,"
						errMes += "save this ListTable or Transcription into file and instead it will this file name."
						errMes += "In that case,we will skip checking the prefix."
						raise WrongOperation(errMes)

					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.save()
						newResources[key] = "-"
					else:
						targetTemp = fhm.create("w+",encoding="utf-8")
						target.save(targetTemp)
						newResources[key] = f"{targetTemp.name}"

				# If target is an index-table,we automatically recognize it as scp-file,so you do not need appoint it.
				elif type_name(target) == "IndexTable":
					if prefix != " ":
						errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
						errMes += f"Because we will decide the prefix depending on its data type."
						raise WrongOperation(errMes)
						
					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.save()
						newResources[key] = "scp:-"
					else:
						targetTemp = fhm.create("w+",suffix=".scp",encoding="utf-8")
						target.save(targetTemp)
						newResources[key] = f"scp:{targetTemp.name}"
				
				elif isinstance(target,(str,int,float)):
					# file or other value parameter
					newResources[key] = f"{target}"
			
				elif isinstance(target,(BytesMatrix,BytesVector)):
					if prefix != " ":
						errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
						errMes += f"Because we will decide the prefix depending on its data type."						
						raise WrongOperation(errMes)

					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.data
						newResources[key] = "ark:-"
					else:					
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newResources[key] = f"ark:{targetTemp.name}"		

				elif isinstance(target,(NumpyMatrix,NumpyVector)):
					if prefix != " ":
						errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
						errMes += f"Because we will decide the prefix depending on its data type."		
						raise WrongOperation(errMes)

					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.to_bytes().data
						newResources[key] = "ark:-"
					else:
						target = target.to_bytes()
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newResources[key] = f"ark:{targetTemp.name}"	

				elif isinstance(target,BytesArchive):
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.data
						newResources[key] = "-"
					else:
						targetTemp = fhm.create("wb+")
						target.save(targetTemp)
						newResources[key] = f"{targetTemp.name}"

				else:
					raise UnsupportedType(f"<target> should be IndexTable,ListTable,file name,int or float value,or exkaldi achieve object but got: {type_name(target)}.")
			
			# Then,process output stream
			outFile = outFiles[0]
			newResources["outFile"] = outFile
			inputsBuffer = None if isinstance(inputsBuffer,bool) else inputsBuffer
			# Then rum command
			finalCmd = cmdPattern.format(**newResources)
			out,err,cod = run_shell_command(finalCmd,stdin="PIPE",stdout="PIPE",stderr="PIPE",inputs=inputsBuffer)
			
			if analyzeResult:
				if cod != 0:
					finalCmd = ",".join([cmd.strip().split(maxsplit=1)[0] for cmd in finalCmd.split("|")])
					raise KaldiProcessError(f"Failed to run Kaldi command: {finalCmd}.",err.decode())
			
			if outFile == "-":
				if generateArchive is not None:
					if generateArchive == "feat":
						out = BytesFeat(data=out,name=archiveNames[0])
					elif generateArchive == "ali":
						out = BytesAliTrans(data=out,name=archiveNames[0])
					elif generateArchive == "cmvn":
						out = BytesCMVN(data=out,name=archiveNames[0])
					else:
						out = BytesFmllr(data=out,name=archiveNames[0])
					return out
				else:
					return (cod,err,out)
			else:
				if generateArchive is not None:
					return load_index_table(outFile,name=archiveNames[0],useSuffix="ark")
				else:
					return (cod,err,outFile)

		else:
			# In this case,all input IO stream must be files.
			for key,countPrefix in auxiliaryInfo.items():
				count,prefix = countPrefix
				values = resources[key]
				newValues = []
				for target in values:

					# If target is scp resource
					if type_name(target) in ["ListTable","Transcription"]:
						if prefix not in [":","="]:
							errMes = f"There might miss prefix such as 'ark:' or 'scp:' or '--option=' in command pattern before resource: {key}."
							errMes += "Check the command line please. If you still think there dose not need the prefix,"
							errMes += "save this ListTable or Transcription into file and instead it will this file name."
							errMes += "In that case,we will skip checking the prefix."
							raise WrongOperation(errMes)		

						target = target.sort()
						targetTemp = fhm.create("w+",encoding="utf-8")
						target.save(targetTemp)
						newValues.append(f"{targetTemp.name}")						

					elif type_name(target) == "IndexTable":
						if prefix != " ":
							errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
							errMes += f"Because we will decide the prefix depending on its data type."
							raise WrongOperation(errMes)		

						target = target.sort()
						targetTemp = fhm.create("w+",suffix=".scp",encoding="utf-8")
						target.save(targetTemp)
						newValues.append(f"scp:{targetTemp.name}")
				
					elif isinstance(target,(str,float,int)):
						# file name or other value parameters
						newValues.append(f"{target}")
				
					elif isinstance(target,(BytesMatrix,BytesVector)):
						if prefix != " ":
							errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
							errMes += f"Because we will decide the prefix depending on its data type."						
							raise WrongOperation(errMes)	

						target = target.sort()
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newValues.append(f"ark:{targetTemp.name}")			

					elif isinstance(target,(NumpyMatrix,NumpyVector)):
						if prefix != " ":
							errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
							errMes += f"Because we will decide the prefix depending on its data type."						
							raise WrongOperation(errMes)

						target = target.sort().to_bytes()
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newValues.append(f"ark:{targetTemp.name}")

					elif isinstance(target,BytesArchive):
						targetTemp = fhm.create("wb+")
						target.save(targetTemp)	
						newValues.append(f"{targetTemp.name}")

					else:
						raise UnsupportedType(f"<target> should be IndexTable,ListTable,Transcription,file,int or float values or exkaldi achieve object but got: {type_name(target)}.")
				
				newResources[key] = newValues
			
			newResources["outFile"] = outFiles
			# assign these resources to each process and generate multiple commands
			parallelResources = []
			for i in range(parallel):
				parallelResources.append({})
				for key,items in newResources.items():
					parallelResources[-1][key] = items[i]
			cmds = [ cmdPattern.format(**re) for re in parallelResources ]
			# run
			flags = run_shell_command_parallel(cmds,timeout=timeout)

			finalResult = []
			done = True
			for index,info in enumerate(flags):
				cod,err = info
				if analyzeResult and cod != 0:
					print(f"{index}/{len(flags)} error tracking")
					print(err.decode())
					done = False	
				finalResult.append( (cod,err,outFiles[index]) )

			if analyzeResult and (not done):
				finalCmd = ",".join([cmd.strip().split(maxsplit=1)[0] for cmd in cmds[0].split("|")])
				raise KaldiProcessError(f"Failed to run Kaldi command: {finalCmd}. Look the error messages above.")
			else:
				if generateArchive is not None:
					for i,fileName in enumerate(outFiles):
						finalResult[i] = load_index_table(fileName,name=archiveNames[i],useSuffix="ark")

			return finalResult