Beispiel #1
0
def merge_archives(archives):
	'''
	Merge multiple archives to one.
	Particularly,exkaldi Lattice objects also support this operation.
	Do the plus operation between all archives.

	Args:
		<archives>: a list or tuple of multiple exkaldi archive objects which are the same class.
	
	Return:
		a new archive object.
	'''
	declare.is_classes("archives",archives,(list,tuple))
	declare.not_void("archives",archives)
	
	if type_name(archives[0]) != "Lattice":
		declare.belong_classes("archives",archives[0],[BytesMatrix,BytesVector,ListTable,NumpyMatrix,NumpyVector])

	result = archives[0]
	typeName = type_name(archives[0])
	names = [archives[0].name]

	for ark in archives[1:]:
		assert type_name(ark) == typeName,f"All archives needed to be merged must be the same class but got: {typeName}!={type_name(ark)}."
		result += ark
		names.append(ark.name)
	
	names = ",".join(names)
	result.rename(f"merge({names})")
	return result
Beispiel #2
0
def spk_to_utt(spks,spk2utt):
	'''
	Accept a list of speaker IDs and return their corresponding utterance IDs.

	Args:
		<spks>: a string or list or tuple of speaker IDs.
		<spk2utt>: spk2utt file or ListTable object.
	
	Return:
		a list of utterance IDs.
	'''
	declare.is_classes("speaker IDs",spks,(str,tuple,list))

	if not isinstance(spks,str):
		declare.members_are_valid_strings("speaker IDs",spks)
	else:
		spks = [spks,]
		
	declare.is_potential_list_table("spk2utt",spk2utt)
	if isinstance(spk2utt,str):
		spk2utt = load_list_table(spk2utt)
	
	utts = []
	for spk in spks:
		try:
			utt = spk2utt[spk]
		except KeyError:
			raise WrongOperation(f"Miss speaker ID {spk} in spk2utt map.")
		else:
			declare.is_valid_string("The value of spk2utt",utt)
			utts.extend(utt.strip().split())
	
	return sorted(list(set(utts)))
Beispiel #3
0
def log_softmax(data, axis=1):
    '''
	The log-softmax function.

	Args:
		<data>: a Numpy array.
		<axis>: the dimension to softmax.
	Return:
		A new array.
	'''
    declare.is_classes("data", data, np.ndarray)
    if len(data.shape) == 1:
        axis = 0
    declare.in_boundary("axis", axis, 0, len(data.shape) - 1)

    dataShape = list(data.shape)
    dataShape[axis] = 1
    maxValue = data.max(axis, keepdims=True)
    dataNor = data - maxValue

    dataExp = np.exp(dataNor)
    dataExpSum = np.sum(dataExp, axis)
    dataExpSumLog = np.log(dataExpSum) + maxValue.reshape(dataExpSum.shape)

    return data - dataExpSumLog.reshape(dataShape)
Beispiel #4
0
def use_cmvn_sliding(feat,windowSize=None,std=False):
	'''
	Allpy sliding CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<windowSize>: windows size,If None,use windows size greater_equal than the frames of feature.
		<std>: a bool value.
	
	Return:
		exkaldi feature object.
	'''
	declare.is_classes("feat",feat, ["BytesFeature","NumpyFeature"])
	declare.is_bool("std",std)

	if windowSize is None:
		featLen = feat.lens[1]
		maxLen = max([length for utt,length in featLen])
		windowSize = math.ceil(maxLen/100)*100
	else:
		declare.is_positive_int("windowSize",windowSize)

	if std:
		std='true'
	else:
		std='false'

	cmd = f'apply-cmvn-sliding --cmn-window={windowSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-'
	out,err,cod = run_shell_command(cmd,stdin="PIPE",stderr="PIPE",stdout="PIPE",inputs=feat.data)
	if cod != 0:
		print(err.decode())
		raise KaldiProcessError("Failed to compute sliding cmvn.")
	
	newName = f"cmvn({feat.name},{windowSize})"
	return BytesFeature(out,name=newName,indexTable=None)
Beispiel #5
0
def utt_to_spk(utts,utt2spk):
	'''
	Accept a list of utterance IDs and return their corresponding speaker IDs.

	Args:
		<utts>: a string or list or tuple of utterance IDs.
		<utt2spk>: utt2spk file or ListTable object.
	
	Return:
		a list of speaker IDs.
	'''
	declare.is_classes("utterance IDs",utts,(str,tuple,list))
	if not isinstance(utts,str):
		declare.members_are_valid_strings("utterance IDs",utts)
	else:
		utts = [utts,]	

	declare.is_potential_list_table("utt2spk",utt2spk)
	if isinstance(utt2spk,str):
		utt2spk = load_list_table(utt2spk)
	
	spks = []
	for utt in utts:
		try:
			spk = utt2spk[utt]
		except KeyError:
			raise WrongOperation(f"Miss utterance ID {utt} in utt2spk map.")
		else:
			declare.is_valid_string("The value of utt2spk",utt)
			spktemp = spk.strip().split(maxsplit=1)
			assert len(spktemp) == 1,f"speaker ID in utt2spk has unexpected space: {spk}."
			spks.append(spktemp[0])
	
	return sorted(list(set(spks)))
Beispiel #6
0
def load_list_table(target, name="listTable"):
    '''
	Generate a list table object from dict object or file.

	Args:
		<target>: dict object or a file path.
	
	Return:
		a ListTable object.
	'''
    declare.is_classes("target", target, [dict, ListTable, str])

    newTable = ListTable(name=name)
    if type_name(target) in ["dict", "ListTable"]:
        newTable.update(target)
        return newTable

    else:
        files = list_files(target)
        for filePath in files:
            with open(filePath, "r", encoding="utf-8") as fr:
                lines = fr.readlines()
            for index, line in enumerate(lines, start=1):
                t = line.strip().split(maxsplit=1)
                if len(t) < 2:
                    raise WrongDataFormat(
                        f"Line Number: {index}\n" + f"Line Content: {line}\n" +
                        f"Missing paired key and value information in file:{filePath}."
                    )
                else:
                    newTable[t[0]] = t[1]

        return newTable
Beispiel #7
0
def test_is_classes_and_belong_classes():
    class A:
        def __init__(self):
            pass

    class B(A):
        def __init__(self):
            pass

    b = B()

    declare.is_classes("test object", b, B)
    declare.belong_classes("test object", b, A)
Beispiel #8
0
def load_transcription(target, name="transcription", checkSpace=True):
    '''
	Load transcription from file.

	Args:
		<target>: transcription file path.
		<name>: a string.
		<checkSpace>: a bbol value. If True,we will check the validity of the number of spaces.

	Return:
		An exkaldi Transcription object.
	'''
    declare.is_classes("target", target,
                       ["dict", "Transcription", "ListTable", "str"])
    declare.is_bool("checkSpace", checkSpace)

    if isinstance(target, str):
        declare.is_file("target", target)
        with open(target, "r", encoding="utf-8") as fr:
            lines = fr.readlines()
        result = Transcription(name=name)
        for index, line in enumerate(lines, start=1):
            t = line.strip().split(maxsplit=1)
            if len(t) < 2:
                print(f"Line Number: {index}")
                print(f"Line Content: {line}")
                raise WrongDataFormat(
                    "Missing entire key and value information.")
            else:
                result[t[0]] = t[1]
    else:
        for utt, utterance in target.items():
            declare.is_valid_string("utterance ID", utt)
            declare.is_valid_string("utterance", utterance)
        result = Transcription(target, name=name)

    if checkSpace:
        sampleText = result.subset(nRandom=100)
        spaceCount = 0
        for key, value in sampleText.items():
            spaceCount += value.count(" ")
        if spaceCount < len(sampleText) // 2:
            errMes = "The transcription doesn't seem to be separated by spaces or extremely short."
            errMes += "If it actually has right format, set the <checkSpace>=False and run this function again."
            raise WrongDataFormat(errMes)

    return result
Beispiel #9
0
def run_shell_command_parallel(cmds, env=None, timeout=ExkaldiInfo.timeout):
    '''
	Run shell commands with multiple processes.
	In this mode,we don't allow the input and output streams are PIPEs.
	If you mistakely appoint buffer to be input or output stream,we set time out error to avoid dead lock.
	So you can change the time out value into a larger one to deal with large courpus as long as you rightly apply files as the input and output streams. 

	Args:
		<cmds>: a list of strings. Each string should be a command and its options.
		<env>: If None,use exkaldi.version.ENV defaultly.
		<timeout>: a int value. Its the total timeout value of all processes.

	Return:
		a list of pairs: return code and error information.
	'''
    declare.is_classes("cmds", cmds, [tuple, list])
    declare.is_positive_int("timeout", timeout)

    if env is None:
        env = ExkaldiInfo.ENV

    processManager = {}
    for index, cmd in enumerate(cmds):
        declare.is_valid_string("cmd", cmd)
        processManager[index] = subprocess.Popen(cmd,
                                                 shell=True,
                                                 stderr=subprocess.PIPE,
                                                 env=env)

    runningProcess = len(processManager)
    if runningProcess == 0:
        raise WrongOperation("<cmds> has not any command to run.")
    dtimeout = timeout // runningProcess
    assert dtimeout >= 1, f"<timeout> is extremely short: {timeout}."
    for ID, p in processManager.items():
        try:
            out, err = p.communicate(timeout=dtimeout)
        except subprocess.TimeoutExpired:
            p.kill()
            errMes = b"Time Out Error: Process was killed! If you are exactly running the right program,"
            errMes += b"you can set a greater timeout value by exkaldi.info.set_timeout()."
            processManager[ID] = (-9, errMes)
        else:
            processManager[ID] = (p.returncode, err)

    return list(processManager.values())
Beispiel #10
0
def run_shell_command(cmd,
                      stdin=None,
                      stdout=None,
                      stderr=None,
                      inputs=None,
                      env=None):
    '''
	Run a shell command with Python subprocess.

	Args:
		<cmd>: a string including a shell command and its options.
		<stdin>,<stdout>,<stderr>: IO streams. If "PIPE",use subprocess.PIPE.
		<inputs>: a string or bytes to send to input stream.
		<env>: If None,use exkaldi.version.ENV defaultly.

	Return:
		out,err,returnCode
	'''
    declare.is_valid_string("cmd", cmd)

    if env is None:
        env = ExkaldiInfo.ENV

    if inputs is not None:
        declare.is_classes("inputs", inputs, [str, bytes])
        if isinstance(inputs, str):
            inputs = inputs.encode()

    if stdin == "PIPE":
        stdin = subprocess.PIPE
    if stdout == "PIPE":
        stdout = subprocess.PIPE
    if stderr == "PIPE":
        stderr = subprocess.PIPE

    p = subprocess.Popen(cmd,
                         shell=True,
                         stdin=stdin,
                         stdout=stdout,
                         stderr=stderr,
                         env=env)
    (out, err) = p.communicate(input=inputs)

    return out, err, p.returncode
Beispiel #11
0
def match_utterances(archives):
	'''
	Pick up utterances whose ID has existed in all provided archives.

	Args:
		<archives>: a list of exkaldi archive objects.
	
	Return:
		a list of new exkaldi archive objects.
	'''
	declare.is_classes("archives",archives,[list,tuple])

	shareKeys = None
	for ark in archives:

		declare.belong_classes("archives",ark,[ListTable,BytesMatrix,BytesVector,NumpyMatrix,NumpyVector] )
		keys = set(ark.keys())

		if shareKeys is None:
			shareKeys = keys
		else:
			shareKeys &= keys

	shareKeys = list(shareKeys)
	if len(shareKeys) == 0:
		raise WrongOperation("Utterance IDs completely missed. We don't think it is reasonable. Please check these archives.")

	results = []
	for ark in archives:
		if len(ark.keys()) == len(shareKeys):
			results.append( ark )
		else:
			oname = ark.name
			ark = ark.subset(keys=shareKeys)
			ark.rename(oname)
			results.append( ark )

	
	return results
Beispiel #12
0
    def judge(self, key, condition, threshold, byDeltaRatio=False):
        '''
		Usage:  obj.judge('train_loss','<',0.0001,byDeltaRatio=True) or obj.judge('epoch','>=',10)
		
		Check if condition is true. 
		Only collected information will be used to check the condition. So data collecting is expected beforehand.

		Args:
			<key>: the name reported.
			<condition>: a string,condition operators such as ">" or "=="
			<threshold>: a int or float value.
			<byDeltaRatio>: bool value,if true,threshold should be a delta ratio value.
								deltaRatio = abs((value-value_pre)/value) 

		Return:
			True or False. 
		'''
        declare.is_instance("condition operator", condition,
                            ['>', '>=', '<=', '<', '==', '!='])
        declare.is_classes("threshold", threshold, (int, float))

        #if self.currentField != {}:
        #	self.collect_report(plot=False)

        if byDeltaRatio is True:
            p = []
            for i in range(len(self.globalField) - 1, -1, -1):
                if key in self.globalField[i].keys():
                    p.append(self.globalField[i][key])
                if len(p) == 2:
                    value = str(abs((p[0] - p[1]) / p[0]))
                    return eval(value + condition + str(threshold))
            return False
        else:
            for i in range(len(self.globalField) - 1, -1, -1):
                if key in self.globalField[i].keys():
                    value = str(self.globalField[i][key])
                    return eval(value + condition + str(threshold))
            return False
Beispiel #13
0
def compute_postprob_norm(ali, probDims):
    '''
	Compute alignment counts in order to normalize acoustic model posterior probability.
	For more help information,look at the Kaldi <analyze-counts> command.

	Args:
		<ali>: exkaldi NumpyAlignmentTrans,NumpyAlignmentPhone or NumpyAlignmentPdf object.
		<probDims>: the dimensionality of posterior probability.
		
	Return:
		A numpy array of the normalization.
	'''
    declare.kaldi_existed()
    declare.is_classes(
        "ali", ali,
        ["NumpyAlignmentTrans", "NumpyAlignmentPhone", "NumpyAlignmentPdf"])
    declare.is_positive_int("probDims", probDims)

    txt = []
    for key, vlaue in ali.items():
        value = " ".join(map(str, vlaue.tolist()))
        txt.append(key + " " + value)
    txt = "\n".join(txt)

    cmd = f"analyze-counts --binary=false --counts-dim={probDims} ark:- -"
    out, err, cod = run_shell_command(cmd,
                                      stdin="PIPE",
                                      stdout="PIPE",
                                      stderr="PIPE",
                                      inputs=txt)
    if (isinstance(cod, int) and cod != 0) or out == b"":
        print(err.decode())
        raise KaldiProcessError('Analyze counts defailed.')
    else:
        out = out.decode().strip().strip("[]").strip().split()
        counts = np.array(out, dtype=np.float32)
        countBias = np.log(counts / np.sum(counts))
        return countBias
Beispiel #14
0
def softmax(data, axis=1):
    '''
	The softmax function.

	Args:
		<data>: a Numpy array.
		<axis>: the dimension to softmax.
		
	Return:
		A new array.
	'''
    declare.is_classes("data", data, np.ndarray)
    if len(data.shape) == 1:
        axis = 0
    declare.in_boundary("axis", axis, 0, len(data.shape) - 1)

    maxValue = data.max(axis, keepdims=True)
    dataNor = data - maxValue

    dataExp = np.exp(dataNor)
    dataExpSum = np.sum(dataExp, axis, keepdims=True)

    return dataExp / dataExpSum
Beispiel #15
0
def list_files(filePaths):
    '''
	List files by a normal grammar string.

	Args:
		<filePaths>: a string or list or tuple object.
	
	Return:
		A list of file paths.
	'''
    declare.is_classes("filePaths", filePaths, [str, list, tuple])

    def list_one_record(target):
        declare.is_valid_string("filePaths", target)
        cmd = f"ls {target}"
        out, err, cod = run_shell_command(cmd, stdout=subprocess.PIPE)
        if len(out) == 0:
            return []
        else:
            out = out.decode().strip().split("\n")
            newOut = [o for o in out if os.path.isfile(o)]
            return newOut

    if isinstance(filePaths, str):
        outFiles = list_one_record(filePaths)
    else:
        outFiles = []
        for m in filePaths:
            outFiles.extend(list_one_record(m))

    if len(outFiles) == 0:
        raise WrongPath(
            f"No any files have been found through the provided file paths: {filePaths}."
        )

    return outFiles
Beispiel #16
0
    def send_report(self, info):
        '''
		Send information and these will be retained untill you do the statistics by using .collect_report().

		Args:
			<info>: a Python dict object includiing names and their values with int or float type.
					such as {"epoch":epoch,"train_loss":loss,"train_acc":acc}
					The value can be Python int,float object,Numpy int,float object or NUmpy ndarray with only one value.
		'''
        declare.is_classes("info", info, dict)

        for name, value in info.items():
            assert isinstance(name, str) and len(
                name
            ) > 0, f"The name of info should be string avaliable but got {type_name(name)}."
            valueDtype = type_name(value)
            if valueDtype.startswith(
                    "int"):  # Python int object,Numpy int object
                pass

            elif valueDtype.startswith(
                    "float"):  # Python float object,Numpy float object
                self.currentFieldIsFloat[name] = True

            elif valueDtype == "ndarray" and value.shape == (
            ):  # Numpy ndarray with only one value
                if value.dtype == "float":
                    self.currentFieldIsFloat[name] = True
            else:
                raise UnsupportedType(
                    f"Expected int or float value but got {type_name(value)}.")

            name = name.lower()
            if not name in self.currentField.keys():
                self.currentField[name] = []
            self.currentField[name].append(value)
Beispiel #17
0
def ctc_greedy_search(prob, vocabs, blankID=None):
    '''
    The best path decoding algorithm.

    Args:
        <prob>: An exkaldi probability object. This probalility should be an output of Neural Network with CTC loss fucntion.
        <vocabs>: a list of vocabulary.
        <blankID>: specify the ID of blank symbol. If None, use the last dimentionality of <prob>.
    Return:
        An exkaldi Transcription object of decoding results.  
    '''
    declare.is_classes("vocabs", vocabs, list)

    declare.is_probability("prob", prob)
    if type_name(prob) == "BytesProb":
        prob = prob.to_numpy()
    elif type_name(prob) == "IndexTable":
        prob = prob.read_record("prob").to_numpy()

    probDim = prob.dim
    if len(vocabs) == probDim:
        if blankID is None:
            blankID = probDim - 1
        declare.is_positive_int("blankID", blackID)
        declare.in_boundary("blankID", blackID, 0, probDim - 1)
    elif len(vocabs) == probDim - 1:
        if blankID == None:
            blankID = probDim - 1
        else:
            assert blankID == probDim - 1, f"The dimensibality of probability is {probDim} but only have {len(vocabs)} words. In this case, blank ID must be {probDim-1} but got {blankID}"
    else:
        raise WrongDataFormat(
            f"The dimensibality of probability {probDim} does not match the numbers of words {len(vocabs)}."
        )

    results = Transcription(name="bestPathResult")
    for utt, pb in prob.items:
        declare.is_classes("prob", prob, np.ndarray)
        declare.is_classes("the rank of matrix shape", len(pb.shape),
                           "expected rank", 2)
        best_path = np.argmax(pb, 1)
        best_chars_collapsed = [
            vocabs[ID] for ID, _ in groupby(best_path) if ID != blankID
        ]
        try:
            results[utt] = " ".join(best_chars_collapsed)
        except Exception as e:
            e.args = ("<vocab> might has non-string items.\n" + e.args[0], )
            raise e
    return results
Beispiel #18
0
def decompress_feat(feat,name="decompressedFeat"):
	'''
	Decompress a kaldi conpressed feature whose data-type is "CM"
	
	Args:
		<feat>: a bytes object.
		
	Return:
		An new exkaldi feature object.

	This function is a cover of kaldi-io-for-python tools. 
	For more information about it,please access to https://github.com/vesis84/kaldi-io-for-python/blob/master/kaldi_io/kaldi_io.py 
	'''
	declare.is_classes("feat",feat,bytes)

	def _read_compressed_mat(fd):

		# Format of header 'struct',
		global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
		per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])

		# Read global header,
		globmin,globrange,rows,cols = np.frombuffer(fd.read(16),dtype=global_header,count=1)[0]
		cols = int(cols)
		rows = int(rows)

		# The data is structed as [Colheader,... ,Colheader,Data,Data ,.... ]
		#                         {           cols           }{     size         }
		col_headers = np.frombuffer(fd.read(cols*8),dtype=per_col_header,count=cols)
		col_headers = np.array([np.array([x for x in y]) * globrange * 1.52590218966964e-05 + globmin for y in col_headers],dtype=np.float32)
		data = np.reshape(np.frombuffer(fd.read(cols*rows),dtype='uint8',count=cols*rows),newshape=(cols,rows)) # stored as col-major,

		mat = np.zeros((cols,rows),dtype='float32')
		p0 = col_headers[:,0].reshape(-1,1)
		p25 = col_headers[:,1].reshape(-1,1)
		p75 = col_headers[:,2].reshape(-1,1)
		p100 = col_headers[:,3].reshape(-1,1)
		mask_0_64 = (data <= 64)
		mask_193_255 = (data > 192)
		mask_65_192 = (~(mask_0_64 | mask_193_255))

		mat += (p0  + (p25 - p0) / 64. * data) * mask_0_64.astype(np.float32)
		mat += (p25 + (p75 - p25) / 128. * (data - 64)) * mask_65_192.astype(np.float32)
		mat += (p75 + (p100 - p75) / 63. * (data - 192)) * mask_193_255.astype(np.float32)

		return mat.T,rows,cols        
	
	with BytesIO(feat) as sp:
		newData = []

		while True:
			data = b''
			utt = ''
			while True:
				char = sp.read(1)
				data += char
				char = char.decode()
				if (char == '') or (char == ' '):break
				utt += char
			utt = utt.strip()
			if utt == '':break
			binarySymbol = sp.read(2)
			data += binarySymbol
			binarySymbol = binarySymbol.decode()
			if binarySymbol == '\0B':
				dataType = sp.read(3).decode()
				if dataType == 'CM ':
					data += 'FM '.encode()
					matrix,rows,cols = _read_compressed_mat(sp)
					data += '\04'.encode()
					data += struct.pack(np.dtype('uint32').char,rows)
					data += '\04'.encode()
					data += struct.pack(np.dtype('uint32').char,cols)
					data += matrix.tobytes()
					newData.append(data)
				else:
					raise UnsupportedType("This is not a compressed binary data.")
			else:
				raise WrongDataFormat('Miss right binary symbol.')

	return BytesFeature(b''.join(newData),name=name)
Beispiel #19
0
def check_multiple_resources(*resources,outFile=None):
	'''
	This function is used to check whether or not use multiple process and verify the resources.

	args:
		<resources>: objects.
		<outFile>: None,file name,or a list of None objects,file names.
				If None,it means standard output stream.
	
	Return:
		lists of resources.
	'''
	# check the number of parallels
	multipleFlag = [ len(re) if isinstance(re,(list,tuple)) else 1 for re in resources ]
	multipleFlag = list(set(multipleFlag))

	if len(multipleFlag) == 0:
		raise WrongOperation(f"No any resource has been received.")
	elif len(multipleFlag) > 2:
		raise WrongOperation(f"The number of resources has various sizes:{multipleFlag}. We hope they have the same amount if their size are not 1.")
	multipleFlag = max(multipleFlag)

	# check and modify the amount of each resource
	resources = list(resources)
	for index,target in enumerate(resources):
		if isinstance(target,(list,tuple)):
			if len(target) == 1:
				resources[index] = [ target[0] for i in range(multipleFlag) ]
			else:
				exType = None
				for t in target:
					if exType is None:
						exType = type_name(t)
					elif type_name(t) != exType:
						raise WrongDataFormat(f"Elements of one group should be the same data class,but got: {exType} != {type_name(t)}.")
		else:
			resources[index] = [ target for i in range(multipleFlag) ]

	# check output file format
	if multipleFlag > 1:
		assert outFile is not None,"When apply parallel processes,output file name is necessary."
		outFiles = []
		declare.is_classes("outFile",outFile,[str,list,tuple])
		if isinstance(outFile,str):
			declare.is_valid_file_name("outFile",outFile)
			outFile = os.path.abspath(outFile)
			dirName = os.path.dirname(outFile)
			fileName = os.path.basename(outFile)
			namePattern = f"nj%0{len(str(multipleFlag))}d_{fileName}"
			outFiles = [ os.path.join(dirName,namePattern%i) for i in range(multipleFlag) ]
		else:
			declare.equal("the number of output files",len(outFile),"the number of parallel processes",multipleFlag)
			outFiles = []
			for f in outFile:
				declare.is_valid_file_name("outFile",f)
				outFiles.append(f)
		
		resources.append(outFiles)

	else:
		if outFile is None:
			outFile = "-"
		else:
			declare.is_valid_file_name("outFile",outFile)

		resources.append([outFile,])

	return resources
Beispiel #20
0
def __compute_feature(target,kaldiTool,useSuffix=None,name="feat",outFile=None):
	'''
	The base funtion to compute feature.
	'''
	declare.kaldi_existed()

	if useSuffix != None:
		declare.is_valid_string("useSuffix",useSuffix)
		useSuffix = useSuffix.strip().lower()[-3:]
		declare.is_instances("useSuffix",useSuffix,["scp","wav"])
	else:
		useSuffix = ""	

	targets,kaldiTools,useSuffixs,names,outFiles = check_multiple_resources(target,kaldiTool,useSuffix,name,outFile=outFile)
	# pretreatment
	fromSegment = False
	with FileHandleManager() as fhm:

		segments = []
		for index,kaldiTool,target,useSuffix,name in zip(range(len(outFiles)),kaldiTools,targets,useSuffixs,names):
			
			declare.is_classes("target",target,["str","ListTable","WavSegment"])
			declare.is_valid_string("name",name)

			if isinstance(target,str):		
		
				allFiles = list_files(target)
				target = ListTable()

				for filePath in allFiles:
					filePath = filePath.strip()
					if filePath[-4:].lower() == ".wav":
						fileName = os.path.basename(filePath)
						uttID = fileName[0:-4].replace(".","")
						target[uttID] = filePath
					
					elif filePath[-4:].lower() == '.scp':
						target += load_list_table(filePath)
					
					elif "wav" == useSuffix:
						fileName = os.path.basename(filePath)
						uttID = fileName.replace(".","")
						target[uttID] = filePath

					elif "scp" == useSuffix:
						target += load_list_table(filePath)

					else:
						raise UnsupportedType('Unknown file suffix. You can declare whether <useSuffix> is "wav" or "scp".')
				
				if len(target) == 0:
					raise WrongDataFormat("There did not include any data to compute data in target.")

				targets[index] = target
			
			elif type_name(target) == "WavSegment":

				segTemp = fhm.create("w+",suffix=".seg",encode="utf-8")
				target.save(segTemp)
				segments.append(segTemp.name)

				targets[index] = target.detach_wav()
				fromSegment = True

	if fromSegment:
		# define the command pattern
		cmdPattern = "extract-segments scp:{wavFile} {segment} ark:- | {kaldiTool} ark:- ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"segment":segments,"kaldiTool":kaldiTools,"outFile":outFiles}
	else:
		# define the command pattern
		cmdPattern = "{kaldiTool} scp:{wavFile} ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"kaldiTool":kaldiTools,"outFile":outFiles}

	# Run
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Beispiel #21
0
def convert_field(prob, originVocabs, targetVocabs, retainOOV=False):
    '''
    Tranform the dimensions of probability to target field.

    Args:
        <prob>: An exkaldi probability object. This probalility should be an output of Neural Network.
        <originVocabs>: list of original field vocabulary.
        <originVocabs>: list of target field vocabulary.
        <retainOOV>: If True, target words which are not in original vocabulary will be retained in minimum probability of each frame. 
    Return:
        An new exkaldi probability object and a list of new target vocabulary.  
    '''
    declare.is_classes("originVocabs", originVocabs, list)
    declare.is_classes("targetVocabs", targetVocabs, list)
    assert len(targetVocabs) > 0, f"Target vocabulary is void."

    declare.is_probability("prob", prob)
    if type_name(prob) == "BytesProb":
        prob = prob.to_numpy()
    elif type_name(prob) == "IndexTable":
        prob = prob.read_record("prob").to_numpy()

    probDim = prob.dim
    declare.equal("the dimension of probability", probdim,
                  "the number of words", len(originVocabs))

    origin_w2i = dict((w, i) for i, w in enumerate(originVocabs))

    retainIDs = []
    newTargetVocabs = []
    for w in targetVocabs:
        try:
            ID = origin_w2i[w]
        except KeyError:
            if retainOOV is True:
                newTargetVocabs.append(w)
                retainIDs.append(None)
            else:
                pass
        else:
            newTargetVocabs.append(w)
            retainIDs.append(ID)

    results = {}
    for utt, pb in prob.items:
        declare.is_classes("prob", prob, np.ndarray)
        declare.is_classes("the rank of matrix shape", len(pb.shape),
                           "expected rank", 2)
        if retainOOV is True:
            padding = np.min(pb, axis=1)
        new = np.zeros(shape=(pb.shape[0], len(retainIDs)), dtype=np.float32)
        for index, i in enumerate(retainIDs):
            if i is None:
                new[:, index] = padding
            else:
                new[:, index] = pb[:, i]
            results[utt] = new

        results[utt] = new

    newName = f"convert({prob.name})"
    return NumpyProb(data=results, name=newName), newTargetVocabs
Beispiel #22
0
	def add(self,name,dtype,abbr=None,default=None,choices=None,minV=None,maxV=None,discription=None):
		'''
		Add a new option.

		Args:
			_name_: a string which must have a format such as "--exkaldi" (but "--help" is inavaliable exceptionally.).  
			_dtype_: float, int, str or bool.  
			_abbr_: None or a abbreviation of name which must have a format such as "-e" (but "-h" is inavaliable exceptionally.).  
			_dtype_: the default value or a list/tuple of values.  
			_choices_: a list/tuple of values.  
			_minV_: set the minimum value if dtype is int or float. Enable when _choices_ is None.  
			_maxV_: set the maximum value if dtype is int or float. Enable when _choices_ is None.  
			_maxV_: a string to discribe this option.
		'''
		self.__capture()

		# check option name
		declare.is_valid_string("name",name)
		name = name.strip()
		self.__detect_special_char(name)
		assert name[0:2] == "--" and name[2:3] != "-", f"Option name must start with '--' but got: {name}."
		assert name != "--help", "Option name is inavaliable: --help."
		if name in self.__arguments.keys():
			raise WrongOperation(f"Option name has existed: {name}.")
		
		# check dtype
		declare.is_instances("option dtype", dtype, (float,int,bool,str))

		# check abbreviation
		if abbr is not None:
			declare.is_valid_string("abbr",abbr)
			abbr = abbr.strip()
			self.__detect_special_char(abbr)
			assert abbr[0:1] == "-" and abbr[1:2] != "-", f"Abbreviation must start with '-' but got: {abbr}."
			assert abbr != "-h", "Abbreviation is inavaliable: -h."
			if abbr in self.__abb2Name.keys():
				raise WrongOperation(f"Abbreviation has existed: {abbr}.")

		# check default value
		if default is not None:
			if isinstance(default,(list,tuple)):
				declare.members_are_classes(f"Default value of {name}", default, dtype)
			else:
				declare.is_classes(f"Default value of {name}", default, dtype)
			if dtype == str:
				self.__detect_special_char(default)

		# check choices
		if choices is not None:
			declare.is_classes(f"Choices of {name}", choices, (list,tuple))
			declare.members_are_classes(f"Choices of {name}", choices, dtype)
			if dtype == str:
				self.__detect_special_char(choices)
			if default is not None:
				if isinstance(default,(list,tuple)):
					declare.members_are_instances(f"Default value of {name}", default, choices)
				else:
					declare.is_instances(f"Default value of {name}", default, choices)
		
		# check boundary values
		if minV is not None or maxV is not None:
			assert dtype in [float,int], f"Only float and int option can set the boundary but {name} is {dtype.__name__}."
			assert choices is None, f"Cannot set choices and boundary concurrently: {name}."
			if minV is not None:
				declare.is_classes(f"Minimum value of {name}", minV, dtype)
				if default is not None:
					if isinstance(default, (list,tuple)):
						for v in default:
							declare.greater_equal(f"Default value of {name}", v, "minimum expected value", minV)
					else:
						declare.greater_equal(f"Default of {name}", default, "minimum expected value", minV)
			if maxV is not None:
				declare.is_classes(f"Maximum value of {name}", maxV, dtype)
				if default is not None:
					if isinstance(default,(list,tuple)):
						for v in default:					
							declare.less_equal(f"Default value of {name}", v, "maximum expected value", maxV)
					else:
						declare.less_equal(f"Default value of {name}", default, "maximum expected value", maxV)
			if minV is not None and maxV is not None:
				declare.less_equal(f"Minimum value of {name}", minV, f"maximum value", maxV)

		# check discription
		if discription is not None:
			declare.is_valid_string(f"Discription of {name}", discription)
			self.__detect_special_char(discription)

		self.__arguments[name] = self.spec(dtype,default,choices,minV,maxV,discription)
		self.__name2Abb[name] = abbr
		if abbr is not None:
			self.__abb2Name[abbr] = name
Beispiel #23
0
def run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,timeout=ExKaldiInfo.timeout,generateArchive=None,archiveNames=None):
	'''
	Map resources to command pattern and run this command parallelly.

	Args:
		<resources>: a dict whose keys are the name of resource and values are lists of resources objects.
					For example: {"feat": [BytesFeat01,BytesFeat02,... ],"outFile":{"newFeat01.ark","newFeat02.ark",...} }.
					The "outFile" resource is necessary.
					When there is only one process to run,"outFile" can be "-" which means the standard output stream.

		<cmdPattern>: a string needed to map the resources.
					For example: "copy-feat {feat} ark:{outFile}".
	
	Return:
		a list of triples: (return code,error info,output file or buffer)
	'''
	declare.kaldi_existed()
	declare.is_classes("resources",resources,dict)
	declare.is_classes("cmdPattern",cmdPattern,str)
	assert "outFile" in resources.keys(),"<outFile> key and value is necessary in recources."

	declare.members_are_classes("the values of resources",resources.values(),[list,tuple])
	if generateArchive is not None:
		analyzeResult = True #forcely analyze the result

	# check the format of cmomand pattern
	nameIndexs = [ i for i,c in enumerate(cmdPattern) if c == "{" or c == "}" ]
	assert len(nameIndexs)%2 == 0,f"The numbers of braces do not match in command pattern: '{cmdPattern}'. "
	auxiliaryInfo = {}
	for i in range(0,len(nameIndexs),2):
		name = cmdPattern[nameIndexs[i]+1:nameIndexs[i+1]]
		if name not in resources:
			raise WrongDataFormat(f"Resource is necessary but has not been provided: {name}.")
		prefix = "" if nameIndexs[i] == 0 else cmdPattern[nameIndexs[i]-1]
		if name in auxiliaryInfo.keys():
			auxiliaryInfo[name][0] += 1
			if not prefix in auxiliaryInfo[name][1]:
				auxiliaryInfo[name][1] += prefix
		else:
			auxiliaryInfo[name] = [1,prefix]

	assert "outFile" in auxiliaryInfo.keys(),"Key: <outFile> is necessary in command pattern."
	_outFileCountInfo = auxiliaryInfo.pop("outFile")
	assert _outFileCountInfo[0] == 1,f"Only allow <outFile> appear one time in command pattern but: {_outFileCountInfo[0]}."
	outFiles = resources.pop("outFile")

	for outFile in outFiles:
		if outFile != "-":
			make_dependent_dirs(outFile,pathIsFile=True)
	parallel = len(outFiles)

	if generateArchive is not None:
		declare.is_instances("generateArchive",generateArchive,["feat","cmvn","ali","fmllr"])
		if archiveNames is None:
			archiveNames = [ generateArchive for i in range(parallel)]
		elif isinstance(archiveNames,str):
			archiveNames = [ archiveNames for i in range(parallel)]
		elif isinstance(archiveNames,(list,tuple)):
			declare.equal("the number of achieve names",len(archiveNames),"parallel",parallel)
		else:
			raise UnsupportedType(f"<archiveNames> should be string or list or tuple but got: {type_name(archiveNames)}.")

	# regulate resources and run
	with FileHandleManager() as fhm:

		newResources = {}
		if parallel == 1:
			# Detect whether there is PIPE in command pattern.
			testPlaceholder = dict( (key,value[0]) if isinstance(value[0],str) else (key,"placeholder") for key,value in resources.items() )
			testPlaceholder["outFile"] = "placeholder"
			testCmd = cmdPattern.format(**testPlaceholder)
			if "|" in testCmd:
				inputsBuffer = False
			else:
				inputsBuffer = True
			del testPlaceholder
			# regularate resources
			for key,countPrefix in auxiliaryInfo.items():
				count,prefix = countPrefix
				target = resources[key][0]

				# If target is a list-table,we can not automatically decide whether it is scp-format or ark-format.
				# So you should appoint it in the command parttern.
				if type_name(target) in ["ListTable","Transcription"]:
					if prefix not in [":","="]:
						errMes = f"There might miss prefix such as 'ark:' or 'scp:' or '--option=' in command pattern before resource: {key}."
						errMes += "Check the command line please. If you still think there dose not need the prefix,"
						errMes += "save this ListTable or Transcription into file and instead it will this file name."
						errMes += "In that case,we will skip checking the prefix."
						raise WrongOperation(errMes)

					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.save()
						newResources[key] = "-"
					else:
						targetTemp = fhm.create("w+",encoding="utf-8")
						target.save(targetTemp)
						newResources[key] = f"{targetTemp.name}"

				# If target is an index-table,we automatically recognize it as scp-file,so you do not need appoint it.
				elif type_name(target) == "IndexTable":
					if prefix != " ":
						errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
						errMes += f"Because we will decide the prefix depending on its data type."
						raise WrongOperation(errMes)
						
					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.save()
						newResources[key] = "scp:-"
					else:
						targetTemp = fhm.create("w+",suffix=".scp",encoding="utf-8")
						target.save(targetTemp)
						newResources[key] = f"scp:{targetTemp.name}"
				
				elif isinstance(target,(str,int,float)):
					# file or other value parameter
					newResources[key] = f"{target}"
			
				elif isinstance(target,(BytesMatrix,BytesVector)):
					if prefix != " ":
						errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
						errMes += f"Because we will decide the prefix depending on its data type."						
						raise WrongOperation(errMes)

					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.data
						newResources[key] = "ark:-"
					else:					
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newResources[key] = f"ark:{targetTemp.name}"		

				elif isinstance(target,(NumpyMatrix,NumpyVector)):
					if prefix != " ":
						errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
						errMes += f"Because we will decide the prefix depending on its data type."		
						raise WrongOperation(errMes)

					target = target.sort()
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.to_bytes().data
						newResources[key] = "ark:-"
					else:
						target = target.to_bytes()
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newResources[key] = f"ark:{targetTemp.name}"	

				elif isinstance(target,BytesArchive):
					if (inputsBuffer is True) and count == 1:
						inputsBuffer = target.data
						newResources[key] = "-"
					else:
						targetTemp = fhm.create("wb+")
						target.save(targetTemp)
						newResources[key] = f"{targetTemp.name}"

				else:
					raise UnsupportedType(f"<target> should be IndexTable,ListTable,file name,int or float value,or exkaldi achieve object but got: {type_name(target)}.")
			
			# Then,process output stream
			outFile = outFiles[0]
			newResources["outFile"] = outFile
			inputsBuffer = None if isinstance(inputsBuffer,bool) else inputsBuffer
			# Then rum command
			finalCmd = cmdPattern.format(**newResources)
			out,err,cod = run_shell_command(finalCmd,stdin="PIPE",stdout="PIPE",stderr="PIPE",inputs=inputsBuffer)
			
			if analyzeResult:
				if cod != 0:
					finalCmd = ",".join([cmd.strip().split(maxsplit=1)[0] for cmd in finalCmd.split("|")])
					raise KaldiProcessError(f"Failed to run Kaldi command: {finalCmd}.",err.decode())
			
			if outFile == "-":
				if generateArchive is not None:
					if generateArchive == "feat":
						out = BytesFeat(data=out,name=archiveNames[0])
					elif generateArchive == "ali":
						out = BytesAliTrans(data=out,name=archiveNames[0])
					elif generateArchive == "cmvn":
						out = BytesCMVN(data=out,name=archiveNames[0])
					else:
						out = BytesFmllr(data=out,name=archiveNames[0])
					return out
				else:
					return (cod,err,out)
			else:
				if generateArchive is not None:
					return load_index_table(outFile,name=archiveNames[0],useSuffix="ark")
				else:
					return (cod,err,outFile)

		else:
			# In this case,all input IO stream must be files.
			for key,countPrefix in auxiliaryInfo.items():
				count,prefix = countPrefix
				values = resources[key]
				newValues = []
				for target in values:

					# If target is scp resource
					if type_name(target) in ["ListTable","Transcription"]:
						if prefix not in [":","="]:
							errMes = f"There might miss prefix such as 'ark:' or 'scp:' or '--option=' in command pattern before resource: {key}."
							errMes += "Check the command line please. If you still think there dose not need the prefix,"
							errMes += "save this ListTable or Transcription into file and instead it will this file name."
							errMes += "In that case,we will skip checking the prefix."
							raise WrongOperation(errMes)		

						target = target.sort()
						targetTemp = fhm.create("w+",encoding="utf-8")
						target.save(targetTemp)
						newValues.append(f"{targetTemp.name}")						

					elif type_name(target) == "IndexTable":
						if prefix != " ":
							errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
							errMes += f"Because we will decide the prefix depending on its data type."
							raise WrongOperation(errMes)		

						target = target.sort()
						targetTemp = fhm.create("w+",suffix=".scp",encoding="utf-8")
						target.save(targetTemp)
						newValues.append(f"scp:{targetTemp.name}")
				
					elif isinstance(target,(str,float,int)):
						# file name or other value parameters
						newValues.append(f"{target}")
				
					elif isinstance(target,(BytesMatrix,BytesVector)):
						if prefix != " ":
							errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
							errMes += f"Because we will decide the prefix depending on its data type."						
							raise WrongOperation(errMes)	

						target = target.sort()
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newValues.append(f"ark:{targetTemp.name}")			

					elif isinstance(target,(NumpyMatrix,NumpyVector)):
						if prefix != " ":
							errMes = f"Do not need prefix such as 'ark:' or 'scp:' in command pattern before: {key}."
							errMes += f"Because we will decide the prefix depending on its data type."						
							raise WrongOperation(errMes)

						target = target.sort().to_bytes()
						targetTemp = fhm.create("wb+",suffix=".ark")
						target.save(targetTemp)
						newValues.append(f"ark:{targetTemp.name}")

					elif isinstance(target,BytesArchive):
						targetTemp = fhm.create("wb+")
						target.save(targetTemp)	
						newValues.append(f"{targetTemp.name}")

					else:
						raise UnsupportedType(f"<target> should be IndexTable,ListTable,Transcription,file,int or float values or exkaldi achieve object but got: {type_name(target)}.")
				
				newResources[key] = newValues
			
			newResources["outFile"] = outFiles
			# assign these resources to each process and generate multiple commands
			parallelResources = []
			for i in range(parallel):
				parallelResources.append({})
				for key,items in newResources.items():
					parallelResources[-1][key] = items[i]
			cmds = [ cmdPattern.format(**re) for re in parallelResources ]
			# run
			flags = run_shell_command_parallel(cmds,timeout=timeout)

			finalResult = []
			done = True
			for index,info in enumerate(flags):
				cod,err = info
				if analyzeResult and cod != 0:
					print(f"{index}/{len(flags)} error tracking")
					print(err.decode())
					done = False	
				finalResult.append( (cod,err,outFiles[index]) )

			if analyzeResult and (not done):
				finalCmd = ",".join([cmd.strip().split(maxsplit=1)[0] for cmd in cmds[0].split("|")])
				raise KaldiProcessError(f"Failed to run Kaldi command: {finalCmd}. Look the error messages above.")
			else:
				if generateArchive is not None:
					for i,fileName in enumerate(outFiles):
						finalResult[i] = load_index_table(fileName,name=archiveNames[i],useSuffix="ark")

			return finalResult
Beispiel #24
0
def pad_sequence(data,
                 dim=0,
                 maxLength=None,
                 dtype='float32',
                 padding='tail',
                 truncating='tail',
                 value=0.0):
    '''
	Pad sequence.

	Args:
		<data>: a list of NumPy arrays.
		<dim>: which dimmension to pad. All other dimmensions should be the same size.
		<maxLength>: If larger than this theshold,truncate it.
		<dtype>: target dtype.
		<padding>: padding position,"head","tail" or "random".
		<truncating>: truncating position,"head","tail".
		<value>: padding value.
	
	Return:
		a two-tuple: (a Numpy array,a list of padding positions). 
	'''
    declare.is_classes("data", data, (list, tuple))
    declare.is_non_negative_int("dim", dim)
    declare.not_void("data", data)
    declare.is_classes("value", value, (int, float))
    declare.is_instances("padding", padding, ["head", "tail", "random"])
    declare.is_instances("truncating", padding, ["head", "tail"])
    if maxLength is not None:
        declare.is_positive_int("maxLength", maxLength)

    lengths = []
    newData = []
    exRank = None
    exOtherDims = None
    for i in data:

        # verify
        declare.is_classes("data", i, np.ndarray)
        shape = i.shape
        if exRank is None:
            exRank = len(shape)
            assert dim < exRank, f"<dim> is out of range: {dim}>{exRank-1}."
        else:
            assert len(
                shape
            ) == exRank, f"Arrays in <data> has different rank: {exRank}!={len(shape)}."

        if dim != 0:
            # transpose
            rank = [r for r in range(exRank)]
            rank[0] = dim
            rank[dim] = 0
            i = i.transpose(rank)

        if exOtherDims is None:
            exOtherDims = i.shape[1:]
        else:
            assert exOtherDims == i.shape[
                1:], f"Expect for sequential dimmension,All arrays in <data> has same shape but got: {exOtherDims}!={i.shape[1:]}."

        length = len(i)
        if maxLength is not None and length > maxLength:
            if truncating == "head":
                i = i[maxLength:, ...]
            else:
                i = i[0:maxLength:, ...]

        lengths.append(len(i))
        newData.append(i)

    maxLength = max(lengths)
    batchSize = len(newData)

    result = np.array(value, dtype=dtype) * np.ones(
        [batchSize, maxLength, *exOtherDims], dtype=dtype)

    pos = []
    for i in range(batchSize):
        length = lengths[i]
        if padding == "tail":
            result[i][0:length] = newData[i]
            pos.append((0, length))
        elif padding == "head":
            start = maxLength - length
            result[i][start:] = newData[i]
            pos.append((start, maxLength))
        else:
            start = random.randint(0, maxLength - length)
            end = start + length
            result[i][start:end] = newData[i]
            pos.append((start, end))

    if dim != 0:
        exRank = len(result.shape)
        rank = [r for r in range(exRank)]
        rank[1] = dim + 1
        rank[dim + 1] = 1
        result = result.transpose(rank)

    return result, pos
Beispiel #25
0
def ctc_prefix_beam_search(prob,
                           vocabs,
                           blankID=None,
                           beam=5,
                           cutoff=0.999,
                           strick=1.0,
                           lmFile=None,
                           alpha=1.0,
                           beta=0):
    '''
    Prefix beam search decoding algorithm. Lm score is supported.

    Args:
        <prob>: An exkaldi postprobability object. This probalility should be an output of Neural Network with CTC loss fucntion.
                We expect the probability didn't pass any activation function, or it may generate wrong results.
        <vocabs>: a list of vocabulary.
        <blankID>: specify the ID of blank symbol. If None, use the last dimentionality of <prob>.
        <beam>: the beam size.
        <cutoff>: the sum threshold to cut off dimensions whose probability is extremely small.  
        <strick>: When the decoding results of two adjacent frames are the same, the probability of latter will be reduced.
        <lmFile>: If not None, add language model score to beam.
        <alpha>: the weight of LM score.
        <beta>: the length normaoliztion weight of LM score.
    Return:
        An exkaldi Transcription object of decoding results.  
    '''
    declare.is_classes("vocabs", vocabs, [tuple, list])

    declare.is_probability("prob", prob)
    if type_name(prob) == "BytesProb":
        prob = prob.to_numpy()
    elif type_name(prob) == "IndexTable":
        prob = prob.read_record("prob").to_numpy()

    if lmFile is not None:
        declare.is_file("lmFile", lmFile)
    else:
        lmFile = "none"

    probDim = prob.dims
    if len(vocabs) == probDim:
        if blankID is None:
            blankID = probDim - 1
        declare.is_positive_int("blankID", blackID)
        declare.in_boundary("blankID", blackID, 0, probDim - 1)

    elif len(vocabs) == probDim - 1:
        if blankID == None:
            blankID = probDim - 1
        else:
            assert blankID == probDim - 1, f"The dimensibality of probability is {probDim} but only have {len(vocabs)} words. In this case, blank ID must be {probDim-1} but got {blankID}"
    else:
        raise WrongDataFormat(
            f"The dimensibality of probability {probDim} does not match the numbers of words {len(vocabs)}."
        )

    for ID, word in enumerate(vocabs):
        if len(word.strip()) == 0:
            raise WrongDataFormat(f"Found a vocab {word} unavaliable.")

    num_classes = len(vocabs)
    vocabs = " ".join(vocabs)

    sources = [
        vocabs.encode(),
    ]
    uttTemp = []
    for utt, pb in prob.items:
        declare.is_classes("prob", prob, np.ndarray)
        declare.is_classes("the rank of matrix shape", len(pb.shape),
                           "expected rank", 2)
        pb = softmax(pb, axis=1)
        sources.append(f" {pb.shape[0]} ".encode() +
                       pb.astype("float32").tobytes())

    sources = b"".join(sources)

    cmd = os.path.join(sys.prefix, "exkaldisrc", "tools",
                       "prefix_beam_search_decode")
    cmd += " --num_files {}".format(prob.lens[0])
    cmd += " --num_classes {}".format(num_classes)
    cmd += " --blank_id {}".format(blankID)
    cmd += " --lm_model {}".format(lmFile)
    cmd += " --beam_size {}".format(beam)
    cmd += " --cutoff_prob {}".format(cutoff)
    cmd += " --alpha {}".format(alpha)
    cmd += " --beta {}".format(beta)

    out, err, _ = run_shell_command(cmd,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    inputs=sources)

    if len(out) == 0:
        raise Exception("Failed to beam search decode.", err.decode())
    else:
        results = Transcription(name="beamSearchResults")
        out = out.decode().strip().split("file")
        results = []
        for index, re in enumerate(out[1:]):
            re = re.strip().split("\n")
            if len(re) <= 1:
                results.append([
                    "",
                ])
            else:
                results[uttTemp[index]] = " ".join(re[1].strip().split()[1:])

        return results
Beispiel #26
0
def tuple_dataset(archives,frameLevel=False):
	'''
	Tuple feature or alignment archives in "utterance" level or "frame" level.

	Args:
		<archives>: exkaldi feature or alignment objects.
		<framelevel>: If True,tuple data in frame level. Or in utterance level.

	Return:
		List of tupled data.
	'''
	declare.is_classes("archives",archives,(tuple,list))
	assert len(archives) > 1,"<archives> should has multiple items."
	declare.is_bool("frameLevel",frameLevel)
	
	archives = match_utterances(archives)

	fields = {}
	for index,ark in enumerate(archives):
		if frameLevel is True:
			declare.belong_classes("achieves",ark,(BytesMatrix,BytesVector,NumpyMatrix,NumpyVector))
		else:
			declare.belong_classes("achieves",ark,(BytesMatrix,BytesVector,NumpyMatrix,NumpyVector,ListTable))
		
		if isinstance(ark,(BytesMatrix,BytesVector)):
			ark = ark.to_numpy()

		if ark.name not in fields.keys():
			fields[ark.name] = []
		fields[ark.name].append(ark)

	fieldNames = list(fields.keys())

	try:
		if frameLevel:
			templet = namedtuple(typename="TupledData",field_names=["key","frameID",]+fieldNames)
		else:
			templet = namedtuple(typename="TupledData",field_names=["key",]+fieldNames)
	except ValueError as e:
		e.args = ('While tuple data,use "name" of archives as identity ID so they are expected Python valid identifiers.'+
							'You can use ".rename()" method to rename it and try this function again.'+"\n"+
							e.args[0],)
		raise e

	def align_tuple_data_to_frame(key,record,templet):

		if isinstance(record[0],list):
			frameSize = len(record[0][0])
		else:
			frameSize = len(record[0])

		for re in record[1:]:
			if isinstance(re,list):
				for sr in re:
					if len(sr) != frameSize:
						raise WrongOperation(f"Cannot tuple data with different frame length to frame level: {frameSize}!={len(sr)}.")
			else:
				if len(re) != frameSize:
					raise WrongOperation(f"Cannot tuple data with different frame length to frame level: {frameSize}!={len(re)}.")				
		
		result = []
		for frameIndex in range(frameSize):
			new = []
			for re in record:
				if isinstance(re,list):
					filedR = []
					for sr in re:
						filedR.append( sr[frameIndex] )
					new.append(filedR)
				else:
					new.append( re[frameIndex:frameIndex+1] )
					
			result.append(templet( key,frameIndex,*new  ))

		return result

	result = []
	for key in archives[0].keys():
		oneRecord = []
		for field in fieldNames:
			fieldData = []
			for ark in fields[field]:
				fieldData.append( ark.data[key] )
			if len(fieldData) == 1:
				fieldData = fieldData[0]
			oneRecord.append( fieldData )

		if frameLevel:
			result.extend( align_tuple_data_to_frame(key,oneRecord,templet) )
		else:
			result.append( templet(key,*oneRecord))
	
	return result