Esempio n. 1
0
def add_delta(feat,order=2,outFile=None):
	'''
	Add n order delta to feature.
	
	Share Args:
		Null

	Parrallel Args:
		<feat>: exkaldi feature objects.
		<order>: the orders.
		<outFile>: output file name.

	Return:
		exkaldi feature or index table object.
	'''
	feats,orders,outFiles = check_multiple_resources(feat,order,outFile=outFile)
	names = []
	for feat,order in zip(feats,orders):
		# check feature
		declare.is_feature("feat",feat)
		# check order
		declare.is_positive_int("order",order)
		names.append(f"add_delta({feat.name},{order})")

	# prepare command pattern and resources
	cmdPattern = "add-deltas --delta-order={order} {feat} ark:{outFile}"
	resources = {"feat":feats,"order":orders,"outFile":outFiles}
	# run 
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Esempio n. 2
0
def compute_cmvn_stats(feat,spk2utt=None,name="cmvn",outFile=None):
	'''
	Compute CMVN statistics.

	Share Args:
		Null

	Parrallel Args:
		<feat>: exkaldi feature object or index table object.
		<spk2utt>: spk2utt file or exkaldi ListTable object.
		<name>: name of output CMVN object.
		<outFile>: output file name.

	Return:
		exkaldi CMVN statistics or index table object.
	''' 
	feats,spk2utts,names,outFiles = check_multiple_resources(feat,spk2utt,name,outFile=outFile)

	for feat,spk2utt in zip(feats,spk2utts):
		# verify feature
		declare.is_feature("feat",feat)
		# verify spk2utt
		if spk2utt is not None:
			declare.is_potential_list_table("spk2utt",spk2utt)
	
	if spk2utts[0] is None:
		cmdPattern = 'compute-cmvn-stats {feat} ark:{outFile}'
		resources  = {"feat":feats,"outFile":outFiles}
	else:
		cmdPattern = 'compute-cmvn-stats --spk2utt=ark:{spk2utt} {feat} ark:{outFile}'
		resources  = {"feat":feats,"spk2utt":spk2utts,"outFile":outFiles}

	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="cmvn",archiveNames=names)
Esempio n. 3
0
def use_fmllr(feat,fmllrMat,utt2spk,outFile=None):
	'''
	Transfrom to fmllr feature.

	Share Args:
		Null

	Parallel Args:
		<feat>: exkaldi feature or index table object.
		<fmllrMat>: exkaldi fMLLR transform matrix or index table object.
		<utt2spk>: file name or ListTable object.
		<outFile>: output file name.
	
	Return:
		exkaldi feature or index table object.
	'''
	feats,fmllrMats,utt2spks,outFiles = check_multiple_resources(feat,fmllrMat,utt2spk,outFile=outFile)

	names = []
	for index,feat,fmllrMat,utt2spk in zip(range(len(outFiles)),feats,fmllrMats,utt2spks):
		# verify data
		declare.is_feature("feat",feat)
		declare.is_fmllr_matrix("fmllrMat",fmllrMat)
		# verify utt2spk
		declare.is_potential_list_table("utt2spk",utt2spk)
		names.append(f"fmllr({feat.name},{fmllrMat.name})")
	
	cmdPattern = 'transform-feats --utt2spk=ark:{utt2spk} {transMat} {feat} ark:{outFile}'
	resources = {"feat":feats,"transMat":fmllrMats,"utt2spk":utt2spks,"outFile":outFiles}

	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Esempio n. 4
0
def transform_feat(feat,matFile,outFile=None):
	'''
	Transform feat by a transform matrix. Typically,LDA,MLLT matrices.
	Note that is you want to transform FMLLR,use exkaldi.use_fmllr() function.  

	Share Args:
		Null

	Parallel Args:
		<feat>: exkaldi feature or index table object.
		<matFile>: file name.
		<outFile>: output file name.
	
	Return:
		exkaldi feature or index table object.
	'''
	feats,matFiles,outFiles = check_multiple_resources(feat,matFile,outFile=outFile)

	names = []
	for feat,matFile in zip(feats,matFiles):
		declare.is_feature("feat",feat)
		declare.is_file("matFile",matFile)
		names.append( f"tansform({feat.name})" )

	cmdPattern = 'transform-feats {matFile} {feat} ark:{outFile}'
	resources = {"feat":feats,"matFile":matFiles,"outFile":outFiles}

	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Esempio n. 5
0
def splice_feature(feat,left,right=None,outFile=None):
	'''
	Splice left-right N frames to generate new feature.
	The dimentions will become original-dim * (1 + left + right)

	Share Args:
		Null

	Parrallel Args:
		<feat>: feature or index table object.
		<left>: the left N-frames to splice.
		<right>: the right N-frames to splice. If None,right = left.
		<outFile>; output file name.

	Return:
		exkaldi feature object or index table object.
	'''
	feats,lefts,rights,outFiles = check_multiple_resources(feat,left,right,outFile=outFile)
	
	names = []
	for index,feat,left,right in zip(range(len(outFiles)),feats,lefts,rights):
		# check feature
		declare.is_feature("feat",feat)
		# check left
		declare.is_non_negative_int("left",left)
		# check right
		if right is None:
			assert left != 0,f"At least one of <left> or <right> is valid but got:{left},{right}."
			rights[index] = left
		else:
			declare.is_non_negative_int("right",right)
			assert left != 0,"Al least one of <left> or <right> is not 0."
			assert not (left == 0 and right == 0),f"At least one of <left> or <right> is valid but got:{left},{right}."

		names.append( f"splice({feat.name},{left},{right})" )

	# prepare command pattern and resources
	cmdPattern = "splice-feats --left-context={left} --right-context={right} {feat} ark:{outFile}"
	resources = {"feat":feats,"left":lefts,"right":rights,"outFile":outFiles}
	# run 
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Esempio n. 6
0
def use_cmvn(feat,cmvn,utt2spk=None,std=False,outFile=None):
	'''
	Apply CMVN statistics to feature.

	Share Args:
		Null

	Parrallel Args:
		<feat>: exkaldi feature or index table object.
		<cmvn>: exkaldi CMVN statistics or index object.
		<utt2spk>: file path or ListTable object.
		<std>: If true,apply std normalization.
		<outFile>: out file name.

	Return:
		feature or index table object.
	'''
	feats,cmvns,utt2spks,stds,outFiles = check_multiple_resources(feat,cmvn,utt2spk,std,outFile=outFile)

	names = []
	for i,feat,cmvn,utt2spk,std in zip(range(len(outFiles)),feats,cmvns,utt2spks,stds):
		# verify feature and cmvn
		declare.is_feature("feat",feat)
		declare.is_cmvn("cmvn",cmvn)
		# verify utt2spk
		if utt2spk is not None:
			declare.is_potential_list_table("utt2spk",utt2spk)
		# std
		declare.is_bool("std",std)
		#stds[i] = "true" if std else "false"
		names.append( f"cmvn({feat.name},{cmvn.name})" ) 

	if utt2spks[0] is None:
		cmdPattern = 'apply-cmvn --norm-vars={std} {cmvn} {feat} ark:{outFile}'
		resources = {"feat":feats,"cmvn":cmvns,"std":stds,"outFile":outFiles}
	else:
		cmdPattern = 'apply-cmvn --norm-vars={std} --utt2spk=ark:{utt2spk} {cmvn} {feat} ark:{outFile}'
		resources = {"feat":feats,"cmvn":cmvns,"utt2spk":utt2spks,"std":stds,"outFile":outFiles}	
	
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Esempio n. 7
0
def __compute_feature(target,kaldiTool,useSuffix=None,name="feat",outFile=None):
	'''
	The base funtion to compute feature.
	'''
	declare.kaldi_existed()

	if useSuffix != None:
		declare.is_valid_string("useSuffix",useSuffix)
		useSuffix = useSuffix.strip().lower()[-3:]
		declare.is_instances("useSuffix",useSuffix,["scp","wav"])
	else:
		useSuffix = ""	

	targets,kaldiTools,useSuffixs,names,outFiles = check_multiple_resources(target,kaldiTool,useSuffix,name,outFile=outFile)
	# pretreatment
	fromSegment = False
	with FileHandleManager() as fhm:

		segments = []
		for index,kaldiTool,target,useSuffix,name in zip(range(len(outFiles)),kaldiTools,targets,useSuffixs,names):
			
			declare.is_classes("target",target,["str","ListTable","WavSegment"])
			declare.is_valid_string("name",name)

			if isinstance(target,str):		
		
				allFiles = list_files(target)
				target = ListTable()

				for filePath in allFiles:
					filePath = filePath.strip()
					if filePath[-4:].lower() == ".wav":
						fileName = os.path.basename(filePath)
						uttID = fileName[0:-4].replace(".","")
						target[uttID] = filePath
					
					elif filePath[-4:].lower() == '.scp':
						target += load_list_table(filePath)
					
					elif "wav" == useSuffix:
						fileName = os.path.basename(filePath)
						uttID = fileName.replace(".","")
						target[uttID] = filePath

					elif "scp" == useSuffix:
						target += load_list_table(filePath)

					else:
						raise UnsupportedType('Unknown file suffix. You can declare whether <useSuffix> is "wav" or "scp".')
				
				if len(target) == 0:
					raise WrongDataFormat("There did not include any data to compute data in target.")

				targets[index] = target
			
			elif type_name(target) == "WavSegment":

				segTemp = fhm.create("w+",suffix=".seg",encode="utf-8")
				target.save(segTemp)
				segments.append(segTemp.name)

				targets[index] = target.detach_wav()
				fromSegment = True

	if fromSegment:
		# define the command pattern
		cmdPattern = "extract-segments scp:{wavFile} {segment} ark:- | {kaldiTool} ark:- ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"segment":segments,"kaldiTool":kaldiTools,"outFile":outFiles}
	else:
		# define the command pattern
		cmdPattern = "{kaldiTool} scp:{wavFile} ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"kaldiTool":kaldiTools,"outFile":outFiles}

	# Run
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)