Exemple #1
0
def use_cmvn(feat,cmvn,utt2spk=None,std=False,outFile=None):
	'''
	Apply CMVN statistics to feature.

	Share Args:
		Null

	Parrallel Args:
		<feat>: exkaldi feature or index table object.
		<cmvn>: exkaldi CMVN statistics or index object.
		<utt2spk>: file path or ListTable object.
		<std>: If true,apply std normalization.
		<outFile>: out file name.

	Return:
		feature or index table object.
	'''
	feats,cmvns,utt2spks,stds,outFiles = check_multiple_resources(feat,cmvn,utt2spk,std,outFile=outFile)

	names = []
	for i,feat,cmvn,utt2spk,std in zip(range(len(outFiles)),feats,cmvns,utt2spks,stds):
		# verify feature and cmvn
		declare.is_feature("feat",feat)
		declare.is_cmvn("cmvn",cmvn)
		# verify utt2spk
		if utt2spk is not None:
			declare.is_potential_list_table("utt2spk",utt2spk)
		# std
		declare.is_bool("std",std)
		#stds[i] = "true" if std else "false"
		names.append( f"cmvn({feat.name},{cmvn.name})" ) 

	if utt2spks[0] is None:
		cmdPattern = 'apply-cmvn --norm-vars={std} {cmvn} {feat} ark:{outFile}'
		resources = {"feat":feats,"cmvn":cmvns,"std":stds,"outFile":outFiles}
	else:
		cmdPattern = 'apply-cmvn --norm-vars={std} --utt2spk=ark:{utt2spk} {cmvn} {feat} ark:{outFile}'
		resources = {"feat":feats,"cmvn":cmvns,"utt2spk":utt2spks,"std":stds,"outFile":outFiles}	
	
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Exemple #2
0
def __compute_feature(target,kaldiTool,useSuffix=None,name="feat",outFile=None):
	'''
	The base funtion to compute feature.
	'''
	declare.kaldi_existed()

	if useSuffix != None:
		declare.is_valid_string("useSuffix",useSuffix)
		useSuffix = useSuffix.strip().lower()[-3:]
		declare.is_instances("useSuffix",useSuffix,["scp","wav"])
	else:
		useSuffix = ""	

	targets,kaldiTools,useSuffixs,names,outFiles = check_multiple_resources(target,kaldiTool,useSuffix,name,outFile=outFile)
	# pretreatment
	fromSegment = False
	with FileHandleManager() as fhm:

		segments = []
		for index,kaldiTool,target,useSuffix,name in zip(range(len(outFiles)),kaldiTools,targets,useSuffixs,names):
			
			declare.is_classes("target",target,["str","ListTable","WavSegment"])
			declare.is_valid_string("name",name)

			if isinstance(target,str):		
		
				allFiles = list_files(target)
				target = ListTable()

				for filePath in allFiles:
					filePath = filePath.strip()
					if filePath[-4:].lower() == ".wav":
						fileName = os.path.basename(filePath)
						uttID = fileName[0:-4].replace(".","")
						target[uttID] = filePath
					
					elif filePath[-4:].lower() == '.scp':
						target += load_list_table(filePath)
					
					elif "wav" == useSuffix:
						fileName = os.path.basename(filePath)
						uttID = fileName.replace(".","")
						target[uttID] = filePath

					elif "scp" == useSuffix:
						target += load_list_table(filePath)

					else:
						raise UnsupportedType('Unknown file suffix. You can declare whether <useSuffix> is "wav" or "scp".')
				
				if len(target) == 0:
					raise WrongDataFormat("There did not include any data to compute data in target.")

				targets[index] = target
			
			elif type_name(target) == "WavSegment":

				segTemp = fhm.create("w+",suffix=".seg",encode="utf-8")
				target.save(segTemp)
				segments.append(segTemp.name)

				targets[index] = target.detach_wav()
				fromSegment = True

	if fromSegment:
		# define the command pattern
		cmdPattern = "extract-segments scp:{wavFile} {segment} ark:- | {kaldiTool} ark:- ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"segment":segments,"kaldiTool":kaldiTools,"outFile":outFiles}
	else:
		# define the command pattern
		cmdPattern = "{kaldiTool} scp:{wavFile} ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"kaldiTool":kaldiTools,"outFile":outFiles}

	# Run
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Exemple #3
0
def compute_plp(target,rate=16000,frameWidth=25,frameShift=10,
				melBins=23,featDim=13,windowType='povey',useSuffix=None,
				config=None,name="plp",outFile=None):
	'''
	Compute PLP feature.

	Share Args:
		Null
	
	Parallel Args:
		<target>: wave file,scp file,exkaldi ListTable object or WavSegment object. If it is wave file,we will use it's file name as utterance ID.
		<rate>: sample rate.
		<frameWidth>: frame windows width (ms).
		<frameShift>: shift windows width (ms).
		<melbins>: the numbers of mel filter banks.
		<featDim>: the output dinmensionality of PLP feature.
		<windowType>: sample windows type.
		<useSuffix>: If the suffix of file is not .scp and .wav,use this to specify it.
		<config>: extra optional configurations.
		<name>: the name of output feature object.
		<outFile>: output file name.

		Some usual options can be specified directly. If you want to use more,set <config> = your-configure.
		You can use exkaldi.check_config('compute_plp') function to get the refereance of extra configurations.
		Also you can run shell command "compute-plp-feats" to look their useage.

	Return:
		exkaldi feature or index table object.
	'''
	# check the basis configure parameters to build base commands
	stdParameters = check_multiple_resources(rate,frameWidth,frameShift,melBins,featDim,windowType,config)
	baseCmds = []
	for rate,frameWidth,frameShift,melBins,featDim,windowType,config,_ in zip(*stdParameters):
		declare.is_positive_int("rate",rate)
		declare.is_positive_int("frameWidth",frameWidth)
		declare.is_positive_int("frameShift",frameShift)
		declare.is_positive_int("melBins",melBins)
		declare.is_positive_int("featDim",featDim)
		declare.greater_equal("frameWidth",frameWidth,"frameShift",frameShift)
		declare.is_instances("windowType",windowType,["hamming","hanning","povey","rectangular","blackmann"])

		kaldiTool = 'compute-plp-feats --allow-downsample --allow-upsample '
		kaldiTool += f'--sample-frequency={rate} '
		kaldiTool += f'--frame-length={frameWidth} '
		kaldiTool += f'--frame-shift={frameShift} '
		kaldiTool += f'--num-mel-bins={melBins} '
		kaldiTool += f'--num-ceps={featDim} '
		kaldiTool += f'--window-type={windowType} '

		if config is not None:
			if check_config(name='compute_plp',config=config):
				for key,value in config.items():
					if isinstance(value,bool):
						if value is True:
							kaldiTool += f"{key} "
					else:
						kaldiTool += f"{key}={value} "
		
		baseCmds.append(kaldiTool)
	
	# run the common function
	return __compute_feature(target,baseCmds,useSuffix,name,outFile)