Esempio n. 1
0
def compute_fbank(target,rate=16000,frameWidth=25,frameShift=10,
					melBins=23,windowType='povey',useSuffix=None,
					config=None,name="fbank",outFile=None):
	'''
	Compute fbank feature.
	
	Share Args:
		Null 

	Parallel Args:
		<target>: wave file,scp file,exkaldi ListTable object or WavSegment object. If it is wave file,we will use it's file name as utterance ID.
		<rate>: sample rate.
		<frameWidth>: windows width (ms).
		<frameShift>: shift windows width (ms).
		<melbins>: the numbers of mel filter banks.
		<windowType>: windows type.
		<useSuffix>: If the suffix of file is not .scp or .wav,use this to specify it.
		<config>:  extra optional configurations.
		<name>: the name of output feature.
		<outFile>: output file name.
		
		Some usual options can be assigned directly. If you want use more,set <config> = your-configure.
		You can use exkaldi.check_config('compute_fbank') function to get the reference of extra configurations.
		Also you can run shell command "compute-fbank-feats" to look their usage.

	Return:
		exkaldi feature or index table object.
	'''
	# check the basis configure parameters to build base commands
	stdParameters = check_multiple_resources(rate,frameWidth,frameShift,melBins,windowType,config)

	baseCmds = []
	for rate,frameWidth,frameShift,melBins,windowType,config,_ in zip(*stdParameters):
		declare.is_positive_int("rate",rate)
		declare.is_positive_int("frameWidth",frameWidth)
		declare.is_positive_int("frameShift",frameShift)
		declare.is_positive_int("melBins",melBins)
		declare.greater_equal("frameWidth",frameWidth,"frameShift",frameShift)
		declare.is_instances("windowType",windowType,["hamming","hanning","povey","rectangular","blackmann"])

		kaldiTool = 'compute-fbank-feats --allow-downsample --allow-upsample '
		kaldiTool += f'--sample-frequency={rate} '
		kaldiTool += f'--frame-length={frameWidth} '
		kaldiTool += f'--frame-shift={frameShift} '
		kaldiTool += f'--num-mel-bins={melBins} '
		kaldiTool += f'--window-type={windowType} '

		if config is not None:
			if check_config(name='compute_fbank',config=config):
				for key,value in config.items():
					if isinstance(value,bool):
						if value is True:
							kaldiTool += f"{key} "
					else:
						kaldiTool += f"{key}={value} "
		
		baseCmds.append(kaldiTool)
	
	# run the common function
	return __compute_feature(target,baseCmds,useSuffix,name,outFile)
Esempio n. 2
0
def split_txt_file(filePath, chunks=2):
    '''
	Split a text file into N chunks by average number of lines.

	Args:
		<filePath>: text file path.
		<chunks>: an int avlue. How many chunks to split.

	Return:
		a list of paths of genrated chunk files.
		each file has a a prefix such as "ck0_" which _0_ is the chunk ID.
	'''
    declare.is_file("filePath", filePath)
    declare.greater_equal("chunks", chunks, "minimum chunk size", 2)

    with open(filePath, 'r', encoding='utf-8') as fr:
        data = fr.readlines()

    lines = len(data)
    chunkLines = lines // chunks

    if chunkLines == 0:
        chunkLines = 1
        chunks = lines
        t = 0
    else:
        t = lines - chunkLines * chunks

    a = len(str(chunks))

    filePath = os.path.abspath(filePath)
    dirName = os.path.dirname(filePath)
    fileName = os.path.basename(filePath)

    fileNamePattern = os.path.join(dirName, f"ck%0{a}d_" + fileName)
    newFiles = []
    start = 0
    for i in range(chunks):
        if i < t:
            end = start + chunkLines + 1
        else:
            end = start + chunkLines
        chunkData = data[start:end]
        newFileName = fileNamePattern % (i)
        with open(newFileName, 'w', encoding='utf-8') as fw:
            fw.write(''.join(chunkData))

        newFiles.append(newFileName)
        start = end

    return newFiles
Esempio n. 3
0
	def __setattr__(self, name, value):

		if '_ArgumentParser__arguments' in self.__dict__.keys():
			if "--"+name in self.__arguments.keys() and name in self.__dict__.keys():
					# verify value
					proto = self.__arguments[name]
					if isinstance(value, (list,tuple)):
						temp = value
					else:
						temp = [value,]
					for v in temp:
						assert isinstance(v, proto.dtype), f"<{name}> need {proto.dtype.__name__} type value but got: {value}."
						if proto.choices is not None:
							assert v in proto.choices, f"<{name}> should be one of {proto.choices} but got: {value}."
						if proto.minV is not None:
							declare.greater_equal(f"option value of {name}", v, "minimum expected value", proto.minV)
						if proto.maxV is not None:
							declare.less_equal(f"option value of {name}", v, "maximum expected value", proto.maxV)
					# modify the backup
					self.__arguments[name] = proto._replace(value=value)

		super().__setattr__(name, value)
Esempio n. 4
0
	def add(self,name,dtype,abbr=None,default=None,choices=None,minV=None,maxV=None,discription=None):
		'''
		Add a new option.

		Args:
			_name_: a string which must have a format such as "--exkaldi" (but "--help" is inavaliable exceptionally.).  
			_dtype_: float, int, str or bool.  
			_abbr_: None or a abbreviation of name which must have a format such as "-e" (but "-h" is inavaliable exceptionally.).  
			_dtype_: the default value or a list/tuple of values.  
			_choices_: a list/tuple of values.  
			_minV_: set the minimum value if dtype is int or float. Enable when _choices_ is None.  
			_maxV_: set the maximum value if dtype is int or float. Enable when _choices_ is None.  
			_maxV_: a string to discribe this option.
		'''
		self.__capture()

		# check option name
		declare.is_valid_string("name",name)
		name = name.strip()
		self.__detect_special_char(name)
		assert name[0:2] == "--" and name[2:3] != "-", f"Option name must start with '--' but got: {name}."
		assert name != "--help", "Option name is inavaliable: --help."
		if name in self.__arguments.keys():
			raise WrongOperation(f"Option name has existed: {name}.")
		
		# check dtype
		declare.is_instances("option dtype", dtype, (float,int,bool,str))

		# check abbreviation
		if abbr is not None:
			declare.is_valid_string("abbr",abbr)
			abbr = abbr.strip()
			self.__detect_special_char(abbr)
			assert abbr[0:1] == "-" and abbr[1:2] != "-", f"Abbreviation must start with '-' but got: {abbr}."
			assert abbr != "-h", "Abbreviation is inavaliable: -h."
			if abbr in self.__abb2Name.keys():
				raise WrongOperation(f"Abbreviation has existed: {abbr}.")

		# check default value
		if default is not None:
			if isinstance(default,(list,tuple)):
				declare.members_are_classes(f"Default value of {name}", default, dtype)
			else:
				declare.is_classes(f"Default value of {name}", default, dtype)
			if dtype == str:
				self.__detect_special_char(default)

		# check choices
		if choices is not None:
			declare.is_classes(f"Choices of {name}", choices, (list,tuple))
			declare.members_are_classes(f"Choices of {name}", choices, dtype)
			if dtype == str:
				self.__detect_special_char(choices)
			if default is not None:
				if isinstance(default,(list,tuple)):
					declare.members_are_instances(f"Default value of {name}", default, choices)
				else:
					declare.is_instances(f"Default value of {name}", default, choices)
		
		# check boundary values
		if minV is not None or maxV is not None:
			assert dtype in [float,int], f"Only float and int option can set the boundary but {name} is {dtype.__name__}."
			assert choices is None, f"Cannot set choices and boundary concurrently: {name}."
			if minV is not None:
				declare.is_classes(f"Minimum value of {name}", minV, dtype)
				if default is not None:
					if isinstance(default, (list,tuple)):
						for v in default:
							declare.greater_equal(f"Default value of {name}", v, "minimum expected value", minV)
					else:
						declare.greater_equal(f"Default of {name}", default, "minimum expected value", minV)
			if maxV is not None:
				declare.is_classes(f"Maximum value of {name}", maxV, dtype)
				if default is not None:
					if isinstance(default,(list,tuple)):
						for v in default:					
							declare.less_equal(f"Default value of {name}", v, "maximum expected value", maxV)
					else:
						declare.less_equal(f"Default value of {name}", default, "maximum expected value", maxV)
			if minV is not None and maxV is not None:
				declare.less_equal(f"Minimum value of {name}", minV, f"maximum value", maxV)

		# check discription
		if discription is not None:
			declare.is_valid_string(f"Discription of {name}", discription)
			self.__detect_special_char(discription)

		self.__arguments[name] = self.spec(dtype,default,choices,minV,maxV,discription)
		self.__name2Abb[name] = abbr
		if abbr is not None:
			self.__abb2Name[abbr] = name
Esempio n. 5
0
	def load(self, filePath):
		'''
		Load auguments from file.

		Args:
			_filePath_: args file path.
		'''
		declare.is_file("filePath", filePath)
		self.reset()

		with open(filePath, "r", encoding="utf-8") as fr:
			lines = fr.read()
		lines = lines.strip()
		if len(lines) == 0:
			raise WrongOperation(f"This is a void file: {filePath}.")
		
		blocks = lines.split("\n\n")
		
		def __parse(name, value, dtype):
			if dtype in [float,int]:
				try:
					value = dtype(value)
				except ValueError:
					raise WrongOperation(f"Option <{name}> need a {dtype.__name__} value but choices got: {value}.")
			elif dtype == bool:
				if value.lower() == "true":
					value = True
				elif c.lower() == "false":
					value = False
				else:
					raise WrongOperation(f"Option <{name}> need a bool value but choices got: {value}.")

			return value  

		self.__discription = blocks[0].strip()
		for blockNo, block in enumerate(blocks[1:], start=1):
			block = block.strip()
			if len(block) == 0:
				continue
			block = block.split("\n")
			# 1. match options
			values = {"name":None,"abbr":None,"dtype":None,"default":None,"choices":None,"minV":None,"maxV":None,"discription":None,"value":None}
			for m in block:
				m = m.strip()
				assert "=" in m, f"Augument should has format: key = value, but got: {m}."
				assert len(m.split("=")) == 2, f"Augument should has format: key = value, but got: {m}."
				m = m.split("=")
				name = m[0].strip()
				value = m[1].strip()
				declare.is_instances("Option key", name, list(values.keys()))
				values[name] = value

			for key, value in values.items():
				assert value is not None, f"Missed {key} information in line: {lineNo}."
			# 2. parse
			name = values["name"]
			# parse the dtype firstly
			declare.is_instances("dtype", values["dtype"], ["float","int","bool","str"])
			values["dtype"] = eval(values["dtype"])
			dtype = values["dtype"]	
			# then parse the choices
			choices = values["choices"]
			if choices in ["none", "None"]:
				choices = None
			else:
				choices = choices.split("|")
				for i, c in enumerate(choices):
					choices[i] = __parse(name, c, dtype)
			values["choices"] = choices
			# then parse the boundary value
			boundary = {"minV":None, "maxV":None}
			for i in boundary.keys():
				V = values[i]
				if V not in ["none", "None"]:
					assert dtype in [float,int], f"Only float and int option can set the boundary but {name} is {dtype.__name__}:"
					assert choices is None, f"{name} cannot set choices and boundary concurrently."
					
					toIntFlag = True
					toFloatFlag = True
					try:
						float(V)
					except ValueError:
						toFloatFlag= False
					try:
						int(V)
					except ValueError:
						toIntFlag= False
					
					if toIntFlag is False and toFloatFlag is False:
						raise WrongDataFormat(f"Boundary values of {name} should be a int or float value but got: {V}.")
					elif toIntFlag is False and toFloatFlag is True: # minV is predicted be a float value
						if dtype != float:
							raise WrongDataFormat(f"{name}'s dtype is int but try to set boundary value with a float value: {V}.")
						else:
							V = float(V)
					elif toIntFlag is True and toFloatFlag is True: # minV is predicted be a float or an int value
						V = dtype(V)
					else:
						raise WrongDataFormat(f"Failed to set {name}'s boundary value: {V}.")
				
					boundary[i] = V
			values["minV"] = boundary["minV"]
			values["maxV"] = boundary["maxV"]
			# then parse the default and value
			if values["default"].lower() == "none":
				values["default"] = None
			else:
				default = values["default"].split("|")
				for i, v in enumerate(default):
					default[i] = __parse(name, v, dtype)
				values["default"] = default if len(default) > 1 else default[0]
			
			# the judgement of "default" will be done by .parse() function, so here we only verify "value"
			if values["value"].lower() == "none":
				values["value"] = None
			else:
				value = values["value"].split("|")
				for i, v in enumerate(value):
					v = __parse(name, v, dtype)
					if values["choices"] is not None:
						declare.is_instances("Option value", v, values["choices"])
					else:
						if values["minV"] is not None:
							declare.greater_equal("Option value", v, "minimum expected value", values["minV"])
						if values["maxV"] is not None:
							declare.less_equal("Option value", v, "maximum expected value", values["maxV"])
					value[i] = v
				if len(value) == 1:
					value = value[0]
				values["value"] = value
			
			# check abbreviation
			if values["abbr"] in ["none", "None"]:
				values["abbr"] = None

			# add this options
			self.add(name=values["name"], 
							 dtype=values["dtype"], 
							 abbr=values["abbr"], 
							 default=values["default"], 
					 		 choices=values["choices"], 
							 minV=values["minV"], 
							 maxV=values["maxV"], 
							 discription=values["discription"]
							)
			
			# finally, modify the "value"
			self.__arguments[values["name"]] = self.__arguments[values["name"]]._replace(value=values["value"])
			if values["value"] is not None:
				self.__setattr__(values["name"], values["value"])
Esempio n. 6
0
	def parse(self):
		'''
		Start to parse arguments.
		'''
		self.__capture()
		
		# extract arguments
		temp = self.__argv.copy()
		temp.reverse()
		newArgv = []
		for a in temp:
			if a.endswith(".py"):
				break
			a = a.split("=")
			a.reverse()
			newArgv.extend( a )

		# match these arguments
		result = dict( (key, proto.default) for key, proto in self.__arguments.items() )
		for i, op in enumerate(newArgv):

			if op[0:1] == "-" and op[1:2] != "-":
				if op == "-h":
					self.print_help_and_exit()
				if op not in self.__abb2Name.keys():
					raise WrongOperation(f"Option has not been defined: {op}.")
				else:
					op = self.__abb2Name[op]

			if op.startswith("--"):
				if op == "--help":
					self.print_help_and_exit()
				if op not in self.__arguments.keys():
					raise WrongOperation(f"Option has not been defined: {op}.")
				if i%2 == 0:
					raise WrongOperation(f"Missed value for option: {op}.")

				# option value might has a format such as: 1|2
				vs = newArgv[i-1].split("|")
				proto = self.__arguments[op]

				if proto.dtype in [float,int]:
					try:
						for i,v in enumerate(vs):
							vs[i] = proto.dtype(v)
					except ValueError:
						raise WrongOperation(f"Option <{op}> need a {proto.dtype.__name__} value but got: {v}.")

				elif proto.dtype == bool:
					for i,v in enumerate(vs):
						if v.lower() == "true":
							v = True
						elif v.lower() == "false":
							v = False
						else:
							raise WrongOperation(f"Option <{op}> need a bool value but got: {v}.")
						vs[i] = v
				
				# vs become a list
				if proto.choices is not None:
					declare.members_are_instances(f"Option value of {op}", vs, proto.choices)
				else:
					if proto.minV is not None:
						for v in vs:
							declare.greater_equal(f"Option value of {op}", v, "minimum expected value", proto.minV)
					if proto.maxV is not None:
						for v in vs:
							declare.less_equal(f"Option value of {op}", v, "maximum expected value", proto.maxV)

				result[op] = vs if len(vs) > 1 else vs[0]

		# set attributes
		for name, value in result.items():
			if value is None:
				raise WrongOperation(f"Missed value for option: {name}.")
			else:
				self.__arguments[name] = self.__arguments[name]._replace(value=value)
				self.__setattr__(name[2:], value)