Example #1
0
    def get_retained_data(self,
                          processFunc=None,
                          batchSize=None,
                          chunks='auto',
                          otherArgs=None,
                          shuffle=False,
                          retainData=0.0):

        if self.evalTable.is_void:
            raise WrongOperation('No retained validation data.')

        if processFunc is None:
            processFunc = self.fileProcessFunc

        if batchSize is None:
            batchSize = self._batchSize

        if isinstance(chunks, int):
            assert chunks > 0, "Expected <chunks> is a positive int number."
        elif chunks != 'auto':
            raise WrongOperation(
                f'Expected <chunks> is a positive int number or <auto> but got {chunks}.'
            )

        if otherArgs is None:
            otherArgs = self.otherArgs

        reIterator = DataIterator(self.evalTable, processFunc, batchSize,
                                  chunks, otherArgs, shuffle, retainData)

        return reIterator
Example #2
0
    def align_tuple_data_to_frame(utt, record, templet):

        if isinstance(record[0], list):
            frameSize = len(record[0][0])
        else:
            frameSize = len(record[0])

        for r in record[1:]:
            if isinstance(r, list):
                for sr in r:
                    if len(sr) != frameSize:
                        raise WrongOperation(
                            f"Cannot tuple data with different frame length to frame level: {frameSize}!={len(sr)}."
                        )
            else:
                if len(r) != frameSize:
                    raise WrongOperation(
                        f"Cannot tuple data with different frame length to frame level: {frameSize}!={len(r)}."
                    )

        result = []
        for frameIndex in range(frameSize):
            new = []
            for r in record:
                if isinstance(r, list):
                    filedR = []
                    for sr in r:
                        filedR.append(sr[frameIndex])
                    new.append(filedR)
                else:
                    new.append(r[frameIndex:frameIndex + 1])

            result.append(templet(utt, frameIndex, *new))

        return result
Example #3
0
def check_config(name, config=None):
    '''
	Check the users'configures or get the default configures of some functions.

	Args:
		<name>: function name.
		<config>: a list object whose keys are configure name and values are their configure values. If None,return the default configure.
	
	Return:
		if <config> is None:
			Return none,or a dict object of example configure of <name>.
			If the value is a tuple,it standards for multiple types of value you can set.
		else:
			Return True or raise error.
	'''
    declare.is_valid_string("name", name)

    try:
        module = importlib.import_module(f'exkaldi.config.{name}')
    except ModuleNotFoundError:
        print(f"Warning: no default configure for name '{name}'.")
        return None
    else:
        c = module.config

    if config is None:
        config = {}
        for key, value in c.items():
            value = tuple(value[i] for i in range(0, len(value), 2))
            value = value if len(value) > 1 else value[0]
            config[key] = value
        return config

    else:
        if not isinstance(config, dict):
            raise WrongOperation(
                f"<config> has a wrong format. You can use check_config('{name}') to get expected configure format."
            )
        for k in config.keys():
            if not k in c.keys():
                raise WrongOperation(
                    f"No such configure name: <{k}> in {name}.")
            else:
                protos = tuple(c[k][i] for i in range(1, len(c[k]), 2))
                if not isinstance(config[k], protos):
                    if isinstance(config[k], bool):
                        raise WrongDataFormat(
                            f"Configure <{k}> is bool value: {config[k]},but we expected str value like 'true' or 'false'."
                        )
                    else:
                        raise WrongDataFormat(
                            f"Configure <{k}> should be in {protos} but got {type_name(config[k])}."
                        )

            return True
Example #4
0
    def connect_from(self, targetHost):
        '''
        Usage:  client.connect_from(targetHost="192.168.1.1")
        
        Connected to remote client.
        '''
        if self.safeFlag is False:
            raise WrongOperation(
                'We only allow user to use client under <with> grammar.')

        if self.bindHost is None:
            raise WrongOperation(
                'Please bind host IP and Port by using .bind() method.')

        if self.client != None:
            raise WrongOperation('Another connection is running right now.')

        try:
            i = 0
            while i < 5:
                if self.proto == 'TCP':
                    self.server.listen(1)
                    self.client, addr = self.server.accept()
                    if addr[0] == targetHost:
                        self.client.send(b'hello world')
                        self.targetAddr = addr
                        break
                    else:
                        self.client.close()
                else:
                    vertification, addr = self.server.recvfrom(32)
                    if vertification == b'hello world' and addr[
                            0] == targetHost:
                        self.client = self.server
                        self.client.sendto(b'hello world', addr)
                        self.targetAddr = addr
                        break
                i += 1
        except Exception as e:
            self.localErrFlag = True
            if isinstance(e, socket.timeout):
                raise NetworkError(
                    'No connect-application from any remote client.')
            else:
                raise e
        else:
            if i >= 5:
                self.client = None
                self.localErrFlag = True
                raise NetworkError(
                    "Cannot connect from {}.".format(targetHost))
            else:
                return True
Example #5
0
		def __parse(name, value, dtype):
			if dtype in [float,int]:
				try:
					value = dtype(value)
				except ValueError:
					raise WrongOperation(f"Option <{name}> need a {dtype.__name__} value but choices got: {value}.")
			elif dtype == bool:
				if value.lower() == "true":
					value = True
				elif c.lower() == "false":
					value = False
				else:
					raise WrongOperation(f"Option <{name}> need a bool value but choices got: {value}.")

			return value  
Example #6
0
    def dump(self, keepItems=False):
        '''
		Usage:  product = obj.dump()
		Get all reported information.

		Args:
			<keepItems>: If True,return a dict object.
						 Else,return a list of dict objects. 
		
		Return:
			A dict object or list object.
		'''
        if self.currentField != {}:
            self.collect_report(plot=False)

        if self.globalField != []:
            allData = self.globalField
        else:
            raise WrongOperation('Not any information to dump.')

        if keepItems is True:
            items = {}
            for i in allData:
                for key in i.keys():
                    if not key in items.keys():
                        items[key] = []
                    items[key].append(i[key])
            return items
        else:
            return allData
Example #7
0
    def open(self, filePath, mode, encoding=None, name=None):
        '''
		Open a regular file and return the handle.

		Args:
			<name>: a string. After named this handle exclusively,you can call its name to get it again.
					If None,we will use the file name as its default name.
					We allow to open the same file in multiple times as long as you name them differently.
		
		Return:
			a file handle.
		'''
        self.verify_safety()

        if name is not None:
            declare.is_valid_string("name", name)
            assert name not in self.__inventory.keys(
            ), f"<name> has been existed. We hope it be exclusive: {name}."
        else:
            if filePath in self.__inventory.keys():
                raise WrongOperation(
                    f"File has been opened already: {filePath}. If you still want to open it to get another handle,please give it an exclusive name."
                )
            name = filePath

        declare.is_file("filePath", filePath)

        handle = open(filePath, mode, encoding=encoding)

        self.__inventory[name] = handle

        return handle
Example #8
0
    def bind(self, proto='TCP', bindHost=None, bindPort=9509):
        '''
        Usage:  client.bind(proto='TCP', bindHost="192.168.1.1", bindPort=9509)
        
        Bind the IP address and Port of this machine as server. 
        '''
        assert proto in [
            'TCP', 'UDP'
        ], 'Expected <proto> is "TCP" or "UDP" but got {}.'.format(proto)

        if self.bindHost != None:
            raise WrongOperation('Server has already bound to {}.'.format(
                (self.bindHost, self.bindPort)))

        assert bindHost != None, 'Expected <bindHost> is not None.'

        if proto == 'TCP':
            self.server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        else:
            self.server = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

        self.server.bind((bindHost, bindPort))

        self.proto = proto
        self.bindHost = bindHost
        self.bindPort = bindPort
Example #9
0
	def add_penalty(self, penalty=0):
		'''
		Add penalty to lattice.

		Args:
			<penalty>: penalty.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice to scale.')

		assert isinstance(penalty, (int,float)) and penalty >= 0, "Expected <penalty> is positive int or float value."
		
		cmd = f"lattice-add-penalty --word-ins-penalty={penalty} ark:- ark:-"

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to add penalty.")
		else:
			newName = f"add_penalty({self.name})"
			return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
Example #10
0
def decompress_gz_file(filePath, overWrite=False):
    '''
	Decompress a gz file.

	Args:
		<filePath>: file path.
	Return:
		the absolute path of decompressed file.
	'''
    assert isinstance(
        filePath,
        str), f"<filePath> must be a string but got {type_name(filePath)}."
    filePath = filePath.rstrip()
    if not os.path.isfile(filePath):
        raise WrongPath(f"No such file:{filePath}.")
    elif not filePath.endswith(".gz"):
        raise WrongOperation(f"{filePath}: Unknown suffix.")

    outFile = filePath[:-3]
    if overWrite is True and os.path.isfile(outFile):
        os.remove(outFile)

    cmd = f"gzip -d {filePath}"
    out, err, cod = run_shell_command(cmd, stderr=subprocess.PIPE)

    if cod != 0:
        print(err.decode())
        raise ShellProcessError("Failed to decompress file.")
    else:
        return os.path.abspath(outFile)
Example #11
0
	def determinize(self, acwt=1.0, beam=6):
		'''
		Determinize the lattice.

		Args:
			<acwt>: acoustic scale.
			<beam>: prune beam.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		assert isinstance(acwt, float) and acwt >= 0, "Expected <acwt> is positive float value."
		assert isinstance(beam, int) and beam >= 0, "Expected <beam> is positive int value."
		
		cmd = f"lattice-determinize-pruned --acoustic-scale={acwt} --beam={beam} ark:- ark:-"

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to determinize lattice.")
		else:
			newName = f"determinize({self.name})"
			return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)		
Example #12
0
    def get(self):
        '''
        Usage:  client.get()
        
        Get lastest recognied result from result queue.
        If all results have been taken out, return None.
        '''
        if not self.safeFlag:
            raise WrongOperation(
                'We only allow user to use client under <with> grammar.')

        while True:
            if self.resultQueue.empty():
                if ('recognize' in self.threadManager.keys()
                        and self.threadManager['recognize'].is_alive()) or (
                            'receive' in self.threadManager.keys()
                            and self.threadManager['receive'].is_alive()):
                    time.sleep(0.01)
                else:
                    return None
            else:
                message = self.resultQueue.get()
                if message == 'endFlag':
                    return None
                else:
                    self.finalRecognizedResult[-1] = message[1]
                    if message[0] is True:
                        self.finalRecognizedResult.append("")
                    return "".join(self.finalRecognizedResult)
Example #13
0
	def am_rescore(self, hmm, feat):
		"""
		Replace the acoustic scores with new HMM-GMM model.
		"""
		'''
		Determinize the lattice.

		Args:
			<hmm>: exkaldi HMM object or file path.

		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		featTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		try:
			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["BaseHMM", "MonophoneHMM", "TriphoneHMM"]:
				hmmTemp.write(hmm.data)
				hmmTemp.seek(0)
				hmmFile = hmmTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file path or exkaldi HMM object but got: {type_name(hmm)}.")
	
			if type_name(feat) == "BytesFeature":
				feat = feat.sort(by="utt")
			elif type_name(feat) == "NumpyFeature":
				feat = feat.sort(by="utt").to_numpy()
			else:
				raise UnsupportedType(f"<feat> should be exkaldi feature object but got: {type_name(feat)}.")

			featTemp.write(feat.data)
			featTemp.seek(0)
			featFile = featTemp.name

			cmd = f"gmm-rescore-lattice	{hmmFile} ark:- ark:{featFile} ark:-"

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError("Failed to determinize lattice.")
			else:
				newName = f"am_rescore({self.name})"
				return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
		finally:
			hmmTemp.close()
			featTemp.close()
Example #14
0
def compress_gz_file(filePath, overWrite=False, keepSource=False):
    '''
	Compress a file to gz file.

	Args:
		<filePath>: file path.
		<overWrite>: If True,overwrite gz file when it has existed.
		<keepSource>: If True,retain source file.
	
	Return:
		the path of compressed file.
	'''
    declare.is_file("filePath", filePath)
    declare.is_bool("overWrite", overWrite)
    declare.is_bool("keepSource", keepSource)

    filePath = os.path.abspath(filePath)
    if filePath.endswith(".gz"):
        raise WrongOperation(f"Cannot compress a .gz file:{filePath}.")
    else:
        outFile = filePath + ".gz"

    if os.path.isfile(outFile):
        if overWrite is True:
            os.remove(outFile)
        else:
            raise WrongOperation(
                f"File has existed:{outFile}. If overwrite it,set option <overWrite>=True."
            )

    if keepSource:
        cmd = f"gzip -k {filePath}"
    else:
        cmd = f"gzip {filePath}"

    out, err, cod = run_shell_command(cmd, stderr=subprocess.PIPE)

    if cod != 0:
        print(err.decode())
        raise ShellProcessError("Failed to compress file.")
    else:
        return outFile
Example #15
0
    def close(self):
        '''
        Usage:  client.close()
        
        Close this client object. After done this, current will be unavailable.
        '''
        if self.safeFlag is False:
            raise WrongOperation(
                'We only allow user to use client under <with> grammar.')

        self.endFlag = True
        self.__exit__(None, None, None)
Example #16
0
    def collect_report(self, keys=None, plot=True):
        '''
		Do the statistics of received information. The result will be saved in outDir/log file. 

		Args:
			<keys>: Specify the data wanted to be collected. If "None",collect all data reported. 
			<plot>: If "True",print the statistics result to standard output.
		'''
        if keys is None:
            keys = list(self.currentField)
        elif isinstance(keys, str):
            keys = [
                keys,
            ]
        elif isinstance(keys, (list, tuple)):
            pass
        else:
            raise WrongOperation("Expected <keys> is string,list or tuple.")

        self.globalField.append({})

        self._allKeys.extend(self.currentField.keys())
        self._allKeys = list(set(self._allKeys))

        message = ''
        for name in keys:
            if name in self.currentField.keys():

                if len(self.currentField[name]) == 0:
                    mn = 0.
                else:
                    mn = sum(self.currentField[name]) / len(
                        self.currentField[name])

                if name in self.currentFieldIsFloat.keys():
                    message += f'{name}:{mn:.5f}    '
                else:
                    mn = int(mn)
                    message += f'{name}:{mn}    '

                self.globalField[-1][name] = mn
            else:
                message += f'{name}:-----    '

        with open(self.logFile, 'a', encoding='utf-8') as fw:
            fw.write(message + '\n')
        # Print to screen
        if plot is True:
            print(message)
        # Clear
        self.currentField = {}
        self.currentFieldIsFloat = {}
Example #17
0
    def wait(self):
        '''
        Usage:  client.wait()
        
        Wait all threads over. 
        '''
        if self.safeFlag is False:
            raise WrongOperation(
                'We only allow user to use client under <with> grammar.')

        for name, thread in self.threadManager.items():
            if thread.is_alive():
                thread.join()
Example #18
0
    def config_wave_format(self,
                           Format=None,
                           Width=None,
                           Channels=1,
                           Rate=16000,
                           ChunkFrames=1024):
        '''
        Usage:  client.config_wave_format(Format="int32")
        
        Set the wav parameters when recording from microphone.
        If reading from file, it will be set automatically.
        '''
        if self.safeFlag is False:
            raise WrongOperation(
                'We only allow user to use client under <with> grammar.')

        assert Channels in [
            1, 2
        ], "Expected <Channels> is 1 or 2 but got {}.".format(Channels)

        if Format != None:
            assert Format in [
                'int8', 'int16', 'int32'
            ], "Expected <Format> is int8, int16 or int32 but got{}.".format(
                Format)
            assert Width == None, 'Only one of <Format> and <Width> is expected to be assigned but both two are gotten.'
            self.formats = Format
            if Format == 'int8':
                self.width = 1
            elif Format == 'int16':
                self.width = 2
            else:
                self.width = 4
        else:
            assert Width != None, 'Expected to assign one value of <Format> aor <Width> but got two None.'
            assert Width in [
                1, 2, 4
            ], "Expected <Width> is 1, 2 or 4 but got{}.".format(Width)
            self.width = Width
            if Width == 1:
                self.formats = 'int8'
            elif Width == 2:
                self.formats = 'int16'
            else:
                self.formats = 'int32'

        self.channels = Channels
        self.rate = Rate
        self.chunkFrames = ChunkFrames
        self.chunkSize = self.width * Channels * ChunkFrames
Example #19
0
        def readWave(wavFile, dataQueue):
            try:
                self._counter = 0
                wf = wave.open(wavFile, 'rb')
                wfRate = wf.getframerate()
                wfChannels = wf.getnchannels()
                wfWidth = wf.getsampwidth()
                if not wfWidth in [1, 2, 4]:
                    raise WrongOperation(
                        'Only these wav file with a data type of "int8", "int16" or "int32" can be accepted.'
                    )

                self.config_wave_format(None, wfWidth, wfChannels, wfRate,
                                        1024)

                secPerRead = self.chunkFrames / self.rate

                firstMessage = "{},{},{},{}".format(self.formats,
                                                    self.channels, self.rate,
                                                    self.chunkFrames)
                firstMessage = firstMessage + " " * (32 - len(firstMessage))
                dataQueue.put(firstMessage.encode())

                data = wf.readframes(self.chunkFrames)
                while len(data) == self.chunkSize:
                    self._counter += secPerRead
                    dataQueue.put(data)
                    if True in [
                            self.localErrFlag, self.remoteErrFlag, self.endFlag
                    ]:
                        data = b""
                        break
                    time.sleep(secPerRead)
                    data = wf.readframes(self.chunkFrames)
                if data != b"":
                    self._counter += len(
                        data) / self.width / self.channels / self.rate
                    lastChunkData = data + b" " * (self.chunkSize - len(data))
                    dataQueue.put(lastChunkData)
            except Exception as e:
                self.localErrFlag = True
                raise e
            else:
                if True in [self.remoteErrFlag, self.localErrFlag]:
                    pass
                else:
                    dataQueue.put('endFlag')
Example #20
0
    def _config_wave_format(self,
                            Format=None,
                            Width=None,
                            Channels=1,
                            Rate=16000,
                            ChunkFrames=1024):
        '''
        Wave data format will be set automatically when it receive the first message.
        '''
        if self.safeFlag is False:
            raise WrongOperation(
                'We only allow user to use client under <with> grammar.')

        assert Channels == 1 or Channels == 2, "Expected <Channels> is 1 or 2 but got {}.".format(
            Channels)

        if Format != None:
            assert Format in [
                'int8', 'int16', 'int32'
            ], "Expected <Format> is int8, int16 or int32 but got{}.".format(
                Format)
            assert Width == None, 'Only one of <Format> and <Width> is expected to assigned but both two.'
            self.formats = Format
            if Format == 'int8':
                self.width = 1
            elif Format == 'int16':
                self.width = 2
            else:
                self.width = 4
        else:
            assert Width != None, 'Expected one value in <Format> and <Width> but got two None.'
            assert Width in [
                1, 2, 4
            ], "Expected <Width> is 1, 2 or 4 but got{}.".format(Format)
            self.width = Width
            if Width == 1:
                self.formats = 'int8'
            elif Width == 2:
                self.formats = 'int16'
            else:
                self.formats = 'int32'

        self.channels = Channels
        self.rate = Rate
        self.chunkFrames = ChunkFrames
        self.chunkSize = self.width * Channels * ChunkFrames
Example #21
0
def run_shell_command_parallel(cmds, env=None, timeout=ExkaldiInfo.timeout):
    '''
	Run shell commands with multiple processes.
	In this mode,we don't allow the input and output streams are PIPEs.
	If you mistakely appoint buffer to be input or output stream,we set time out error to avoid dead lock.
	So you can change the time out value into a larger one to deal with large courpus as long as you rightly apply files as the input and output streams. 

	Args:
		<cmds>: a list of strings. Each string should be a command and its options.
		<env>: If None,use exkaldi.version.ENV defaultly.
		<timeout>: a int value. Its the total timeout value of all processes.

	Return:
		a list of pairs: return code and error information.
	'''
    declare.is_classes("cmds", cmds, [tuple, list])
    declare.is_positive_int("timeout", timeout)

    if env is None:
        env = ExkaldiInfo.ENV

    processManager = {}
    for index, cmd in enumerate(cmds):
        declare.is_valid_string("cmd", cmd)
        processManager[index] = subprocess.Popen(cmd,
                                                 shell=True,
                                                 stderr=subprocess.PIPE,
                                                 env=env)

    runningProcess = len(processManager)
    if runningProcess == 0:
        raise WrongOperation("<cmds> has not any command to run.")
    dtimeout = timeout // runningProcess
    assert dtimeout >= 1, f"<timeout> is extremely short: {timeout}."
    for ID, p in processManager.items():
        try:
            out, err = p.communicate(timeout=dtimeout)
        except subprocess.TimeoutExpired:
            p.kill()
            errMes = b"Time Out Error: Process was killed! If you are exactly running the right program,"
            errMes += b"you can set a greater timeout value by exkaldi.info.set_timeout()."
            processManager[ID] = (-9, errMes)
        else:
            processManager[ID] = (p.returncode, err)

    return list(processManager.values())
Example #22
0
def load_lat(target, name="lat"):
	'''
	Load lattice data.

	Args:
		<target>: bytes object, file path or exkaldi lattice object.
		<hmm>: file path or exkaldi HMM object.
		<wordSymbol>: file path or exkaldi LexiconBank object.
		<name>: a string.
	Return:
		A exkaldi lattice object.
	'''
	if isinstance(target, bytes):
		return Lattice(target, name)

	elif isinstance(target, str):
		target = list_files(target)
		allData = []
		for fileName in target:
			if fileName.endswith('.gz'):
				cmd = 'gunzip -c {}'.format(fileName)
				out, err, _ = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
				if out == b'':
					print(err.decode())
					raise WrongDataFormat('Failed to load Lattice.')
				else:
					allData.append(out)
			else:
				try:
					with open(fileName, 'rb') as fr:
						out = fr.read()
				except Exception as e:
					print("Load lattice file defeated. Please make sure it is a lattice file avaliable.")
					raise e
				else:
					allData.append(out)
		try:
			allData = b"".join(allData)
		except Exception as e:
			raise WrongOperation("Only support binary format lattice file.")
		else:
			return Lattice(data=allData, name=name)

	else:
		raise UnsupportedType(f"Expected bytes object or lattice file but got: {type_name(target)}.")
Example #23
0
        def sendWave(dataQueue):
            try:
                while True:
                    if True in [self.localErrFlag, self.remoteErrFlag]:
                        break
                    if dataQueue.empty():
                        if ('read' in self.threadManager.keys()
                                and self.threadManager['read'].is_alive()
                            ) or ('record' in self.threadManager.keys()
                                  and self.threadManager['record'].is_alive()):
                            time.sleep(0.01)
                        else:
                            raise WrongOperation(
                                'Excepted data input from Read(file) or Record(microphone).'
                            )
                    else:
                        message = dataQueue.get()
                        if message == 'endFlag':
                            break
                        elif self.proto == 'TCP':
                            self.client.send(message)
                        else:
                            self.client.sendto(
                                message, (self.targetHost, self.targetPort))
            except Exception as e:
                self.localErrFlag = True
                raise e
            finally:
                if self.remoteErrFlag == True:
                    pass
                else:
                    if self.localErrFlag == True:
                        lastMessage = 'errFlag'.encode()
                    else:
                        lastMessage = 'endFlag'.encode()

                    if self.proto == 'TCP':
                        self.client.send(lastMessage)
                    else:
                        self.client.sendto(lastMessage,
                                           (self.targetHost, self.targetPort))
Example #24
0
	def save(self, fileName):
		'''
		Save lattice as .ali file. 
		
		Args:
			<fileName>: file name.
		''' 
		assert isinstance(fileName, str) and len(fileName) > 0, "file name is unavaliable."

		if self.is_void:
			raise WrongOperation('No any data to save.')

		if not fileName.rstrip().endswith(".lat"):
			fileName += ".lat"
		
		make_dependent_dirs(fileName)

		with open(fileName, "wb") as fw:
			fw.write(self.data)

		return os.path.abspath(fileName)
Example #25
0
def unpack_padded_sequence(data, lengths, batchSizeDim=1):
    '''
	Usage:  listData = unpack_padded_sequence(data,lengths)

	This is a reverse operation of .pad_sequence() function. Return a list whose members are sequences.
	We defaultly think the dimension 0 of <data> is sequence-length and the dimension 1 is batch-size.
	If the dimension of batch-size is not 1, assign the <batchSizeDim> please.
	'''
    assert isinstance(
        data, np.ndarray
    ), f"Expected <data> is NumPy array but got {type_name(data)}."
    assert isinstance(
        lengths, list
    ), "Expected <lengths> is list whose members are padded start position ( and end position)."
    assert isinstance(
        batchSizeDim, int
    ) and batchSizeDim >= 0, "<batchSizeDim> should be a non-negative int value."
    assert batchSizeDim < len(
        data.shape), "<batchSizeDim> is out of the dimensions of <data>."

    if batchSizeDim != 0:
        dims = [d for d in range(len(data.shape))]
        dims.remove(batchSizeDim)
        newDim = [batchSizeDim, *dims]
        data = data.transpose(newDim)

    assert len(data) <= len(lengths), "<lengths> is shorter than batch size."

    new = []
    for i, j in enumerate(data):
        if isinstance(lengths[i], int):
            new.append(j[0:lengths[i]])
        elif isinstance(lengths[i], (list, tuple)) and len(lengths[i]) == 2:
            new.append(j[lengths[i][0]:lengths[i][1]])
        else:
            raise WrongOperation("<lengths> has wrong format.")

    return new
Example #26
0
	def scale(self, acwt=1, invAcwt=1, ac2lm=0, lmwt=1, lm2ac=0):
		'''
		Scale lattice.

		Args:
			<acwt>: acoustic scale.
			<invAcwt>: inverse acoustic scale.
			<ac2lm>: acoustic to lm scale.
			<lmwt>: language lm scale.
			<lm2ac>: lm scale to acoustic.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice to scale.')           

		for x in [acwt, invAcwt, ac2lm, lmwt, lm2ac]:
			assert x >= 0, "Expected scale is positive value."
		
		cmd = 'lattice-scale'
		cmd += ' --acoustic-scale={}'.format(acwt)
		cmd += ' --acoustic2lm-scale={}'.format(ac2lm)
		cmd += ' --inv-acoustic-scale={}'.format(invAcwt)
		cmd += ' --lm-scale={}'.format(lmwt)
		cmd += ' --lm2acoustic-scale={}'.format(lm2ac)
		cmd += ' ark:- ark:-'

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to scale lattice.")
		else:
			newName = f"scale({self.name})"
			return Lattice(data=out,wordSymbolTable=self.wordSymbolTable,hmm=self.hmm,name=newName)
Example #27
0
def __read_data_from_file(fileName, useSuffix=None):
    '''
	Read data from file. If the file suffix is unknown, <useSuffix> should be assigned.
	'''
    if useSuffix != None:
        assert isinstance(useSuffix, str), "Expected <useSuffix> is a string."
        useSuffix = useSuffix.strip().lower()[-3:]
    else:
        useSuffix = ""

    assert useSuffix in [
        "", "scp", "ark", "npy"
    ], f'Expected <useSuffix> is "ark", "scp" or "npy" but got "{useSuffix}".'

    if isinstance(fileName, str):
        if os.path.isdir(fileName):
            raise WrongOperation(
                f"Expected file name but got a directory:{fileName}.")
        else:
            allFiles = list_files(fileName)
    else:
        raise UnsupportedType(
            f'Expected <fileName> is file name-like string but got a {type_name(fileName)}.'
        )

    allData_bytes = BytesMatrix()
    allData_numpy = NumpyMatrix()

    def loadNpyFile(fileName):
        try:
            temp = np.load(fileName, allow_pickle=True)
            data = {}
            #totalSize = 0
            for utt_mat in temp:
                data[utt_mat[0]] = utt_mat[1]
                #totalSize += sys.getsizeof(utt_mat[1])
            #if totalSize > 10000000000:
            #    print('Warning: Data is extramely large. It could not be used correctly sometimes.')
        except:
            raise UnsupportedType(
                f'Expected "npy" data with exkaldi format but got {fileName}.')
        else:
            return NumpyMatrix(data)

    def loadArkScpFile(fileName, suffix):
        ExkaldiInfo.vertify_kaldi_existed()

        if suffix == "ark":
            cmd = 'copy-feats ark:'
        else:
            cmd = 'copy-feats scp:'

        cmd += '{} ark:-'.format(fileName)
        out, err, cod = run_shell_command(cmd,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE)
        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Copy feat defeated.')
        else:
            #if sys.getsizeof(out) > 10000000000:
            #    print('Warning: Data is extramely large. It could not be used correctly sometimes.')
            return BytesMatrix(out)

    for fileName in allFiles:
        sfx = fileName[-3:].lower()
        if sfx == "npy":
            allData_numpy += loadNpyFile(fileName)
        elif sfx in ["ark", "scp"]:
            allData_bytes += loadArkScpFile(fileName, sfx)
        elif useSuffix == "npy":
            allData_numpy += loadNpyFile(fileName)
        elif useSuffix in ["ark", "scp"]:
            allData_bytes += loadArkScpFile(fileName, useSuffix)
        else:
            raise UnsupportedType(
                'Unknown file suffix. You can assign the <useSuffix> with "scp", "ark" or "npy".'
            )

    if useSuffix == "":
        if allFiles[0][-3:].lower() == "npy":
            result = allData_numpy + allData_bytes.to_numpy()
        else:
            result = allData_bytes + allData_numpy.to_bytes()
    elif useSuffix == "npy":
        result = allData_numpy + allData_bytes.to_numpy()
    else:
        result = allData_bytes + allData_numpy.to_bytes()

    result.check_format()
    return result
Example #28
0
def load_ali(target, aliType=None, name="ali", hmm=None):
    '''
	Load alignment data.

	Args:
		<target>: Python dict object, bytes object, exkaldi alignment object, kaldi alignment file or .npy file.
		<aliType>: None, or one of 'transitionID', 'phoneID', 'pdfID'. It will return different alignment object.
		<name>: a string.
		<hmm>: file path or exkaldi HMM object.
	Return:
		exkaldi alignment data objects.
	'''
    assert isinstance(
        name, str) and len(name) > 0, "Name shoud be a string avaliable."

    ExkaldiInfo.vertify_kaldi_existed()

    def transform(data, cmd):
        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=data)
        if (isinstance(cod, int) and cod != 0) and out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to transform alignment.')
        else:
            result = {}
            sp = BytesIO(out)
            for line in sp.readlines():
                line = line.decode()
                line = line.strip().split()
                utt = line[0]
                matrix = np.array(line[1:], dtype=np.int32)
                result[utt] = matrix
            return results

    if isinstance(target, dict):
        if aliType is None:
            result = NumpyAlignment(target, name)
        elif aliType == "transitionID":
            result = NumpyAlignmentTrans(target, name)
        elif aliType == "phoneID":
            result = NumpyAlignmentPhone(target, name)
        elif aliType == "pdfID":
            result = NumpyAlignmentPdf(target, name)
        else:
            raise WrongOperation(
                f"<aliType> should be None, 'transitionID', 'phoneID' or 'pdfID' but got {aliType}."
            )
        result.check_format()
        return result

    elif type_name(target) in [
            "NumpyAlignment", "NumpyAlignmentTrans", "NumpyAlignmentPhone",
            "NumpyAlignmentPdf", "BytesAlignmentTrans"
    ]:
        result = copy.deepcopy(target)
        result.rename(name)
        return result

    elif isinstance(target, str):

        allFiles = list_files(target)

        results = {
            "NumpyAlignment": NumpyAlignment(),
            "NumpyAlignmentTrans": NumpyAlignmentTrans(),
            "NumpyAlignmentPhone": NumpyAlignmentPhone(),
            "NumpyAlignmentPdf": NumpyAlignmentPdf(),
            "BytesAlignmentTrans": BytesAlignmentTrans(),
        }

        for fileName in allFiles:
            fileName = os.path.abspath(fileName)

            if fileName.endswith(".npy"):
                temp = __read_data_from_file(fileName, "npy")
                if aliType is None:
                    temp = NumpyAlignment(temp.data)
                    results["NumpyAlignment"] += temp
                elif aliType == "transitionID":
                    temp = NumpyAlignmentTrans(temp.data)
                    results["NumpyAlignmentTrans"] += temp
                elif aliType == "phoneID":
                    temp = NumpyAlignmentPhone(temp.data)
                    results["NumpyAlignmentPhone"] += temp
                elif aliType == "pdfID":
                    temp = NumpyAlignmentPdf(temp.data)
                    results["NumpyAlignmentPdf"] += temp
                else:
                    raise WrongOperation(
                        f"<aliType> should be None, 'transitionID','phoneID' or 'pdfID' but got {aliType}."
                    )

            else:
                if fileName.endswith('.gz'):
                    cmd = f'gunzip -c {fileName}'
                else:
                    cmd = f'cat {fileName}'

                if aliType is None or aliType == "transitionID":
                    out, err, cod = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)
                    if (isinstance(cod, int) and cod != 0) or out == b'':
                        print(err.decode())
                        raise ShellProcessError(
                            "Failed to get the alignment data from file.")
                    else:
                        temp = BytesAlignmentTrans(out)
                        results["BytesAlignmentTrans"] += temp

                else:
                    temp = tempfile.NamedTemporaryFile("wb+")
                    try:
                        if type_name(hmm) in ("HMM", "MonophoneHMM",
                                              "TriphoneHMM"):
                            hmm.save(temp)
                            hmmFileName = temp.name
                        elif isinstance(hmm, str):
                            if not os.path.isfile(hmm):
                                raise WrongPath(f"No such file:{hmm}.")
                            hmmFileName = hmm
                        else:
                            raise UnsupportedType(
                                f"<hmm> should be a filePath or exkaldi HMM and its sub-class object. but got {type_name(hmm)}."
                            )

                        if aliType == "phoneID":
                            cmd += f" | ali-to-phones --per-frame=true {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPhone(temp)
                            results["NumpyAlignmentPhone"] += temp

                        elif target == "pdfID":
                            cmd = f" | ali-to-pdf {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPdf(temp)
                            results["NumpyAlignmentPdf"] += temp
                        else:
                            raise WrongOperation(
                                f"<target> should be 'trainsitionID', 'phoneID' or 'pdfID' but got {target}."
                            )

                    finally:
                        temp.close()

        finalResult = []
        for obj in results.values():
            if not obj.is_void:
                obj.rename(name)
                finalResult.append(obj)

        if len(finalResult) == 0:
            raise WrongOperation(
                "<target> dose not include any data avaliable.")
        elif len(finalResult) == 1:
            finalResult = finalResult[0]

        return finalResult
Example #29
0
        def recognizeWave(dataQueue, func, args, resultQueue, interval):
            class VAD(object):
                def __init__(self):
                    self.lastRe = None
                    self.c = 0

                def __call__(self, re):
                    if re == self.lastRe:
                        self.c += 1
                        if self.c == 2:
                            self.c = 0
                            return True
                        else:
                            return False
                    self.lastRe = re
                    self.c = 0
                    return False

            vad = VAD()

            dataPerReco = []
            timesPerReco = None
            count = 0

            try:
                while True:
                    if True in [self.localErrFlag, self.remoteErrFlag]:
                        break
                    if dataQueue.empty():
                        if ('receive' in self.threadManager.keys()
                                and self.threadManager['receive'].is_alive()):
                            time.sleep(0.01)
                        else:
                            raise WrongOperation(
                                'Excepted data input by Receive() from remote client.'
                            )
                    else:
                        chunkData = dataQueue.get()
                        if timesPerReco is None:
                            #Compute timesPerReco and Throw the first message
                            timesPerReco = math.ceil(self.rate * interval /
                                                     self.chunkFrames)
                            continue
                        if chunkData == 'endFlag':
                            count = timesPerReco + 1
                        else:
                            dataPerReco.append(chunkData)
                            count += 1

                        if count >= timesPerReco:
                            if len(dataPerReco) > 0:
                                with tempfile.NamedTemporaryFile(
                                        'w+b', suffix='.wav') as waveFile:
                                    wf = wave.open(waveFile.name, 'wb')
                                    wf.setsampwidth(self.width)
                                    wf.setnchannels(self.channels)
                                    wf.setframerate(self.rate)
                                    wf.writeframes(b''.join(dataPerReco))
                                    wf.close()
                                    if args != None:
                                        result = func(waveFile.name, args)
                                    else:
                                        result = func(waveFile.name)
                            else:
                                result = " "
                            if count > timesPerReco:
                                resultQueue.put((True, result))
                                break
                            else:
                                sof = vad(result)
                                resultQueue.put((sof, result))
                                if sof is True:
                                    dataPerReco = []
                                count = 0
            except Exception as e:
                self.localErrFlag = True
                raise e
            else:
                if True in [self.localErrFlag, self.remoteErrFlag]:
                    pass
                else:
                    resultQueue.put('endFlag')
Example #30
0
    def recognize(self, func, args=None, interval=0.3):
        '''
        Usage:  client.recognize(recogFunc)
        
        Recognize wav.
        <func> received path name of chunk wav file (and <args>).
        <interval> is the seconds of each chunk wav data.
        '''
        if not self.safeFlag:
            raise WrongOperation(
                'We only allow user to use client under <with> grammar.')

        if 'recognize' in self.threadManager.keys(
        ) and self.threadManager['recognize'].is_alive():
            raise WrongOperation('Another recognition task is running now.')

        def recognizeWave(dataQueue, func, args, resultQueue, interval):
            class VAD(object):
                def __init__(self):
                    self.lastRe = None
                    self.c = 0

                def __call__(self, re):
                    if re == self.lastRe:
                        self.c += 1
                        if self.c == 2:
                            self.c = 0
                            return True
                        else:
                            return False
                    self.lastRe = re
                    self.c = 0
                    return False

            vad = VAD()

            dataPerReco = []
            timesPerReco = None
            count = 0

            try:
                while True:
                    if True in [self.localErrFlag, self.remoteErrFlag]:
                        break
                    if dataQueue.empty():
                        if ('receive' in self.threadManager.keys()
                                and self.threadManager['receive'].is_alive()):
                            time.sleep(0.01)
                        else:
                            raise WrongOperation(
                                'Excepted data input by Receive() from remote client.'
                            )
                    else:
                        chunkData = dataQueue.get()
                        if timesPerReco is None:
                            #Compute timesPerReco and Throw the first message
                            timesPerReco = math.ceil(self.rate * interval /
                                                     self.chunkFrames)
                            continue
                        if chunkData == 'endFlag':
                            count = timesPerReco + 1
                        else:
                            dataPerReco.append(chunkData)
                            count += 1

                        if count >= timesPerReco:
                            if len(dataPerReco) > 0:
                                with tempfile.NamedTemporaryFile(
                                        'w+b', suffix='.wav') as waveFile:
                                    wf = wave.open(waveFile.name, 'wb')
                                    wf.setsampwidth(self.width)
                                    wf.setnchannels(self.channels)
                                    wf.setframerate(self.rate)
                                    wf.writeframes(b''.join(dataPerReco))
                                    wf.close()
                                    if args != None:
                                        result = func(waveFile.name, args)
                                    else:
                                        result = func(waveFile.name)
                            else:
                                result = " "
                            if count > timesPerReco:
                                resultQueue.put((True, result))
                                break
                            else:
                                sof = vad(result)
                                resultQueue.put((sof, result))
                                if sof is True:
                                    dataPerReco = []
                                count = 0
            except Exception as e:
                self.localErrFlag = True
                raise e
            else:
                if True in [self.localErrFlag, self.remoteErrFlag]:
                    pass
                else:
                    resultQueue.put('endFlag')

        self.threadManager['recognize'] = threading.Thread(
            target=recognizeWave,
            args=(
                self.dataQueue,
                func,
                args,
                self.resultQueue,
                interval,
            ))
        self.threadManager['recognize'].start()