Esempio n. 1
0
  def __prepare_chunk_frame(self):
    '''Prepare a chunk stream data'''

    pos = 0
    while pos < self.__batchSize:
      action = self.decide_action()
      if action is True:
        pack = self.get_packet()
        if not pack.is_empty():
          iKey = pack.mainKey if self.iKey is None else self.iKey
          vec = pack[ iKey ]
          assert isinstance(vec, np.ndarray) and len(vec.shape) == 1
          if self.__workBuffer is None:
            dim = len(vec)
            self.__workBuffer = np.zeros([self.__batchSize,dim,], dtype=vec.dtype)
          self.__workBuffer[pos] = vec
          pos += 1  
        if is_endpoint(pack):
          self.__endpointStep = True
          self.__tailIndex = pos
          break
      elif action is None:
        self.__finalStep = True
        self.__tailIndex = pos
        break
      else:
        return False

    # padding the tail with zero    
    self.__workBuffer[pos:] = 0
    
    return True
Esempio n. 2
0
 def __prepare_chunk_packet(self, inPIPE):
     timeCost = 0
     pos = 0
     while pos < self.__batchSize:
         if inPIPE.is_wrong():
             self.kill()
             return False
         elif inPIPE.is_exhausted():
             self.__finalStep = True
             break
         elif inPIPE.is_empty():
             time.sleep(info.TIMESCALE)
             timeCost += info.TIMESCALE
             if timeCost > info.TIMEOUT:
                 print(
                     f"{self.name}: Timeout! Did not receive any data for a long time!"
                 )
                 inPIPE.kill()
                 self.kill()
                 return False
         # If need wait because of blocked
         elif inPIPE.is_blocked():
             time.sleep(info.TIMESCALE)
         else:
             pac = inPIPE.get()
             if is_endpoint(pac):
                 self.__endpointStep = True
                 break
             else:
                 self.__elementBuffer.append(pac)
                 pos += 1
     return True
Esempio n. 3
0
 def core_loop(self):
     '''
 The core thread funtion to batch.
 '''
     while True:
         # Decide action
         action = self.decide_action()
         if action is True:
             # get a packet
             pack = self.get_packet()
             if not pack.is_empty():
                 iKey = self.iKey if self.iKey is not None else pack.mainKey
                 mat = pack[iKey]
                 assert isinstance(mat, np.ndarray) and len(mat.shape) == 2
                 cSize = len(mat) // self.__nChunk
                 assert cSize * self.__nChunk == len(mat)
                 # Split matrix
                 for i in range(self.__nChunk):
                     self.put_packet(
                         Packet(items={
                             self.oKey[0]: mat[i * cSize:(i + 1) * cSize]
                         },
                                cid=self.__id_count,
                                idmaker=pack.idmaker))
             # add endpoint
             if is_endpoint(pack):
                 self.put_packet(
                     Endpoint(cid=self.__id_count, idmaker=pack.idmaker))
         else:
             break
Esempio n. 4
0
 def __prepare_frame_stream(self, streamPIPE):
     '''
 Prepare chunk stream to compute feature.
 '''
     timecost = 0
     # copy old data if necessary
     if self.__zerothStep:
         pos = 0
         self.__zerothStep = False
         self.__firstStep = True
     else:
         self.__streamBuffer[0:self.__cover] = self.__streamBuffer[self.
                                                                   __shift:]
         pos = self.__cover
         self.__firstStep = False
     # get new data
     while pos < self.__width:
         ## If error occurred in stream PIPE
         if streamPIPE.is_wrong():
             self.kill()
             return False
         ## If no more data
         elif streamPIPE.is_exhausted():
             self.__finalStep = True
             self.__tailIndex = pos
             break
         ## If need wait because of receiving no data
         elif streamPIPE.is_empty():
             time.sleep(info.TIMESCALE)
             timecost += info.TIMESCALE
             if timecost > info.TIMEOUT:
                 print(
                     f"{self.name}: Timeout! Did not receive any data for a long time!"
                 )
                 # Try to kill stream PIPE
                 streamPIPE.kill()
                 # Kill self
                 self.kill()
                 return False
         ## If need wait because of blocked
         elif streamPIPE.is_blocked():
             time.sleep(info.TIMESCALE)
         ## If had data
         else:
             ele = streamPIPE.get()
             if is_endpoint(ele):
                 self.__endpointStep = True
                 self.__tailIndex = pos
                 break
             else:
                 assert isinstance(
                     ele, Element
                 ), f"{self.name}: Need Element packet but got: {type(ele).__name__}"
                 self.__streamBuffer[pos] = ele.data
                 pos += 1
     # Padding the rest
     self.__streamBuffer[pos:] = 0
     return True
Esempio n. 5
0
    def __prepare_chunk_stream(self):
        '''
    Prepare chunk stream to compute feature.
    '''
        self.__hadData = False

        for i in range(self.__batchSize):

            # copy old data if necessary
            if self.__zerothStep:
                pos = 0
                self.__zerothStep = False
            else:
                self.__streamBuffer[i, 0:self.__cover] = self.__streamBuffer[
                    i - 1, self.__shift:]
                pos = self.__cover

            # get new data
            while pos < self.__width:
                # Decide action
                action = self.decide_action()
                #
                if action is True:
                    pack = self.get_packet()
                    if not pack.is_empty():
                        iKey = pack.mainKey if self.iKey is None else self.iKey
                        ele = pack[iKey]
                        assert isinstance(ele, (np.signedinteger, np.floating))
                        if self.__streamBuffer is None:
                            self.__streamBuffer = np.zeros([
                                self.__batchSize,
                                self.__width,
                            ],
                                                           dtype=ele.dtype)
                        self.__streamBuffer[i, pos] = ele
                        self.__hadData = True
                        pos += 1
                    if is_endpoint(pack):
                        self.__endpointStep = True
                        break
                elif action is None:
                    self.__finalStep = True
                    break
                else:
                    return False

            # Padding the rest
            if self.__streamBuffer is not None:
                self.__streamBuffer[i, pos:] = 0

            if self.__endpointStep or self.__finalStep:
                break

        if self.__streamBuffer is not None:
            self.__streamBuffer[i + 1:] = 0

        return True
Esempio n. 6
0
	def __prepare_chunk_feature(self,featurePIPE):
		
		timecost = 0
		# Copy old data
		if self.__zeroStep:
			pos = self.__left
			self.__zeroStep = False
		else:
			self.__featureBuffer[0:self.__cover,:] = self.__featureBuffer[ self.__center:self.__width,: ]
			pos = self.__cover

		while pos < self.__width:
			# If feature PIPE had error
			if featurePIPE.is_wrong():
				self.kill()
				return False
			# If no more data
			elif featurePIPE.is_exhausted():
				self.__tailIndex = pos
				self.__finalStep = True
				break
			# If need wait because of receiving no data
			elif featurePIPE.is_empty():
				time.sleep(info.TIMESCALE)
				timecost += info.TIMESCALE
				if timecost > info.TIMEOUT:
					print(f"{self.name}: Timeout! Did not receive any data for a long time!")
					# Try to kill frame PIPE
					featurePIPE.kill()
					# Kill self 
					self.kill()
					return False
			# If need wait because of blocked
			elif featurePIPE.is_blocked():
				time.sleep(info.TIMESCALE)
			# If had data
			else:
				vec = featurePIPE.get()
				## If this is an endpoint
				if is_endpoint(vec):
					self.__endpointStep = True
					self.__tailIndex = pos
					break
				else:
					assert isinstance(vec,Vector), f"{self.name}: Need Vector packet but got: {type(vec).__name__}."
					self.__featureBuffer[pos] = vec.data
					pos += 1
					self.__duration += 1
		# Set the rest with zero 
		self.__featureBuffer[pos:self.__width,:] = 0
		return True
Esempio n. 7
0
    def core_loop(self):

        lastPacket = None
        self.__firstComputing = True
        while True:

            action = self.decide_action()
            #print( "debug action:", action )

            if action is True:
                packet = self.get_packet()

                if not packet.is_empty():
                    iKey = packet.mainKey if self.iKey is None else self.iKey
                    mat = packet[iKey]
                    if self.__context is not None:
                        newMat = self.__context.wrap(mat)
                        if newMat is None:
                            lastPacket = packet
                        else:
                            probs = self.__compute_and_postprocess(
                                newMat, mat.shape[0])
                            if lastPacket is None:
                                packet.add(self.oKey[0], probs, asMainKey=True)
                                self.put_packet(packet)
                            else:
                                lastPacket.add(self.oKey[0],
                                               probs,
                                               asMainKey=True)
                                self.put_packet(packet)
                                lastPacket = packet
                    else:
                        probs = self.__compute_and_postprocess(
                            mat, mat.shape[0])
                        packet.add(self.oKey[0], probs, asMainKey=True)
                        self.put_packet(packet)

                if is_endpoint(packet):
                    if lastPacket is not None:
                        iKey = lastPacket.mainKey if self.iKey is None else self.iKey
                        mat = np.zeros_like(lastPacket[iKey])
                        newMat = self.__context.wrap(mat)
                        probs = self.__compute_and_postprocess(
                            newMat, mat.shape[0])
                        lastPacket.add(self.oKey[0], probs, asMainKey=True)
                        self.put_packet(lastPacket)
                    if packet.is_empty():
                        self.put_packet(packet)

            else:
                break
Esempio n. 8
0
    def __prepare_batch_stream(self):
        '''
    Prepare chunk stream to compute feature.
    '''
        self.__hadData = False

        # copy old data if necessary
        if self.__zerothStep:
            pos = self.__left
            self.__zerothStep = False
            self.__firstStep = True
        else:
            self.__streamBuffer[0:self.
                                __cover] = self.__streamBuffer[self.__center:]
            pos = self.__cover
            self.__firstStep = False

        # get new data
        while pos < self.__width:
            # Decide state
            action = self.decide_action()
            if action is True:
                pack = self.get_packet()
                if not pack.is_empty():
                    iKey = pack.mainKey if self.iKey is None else self.iKey
                    vec = pack[iKey]
                    assert isinstance(vec, np.ndarray) and len(vec.shape) == 1
                    if self.__streamBuffer is None:
                        dim = len(vec)
                        self.__streamBuffer = np.zeros([
                            self.__width,
                            dim,
                        ],
                                                       dtype=vec.dtype)
                    self.__streamBuffer[pos] = vec
                    self.__hadData = True
                    pos += 1
                if is_endpoint(pack):
                    self.__endpointStep = True
                    break
            elif action is False:
                return False
            else:
                self.__finalStep = True
                break

        # Padding the rest
        if self.__streamBuffer is not None:
            self.__streamBuffer[pos:] = 0

        return True
Esempio n. 9
0
    def core_loop(self):

        while True:

            action = self.decide_action()

            if action is True:
                packet = self.get_packet()
                if not packet.is_empty():
                    items = dict(packet.items())
                    items = self.__map_function(items)
                    if is_endpoint(packet):
                        packet = Endpoint(items=items,
                                          cid=packet.cid,
                                          idmaker=packet.idmaker)
                    else:
                        packet = Packet(items=items,
                                        cid=packet.cid,
                                        idmaker=packet.idmaker)
                    self.put_packet(packet)
                elif is_endpoint(packet):
                    self.put_packet(packet)
            else:
                break
Esempio n. 10
0
    def __prepare_chunk_frame(self, framePIPE):
        '''Prepare a chunk stream data'''
        timecost = 0
        pos = 0
        while pos < self.__batchSize:
            if framePIPE.is_wrong():
                self.kill()
                return False
            elif framePIPE.is_exhausted():
                self.__tailIndex = pos
                self.__finalStep = True
                break
            # If need wait because of receiving no data
            elif framePIPE.is_empty():
                time.sleep(info.TIMESCALE)
                timecost += info.TIMESCALE
                if timecost > info.TIMEOUT:
                    print(
                        f"{self.name}: Timeout! Did not receive any data for a long time!"
                    )
                    # Try to kill frame PIPE
                    framePIPE.kill()
                    # Kill self
                    self.kill()
                    return False
            # If need wait because of blocked
            elif framePIPE.is_blocked():
                time.sleep(info.TIMESCALE)
            # If had data
            else:
                vec = framePIPE.get()
                if is_endpoint(vec):
                    self.__endpointStep = True
                    self.__tailIndex = pos
                    break
                else:
                    assert isinstance(
                        vec, Vector
                    ), f"{self.name}: Need vector packet but got: {type(vec).__name__}."
                    self.__frameBuffer[pos, :] = vec.data
                    pos += 1

        # padding the tail with zero
        self.__frameBuffer[pos:, :] = 0

        return True
Esempio n. 11
0
	def __prepare_chunk_probability(self,probabilityPIPE):

		timecost = 0
		pos = 0
		
		while pos < self.__batchSize:
			# If the previous PIPE had errors
			if probabilityPIPE.is_wrong():
				self.kill()
				return False
			# If no more data
			elif probabilityPIPE.is_exhausted():
				self.__tailIndex = pos
				self.__finalStep = True
				break
			# If need wait because of receiving no data
			elif probabilityPIPE.is_empty():
				time.sleep(info.TIMESCALE)
				timecost += info.TIMESCALE
				if timecost > info.TIMEOUT:
					print(f"{self.name}: Timeout! Did not receive any data for a long time!")
					# Try to kill frame PIPE
					probabilityPIPE.kill()
					# Kill self 
					self.kill()
					return False
			# If need wait because of blocked
			elif probabilityPIPE.is_blocked():
				time.sleep(info.TIMESCALE)
			# If had data
			else:
				vec = probabilityPIPE.get()
				if is_endpoint(vec):
					self.__endpointStep = True
					self.__tailIndex = pos
					break
				else:
					assert isinstance(vec,Vector)
					self.__probabilityBuffer[pos] = vec.data
					pos += 1
		# pad the rest
		self.__probabilityBuffer[pos:,:] = 0
		return True
Esempio n. 12
0
def dump_text_PIPE(pipe, key=None, allowPartial=True, endSymbol="\n"):
    '''
  Dump a text PIPE to a transcription.
  '''
    assert isinstance(allowPartial, bool)
    assert isinstance(endSymbol, str)
    assert pipe.state_is_(
        mark.wrong,
        mark.terminated), "<pipe> must be wrong or terminated PIPE."
    assert not pipe.is_outlocked()
    if key is not None:
        assert isinstance(key, str)

    result = []
    memory = None

    while True:
        if pipe.is_empty():
            break
        else:
            packet = pipe.get()

            if not packet.is_empty():
                iKey = packet.mainKey if key is None else key
                text = packet[iKey]
                assert isinstance(text, str)
                memory = text

            if is_endpoint(packet):
                if memory is None:
                    continue
                else:
                    result.append(memory)
                    memory = None

    if allowPartial and (memory is not None):
        result.append(memory)

    return endSymbol.join(result)
Esempio n. 13
0
    def core_loop(self):
        while True:

            action = self.decide_action()
            if action is True:
                packet = self.get_packet()
                if not packet.is_empty():
                    iKey = self.iKey if self.iKey is not None else packet.mainKey
                    data = packet[iKey]
                    assert isinstance(
                        data, np.ndarray
                    ), f"{self.name}: Can only dissolve vector and matrix packet but got: {type(data)}."
                    for element in data.reshape(-1):
                        self.put_packet(
                            Packet({self.oKey[0]: element},
                                   cid=self.__id_count,
                                   idmaker=packet.idmaker))
                if is_endpoint(packet):
                    self.put_packet(
                        Endpoint(cid=self.__id_count, idmaker=packet.idmaker))
            else:
                break
Esempio n. 14
0
def dump_text_PIPE(textPIPE,allowPartial=True,endSymbol="\n"):
	'''
	Dump a text PIPE to a transcription.
	'''
	assert isinstance(allowPartial,bool)
	assert isinstance(endSymbol,str)
	assert textPIPE.is_alive() or textPIPE.is_terminated(), "<textPIPE> must be ALIVE or TERMINATION PIPE."
	
	result = []
	memory = None
	timecost = 0

	while True:
		if textPIPE.is_wrong() or textPIPE.is_exhausted():
			break
		elif textPIPE.is_empty():
			time.sleep(info.TIMESCALE)
			timecost += info.TIMESCALE
			if timecost > info.TIMEOUT:
				break

		else:
			packet = textPIPE.get()
			if is_endpoint(packet):
				if memory is None:
					continue
				else:
					result.append( memory )
					memory = None
			else:
				assert isinstance(packet,Text), "This is not a Text PIPE."
				memory = packet.data
				#print(memory)

	if allowPartial and (memory is not None):
		result.append( memory )

	return endSymbol.join(result)
Esempio n. 15
0
    def core_loop(self):
        # start core loop
        try:
            while True:
                action = self.decide_action()

                if action is False:
                    break
                elif action is None:
                    # final step
                    try:
                        self.__decodeProcess.stdin.write(b" -3 ")
                        self.__decodeProcess.stdin.flush()
                    except Exception as e:
                        print(self.__decodeProcess.stderr.read().decode())
                        raise e
                    break

                else:
                    packet = self.get_packet()
                    if is_endpoint(packet):
                        if packet.is_empty():
                            try:
                                self.__decodeProcess.stdin.write(b" -2 0 ")
                                self.__decodeProcess.stdin.flush()
                            except Exception as e:
                                print(self.__decodeProcess.stderr.read().
                                      decode())
                                raise e
                        else:
                            iKey = packet.mainKey if self.iKey is None else self.iKey
                            mat = packet[iKey]
                            assert isinstance(mat, np.ndarray) and len(
                                mat.shape) == 2
                            assert mat.shape[
                                0] <= self.__max_batch_size, "The chunk size of matrix > max allowable batch size of this decoder."
                            assert mat.shape[
                                1] == self.__pdfs, "The dim. of probability does not match the PDFs."
                            mat = self.__acoustic_scale * mat
                            header = f" -2 {mat.shape[0]} ".encode()
                            inputs = header + encode_vector_temp(
                                mat.reshape(-1))
                            try:
                                self.__decodeProcess.stdin.write(inputs)
                                self.__decodeProcess.stdin.flush()
                            except Exception as e:
                                print(self.__decodeProcess.stderr.read().
                                      decode())
                                raise e
                        self.__packetCache.put(packet)
                    else:
                        if packet.is_empty():
                            continue
                        else:
                            iKey = packet.mainKey if self.iKey is None else self.iKey
                            mat = packet[iKey]
                            assert isinstance(mat, np.ndarray) and len(
                                mat.shape) == 2
                            assert mat.shape[
                                0] <= self.__max_batch_size, "The chunk size of matrix > max allowable batch size of this decoder."
                            assert mat.shape[
                                1] == self.__pdfs, "The dim. of probability does not match the PDFs."
                            mat = self.__acoustic_scale * mat
                            header = f" -1 {mat.shape[0]} ".encode()
                            inputs = header + encode_vector_temp(
                                mat.reshape(-1))
                            try:
                                self.__decodeProcess.stdin.write(inputs)
                                self.__decodeProcess.stdin.flush()
                            except Exception as e:
                                print(self.__decodeProcess.stderr.read().
                                      decode())
                                raise e
                            self.__packetCache.put(packet)

            # Wait until all results has been gotten.
            self.__readResultThread.join()
            # Close the decoding process
            self.__decodeProcess.stdin.write(b"over")
        finally:
            self.__decodeProcess.stdout.close()
            self.__decodeProcess.kill()
Esempio n. 16
0
    def __read_result_from_subprocess(self):
        '''
    This function is used to open a thread to read result from main decoding process. 
    '''
        timecost = 0
        try:
            while True:
                # decide state and action
                master, state = self.decide_state()

                if state == mark.wrong:
                    break
                elif state == mark.stranded:
                    time.sleep(info.TIMESCALE)
                    continue
                elif state == mark.terminated:
                    if master == mark.outPIPE:
                        break

                # if state is active or terminated (master is inPIPE)
                # do the following steps

                # Read
                line = self.__decodeProcess.stdout.readline().decode().strip()

                # nothing is received
                if line == "":
                    time.sleep(info.TIMESCALE)
                    timecost += info.TIMESCALE
                    if timecost > info.TIMEOUT:
                        raise Exception(
                            f"{self.name}: Timeout! Receiving thread has not received any data for a long time!"
                        )

                else:
                    if line.startswith("-1"):
                        packet = self.__packetCache.get()
                        line = line[2:].strip().split(
                        )  # discard the flag "-1"
                        if len(line) > 0:
                            packet.add(self.oKey[0],
                                       self.ids_to_words(line),
                                       asMainKey=True)
                        else:
                            packet.add(self.oKey[0], " ", asMainKey=True)
                        self.put_packet(packet)

                    ## Endpoint
                    elif line.startswith("-2"):
                        packet = self.__packetCache.get()
                        line = line[2:].strip()
                        if len(line) == 0:
                            self.put_packet(packet)
                        else:
                            lines = line[2:].strip().split(
                                "-1")  # discard the flag "-2 -1"
                            lines = [
                                line.strip().split() for line in lines
                                if len(line.strip()) > 0
                            ]
                            if len(lines) == 0:
                                packet.add(self.oKey[0], " ", asMainKey=True)
                            elif len(lines) == 1:
                                packet.add(self.oKey[0],
                                           self.ids_to_words(lines[0]),
                                           asMainKey=True)
                            else:
                                # do not need to rescore
                                if self.rescore_function is None:
                                    for i, line in enumerate(lines):
                                        outKey = self.oKey[0] if i == 0 else (
                                            self.oKey[0] + f"-{i+1}")
                                        packet.add(outKey,
                                                   self.ids_to_words(line),
                                                   asMainKey=True)
                                else:
                                    nbestsInt = [[
                                        int(ID) for ID in line.split()
                                    ] for line in lines]
                                    nResults = self.rescore_function(nbestsInt)
                                    assert isinstance(
                                        nbestsInt,
                                        (list, tuple)) and len(nbestsInt) > 0
                                    for i, re in enumerate(nResults):
                                        assert isinstance(
                                            re, (list,
                                                 tuple)) and len(nbestsInt) > 0
                                        outKey = self.oKey[0] if i == 0 else (
                                            self.oKey[0] + f"-{i+1}")
                                        packet.add(outKey,
                                                   self.ids_to_words(re),
                                                   asMainKey=True)

                            if not is_endpoint(packet):
                                self.put_packet(packet)
                            else:
                                self.put_packet(
                                    Endpoint(items=dict(packet.items()),
                                             cid=packet.cid,
                                             idmaker=packet.idmaker))

                    ## Final step
                    elif line.startswith("-3"):
                        break

                    else:
                        raise Exception(
                            f"{self.name}: Expected flag (-1 -> partial) (-2 endpoint) (-3 termination) but got: {line}"
                        )

        except Exception as e:
            if not self.inPIPE.state_is_(mark.wrong, mark.terminated):
                self.inPIPE.kill()
            if not self.inPIPE.state_is_(mark.wrong, mark.terminated):
                self.inPIPE.kill()
            raise e
        else:
            if not self.inPIPE.state_is_(mark.wrong, mark.terminated):
                self.inPIPE.terminate()
            if not self.inPIPE.state_is_(mark.wrong, mark.terminated):
                self.inPIPE.terminate()
        finally:
            self.__decodeProcess.stdout.close()
            self.__decodeProcess.kill()