def __prepare_chunk_frame(self): '''Prepare a chunk stream data''' pos = 0 while pos < self.__batchSize: action = self.decide_action() if action is True: pack = self.get_packet() if not pack.is_empty(): iKey = pack.mainKey if self.iKey is None else self.iKey vec = pack[ iKey ] assert isinstance(vec, np.ndarray) and len(vec.shape) == 1 if self.__workBuffer is None: dim = len(vec) self.__workBuffer = np.zeros([self.__batchSize,dim,], dtype=vec.dtype) self.__workBuffer[pos] = vec pos += 1 if is_endpoint(pack): self.__endpointStep = True self.__tailIndex = pos break elif action is None: self.__finalStep = True self.__tailIndex = pos break else: return False # padding the tail with zero self.__workBuffer[pos:] = 0 return True
def __prepare_chunk_packet(self, inPIPE): timeCost = 0 pos = 0 while pos < self.__batchSize: if inPIPE.is_wrong(): self.kill() return False elif inPIPE.is_exhausted(): self.__finalStep = True break elif inPIPE.is_empty(): time.sleep(info.TIMESCALE) timeCost += info.TIMESCALE if timeCost > info.TIMEOUT: print( f"{self.name}: Timeout! Did not receive any data for a long time!" ) inPIPE.kill() self.kill() return False # If need wait because of blocked elif inPIPE.is_blocked(): time.sleep(info.TIMESCALE) else: pac = inPIPE.get() if is_endpoint(pac): self.__endpointStep = True break else: self.__elementBuffer.append(pac) pos += 1 return True
def core_loop(self): ''' The core thread funtion to batch. ''' while True: # Decide action action = self.decide_action() if action is True: # get a packet pack = self.get_packet() if not pack.is_empty(): iKey = self.iKey if self.iKey is not None else pack.mainKey mat = pack[iKey] assert isinstance(mat, np.ndarray) and len(mat.shape) == 2 cSize = len(mat) // self.__nChunk assert cSize * self.__nChunk == len(mat) # Split matrix for i in range(self.__nChunk): self.put_packet( Packet(items={ self.oKey[0]: mat[i * cSize:(i + 1) * cSize] }, cid=self.__id_count, idmaker=pack.idmaker)) # add endpoint if is_endpoint(pack): self.put_packet( Endpoint(cid=self.__id_count, idmaker=pack.idmaker)) else: break
def __prepare_frame_stream(self, streamPIPE): ''' Prepare chunk stream to compute feature. ''' timecost = 0 # copy old data if necessary if self.__zerothStep: pos = 0 self.__zerothStep = False self.__firstStep = True else: self.__streamBuffer[0:self.__cover] = self.__streamBuffer[self. __shift:] pos = self.__cover self.__firstStep = False # get new data while pos < self.__width: ## If error occurred in stream PIPE if streamPIPE.is_wrong(): self.kill() return False ## If no more data elif streamPIPE.is_exhausted(): self.__finalStep = True self.__tailIndex = pos break ## If need wait because of receiving no data elif streamPIPE.is_empty(): time.sleep(info.TIMESCALE) timecost += info.TIMESCALE if timecost > info.TIMEOUT: print( f"{self.name}: Timeout! Did not receive any data for a long time!" ) # Try to kill stream PIPE streamPIPE.kill() # Kill self self.kill() return False ## If need wait because of blocked elif streamPIPE.is_blocked(): time.sleep(info.TIMESCALE) ## If had data else: ele = streamPIPE.get() if is_endpoint(ele): self.__endpointStep = True self.__tailIndex = pos break else: assert isinstance( ele, Element ), f"{self.name}: Need Element packet but got: {type(ele).__name__}" self.__streamBuffer[pos] = ele.data pos += 1 # Padding the rest self.__streamBuffer[pos:] = 0 return True
def __prepare_chunk_stream(self): ''' Prepare chunk stream to compute feature. ''' self.__hadData = False for i in range(self.__batchSize): # copy old data if necessary if self.__zerothStep: pos = 0 self.__zerothStep = False else: self.__streamBuffer[i, 0:self.__cover] = self.__streamBuffer[ i - 1, self.__shift:] pos = self.__cover # get new data while pos < self.__width: # Decide action action = self.decide_action() # if action is True: pack = self.get_packet() if not pack.is_empty(): iKey = pack.mainKey if self.iKey is None else self.iKey ele = pack[iKey] assert isinstance(ele, (np.signedinteger, np.floating)) if self.__streamBuffer is None: self.__streamBuffer = np.zeros([ self.__batchSize, self.__width, ], dtype=ele.dtype) self.__streamBuffer[i, pos] = ele self.__hadData = True pos += 1 if is_endpoint(pack): self.__endpointStep = True break elif action is None: self.__finalStep = True break else: return False # Padding the rest if self.__streamBuffer is not None: self.__streamBuffer[i, pos:] = 0 if self.__endpointStep or self.__finalStep: break if self.__streamBuffer is not None: self.__streamBuffer[i + 1:] = 0 return True
def __prepare_chunk_feature(self,featurePIPE): timecost = 0 # Copy old data if self.__zeroStep: pos = self.__left self.__zeroStep = False else: self.__featureBuffer[0:self.__cover,:] = self.__featureBuffer[ self.__center:self.__width,: ] pos = self.__cover while pos < self.__width: # If feature PIPE had error if featurePIPE.is_wrong(): self.kill() return False # If no more data elif featurePIPE.is_exhausted(): self.__tailIndex = pos self.__finalStep = True break # If need wait because of receiving no data elif featurePIPE.is_empty(): time.sleep(info.TIMESCALE) timecost += info.TIMESCALE if timecost > info.TIMEOUT: print(f"{self.name}: Timeout! Did not receive any data for a long time!") # Try to kill frame PIPE featurePIPE.kill() # Kill self self.kill() return False # If need wait because of blocked elif featurePIPE.is_blocked(): time.sleep(info.TIMESCALE) # If had data else: vec = featurePIPE.get() ## If this is an endpoint if is_endpoint(vec): self.__endpointStep = True self.__tailIndex = pos break else: assert isinstance(vec,Vector), f"{self.name}: Need Vector packet but got: {type(vec).__name__}." self.__featureBuffer[pos] = vec.data pos += 1 self.__duration += 1 # Set the rest with zero self.__featureBuffer[pos:self.__width,:] = 0 return True
def core_loop(self): lastPacket = None self.__firstComputing = True while True: action = self.decide_action() #print( "debug action:", action ) if action is True: packet = self.get_packet() if not packet.is_empty(): iKey = packet.mainKey if self.iKey is None else self.iKey mat = packet[iKey] if self.__context is not None: newMat = self.__context.wrap(mat) if newMat is None: lastPacket = packet else: probs = self.__compute_and_postprocess( newMat, mat.shape[0]) if lastPacket is None: packet.add(self.oKey[0], probs, asMainKey=True) self.put_packet(packet) else: lastPacket.add(self.oKey[0], probs, asMainKey=True) self.put_packet(packet) lastPacket = packet else: probs = self.__compute_and_postprocess( mat, mat.shape[0]) packet.add(self.oKey[0], probs, asMainKey=True) self.put_packet(packet) if is_endpoint(packet): if lastPacket is not None: iKey = lastPacket.mainKey if self.iKey is None else self.iKey mat = np.zeros_like(lastPacket[iKey]) newMat = self.__context.wrap(mat) probs = self.__compute_and_postprocess( newMat, mat.shape[0]) lastPacket.add(self.oKey[0], probs, asMainKey=True) self.put_packet(lastPacket) if packet.is_empty(): self.put_packet(packet) else: break
def __prepare_batch_stream(self): ''' Prepare chunk stream to compute feature. ''' self.__hadData = False # copy old data if necessary if self.__zerothStep: pos = self.__left self.__zerothStep = False self.__firstStep = True else: self.__streamBuffer[0:self. __cover] = self.__streamBuffer[self.__center:] pos = self.__cover self.__firstStep = False # get new data while pos < self.__width: # Decide state action = self.decide_action() if action is True: pack = self.get_packet() if not pack.is_empty(): iKey = pack.mainKey if self.iKey is None else self.iKey vec = pack[iKey] assert isinstance(vec, np.ndarray) and len(vec.shape) == 1 if self.__streamBuffer is None: dim = len(vec) self.__streamBuffer = np.zeros([ self.__width, dim, ], dtype=vec.dtype) self.__streamBuffer[pos] = vec self.__hadData = True pos += 1 if is_endpoint(pack): self.__endpointStep = True break elif action is False: return False else: self.__finalStep = True break # Padding the rest if self.__streamBuffer is not None: self.__streamBuffer[pos:] = 0 return True
def core_loop(self): while True: action = self.decide_action() if action is True: packet = self.get_packet() if not packet.is_empty(): items = dict(packet.items()) items = self.__map_function(items) if is_endpoint(packet): packet = Endpoint(items=items, cid=packet.cid, idmaker=packet.idmaker) else: packet = Packet(items=items, cid=packet.cid, idmaker=packet.idmaker) self.put_packet(packet) elif is_endpoint(packet): self.put_packet(packet) else: break
def __prepare_chunk_frame(self, framePIPE): '''Prepare a chunk stream data''' timecost = 0 pos = 0 while pos < self.__batchSize: if framePIPE.is_wrong(): self.kill() return False elif framePIPE.is_exhausted(): self.__tailIndex = pos self.__finalStep = True break # If need wait because of receiving no data elif framePIPE.is_empty(): time.sleep(info.TIMESCALE) timecost += info.TIMESCALE if timecost > info.TIMEOUT: print( f"{self.name}: Timeout! Did not receive any data for a long time!" ) # Try to kill frame PIPE framePIPE.kill() # Kill self self.kill() return False # If need wait because of blocked elif framePIPE.is_blocked(): time.sleep(info.TIMESCALE) # If had data else: vec = framePIPE.get() if is_endpoint(vec): self.__endpointStep = True self.__tailIndex = pos break else: assert isinstance( vec, Vector ), f"{self.name}: Need vector packet but got: {type(vec).__name__}." self.__frameBuffer[pos, :] = vec.data pos += 1 # padding the tail with zero self.__frameBuffer[pos:, :] = 0 return True
def __prepare_chunk_probability(self,probabilityPIPE): timecost = 0 pos = 0 while pos < self.__batchSize: # If the previous PIPE had errors if probabilityPIPE.is_wrong(): self.kill() return False # If no more data elif probabilityPIPE.is_exhausted(): self.__tailIndex = pos self.__finalStep = True break # If need wait because of receiving no data elif probabilityPIPE.is_empty(): time.sleep(info.TIMESCALE) timecost += info.TIMESCALE if timecost > info.TIMEOUT: print(f"{self.name}: Timeout! Did not receive any data for a long time!") # Try to kill frame PIPE probabilityPIPE.kill() # Kill self self.kill() return False # If need wait because of blocked elif probabilityPIPE.is_blocked(): time.sleep(info.TIMESCALE) # If had data else: vec = probabilityPIPE.get() if is_endpoint(vec): self.__endpointStep = True self.__tailIndex = pos break else: assert isinstance(vec,Vector) self.__probabilityBuffer[pos] = vec.data pos += 1 # pad the rest self.__probabilityBuffer[pos:,:] = 0 return True
def dump_text_PIPE(pipe, key=None, allowPartial=True, endSymbol="\n"): ''' Dump a text PIPE to a transcription. ''' assert isinstance(allowPartial, bool) assert isinstance(endSymbol, str) assert pipe.state_is_( mark.wrong, mark.terminated), "<pipe> must be wrong or terminated PIPE." assert not pipe.is_outlocked() if key is not None: assert isinstance(key, str) result = [] memory = None while True: if pipe.is_empty(): break else: packet = pipe.get() if not packet.is_empty(): iKey = packet.mainKey if key is None else key text = packet[iKey] assert isinstance(text, str) memory = text if is_endpoint(packet): if memory is None: continue else: result.append(memory) memory = None if allowPartial and (memory is not None): result.append(memory) return endSymbol.join(result)
def core_loop(self): while True: action = self.decide_action() if action is True: packet = self.get_packet() if not packet.is_empty(): iKey = self.iKey if self.iKey is not None else packet.mainKey data = packet[iKey] assert isinstance( data, np.ndarray ), f"{self.name}: Can only dissolve vector and matrix packet but got: {type(data)}." for element in data.reshape(-1): self.put_packet( Packet({self.oKey[0]: element}, cid=self.__id_count, idmaker=packet.idmaker)) if is_endpoint(packet): self.put_packet( Endpoint(cid=self.__id_count, idmaker=packet.idmaker)) else: break
def dump_text_PIPE(textPIPE,allowPartial=True,endSymbol="\n"): ''' Dump a text PIPE to a transcription. ''' assert isinstance(allowPartial,bool) assert isinstance(endSymbol,str) assert textPIPE.is_alive() or textPIPE.is_terminated(), "<textPIPE> must be ALIVE or TERMINATION PIPE." result = [] memory = None timecost = 0 while True: if textPIPE.is_wrong() or textPIPE.is_exhausted(): break elif textPIPE.is_empty(): time.sleep(info.TIMESCALE) timecost += info.TIMESCALE if timecost > info.TIMEOUT: break else: packet = textPIPE.get() if is_endpoint(packet): if memory is None: continue else: result.append( memory ) memory = None else: assert isinstance(packet,Text), "This is not a Text PIPE." memory = packet.data #print(memory) if allowPartial and (memory is not None): result.append( memory ) return endSymbol.join(result)
def core_loop(self): # start core loop try: while True: action = self.decide_action() if action is False: break elif action is None: # final step try: self.__decodeProcess.stdin.write(b" -3 ") self.__decodeProcess.stdin.flush() except Exception as e: print(self.__decodeProcess.stderr.read().decode()) raise e break else: packet = self.get_packet() if is_endpoint(packet): if packet.is_empty(): try: self.__decodeProcess.stdin.write(b" -2 0 ") self.__decodeProcess.stdin.flush() except Exception as e: print(self.__decodeProcess.stderr.read(). decode()) raise e else: iKey = packet.mainKey if self.iKey is None else self.iKey mat = packet[iKey] assert isinstance(mat, np.ndarray) and len( mat.shape) == 2 assert mat.shape[ 0] <= self.__max_batch_size, "The chunk size of matrix > max allowable batch size of this decoder." assert mat.shape[ 1] == self.__pdfs, "The dim. of probability does not match the PDFs." mat = self.__acoustic_scale * mat header = f" -2 {mat.shape[0]} ".encode() inputs = header + encode_vector_temp( mat.reshape(-1)) try: self.__decodeProcess.stdin.write(inputs) self.__decodeProcess.stdin.flush() except Exception as e: print(self.__decodeProcess.stderr.read(). decode()) raise e self.__packetCache.put(packet) else: if packet.is_empty(): continue else: iKey = packet.mainKey if self.iKey is None else self.iKey mat = packet[iKey] assert isinstance(mat, np.ndarray) and len( mat.shape) == 2 assert mat.shape[ 0] <= self.__max_batch_size, "The chunk size of matrix > max allowable batch size of this decoder." assert mat.shape[ 1] == self.__pdfs, "The dim. of probability does not match the PDFs." mat = self.__acoustic_scale * mat header = f" -1 {mat.shape[0]} ".encode() inputs = header + encode_vector_temp( mat.reshape(-1)) try: self.__decodeProcess.stdin.write(inputs) self.__decodeProcess.stdin.flush() except Exception as e: print(self.__decodeProcess.stderr.read(). decode()) raise e self.__packetCache.put(packet) # Wait until all results has been gotten. self.__readResultThread.join() # Close the decoding process self.__decodeProcess.stdin.write(b"over") finally: self.__decodeProcess.stdout.close() self.__decodeProcess.kill()
def __read_result_from_subprocess(self): ''' This function is used to open a thread to read result from main decoding process. ''' timecost = 0 try: while True: # decide state and action master, state = self.decide_state() if state == mark.wrong: break elif state == mark.stranded: time.sleep(info.TIMESCALE) continue elif state == mark.terminated: if master == mark.outPIPE: break # if state is active or terminated (master is inPIPE) # do the following steps # Read line = self.__decodeProcess.stdout.readline().decode().strip() # nothing is received if line == "": time.sleep(info.TIMESCALE) timecost += info.TIMESCALE if timecost > info.TIMEOUT: raise Exception( f"{self.name}: Timeout! Receiving thread has not received any data for a long time!" ) else: if line.startswith("-1"): packet = self.__packetCache.get() line = line[2:].strip().split( ) # discard the flag "-1" if len(line) > 0: packet.add(self.oKey[0], self.ids_to_words(line), asMainKey=True) else: packet.add(self.oKey[0], " ", asMainKey=True) self.put_packet(packet) ## Endpoint elif line.startswith("-2"): packet = self.__packetCache.get() line = line[2:].strip() if len(line) == 0: self.put_packet(packet) else: lines = line[2:].strip().split( "-1") # discard the flag "-2 -1" lines = [ line.strip().split() for line in lines if len(line.strip()) > 0 ] if len(lines) == 0: packet.add(self.oKey[0], " ", asMainKey=True) elif len(lines) == 1: packet.add(self.oKey[0], self.ids_to_words(lines[0]), asMainKey=True) else: # do not need to rescore if self.rescore_function is None: for i, line in enumerate(lines): outKey = self.oKey[0] if i == 0 else ( self.oKey[0] + f"-{i+1}") packet.add(outKey, self.ids_to_words(line), asMainKey=True) else: nbestsInt = [[ int(ID) for ID in line.split() ] for line in lines] nResults = self.rescore_function(nbestsInt) assert isinstance( nbestsInt, (list, tuple)) and len(nbestsInt) > 0 for i, re in enumerate(nResults): assert isinstance( re, (list, tuple)) and len(nbestsInt) > 0 outKey = self.oKey[0] if i == 0 else ( self.oKey[0] + f"-{i+1}") packet.add(outKey, self.ids_to_words(re), asMainKey=True) if not is_endpoint(packet): self.put_packet(packet) else: self.put_packet( Endpoint(items=dict(packet.items()), cid=packet.cid, idmaker=packet.idmaker)) ## Final step elif line.startswith("-3"): break else: raise Exception( f"{self.name}: Expected flag (-1 -> partial) (-2 endpoint) (-3 termination) but got: {line}" ) except Exception as e: if not self.inPIPE.state_is_(mark.wrong, mark.terminated): self.inPIPE.kill() if not self.inPIPE.state_is_(mark.wrong, mark.terminated): self.inPIPE.kill() raise e else: if not self.inPIPE.state_is_(mark.wrong, mark.terminated): self.inPIPE.terminate() if not self.inPIPE.state_is_(mark.wrong, mark.terminated): self.inPIPE.terminate() finally: self.__decodeProcess.stdout.close() self.__decodeProcess.kill()