def generate_batch_from_buffer(self, batch_size: int, num_chunks=-1) -> BatchedProcessedInputs: if self.buffer.length() < batch_size: self._fill_buffer() if self.buffer.length() < batch_size: raise EndOfDatasetException() logger.debug("Buffer is filled...") # divide batch_size into random multiples of 2 nrs_of_contexts = utils.generate_chunks(batch_size, min_val=self.smin, max_val=self.smax, num_chunks=num_chunks) raw_inputs = [] # type: List[RawInput] logger.debug("Nrs of contexts: {}".format(nrs_of_contexts)) for nr_of_contexts in nrs_of_contexts: raw_inputs.append(self.buffer.get(nr_of_contexts)) logger.debug('Done creating raw inputs') processed_inputs = [ self.generate_processed_input(raw_input, with_target=True) for raw_input in raw_inputs ] logger.debug('Done creating processed inputs') return BatchedProcessedInputs(processed_inputs)
def parse_audio_files_predict(audio_path, name_list, nn_type, file_ext="*.wav"): # create a thread pool to process the workload thread_pool = [] # split the filename list into chunks of 'CHUNK_SIZE' files each data = utils.generate_chunks(name_list, CHUNK_SIZE) # each chunk is the amount of data that will be processed by a single thread for chunk in data: if nn_type == 0: features = np.empty((0, FEATURE_SIZE)) thread_pool.append( utils.ThreadWithReturnValue(target=p_predict_thread, args=(audio_path, chunk))) else: features = np.empty(shape=(0, N_MFCC, AUDIO_LENGTH, 1)) thread_pool.append( utils.ThreadWithReturnValue(target=p_predict_cnn_thread, args=(audio_path, chunk))) # print a log message for status update utils.write_log_msg("PREDICT: creating a total of {0} threads...".format( len(thread_pool))) # start the entire thread pool for single_thread in thread_pool: single_thread.start() # wait for thread pool to return their results of processing for single_thread in thread_pool: ft = single_thread.join() features = np.vstack([features, ft]) # perform final touches to extracted arrays features = np.array(features) # normalize data mean = np.mean(features, axis=0) std = np.std(features, axis=0) features = (features - mean) / std # return the extracted features to the calling program return features, name_list
def stream(self, chunkIterator, config=None): is_final = False last_transcript = '' last_confidence = -1 continuous_transcript = [''] # list of multiple is_final sub-transcripts logger.debug("%s: sending to google = %d",str(chunkIterator)) self.request_stream(chunkIterator) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-in', action='store', dest='filename', default='audio/test1.raw', help='audio file') args = parser.parse_args() config = { "language": "en-US", "encoding":"LINEAR16", "sampling_rate":RATE, "max_alternatives":5, "interim_results": True, "profanity_filter": True, "continuous": False, } W = worker('123456') responses = W.stream(utils.generate_chunks(args.filename, grpc_on=False, chunkSize=3072), config) #for response in responses: # print response
True, 'confidence': last_confidence } logger.info('%s: finished', self.token) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-in', action='store', dest='filename', default='audio/test1.raw', help='audio file') args = parser.parse_args() config = { "language": "en-US", "encoding": "LINEAR16", "sampling_rate": RATE, "max_alternatives": 5, "interim_results": True, "profanity_filter": True, "continuous": False, } W = worker('123456') responses = W.stream( utils.generate_chunks(args.filename, grpc_on=False, chunkSize=3072), config) for response in responses: print response