def run(self, model, lmbin, trie): model = Model(model, BEAM_WIDTH) if lmbin: model.enableDecoderWithLM(lmbin, trie, LM_ALPHA, LM_BETA) stream = None while True: # Try to get the next command from our queue, use a timeout to check # periodically for a quit signal so the application doesn't hang on # exit. try: cmd, *data = self._in_queue.get(timeout=0.3) except queue.Empty: if self._should_quit: break # If we haven't received a quit signal just continue trying to # get a command from the queue indefinitely continue if cmd == 'start': # 'start' means create a new stream stream = model.createStream() elif cmd == 'data': # 'data' means we received more audio data from the recorder if stream: model.feedAudioContent( stream, np.frombuffer(data[0].data(), np.int16)) elif cmd == 'finish': # 'finish' means the caller wants the result of the current stream transcript = model.finishStream(stream) self.finished.emit(transcript) stream = None
class InferenceThread(QObject): finished = Signal(str) def __init__(self): super(InferenceThread, self).__init__() self.in_queue = queue.Queue() self.should_quit = False self.worker = threading.Thread(target=self.run) def send_cmd(self, cmd): ''' Insert command in queue to be processed by the thread ''' self.in_queue.put(cmd) def setQuit(self): ''' Signal to the thread that it should stop running ''' self.should_quit = True def start(self): self.worker.start() def run(self): # Creating the model self.model = Model( os.path.join(os.path.dirname(__file__), "deepspeech-0.6.1-models/output_graph.pbmm"), BEAM_WIDTH) self.model.enableDecoderWithLM( os.path.join(os.path.dirname(__file__), "deepspeech-0.6.1-models/lm.binary"), os.path.join(os.path.dirname(__file__), "deepspeech-0.6.1-models/trie"), LM_ALPHA, LM_BETA) stream = None while True: # Try to get the next command from our queue, use a timeout to check # periodically for a quit signal so the application doesn't hang on # exit. try: cmd, *data = self.in_queue.get(timeout=0.3) except queue.Empty: if self.should_quit: break # If we haven't received a quit signal just continue trying to # get a command from the queue indefinitely continue if cmd == "start": # "start" means create a new stream stream = self.model.createStream() logging.debug("Starts to process sound") elif cmd == "data": # "data" means we received more audio data from the recorder if stream: self.model.feedAudioContent(stream, np.frombuffer(data[0].data(), np.int16)) elif cmd == "finish": # "finish" means the caller wants the result of the current stream transcript = self.model.finishStream(stream) self.finished.emit(transcript) stream = None logging.debug("Finishes to process sound")
def main(): parser = argparse.ArgumentParser( description='Running DeepSpeech inference.') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') parser.add_argument( '--alphabet', required=True, help= 'Path to the configuration file specifying the alphabet used by the network' ) parser.add_argument('--lm', nargs='?', help='Path to the language model binary file') parser.add_argument( '--trie', nargs='?', help= 'Path to the language model trie file created with native_client/generate_trie' ) parser.add_argument('--audio1', required=True, help='First audio file to use in interleaved streams') parser.add_argument('--audio2', required=True, help='Second audio file to use in interleaved streams') args = parser.parse_args() ds = Model(args.model, N_FEATURES, N_CONTEXT, args.alphabet, BEAM_WIDTH) if args.lm and args.trie: ds.enableDecoderWithLM(args.alphabet, args.lm, args.trie, LM_ALPHA, LM_BETA) fin = wave.open(args.audio1, 'rb') fs1 = fin.getframerate() audio1 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16) fin.close() fin = wave.open(args.audio2, 'rb') fs2 = fin.getframerate() audio2 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16) fin.close() stream1 = ds.setupStream(sample_rate=fs1) stream2 = ds.setupStream(sample_rate=fs2) splits1 = np.array_split(audio1, 10) splits2 = np.array_split(audio2, 10) for part1, part2 in zip(splits1, splits2): ds.feedAudioContent(stream1, part1) ds.feedAudioContent(stream2, part2) print(ds.finishStream(stream1)) print(ds.finishStream(stream2))