def main(ARGS): #p = pyaudio.PyAudio() #play_wav('robot_noise.wav') #p.terminate() # Load DeepSpeech model if os.path.isdir(ARGS.model): model_dir = ARGS.model ARGS.model = os.path.join(model_dir, 'output_graph.pb') ARGS.scorer = os.path.join(model_dir, ARGS.scorer) print(cf.bold_coral('Initializing model...')) logging.info("ARGS.model: %s", ARGS.model) model = deepspeech.Model(ARGS.model) if ARGS.scorer: logging.info("ARGS.scorer: %s", ARGS.scorer) model.enableExternalScorer(ARGS.scorer) # Start audio with VAD vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness, device=ARGS.device, input_rate=ARGS.rate) print(cf.bold_coral("Listening (ctrl-C to exit)...")) frames = vad_audio.vad_collector() # Stream from microphone to DeepSpeech using VAD spinner = None if not ARGS.nospinner: spinner = Halo(spinner='line') stream_context = model.createStream() wav_data = bytearray() for frame in frames: if frame is not None: if spinner: spinner.start() logging.debug("streaming frame") stream_context.feedAudioContent(np.frombuffer(frame, np.int16)) #model.feedAudioContent(stream_context, np.frombuffer(frame, np.int16)) if ARGS.savewav: wav_data.extend(frame) else: if spinner: spinner.stop() logging.debug("end utterence") if ARGS.savewav: vad_audio.write_wav(os.path.join(ARGS.savewav, datetime.now().strftime("savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data) wav_data = bytearray() text = stream_context.finishStream() #text = model.finishStream(stream_context) check_input(text, vad_audio) #print(cf.slateGray("Recognized: {0}".format(cf.bold_white(text)))) stream_context = model.createStream()
def show_art(): mt_zeitmore = "ATOP MOUNT ZEITMORE" banner = "#" * 20 print(colorful.bold_coral(mt_zeitmore)) print(colorful.coral(banner)) print(" " * 20) mt_zeit_art00=art.text2art("Atop") mt_zeit_art01=art.text2art("Mount") mt_zeit_art02=art.text2art("Zeitmore") print(colorful.coral(mt_zeit_art00)) print(colorful.coral(mt_zeit_art01)) print(colorful.coral(mt_zeit_art02))
def check_input(input_text, vad_audio): vad_audio.pause() if input_text.lower() == 'stop': exit_app() #elif input_text.lower().startswith('make a robot noise'): # play_wav('robot_noise.wav') elif input_text.lower().startswith('tell me about'): read_wikipedia(input_text) elif input_text.lower().startswith('pause'): t = 20 echo_line(f'Pausing for {t} seconds', False) time.sleep(20) print(cf.bold_coral("Listening (ctrl-C to exit)...")) else: echo_line(input_text) vad_audio.restart() return
def main(ARGS): #p = pyaudio.PyAudio() play_wav('robot_noise.wav') #p.terminate() # Load DeepSpeech model if os.path.isdir(ARGS.model): model_dir = ARGS.model ARGS.model = os.path.join(model_dir, 'output_graph.pbmm') if not Path(ARGS.model).is_file(): ARGS.model = os.path.join(model_dir, 'output_graph.pb') ARGS.alphabet = os.path.join( model_dir, ARGS.alphabet if ARGS.alphabet else 'alphabet.txt') ARGS.lm = os.path.join(model_dir, ARGS.lm) ARGS.trie = os.path.join(model_dir, ARGS.trie) print(cf.bold_coral('Initializing model...')) logging.info("ARGS.model: %s", ARGS.model) logging.info("ARGS.alphabet: %s", ARGS.alphabet) logging.info("ARGS.beam_width: %s", ARGS.beam_width) model = deepspeech.Model(ARGS.model, ARGS.n_features, ARGS.n_context, ARGS.alphabet, ARGS.beam_width) if ARGS.lm and ARGS.trie: logging.info("ARGS.lm: %s", ARGS.lm) logging.info("ARGS.trie: %s", ARGS.trie) logging.info("ARGS.lm_alpha: %s", ARGS.lm_alpha) logging.info("ARGS.lm_beta: %s", ARGS.lm_beta) model.enableDecoderWithLM(ARGS.alphabet, ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta) # Start audio with VAD vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness, device=ARGS.device, input_rate=ARGS.rate) print(cf.bold_coral("Listening (ctrl-C to exit)...")) frames = vad_audio.vad_collector() # Stream from microphone to DeepSpeech using VAD spinner = None if not ARGS.nospinner: spinner = Halo(spinner='line') stream_context = model.setupStream() wav_data = bytearray() for frame in frames: if frame is not None: if spinner: spinner.start() logging.debug("streaming frame") model.feedAudioContent(stream_context, np.frombuffer(frame, np.int16)) if ARGS.savewav: wav_data.extend(frame) else: if spinner: spinner.stop() logging.debug("end utterence") if ARGS.savewav: vad_audio.write_wav( os.path.join( ARGS.savewav, datetime.now().strftime( "savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data) wav_data = bytearray() text = model.finishStream(stream_context) check_input(text, vad_audio) #print(cf.slateGray("Recognized: {0}".format(cf.bold_white(text)))) stream_context = model.setupStream()