else: logging.basicConfig(level=logging.INFO) logging.getLogger("requests").setLevel(logging.WARNING) source = options.source volume = options.volume aggressiveness = options.aggressiveness rec = PulseRecorder(source_name=source, volume=volume, rate=sampleRate) vad = VAD(aggressiveness=aggressiveness, sample_rate=sampleRate, max_utt_length=MAX_UTT_LENGTH) #main frames = int(sampleRate * BUFFER_DURATION / 1000) #BUFFER_DURATION 30 rec.start_recording(frames_per_buffer=frames) time_start = time() print("Please speak.") total, buff_size, finalize = 0, 0, 0 while True: samples = rec.get_samples() audio, finalize = vad.process_audio(samples) #print(len(samples),total,audio) if not audio: continue #logging.debug ('decoding audio len=%d finalize=%s audio=%s'% (len(audio), repr(finalize), audio[0].__class__)) # decoding stop while finalize is true, max_length 12s buff_size = len(samples) #buff = struct.unpack_from('<%dh' % buff_size, audio)
tts = TTS(engine="espeak", voice="en") # # main loop # print(chr(27) + "[2J") while True: # # record audio, run VAD # print "Please speak.", rec.start_recording() finalize = False recording = [] while not finalize: samples = rec.get_samples() audio, finalize = vad.process_audio(samples) if not audio: continue recording.extend(audio) user_utt, confidence = asr.decode(audio, finalize)
vad = VAD(aggressiveness=AGGRESSIVENESS, sample_rate=SAMPLE_RATE) # # ASR # print("Loading model from %s ..." % MODEL_DIR) asr = KaldiNNet3OnlineModel(MODEL_DIR, MODEL) #, acoustic_scale=ACOUSTIC_SCALE, beam=BEAM, frame_subsampling_factor=FRAME_SUBSAMPLING_FACTOR) print("Loading model from %s, done ..." % MODEL_DIR) # # main # print("Start recording") rec.start_recording(FRAMES_PER_BUFFER) print("Please speak.") while True: samples = rec.get_samples() logging.debug("%d samples, %5.2f s" % (len(samples), float(len(samples)) / float(SAMPLE_RATE))) audio, finalize = vad.process_audio(samples) if not audio: continue
# # ASR # print "Loading model from %s ..." % model_dir asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir, kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE, kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR) # # main # complete_utterance='' rec.start_recording() finalize = False while True: samples = rec.get_samples() audio, finalize = vad.process_audio(samples) if not audio: continue logging.debug ('decoding audio len=%d finalize=%s audio=%s' % (len(audio), repr(finalize), audio[0].__class__)) user_utt, confidence = asr.decode(audio, finalize, stream_id=STREAM_ID) print "\r%s " % user_utt, if finalize: