Ejemplo n.º 1
0
else:
    logging.basicConfig(level=logging.INFO)
logging.getLogger("requests").setLevel(logging.WARNING)

source = options.source
volume = options.volume
aggressiveness = options.aggressiveness

rec = PulseRecorder(source_name=source, volume=volume, rate=sampleRate)
vad = VAD(aggressiveness=aggressiveness,
          sample_rate=sampleRate,
          max_utt_length=MAX_UTT_LENGTH)

#main
frames = int(sampleRate * BUFFER_DURATION / 1000)  #BUFFER_DURATION 30
rec.start_recording(frames_per_buffer=frames)

time_start = time()
print("Please speak.")
total, buff_size, finalize = 0, 0, 0
while True:

    samples = rec.get_samples()
    audio, finalize = vad.process_audio(samples)
    #print(len(samples),total,audio)
    if not audio:
        continue
    #logging.debug ('decoding audio len=%d finalize=%s audio=%s'% (len(audio), repr(finalize), audio[0].__class__))
    # decoding stop while  finalize is true, max_length 12s
    buff_size = len(samples)
    #buff = struct.unpack_from('<%dh' % buff_size, audio)
Ejemplo n.º 2
0
tts = TTS(engine="espeak", voice="en")

#
# main loop
#

print(chr(27) + "[2J")
while True:

    #
    # record audio, run VAD
    #

    print "Please speak.",

    rec.start_recording()

    finalize  = False
    recording  = []

    while not finalize:

        samples = rec.get_samples()

        audio, finalize = vad.process_audio(samples)
        if not audio:
            continue

        recording.extend(audio)

        user_utt, confidence = asr.decode(audio, finalize)
Ejemplo n.º 3
0
vad = VAD(aggressiveness=AGGRESSIVENESS, sample_rate=SAMPLE_RATE)

#
# ASR
#

print("Loading model from %s ..." % MODEL_DIR)
asr = KaldiNNet3OnlineModel(MODEL_DIR, MODEL)
#, acoustic_scale=ACOUSTIC_SCALE, beam=BEAM, frame_subsampling_factor=FRAME_SUBSAMPLING_FACTOR)
print("Loading model from %s, done ..." % MODEL_DIR)
#
# main
#

print("Start recording")
rec.start_recording(FRAMES_PER_BUFFER)

print("Please speak.")

while True:

    samples = rec.get_samples()

    logging.debug("%d samples, %5.2f s" %
                  (len(samples), float(len(samples)) / float(SAMPLE_RATE)))

    audio, finalize = vad.process_audio(samples)

    if not audio:
        continue
Ejemplo n.º 4
0
#
# ASR
#

print "Loading model from %s ..." % model_dir

asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir,
          kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE,
          kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR)


#
# main
#
complete_utterance=''
rec.start_recording()
finalize = False
while True:
    samples = rec.get_samples()

    audio, finalize = vad.process_audio(samples)
    if not audio:
        continue

    logging.debug ('decoding audio len=%d finalize=%s audio=%s' % (len(audio), repr(finalize), audio[0].__class__))
    
    user_utt, confidence = asr.decode(audio, finalize, stream_id=STREAM_ID)
    
    print "\r%s                     " % user_utt,
    
    if finalize: