if __name__ == "__main__": decoder = Decoder(MODEL_PATH) file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav') data = wave.open(file_name) n_decoded = 0 while True: frames = data.readframes(8000) if len(frames) == 0: break decoder.accept_audio(frames) n_decoded += decoder.decode(8000) if n_decoded > 0: prob, word_ids = decoder.get_best_path() # ivec = decoder.get_ivector() print('Hypothesis: "%s" (speaker finished speaking: %s)' % ( word_ids_to_str_hyp(decoder, word_ids), decoder.endpoint_detected(), )) decoder.input_finished() print('Final hypothesis: "%s"' % word_ids_to_str_hyp(decoder, word_ids)) decoder.finalize_decoding() p, lat = decoder.get_lattice()
if __name__ == "__main__": decoder = Decoder(MODEL_PATH) file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav') data = wave.open(file_name) n_decoded = 0 while True: frames = data.readframes(8000) if len(frames) == 0: break decoder.accept_audio(frames) n_decoded += decoder.decode(8000) if n_decoded > 0: prob, word_ids = decoder.get_best_path() # ivec = decoder.get_ivector() print('Hypothesis: "%s" (speaker finished speaking: %s)' % (word_ids_to_str_hyp(decoder, word_ids), decoder.endpoint_detected(), )) decoder.input_finished() print('Final hypothesis: "%s"' % word_ids_to_str_hyp(decoder, word_ids)) decoder.finalize_decoding() p, lat = decoder.get_lattice() print ('Resulting lattice:') for state in lat.states:
import struct from os.path import basename inputdir = sys.argv[1] scratchdir = sys.argv[2] resourcedir = sys.argv[3] outdir = sys.argv[4] os.system('mkdir -p ' + scratchdir) # Load speech recognition model from "asr_model_dir" directory. for inputfile in glob.glob(inputdir + '/*.wav'): decoder = Decoder(resourcedir + "/asr_model_dir_nnet3") file_id = basename(inputfile) os.system('sox ' + inputfile + ' -e signed-integer -r 16000 -b 16 -c 1 ' + scratchdir + file_id + '.wav') # Load audio frames from input wav file. data = wave.open(scratchdir + file_id + '.wav') frames = data.readframes(data.getnframes()) # Feed the audio data to the decoder. decoder.accept_audio(frames) decoder.decode(data.getnframes()) decoder.input_finished() # Get and print the best hypothesis. prob, word_ids = decoder.get_best_path() output_string = " ".join(map(decoder.get_word, word_ids)) os.system('echo ' + output_string + ' > ' + outdir + '/' + file_id + '.txt')