def run(self): while True: time.sleep(0.1) self._user_control() new_frames = self.d.decode(max_frames=10) while new_frames > 0: self.utt_frames += new_frames new_frames = self.d.decode(max_frames=10) if self.utt_end or self.dialog_end: start = time.time() self.d.prune_final() prob, lat = self.d.get_lattice() # lat.write('live-demo-recorded.fst') nbest = lattice_to_nbest(lat, n=10) if nbest: best_prob, best_path = nbest[0] decoded = ' '.join([wst[w] for w in best_path]) else: decoded = 'Empty hypothesis' print("%s secs, frames: %d, prob: %f, %s " % ( str(time.time() - start), self.utt_frames, prob, decoded)) self.utt_frames = 0 self.d.reset(keep_buffer_data=False) if self.dialog_end: self.save_wav() break
def run(self): while True: time.sleep(0.1) self._user_control() new_frames = self.d.decode(max_frames=10) while new_frames > 0: self.utt_frames += new_frames new_frames = self.d.decode(max_frames=10) if self.utt_end or self.dialog_end: start = time.time() self.d.prune_final() prob, lat = self.d.get_lattice() # lat.write('live-demo-recorded.fst') nbest = lattice_to_nbest(lat, n=10) if nbest: best_prob, best_path = nbest[0] decoded = ' '.join([wst[w] for w in best_path]) else: decoded = 'Empty hypothesis' print( "%s secs, frames: %d, prob: %f, %s " % (str(time.time() - start), self.utt_frames, prob, decoded)) self.utt_frames = 0 self.d.reset(keep_buffer_data=False) if self.dialog_end: self.save_wav() break
def nbest_hypotheses(n=10): global recogniser recogniser.prune_final() utt_lik, lat = recogniser.get_lattice() recogniser.reset() return [(prob, path_to_text(path)) for (prob, path) in lattice_to_nbest(lat, n=10)]
def get_lat(): global dec_frames d.prune_final() lik, lat = d.get_lattice() nbest = lattice_to_nbest(lat, n=10) nbest_s = '\n'.join(["%0.3f %s" % (prob, ' '.join([wst[i] for i in ids])) for (prob, ids) in nbest]) dec_frames, result = 0, jsonify(nbest=nbest_s, dec_frames=dec_frames) print 'DEBUG', dec_frames d.reset(keep_buffer_data=False) return result
def get_lat(): global dec_frames d.prune_final() lik, lat = d.get_lattice() nbest = lattice_to_nbest(lat, n=10) nbest_s = '\n'.join([ "%0.3f %s" % (prob, ' '.join([wst[i] for i in ids])) for (prob, ids) in nbest ]) dec_frames, result = 0, jsonify(nbest=nbest_s, dec_frames=dec_frames) print 'DEBUG', dec_frames d.reset(keep_buffer_data=False) return result
def decode_wrap(argv, audio_batch_size, wav_paths, file_output, wst_path=None): wst = wst2dict(wst_path) d = PyOnlineLatgenRecogniser() d.setup(argv) for wav_name, wav_path in wav_paths: sw, sr = 2, 16000 # 16-bit audio so 1 sample_width = 2 chars pcm = load_wav(wav_path, def_sample_width=sw, def_sample_rate=sr) print('%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr)) lat, lik, decoded_frames = decode(d, pcm) lat.isyms = lat.osyms = fst.read_symbols_text(wst_path) if DEBUG: with open('pykaldi_%s.svg' % wav_name, 'w') as f: f.write(lat._repr_svg_()) lat.write('%s_pykaldi.fst' % wav_name) print( "Log-likelihood per frame for utterance %s is %f over %d frames" % (wav_name, int(lik / decoded_frames), decoded_frames)) word_ids = lattice_to_nbest(lat, n=10) write_decoded(file_output, wav_name, word_ids, wst)
def decode_wrap(argv, audio_batch_size, wav_paths, file_output, wst_path=None): wst = wst2dict(wst_path) d = PyOnlineLatgenRecogniser() d.setup(argv) for wav_name, wav_path in wav_paths: sw, sr = 2, 16000 # 16-bit audio so 1 sample_width = 2 chars pcm = load_wav(wav_path, def_sample_width=sw, def_sample_rate=sr) print '%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr) lat, lik, decoded_frames = decode(d, pcm) lat.isyms = lat.osyms = fst.read_symbols_text(wst_path) if DEBUG: with open('pykaldi_%s.svg' % wav_name, 'w') as f: f.write(lat._repr_svg_()) lat.write('%s_pykaldi.fst' % wav_name) print "Log-likelihood per frame for utterance %s is %f over %d frames" % ( wav_name, (lik / decoded_frames), decoded_frames) word_ids = lattice_to_nbest(lat, n=10) write_decoded(file_output, wav_name, word_ids, wst)