class ASR(): def __init__(self, am_config, lm_config): self.am = AM(am_config) self.am.load_model(False) self.lm = LM(lm_config) self.lm.load_model(False) def decode_am_result(self, result): return self.am.decode_result(result) def stt(self, wav_path): am_result = self.am.predict(wav_path) if self.am.model_type == 'Transducer': am_result = self.decode_am_result(am_result[1:-1]) lm_result = self.lm.predict(am_result) lm_result = self.lm.decode(lm_result[0].numpy(), self.lm.lm_featurizer) else: am_result = self.decode_am_result(am_result[0]) lm_result = self.lm.predict(am_result) lm_result = self.lm.decode(lm_result[0].numpy(), self.lm.lm_featurizer) return am_result, lm_result def am_test(self, wav_path): # am_result is token id am_result = self.am.predict(wav_path) # token to vocab if self.am.model_type == 'Transducer': am_result = self.decode_am_result(am_result[1:-1]) else: am_result = self.decode_am_result(am_result[0]) return am_result def lm_test(self, txt): if self.lm.config['am_token']['for_multi_task']: pys = pypinyin.pinyin(txt, 8, neutral_tone_with_five=True) input_py = [i[0] for i in pys] else: pys = pypinyin.pinyin(txt) input_py = [i[0] for i in pys] # now lm_result is token id lm_result = self.lm.predict(input_py) # token to vocab lm_result = self.lm.decode(lm_result[0].numpy(), self.lm.lm_featurizer) return lm_result
class ASR(): def __init__(self, am_config): self.am = AM(am_config) self.am.load_model(False) def decode_am_result(self, result): return self.am.decode_result(result) def am_test(self, wav_path): # am_result is token id am_result = self.am.predict(wav_path) # token to vocab if self.am.model_type == 'Transducer': am_result = self.decode_am_result(am_result[1:-1]) else: am_result = self.decode_am_result(am_result[0]) return am_result
class ASR(): def __init__(self, am_config, lm_config): self.am = AM(am_config) self.am.load_model(False) self.lm = LM(lm_config) self.lm.load_model() def decode_am_result(self, result): return self.am.decode_result(result[0]) def stt(self, wav_path): am_result = self.am.predict(wav_path) lm_result = self.lm.predict(self.decode_am_result(am_result)) return am_result, lm_result