batches.append((x, y)) return batches def test(self): while 1: eval_batches = self.make_eval_batch_data() # print('now',self.dg.offset) self.runner.run(eval_batches) if self.dg.offset > len(self.dg.test_texts) - 1: break if __name__ == '__main__': parse = argparse.ArgumentParser() parse.add_argument('--data_config', type=str, required=True, help='the lm data config path') parse.add_argument('--model_config', type=str, required=True, help='the lm model config path') args = parse.parse_args() config = UserConfig(args.data_config, args.model_config) tester = LM_Tester(config) tester.test()
am_result = self.decode_am_result(am_result[1:-1]) else: am_result = self.decode_am_result(am_result[0]) return am_result def lm_test(self, txt): py = pypinyin.pinyin(txt) input_py = [i[0] for i in py] #now lm_result is token id lm_result = self.lm.predict(input_py) #token to vocab lm_result = self.lm.decode(lm_result[0].numpy(), self.lm.word_featurizer) return lm_result if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = '2' am_config = UserConfig(r'./conformer-transducer-logs/am_data.yml', r'./conformer-transducer-logs/conformer.yml') lm_config = UserConfig(r'./transformer-logs/lm_data.yml', r'./transformer-logs/transformer.yml') asr = ASR(am_config, lm_config) a, b = asr.stt('BAC009S0724W0121.wav') print(a) print(b) print(asr.am_test('BAC009S0724W0121.wav')) print(asr.lm_test('中介协会'))
"""Load checkpoint.""" self.checkpoint_dir = os.path.join( config['learning_config']['running_config']["outdir"], "checkpoints") files = os.listdir(self.checkpoint_dir) files.sort(key=lambda x: int(x.split('_')[-1].replace('.h5', ''))) self.model.load_weights(os.path.join(self.checkpoint_dir, files[-1])) self.init_steps = int(files[-1].split('_')[-1].replace('.h5', '')) if __name__ == '__main__': from utils.user_config import UserConfig import tensorflow as tf os.environ['CUDA_VISIBLE_DEVICES'] = '1' am_config = UserConfig(r'D:\TF2-ASR\configs\am_data.yml', r'D:\TF2-ASR\configs\conformer.yml') am = AM(am_config) print('load model') am.load_model(False) print('convert here') am.model.return_pb_function(80, 4) concere = am.model.recognize_pb.get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions([concere]) converter.experimental_new_converter = True # converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] converter.convert() # am.convert_to_pb('./test_model')
""" with tf.name_scope("index_to_unicode_points"): def map_fn(arr): def sub_map_fn(index): return self.index_to_unicode_points[index] return tf.map_fn(sub_map_fn, arr, dtype=tf.int32) # filter -1 value to avoid outofrange minus_one = -1 * tf.ones_like(feat, dtype=tf.int32) blank_like = self.blank * tf.ones_like(feat, dtype=tf.int32) feat = tf.where(feat == minus_one, blank_like, feat) return tf.map_fn( map_fn, feat, dtype=tf.int32, ) if __name__ == '__main__': from utils.user_config import UserConfig import pypinyin import numpy as np config = UserConfig('../config.yml', '../config.yml', False) print(config) test = TextFeaturizer(config['decoder_config']) print(test.num_classes, test.vocab_array) # print(test.extract(pypinyin.lazy_pinyin('我爱你',1))) print(test.iextract(tf.constant(np.random.random([4, test.num_classes]))))
while i < step: enc = tf.reshape(x[:, i], [1, 1, -1]) y = self.am.model.predict_net(inputs=tf.reshape(self.decoded, [1, -1]), p_memory_states=None, training=False) y = y[:, -1:] z = self.am.model.joint_net([enc, y], training=False) logits = tf.squeeze(tf.nn.log_softmax(z)) pred = tf.argmax(logits, axis=-1, output_type=tf.int32) pred = tf.reshape(pred, [1]) if pred != 0 and pred != self.text_featurizer.blank: self.decoded = tf.concat([self.decoded, pred], axis=0) print("buffer_step: {}, " "step: {}, " "pred: {}".format(j, i, self.text_featurizer.index_to_token[pred.numpy().tolist()[0]])) i += 1 j += 1 print(1) if __name__ == "__main__": am_config = UserConfig(r'./pre_train/rnnt/am_data.yml', r'./pre_train/rnnt/conformer.yml') # am_config = UserConfig(r'./configs/am_data_back.yml', r'./configs/tdnn.yml') model = StreamingASR(am_config) model.stream_detect('CppInference/test.wav')
def punc_test(self, txt): return self.lm.punc_predict(list(txt)) if __name__ == '__main__': import time # USE CPU: # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # USE one GPU: # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # limit cpu to 1 core: # import tensorflow as tf # tf.config.threading.set_inter_op_parallelism_threads(1) # tf.config.threading.set_intra_op_parallelism_threads(1) am_config = UserConfig(r'./conformerCTC(M)/am_data.yml', r'./conformerCTC(M)/conformerM.yml') lm_config = UserConfig(r'./transformer-logs/lm_data.yml', r'./transformer-logs/transformerO2OE.yml') punc_config = UserConfig(r'./punc_model/punc_settings.yml', r'./punc_model/punc_settings.yml') asr = ASR(am_config, lm_config, punc_config) # first inference will be slow,it is normal s = time.time() a, b = asr.stt(r'BAC009S0764W0121.wav') e = time.time() print(a) print(b) print('asr.stt first infenrence cost time:', e - s) # now it's OK
_, _, stop_flag, decoded = tf.while_loop( _cond, _body, loop_vars=(b_i, B, stop_flag, decoded), shape_invariants=(tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([None]), tf.TensorShape([None, None]))) return decoded if __name__ == '__main__': from utils.user_config import UserConfig from utils.text_featurizers import TextFeaturizer import time config = UserConfig(r'D:\TF2-ASR\configs\lm_data.yml', r'D:\TF2-ASR\configs\transformer.yml') vocab_featurizer = TextFeaturizer(config['lm_vocab']) word_featurizer = TextFeaturizer(config['lm_word']) model_config = config['model_config'] model_config.update({ 'input_vocab_size': vocab_featurizer.num_classes, 'target_vocab_size': word_featurizer.num_classes }) model = Transformer(**model_config) model._build() model.recognize(np.ones([2, 10])) s = time.time() c = model.recognize(np.ones([2, 10])) e = time.time() print(c, e - s)
# token to vocab lm_result = self.lm.decode(lm_result[0].numpy(), self.lm.lm_featurizer) return lm_result if __name__ == '__main__': import time # USE CPU: # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # USE one GPU: # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # limit cpu to 1 core: # import tensorflow as tf # tf.config.threading.set_inter_op_parallelism_threads(1) # tf.config.threading.set_intra_op_parallelism_threads(1) am_config = UserConfig(r'./conformerCTC(M)/am_data.yml', r'./conformerCTC(M)/conformerM.yml') lm_config = UserConfig(r'./transformer-logs/lm_data.yml', r'./transformer-logs/transformerO2OE.yml') asr = ASR(am_config, lm_config) # first inference will be slow,it is normal s = time.time() a, b = asr.stt(r'BAC009S0764W0121.wav') e = time.time() print(a) print(b) print('asr.stt first infenrence cost time:', e - s) # now it's OK s = time.time() a, b = asr.stt(r'BAC009S0764W0121.wav')
if self.runner._finished(): self.runner.save_checkpoint() logging.info('Finish training!') break if self.runner.steps % self.config['running_config'][ 'save_interval_steps'] == 0: self.dg.save_state(self.config['running_config']['outdir']) if __name__ == '__main__': import argparse parse = argparse.ArgumentParser() parse.add_argument('--data_config', type=str, default='./configs/lm_data.yml', help='the lm data config path') parse.add_argument('--lm_config', type=str, default='./configs/transfomer.yml', help='the lm model config path') parse.add_argument('--punc_config', type=str, default='./configs/punc_settings.yml', help='the lm model config path') args = parse.parse_args() lm_config = UserConfig(args.data_config, args.model_config) punc_config = UserConfig(args.punc_config, args.punc_config) train = Punc_Trainer(lm_config, punc_config) train.train()
# Set CPU 1 core # import tensorflow as tf # import os # os.environ['CUDA_VISIBLE_DEVICES']='-1' # tf.config.threading.set_inter_op_parallelism_threads(1) # tf.config.threading.set_intra_op_parallelism_threads(1) parse = argparse.ArgumentParser() parse.add_argument('--common_config', type=str, default='./configs/common.yml', help='the am data config path') parse.add_argument('--acoustic_config', type=str, default='./configs/fastspeech.yml', help='the am model config path') parse.add_argument('--vocoder_config', type=str, default='./configs/vocoder.yml', help='the am model config path') args = parse.parse_args() acoustic_config = UserConfig(args.common_config, args.acoustic_config) vocoder_config = UserConfig(args.common_config, args.vocoder_config) tts = TTS(acoustic_config, vocoder_config) wav = tts.tts('来一句长一点的话儿试一试呢。', 'spk1') s = time.time() wav = tts.tts('来一句常一点的话儿试一试呢。', 'spk1') e = time.time() sf.write('test.wav', wav, 8000) print('wav length:', wav.shape / 8000, 'tts cost time:', e - s)
old_epoch = self.dg.epochs for batch in train_datasets: print( '**********************************************3**********************************************************' ) try: self.strategy.run(self._train_step, args=(batch, )) except tf.errors.OutOfRangeError: continue new_epoch = self.dg.epochs print( '**********************************************4**********************************************************' ) if new_epoch - old_epoch >= 1: break loss = self.train_metrics['loss'].result().numpy() self._save_model(epoch, loss) print('epoch:{} loss:{}'.format(epoch, loss)) self.train_metrics['loss'].reset_states() if __name__ == '__main__': parse = argparse.ArgumentParser() parse.add_argument('--common_config', type=str, default='/home/jiangjing/project/my/config/common.yml') args = parse.parse_args() config = UserConfig(args.common_config) train = Sub_LM_Trainer(config) train.train()
def am_test(self, wav_path): # am_result is token id am_result = self.am.predict(wav_path) # token to vocab if self.am.model_type == 'Transducer': am_result = self.decode_am_result(am_result[1:-1]) else: am_result = self.decode_am_result(am_result[0]) return am_result if __name__ == '__main__': import time # USE CPU: # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # USE one GPU: # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # limit cpu to 1 core: # import tensorflow as tf # tf.config.threading.set_inter_op_parallelism_threads(1) # tf.config.threading.set_intra_op_parallelism_threads(1) am_config = UserConfig(r'./streaming-logs/am_data.yml', r'./streaming-logs/Streaming_ConformerS.yml') asr = ASR(am_config) # first inference will be slow,it is normal print(asr.am_test(r'BAC009S0764W0121.wav'))