def load_am(): # 1.声学模型----------------------------------- from model_speech.cnn_ctc import Am, am_hparams am_args = am_hparams() am_args.vocab_size = len(train_data.am_vocab) am = Am(am_args) print('loading acoustic model...') am.ctc_model.load_weights('logs_am/model.h5')
def train_am(x=None,y=None,fit_epoch=10): from model_speech.cnn_ctc import Am, am_hparams am_args = am_hparams() am_args.vocab_size = len(utils.pny_vocab) am_args.gpu_nums = 1 am_args.lr = 0.0008 am_args.is_training = True am = Am(am_args) if os.path.exists(os.path.join(utils.cur_path,'logs_am','model.h5')): print('加载声学模型...') am.ctc_model.load_weights(os.path.join(utils.cur_path,'logs_am','model.h5')) checkpoint = ModelCheckpoint(os.path.join(utils.cur_path,'checkpoint', "model_{epoch:02d}-{val_loss:.2f}.h5"), monitor='val_loss',save_best_only=True) eStop = EarlyStopping()#损失函数不再减小后patience轮停止训练 #tensorboard --logdir=/media/yangjinming/DATA/GitHub/AboutPython/AboutDL/语音识别/logs_am/tbLog/ --host=127.0.0.1 #tensbrd = TensorBoard(log_dir=os.path.join(utils.cur_path,'logs_am','tbLog')) if x is not None:#利用实时声音训练调整模型,使定制化 size=1 if type(x) == np.ndarray: x,y = utils.real_time2data([x],[y]) else: size = len(x) x,y = utils.real_time2data(x,y) am.ctc_model.fit(x=x,y=y,batch_size=size,epochs=fit_epoch) else:#利用训练数据 batch = train_data.get_am_batch()#获取的是生成器 dev_batch = dev_data.get_am_batch() validate_step = 200#取N个验证的平均结果 history = am.ctc_model.fit_generator(batch, steps_per_epoch=batch_num, epochs=epochs, callbacks=[eStop,checkpoint], workers=1, use_multiprocessing=False,verbose=1,validation_data=dev_batch, validation_steps=validate_step) am.ctc_model.save_weights(os.path.join(utils.cur_path,'logs_am','model.h5')) #写入序列化的 PB 文件 #with keras.backend.get_session() as sess: sess = keras.backend.get_session() constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def,output_node_names=['the_inputs','dense_2/truediv']) with tf.gfile.GFile(os.path.join(utils.cur_path,'logs_am','amModel.pb'), mode='wb') as f: f.write(constant_graph.SerializeToString()) #保存TF serving用文件 builder = tf.saved_model.builder.SavedModelBuilder(os.path.join(utils.cur_path,'logs_am',modelVersion)) model_signature = tf.saved_model.signature_def_utils.predict_signature_def( inputs={'input': am.inputs}, outputs={'output': am.outputs}) builder.add_meta_graph_and_variables(sess,[tf.saved_model.tag_constants.SERVING], {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: model_signature}) builder.save() if x is None: sess.close()
def _model_init_keras(self, model_dir, config): # 1.声学模型训练----------------------------------- from model_speech.cnn_ctc import Am, am_hparams am_args = am_hparams() am_args.vocab_size = len(self.label_vocabulary) am_args.gpu_nums = 0 am_args.lr = 0.0008 am_args.is_training = True am = Am(am_args) base_model = am.ctc_model self.ctc_model = tf.keras.estimator.model_to_estimator( base_model, model_dir=model_dir, config=config)
def __init__(self,test_flag = True): # 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取 self.test_flag = test_flag #print('加载声学模型中...') if K_usePB: self.AM_sess = tf.Session() with tf.gfile.GFile(os.path.join(cur_path,'logs_am','amModel.pb'), 'rb') as f:#加载模型 graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) self.AM_sess.graph.as_default() tf.import_graph_def(graph_def, name='') #导入计算图 self.AM_sess.run(tf.global_variables_initializer())#需要有一个初始化的过程 self.AM_x = self.AM_sess.graph.get_tensor_by_name('the_inputs:0') #此处的x一定要和之前保存时输入的名称一致! self.AM_preds = self.AM_sess.graph.get_tensor_by_name('dense_2/truediv:0') else: from model_speech.cnn_ctc import Am, am_hparams am_args = am_hparams() am_args.vocab_size = len(pny_vocab)#这里有个坑,需要和训练时的长度一致,需要强烈关注! self.am = Am(am_args) self.am.ctc_model.load_weights(os.path.join(cur_path,'logs_am','model.h5')) #print('加载语言模型中...') if tf_usePB: self.sess = tf.Session() with tf.gfile.GFile(os.path.join(cur_path,'logs_lm','lmModel.pb'), 'rb') as f:#加载模型 graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) self.sess.graph.as_default() tf.import_graph_def(graph_def, name='') # 导入计算图 self.sess.run(tf.global_variables_initializer())# 需要有一个初始化的过程 self.x = self.sess.graph.get_tensor_by_name('x:0') #此处的x一定要和之前保存时输入的名称一致! self.preds = self.sess.graph.get_tensor_by_name('preds:0') else:#ckpt from model_language.transformer import Lm, lm_hparams lm_args = lm_hparams() lm_args.input_vocab_size = len(pny_vocab) lm_args.label_vocab_size = len(han_vocab) lm_args.dropout_rate = 0. self.lm = Lm(lm_args) self.sess = tf.Session(graph=self.lm.graph) with self.lm.graph.as_default(): saver =tf.train.Saver() with self.sess.as_default(): lmPath = tf.train.latest_checkpoint(os.path.join(cur_path,'logs_lm')) saver.restore(self.sess, lmPath)
def train_am(epochs): # 1.声学模型训练----------------------------------- from model_speech.cnn_ctc import Am, am_hparams am_args = am_hparams() am_args.vocab_size = len(train_data.am_vocab) am_args.gpu_nums = 1 am_args.lr = 0.0008 am_args.is_training = True am = Am(am_args) if os.path.exists('logs_am') and not os.listdir('logs_am'): model = os.listdir('logs_am')[0] am.ctc_model.load_weights(os.path.join('logs_am', model)) batch_num = len(train_data.wav_lst) // train_data.batch_size # checkpoint ckpt = "model_{val_loss:.3f}_{epoch:04d}.h5" checkpoint = ModelCheckpoint(os.path.join('logs_am', ckpt), monitor='val_loss', save_weights_only=True, verbose=1, save_best_only=True) batch = train_data.get_am_batch() dev_batch = dev_data.get_am_batch() am.ctc_model.fit_generator(batch, steps_per_epoch=batch_num, epochs=10, callbacks=[checkpoint], workers=1, use_multiprocessing=False, validation_data=dev_batch, validation_steps=2) am.ctc_model.save_weights('logs_am/model.h5')
import difflib import tensorflow as tf import numpy as np from utils import decode_ctc, GetEditDistance # 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取 from utils import get_data, data_hparams data_args = data_hparams() train_data = get_data(data_args) # 1.声学模型----------------------------------- from model_speech.cnn_ctc import Am, am_hparams am_args = am_hparams() # 參數初始化 EX: learning rate # am_args.vocab_size = 230 am_args.vocab_size = len(train_data.am_vocab) # 設定單字長度 am = Am(am_args) # 利用設定好的參數,建造出一個model print('loading acoustic model...') am.ctc_model.load_weights('logs_am/model.h5') am.ctc_model.summary() # 2.语言模型------------------------------------------- from model_language.transformer import Lm, lm_hparams lm_args = lm_hparams() lm_args.input_vocab_size = len(train_data.pny_vocab) lm_args.label_vocab_size = len(train_data.han_vocab) lm_args.dropout_rate = 0. print('loading language model...')
import numpy as np from utils import decode_ctc, GetEditDistance, cal_ctc_acc # 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取 from utils import get_data, data_hparams data_args = data_hparams() data_args.data_length = 20000 train_data = get_data(data_args) print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") print("=====train_data_amvocab", len(train_data.pny_vocab)) # 1.声学模型----------------------------------- from model_speech.cnn_ctc import Am, am_hparams # from model_speech.gru_ctc import Am, am_hparams am_args = am_hparams() am_args.vocab_size = len(train_data.am_vocab) am = Am(am_args) print('loading acoustic model...') am.ctc_model.load_weights('logs_am/model.h5') # am.ctc_model.load_weights('checkpoint/model_01-0.00.hdf5') # 2.语言模型------------------------------------------- # 3. 准备测试所需数据, 不必和训练数据一致,通过设置data_args.data_type测试, # 此处应设为'test',我用了'train'因为演示模型较小,如果使用'test'看不出效果, # 且会出现未出现的词。 data_args = data_hparams() data_args.data_type = 'test' data_args.zanghua = True data_args.data_length = 20000 test_data = get_data(data_args)
def build_model(self): am_args = am_hparams() am_args.vocab_size = 230 #len(train_data.am_vocab) am = Am(am_args) return am.ctc_model