def _build_train_op(self): """构建训练操作符""" # 步数 self.global_step = tf.train.get_or_create_global_step() # Loss函数 if self.model_conf.loss_func == LossFunction.CTC: self.loss = Loss.ctc(labels=self.labels, logits=self.outputs, sequence_length=self.seq_len) elif self.model_conf.loss_func == LossFunction.CrossEntropy: self.loss = Loss.cross_entropy(labels=self.labels, logits=self.outputs) self.cost = tf.reduce_mean(self.loss) tf.compat.v1.summary.scalar('cost', self.cost) # 学习率 指数衰减法 self.lrn_rate = tf.compat.v1.train.exponential_decay( self.model_conf.trains_learning_rate, self.global_step, staircase=True, decay_steps=self.decay_steps, decay_rate=0.98, ) tf.compat.v1.summary.scalar('learning_rate', self.lrn_rate) if self.model_conf.neu_optimizer == Optimizer.AdaBound: self.optimizer = AdaBoundOptimizer(learning_rate=self.lrn_rate, final_lr=0.001, beta1=0.9, beta2=0.999, amsbound=True) elif self.model_conf.neu_optimizer == Optimizer.Adam: self.optimizer = tf.train.AdamOptimizer( learning_rate=self.lrn_rate) elif self.model_conf.neu_optimizer == Optimizer.RAdam: self.optimizer = RAdamOptimizer(learning_rate=self.lrn_rate, warmup_proportion=0.1, min_lr=1e-6) elif self.model_conf.neu_optimizer == Optimizer.Momentum: self.optimizer = tf.train.MomentumOptimizer( learning_rate=self.lrn_rate, use_nesterov=True, momentum=0.9, ) elif self.model_conf.neu_optimizer == Optimizer.SGD: self.optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.lrn_rate, ) elif self.model_conf.neu_optimizer == Optimizer.AdaGrad: self.optimizer = tf.train.AdagradOptimizer( learning_rate=self.lrn_rate, ) elif self.model_conf.neu_optimizer == Optimizer.RMSProp: self.optimizer = tf.train.RMSPropOptimizer( learning_rate=self.lrn_rate, ) # BN 操作符更新(moving_mean, moving_variance) update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS) # 将 train_op 和 update_ops 融合 with tf.control_dependencies(update_ops): self.train_op = self.optimizer.minimize( loss=self.cost, global_step=self.global_step, ) # 转录层-Loss函数 if self.model_conf.loss_func == LossFunction.CTC: self.dense_decoded = self.decoder.ctc(inputs=self.outputs, sequence_length=self.seq_len) elif self.model_conf.loss_func == LossFunction.CrossEntropy: self.dense_decoded = self.decoder.cross_entropy( inputs=self.outputs)
def _build_train_op(self): """操作符生成器""" # 步数 self.global_step = tf.train.get_or_create_global_step() # Loss函数 if self.model_conf.loss_func == LossFunction.CTC: self.loss = Loss.ctc(labels=self.labels, logits=self.outputs, sequence_length=self.seq_len) elif self.model_conf.loss_func == LossFunction.CrossEntropy: self.loss = Loss.cross_entropy(labels=self.labels, logits=self.outputs) self.cost = tf.reduce_mean(self.loss) tf.compat.v1.summary.scalar('cost', self.cost) # 学习率 self.lrn_rate = tf.compat.v1.train.exponential_decay( self.model_conf.trains_learning_rate, self.global_step, staircase=True, decay_steps=10000, decay_rate=0.98, ) tf.compat.v1.summary.scalar('learning_rate', self.lrn_rate) # 训练参数更新 update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS) # Storing adjusted smoothed mean and smoothed variance operations with tf.control_dependencies(update_ops): # TODO 这种if-else结构感觉很蠢,优化器选择器 if self.model_conf.neu_optimizer == Optimizer.AdaBound: self.train_op = AdaBoundOptimizer( learning_rate=self.lrn_rate, final_lr=0.001, beta1=0.9, beta2=0.999, amsbound=True).minimize(loss=self.cost, global_step=self.global_step) elif self.model_conf.neu_optimizer == Optimizer.Adam: self.train_op = tf.train.AdamOptimizer( learning_rate=self.lrn_rate).minimize( self.cost, global_step=self.global_step) elif self.model_conf.neu_optimizer == Optimizer.RAdam: self.train_op = RAdamOptimizer( learning_rate=self.lrn_rate, warmup_proportion=0.1, min_lr=1e-6).minimize(self.cost, global_step=self.global_step) elif self.model_conf.neu_optimizer == Optimizer.Momentum: self.train_op = tf.train.MomentumOptimizer( learning_rate=self.lrn_rate, use_nesterov=True, momentum=0.9, ).minimize(self.cost, global_step=self.global_step) elif self.model_conf.neu_optimizer == Optimizer.SGD: self.train_op = tf.train.GradientDescentOptimizer( learning_rate=self.lrn_rate, ).minimize( self.cost, global_step=self.global_step) elif self.model_conf.neu_optimizer == Optimizer.AdaGrad: self.train_op = tf.train.AdagradOptimizer( learning_rate=self.lrn_rate, ).minimize( self.cost, global_step=self.global_step) elif self.model_conf.neu_optimizer == Optimizer.RMSProp: self.train_op = tf.train.RMSPropOptimizer( learning_rate=self.lrn_rate, ).minimize( self.cost, global_step=self.global_step) # 转录层-Loss函数 if self.model_conf.loss_func == LossFunction.CTC: self.dense_decoded = self.decoder.ctc(inputs=self.outputs, sequence_length=self.seq_len) elif self.model_conf.loss_func == LossFunction.CrossEntropy: self.dense_decoded = self.decoder.cross_entropy( inputs=self.outputs)
def _build_train_op(self): self.global_step = tf.train.get_or_create_global_step() # ctc loss function, using forward and backward algorithms and maximum likelihood. if WARP_CTC: import_module('warpctc_tensorflow') with tf.get_default_graph()._kernel_label_map( {"CTCLoss": "WarpCTC"}): self.loss = tf.nn.ctc_loss(inputs=self.predict, labels=self.labels, sequence_length=self.seq_len) else: self.loss = tf.nn.ctc_loss( labels=self.labels, inputs=self.predict, sequence_length=self.seq_len, ctc_merge_repeated=CTC_MERGE_REPEATED, preprocess_collapse_repeated=PREPROCESS_COLLAPSE_REPEATED, ignore_longer_outputs_than_inputs=False, time_major=CTC_LOSS_TIME_MAJOR) self.cost = tf.reduce_mean(self.loss) tf.summary.scalar('cost', self.cost) self.lrn_rate = tf.train.exponential_decay(TRAINS_LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE, staircase=True) tf.summary.scalar('learning_rate', self.lrn_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # print(update_ops) # Storing adjusted smoothed mean and smoothed variance operations with tf.control_dependencies(update_ops): if OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.AdaBound: self.train_op = AdaBoundOptimizer( learning_rate=self.lrn_rate, final_lr=0.1, beta1=0.9, beta2=0.999, amsbound=True).minimize(loss=self.cost, global_step=self.global_step) elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.Adam: self.train_op = tf.train.AdamOptimizer( learning_rate=self.lrn_rate).minimize( self.cost, global_step=self.global_step) elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.Momentum: self.train_op = tf.train.MomentumOptimizer( learning_rate=self.lrn_rate, use_nesterov=True, momentum=MOMENTUM, ).minimize(self.cost, global_step=self.global_step) elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.SGD: self.train_op = tf.train.GradientDescentOptimizer( learning_rate=self.lrn_rate, ).minimize( self.cost, global_step=self.global_step) elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.AdaGrad: self.train_op = tf.train.AdagradOptimizer( learning_rate=self.lrn_rate, ).minimize( self.cost, global_step=self.global_step) elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.RMSProp: self.train_op = tf.train.RMSPropOptimizer( learning_rate=self.lrn_rate, decay=DECAY_RATE, ).minimize(self.cost, global_step=self.global_step) # Option 2: tf.contrib.ctc.ctc_beam_search_decoder # (it's slower but you'll get better results) # self.decoded, self.log_prob = tf.nn.ctc_greedy_decoder( # self.predict, # self.seq_len, # merge_repeated=False # ) # Find the optimal path self.decoded, self.log_prob = tf.nn.ctc_beam_search_decoder( inputs=self.predict, sequence_length=self.seq_len, merge_repeated=False, beam_width=CTC_BEAM_WIDTH, top_paths=CTC_TOP_PATHS, ) if StrictVersion(tf.__version__) >= StrictVersion('1.12.0'): self.dense_decoded = tf.sparse.to_dense(self.decoded[0], default_value=-1, name="dense_decoded") else: self.dense_decoded = tf.sparse_tensor_to_dense( self.decoded[0], default_value=-1, name="dense_decoded")
def _build_train_op(self): self.global_step = tf.train.get_or_create_global_step() # ctc loss function, using forward and backward algorithms and maximum likelihood. self.loss = tf.nn.ctc_loss( labels=self.labels, inputs=self.predict, sequence_length=self.seq_len, ctc_merge_repeated=CTC_MERGE_REPEATED, preprocess_collapse_repeated=PREPROCESS_COLLAPSE_REPEATED, ignore_longer_outputs_than_inputs=False, time_major=True) self.cost = tf.reduce_mean(self.loss) tf.summary.scalar('cost', self.cost) self.lrn_rate = tf.train.exponential_decay(TRAINS_LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE, staircase=True) tf.summary.scalar('learning_rate', self.lrn_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print(update_ops) with tf.control_dependencies(update_ops): self.train_op = AdaBoundOptimizer(learning_rate=self.lrn_rate, final_lr=0.1, beta1=0.9, beta2=0.999, amsbound=True).minimize( loss=self.cost, global_step=self.global_step) # self.optimizer = tf.train.AdamOptimizer( # learning_rate=self.lrn_rate # ).minimize( # self.cost, # global_step=self.global_step # ) # self.optimizer = tf.train.MomentumOptimizer( # learning_rate=self.lrn_rate, # use_nesterov=True, # momentum=MOMENTUM, # ).minimize( # self.cost, # global_step=self.global_step # ) # Storing adjusted smoothed mean and smoothed variance operations # update_ops = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS)) # train_ops = [self.optimizer] + self.utils.extra_train_ops # self.train_op = tf.group(self.optimizer, update_ops) # self.train_op = self.optimizer # self.train_op = tf.group(*train_ops) # Option 2: tf.contrib.ctc.ctc_beam_search_decoder # (it's slower but you'll get better results) # self.decoded, self.log_prob = tf.nn.ctc_greedy_decoder( # self.predict, # self.seq_len, # merge_repeated=False # ) # Find the optimal path self.decoded, self.log_prob = tf.nn.ctc_beam_search_decoder( self.predict, self.seq_len, merge_repeated=False, beam_width=CTC_BEAM_WIDTH, top_paths=CTC_TOP_PATHS, ) if StrictVersion(tf.__version__) >= StrictVersion('1.12.0'): self.dense_decoded = tf.sparse.to_dense(self.decoded[0], default_value=-1, name="dense_decoded") else: self.dense_decoded = tf.sparse_tensor_to_dense( self.decoded[0], default_value=-1, name="dense_decoded")