Beispiel #1
0
    def _build_train_op(self):
        """构建训练操作符"""

        # 步数
        self.global_step = tf.train.get_or_create_global_step()

        # Loss函数
        if self.model_conf.loss_func == LossFunction.CTC:
            self.loss = Loss.ctc(labels=self.labels,
                                 logits=self.outputs,
                                 sequence_length=self.seq_len)
        elif self.model_conf.loss_func == LossFunction.CrossEntropy:
            self.loss = Loss.cross_entropy(labels=self.labels,
                                           logits=self.outputs)

        self.cost = tf.reduce_mean(self.loss)

        tf.compat.v1.summary.scalar('cost', self.cost)

        # 学习率 指数衰减法
        self.lrn_rate = tf.compat.v1.train.exponential_decay(
            self.model_conf.trains_learning_rate,
            self.global_step,
            staircase=True,
            decay_steps=self.decay_steps,
            decay_rate=0.98,
        )
        tf.compat.v1.summary.scalar('learning_rate', self.lrn_rate)

        if self.model_conf.neu_optimizer == Optimizer.AdaBound:
            self.optimizer = AdaBoundOptimizer(learning_rate=self.lrn_rate,
                                               final_lr=0.001,
                                               beta1=0.9,
                                               beta2=0.999,
                                               amsbound=True)
        elif self.model_conf.neu_optimizer == Optimizer.Adam:
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.lrn_rate)
        elif self.model_conf.neu_optimizer == Optimizer.RAdam:
            self.optimizer = RAdamOptimizer(learning_rate=self.lrn_rate,
                                            warmup_proportion=0.1,
                                            min_lr=1e-6)
        elif self.model_conf.neu_optimizer == Optimizer.Momentum:
            self.optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.lrn_rate,
                use_nesterov=True,
                momentum=0.9,
            )
        elif self.model_conf.neu_optimizer == Optimizer.SGD:
            self.optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.lrn_rate, )
        elif self.model_conf.neu_optimizer == Optimizer.AdaGrad:
            self.optimizer = tf.train.AdagradOptimizer(
                learning_rate=self.lrn_rate, )
        elif self.model_conf.neu_optimizer == Optimizer.RMSProp:
            self.optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.lrn_rate, )

        # BN 操作符更新(moving_mean, moving_variance)
        update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS)

        # 将 train_op 和 update_ops 融合
        with tf.control_dependencies(update_ops):
            self.train_op = self.optimizer.minimize(
                loss=self.cost,
                global_step=self.global_step,
            )

        # 转录层-Loss函数
        if self.model_conf.loss_func == LossFunction.CTC:
            self.dense_decoded = self.decoder.ctc(inputs=self.outputs,
                                                  sequence_length=self.seq_len)
        elif self.model_conf.loss_func == LossFunction.CrossEntropy:
            self.dense_decoded = self.decoder.cross_entropy(
                inputs=self.outputs)
Beispiel #2
0
    def _build_train_op(self):
        """操作符生成器"""
        # 步数
        self.global_step = tf.train.get_or_create_global_step()
        # Loss函数
        if self.model_conf.loss_func == LossFunction.CTC:
            self.loss = Loss.ctc(labels=self.labels,
                                 logits=self.outputs,
                                 sequence_length=self.seq_len)
        elif self.model_conf.loss_func == LossFunction.CrossEntropy:
            self.loss = Loss.cross_entropy(labels=self.labels,
                                           logits=self.outputs)

        self.cost = tf.reduce_mean(self.loss)

        tf.compat.v1.summary.scalar('cost', self.cost)

        # 学习率
        self.lrn_rate = tf.compat.v1.train.exponential_decay(
            self.model_conf.trains_learning_rate,
            self.global_step,
            staircase=True,
            decay_steps=10000,
            decay_rate=0.98,
        )
        tf.compat.v1.summary.scalar('learning_rate', self.lrn_rate)

        # 训练参数更新
        update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS)

        # Storing adjusted smoothed mean and smoothed variance operations
        with tf.control_dependencies(update_ops):

            # TODO 这种if-else结构感觉很蠢,优化器选择器
            if self.model_conf.neu_optimizer == Optimizer.AdaBound:
                self.train_op = AdaBoundOptimizer(
                    learning_rate=self.lrn_rate,
                    final_lr=0.001,
                    beta1=0.9,
                    beta2=0.999,
                    amsbound=True).minimize(loss=self.cost,
                                            global_step=self.global_step)
            elif self.model_conf.neu_optimizer == Optimizer.Adam:
                self.train_op = tf.train.AdamOptimizer(
                    learning_rate=self.lrn_rate).minimize(
                        self.cost, global_step=self.global_step)
            elif self.model_conf.neu_optimizer == Optimizer.RAdam:
                self.train_op = RAdamOptimizer(
                    learning_rate=self.lrn_rate,
                    warmup_proportion=0.1,
                    min_lr=1e-6).minimize(self.cost,
                                          global_step=self.global_step)
            elif self.model_conf.neu_optimizer == Optimizer.Momentum:
                self.train_op = tf.train.MomentumOptimizer(
                    learning_rate=self.lrn_rate,
                    use_nesterov=True,
                    momentum=0.9,
                ).minimize(self.cost, global_step=self.global_step)
            elif self.model_conf.neu_optimizer == Optimizer.SGD:
                self.train_op = tf.train.GradientDescentOptimizer(
                    learning_rate=self.lrn_rate, ).minimize(
                        self.cost, global_step=self.global_step)
            elif self.model_conf.neu_optimizer == Optimizer.AdaGrad:
                self.train_op = tf.train.AdagradOptimizer(
                    learning_rate=self.lrn_rate, ).minimize(
                        self.cost, global_step=self.global_step)
            elif self.model_conf.neu_optimizer == Optimizer.RMSProp:
                self.train_op = tf.train.RMSPropOptimizer(
                    learning_rate=self.lrn_rate, ).minimize(
                        self.cost, global_step=self.global_step)

        # 转录层-Loss函数
        if self.model_conf.loss_func == LossFunction.CTC:
            self.dense_decoded = self.decoder.ctc(inputs=self.outputs,
                                                  sequence_length=self.seq_len)
        elif self.model_conf.loss_func == LossFunction.CrossEntropy:
            self.dense_decoded = self.decoder.cross_entropy(
                inputs=self.outputs)
Beispiel #3
0
    def _build_train_op(self):
        self.global_step = tf.train.get_or_create_global_step()
        # ctc loss function, using forward and backward algorithms and maximum likelihood.
        if WARP_CTC:
            import_module('warpctc_tensorflow')
            with tf.get_default_graph()._kernel_label_map(
                {"CTCLoss": "WarpCTC"}):
                self.loss = tf.nn.ctc_loss(inputs=self.predict,
                                           labels=self.labels,
                                           sequence_length=self.seq_len)
        else:
            self.loss = tf.nn.ctc_loss(
                labels=self.labels,
                inputs=self.predict,
                sequence_length=self.seq_len,
                ctc_merge_repeated=CTC_MERGE_REPEATED,
                preprocess_collapse_repeated=PREPROCESS_COLLAPSE_REPEATED,
                ignore_longer_outputs_than_inputs=False,
                time_major=CTC_LOSS_TIME_MAJOR)

        self.cost = tf.reduce_mean(self.loss)
        tf.summary.scalar('cost', self.cost)
        self.lrn_rate = tf.train.exponential_decay(TRAINS_LEARNING_RATE,
                                                   self.global_step,
                                                   DECAY_STEPS,
                                                   DECAY_RATE,
                                                   staircase=True)
        tf.summary.scalar('learning_rate', self.lrn_rate)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        # print(update_ops)
        # Storing adjusted smoothed mean and smoothed variance operations
        with tf.control_dependencies(update_ops):
            if OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.AdaBound:
                self.train_op = AdaBoundOptimizer(
                    learning_rate=self.lrn_rate,
                    final_lr=0.1,
                    beta1=0.9,
                    beta2=0.999,
                    amsbound=True).minimize(loss=self.cost,
                                            global_step=self.global_step)
            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.Adam:
                self.train_op = tf.train.AdamOptimizer(
                    learning_rate=self.lrn_rate).minimize(
                        self.cost, global_step=self.global_step)
            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.Momentum:
                self.train_op = tf.train.MomentumOptimizer(
                    learning_rate=self.lrn_rate,
                    use_nesterov=True,
                    momentum=MOMENTUM,
                ).minimize(self.cost, global_step=self.global_step)
            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.SGD:
                self.train_op = tf.train.GradientDescentOptimizer(
                    learning_rate=self.lrn_rate, ).minimize(
                        self.cost, global_step=self.global_step)
            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.AdaGrad:
                self.train_op = tf.train.AdagradOptimizer(
                    learning_rate=self.lrn_rate, ).minimize(
                        self.cost, global_step=self.global_step)
            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.RMSProp:
                self.train_op = tf.train.RMSPropOptimizer(
                    learning_rate=self.lrn_rate,
                    decay=DECAY_RATE,
                ).minimize(self.cost, global_step=self.global_step)

        # Option 2: tf.contrib.ctc.ctc_beam_search_decoder
        # (it's slower but you'll get better results)
        # self.decoded, self.log_prob = tf.nn.ctc_greedy_decoder(
        #     self.predict,
        #     self.seq_len,
        #     merge_repeated=False
        # )

        # Find the optimal path
        self.decoded, self.log_prob = tf.nn.ctc_beam_search_decoder(
            inputs=self.predict,
            sequence_length=self.seq_len,
            merge_repeated=False,
            beam_width=CTC_BEAM_WIDTH,
            top_paths=CTC_TOP_PATHS,
        )

        if StrictVersion(tf.__version__) >= StrictVersion('1.12.0'):
            self.dense_decoded = tf.sparse.to_dense(self.decoded[0],
                                                    default_value=-1,
                                                    name="dense_decoded")
        else:
            self.dense_decoded = tf.sparse_tensor_to_dense(
                self.decoded[0], default_value=-1, name="dense_decoded")
Beispiel #4
0
    def _build_train_op(self):
        self.global_step = tf.train.get_or_create_global_step()
        # ctc loss function, using forward and backward algorithms and maximum likelihood.

        self.loss = tf.nn.ctc_loss(
            labels=self.labels,
            inputs=self.predict,
            sequence_length=self.seq_len,
            ctc_merge_repeated=CTC_MERGE_REPEATED,
            preprocess_collapse_repeated=PREPROCESS_COLLAPSE_REPEATED,
            ignore_longer_outputs_than_inputs=False,
            time_major=True)

        self.cost = tf.reduce_mean(self.loss)
        tf.summary.scalar('cost', self.cost)

        self.lrn_rate = tf.train.exponential_decay(TRAINS_LEARNING_RATE,
                                                   self.global_step,
                                                   DECAY_STEPS,
                                                   DECAY_RATE,
                                                   staircase=True)
        tf.summary.scalar('learning_rate', self.lrn_rate)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        print(update_ops)
        with tf.control_dependencies(update_ops):
            self.train_op = AdaBoundOptimizer(learning_rate=self.lrn_rate,
                                              final_lr=0.1,
                                              beta1=0.9,
                                              beta2=0.999,
                                              amsbound=True).minimize(
                                                  loss=self.cost,
                                                  global_step=self.global_step)
            # self.optimizer = tf.train.AdamOptimizer(
            #     learning_rate=self.lrn_rate
            # ).minimize(
            #     self.cost,
            #     global_step=self.global_step
            # )
            # self.optimizer = tf.train.MomentumOptimizer(
            #     learning_rate=self.lrn_rate,
            #     use_nesterov=True,
            #     momentum=MOMENTUM,
            # ).minimize(
            #     self.cost,
            #     global_step=self.global_step
            # )
        # Storing adjusted smoothed mean and smoothed variance operations
        # update_ops = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS))
        # train_ops = [self.optimizer] + self.utils.extra_train_ops
        # self.train_op = tf.group(self.optimizer, update_ops)
        # self.train_op = self.optimizer
        # self.train_op = tf.group(*train_ops)

        # Option 2: tf.contrib.ctc.ctc_beam_search_decoder
        # (it's slower but you'll get better results)
        # self.decoded, self.log_prob = tf.nn.ctc_greedy_decoder(
        #     self.predict,
        #     self.seq_len,
        #     merge_repeated=False
        # )

        # Find the optimal path
        self.decoded, self.log_prob = tf.nn.ctc_beam_search_decoder(
            self.predict,
            self.seq_len,
            merge_repeated=False,
            beam_width=CTC_BEAM_WIDTH,
            top_paths=CTC_TOP_PATHS,
        )

        if StrictVersion(tf.__version__) >= StrictVersion('1.12.0'):
            self.dense_decoded = tf.sparse.to_dense(self.decoded[0],
                                                    default_value=-1,
                                                    name="dense_decoded")
        else:
            self.dense_decoded = tf.sparse_tensor_to_dense(
                self.decoded[0], default_value=-1, name="dense_decoded")