def model_fn(features, labels, mode, params):
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN and EVAL
        if not is_predicting:

            (loss, predicted_labels,
             log_probs) = create_model(is_training, is_predicting, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels)

            train_op = optimization.create_optimizer(loss,
                                                     learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps,
                                                     use_tpu=False)

            # Calculate evaluation metrics.
            def metric_fn(label_ids, predicted_labels):
                accuracy = tf.metrics.accuracy(label_ids, predicted_labels)

                #        f1_score = tf.contrib.metrics.f1_score(
                #            label_ids,
                #            predicted_labels)
                #        auc = tf.metrics.auc(
                #            label_ids,
                #            predicted_labels)
                #        recall = tf.metrics.recall(
                #            label_ids,
                #            predicted_labels)
                #        precision = tf.metrics.precision(
                #            label_ids,
                #            predicted_labels)
                #        true_pos = tf.metrics.true_positives(
                #            label_ids,
                #            predicted_labels)
                #        true_neg = tf.metrics.true_negatives(
                #            label_ids,
                #            predicted_labels)
                #        false_pos = tf.metrics.false_positives(
                #            label_ids,
                #            predicted_labels)
                #        false_neg = tf.metrics.false_negatives(
                #            label_ids,
                #            predicted_labels)

                print('***** Accuracy {}'.format(accuracy))

                return {
                    "eval_accuracy": accuracy,
                    #            "f1_score": f1_score,
                    #            "auc": auc,
                    #            "precision": precision,
                    #            "recall": recall,
                    #            "true_positives": true_pos,
                    #            "true_negatives": true_neg,
                    #            "false_positives": false_pos,
                    #            "false_negatives": false_neg
                }

            eval_metrics = metric_fn(label_ids, predicted_labels)

            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  train_op=train_op)
            else:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  eval_metric_ops=eval_metrics)
        else:
            (predicted_labels,
             log_probs) = create_model(is_training, is_predicting, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels)

            predictions = {
                'probabilities': log_probs,
                'labels': predicted_labels
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
Exemple #2
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        #label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         predicts) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 13, [1, 2, 4, 5, 6, 7, 8, 9],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           13, [1, 2, 4, 5, 6, 7, 8, 9],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  13, [1, 2, 4, 5, 6, 7, 8, 9],
                                  average="macro")
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    #"eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
Exemple #3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, per_example_loss, logits,
         predicts) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # eval 的 计算方式metric需要自己定义修改
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E,
                # 具体见 tf.metrics里的函数
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 10, [1, 2, 3, 4, 5, 6],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           10, [1, 2, 3, 4, 5, 6],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  10, [1, 2, 3, 4, 5, 6],
                                  average="macro")

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            # def metric_fn(per_example_loss, label_ids, logits, is_real_example):
            #     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
            #     accuracy = tf.metrics.accuracy(
            #         labels=label_ids, predictions=predictions, weights=is_real_example)
            #     loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
            #     return {
            #         "eval_accuracy": accuracy,
            #         "eval_loss": loss,
            #     }

            # eval_metrics = (metric_fn,
            #                 [per_example_loss, label_ids, logits, is_real_example])
            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
Exemple #4
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        #全部损失,分数,,预测类别
        total_loss, logits, trans, pred_ids = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels, False, args.dropout_rate, args.lstm_size,
            args.cell, args.num_layers)
        # tf.summary.scalar('total_loss', total_loss)
        # tf.summary.scalar('logits',logits)
        # tf.summary.scalar('trans',trans)
        # tf.summary.scalar('pred_ids',pred_ids)
        #所有需要训练的变量
        tvars = tf.trainable_variables()
        # 加载BERT模型,assignmen_map,加载的预训练变量值
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = \
                 modeling.get_assignment_map_from_checkpoint(tvars,
                                                             init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # 打印变量名
        # logger.info("**** Trainable Variables ****")
        #
        # # 打印加载模型的参数
        # for var in tvars:
        #     init_string = ""
        #     if var.name in initialized_variable_names:
        #         init_string = ", *INIT_FROM_CKPT*"
        #     logger.info("  name = %s, shape = %s%s", var.name, var.shape,
        #                     init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            #train_op = optimizer.optimizer(total_loss, learning_rate, num_train_steps)
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, False)
            hook_dict = {}
            hook_dict['loss'] = total_loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(
                hook_dict, every_n_iter=args.save_summary_steps)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, pred_ids):
                return {
                    "eval_loss":
                    tf.metrics.mean_squared_error(labels=label_ids,
                                                  predictions=pred_ids),
                }

            eval_metrics = metric_fn(label_ids, pred_ids)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metrics)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=pred_ids)
        return output_spec
Exemple #5
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("***features***")
        #print(features)
        input_ids = features['input_ids']
        input_mask = features['input_mask']
        segment_ids = features['segment_ids']
        domain_id = features['domain_id']
        intent_id = features['intent_id']
        slot_id = features['slot_id']
        is_real_example = None  #含义
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(domain_id), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        domain_loss, intent_loss, slot_loss, domain_pred, intent_pred, slot_pred = \
                create_model(bert_config, is_training, input_ids, input_mask, segment_ids, \
                    domain_id, intent_id, slot_id, num_domain, num_intent, num_slot,\
                    use_one_hot_embeddings, np.array(domain_w, dtype=np.float32), np.array(intent_w, dtype=np.float32))
        '''
        total_loss, domain_pred, intent_pred, slot_pred = \
                create_model(bert_config, is_training, input_ids, input_mask, segment_ids, \
                    domain_id, intent_id, slot_id, num_domain, num_intent, num_slot,\
                    use_one_hot_embeddings)
        '''

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        #加载模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            global_step = tf.train.get_global_step()
            #print("global_step: ", global_step)
            '''
            if num_train_steps < 1000:
                total_loss = domain_loss + intent_loss + slot_loss
            else:
                total_loss = domain_loss + intent_loss + (domain_loss + intent_loss) / slot_loss * slot_loss
            '''
            total_loss = domain_loss + intent_loss + 2 * slot_loss

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            #EstimatorSpec的使用
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op,
                                                     scaffold=scaffold_fn)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions={
                                                         "domain_pred":
                                                         domain_pred,
                                                         "intent_pred":
                                                         intent_pred,
                                                         "slot_pred": slot_pred
                                                     },
                                                     scaffold=scaffold_fn)

        return output_spec
Exemple #6
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")  # logging 用来记录日志
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, logits, predicts) = create_model(  # 使用BERT的接口建模
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels, use_one_hot_embeddings)
        tvars = tf.trainable_variables()  # 得到所有要训练的变量
        scaffold_fn = None
        if init_checkpoint:  # 用BERT预加载模型,这里加载的只有BERT预训练的模型
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint,
                                          assignment_map)  # 使用预训练模型
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:  # 这里输出的是预加载模型中的向量格式
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:  # 在训练阶段
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps,
                                                     use_tpu)  # 创建一个Adam优化器
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(  # TPU运行时的特殊 estimator
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:  # 评估阶段

            def metric_fn(label_ids, predicts, valid_labels):
                # def metric_fn(label_ids, logits):
                # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)      #直接计算第三维最大值为预测值
                precision = tf_metrics.precision(
                    label_ids,
                    predicts,
                    num_labels,
                    valid_labels,
                    average="macro")  # 对比实际值和预测值计算正确率
                recall = tf_metrics.recall(label_ids,
                                           predicts,
                                           num_labels,
                                           valid_labels,
                                           average="macro")  # 对比实际值和预测值计算召回率
                f = tf_metrics.f1(label_ids,
                                  predicts,
                                  num_labels,
                                  valid_labels,
                                  average="macro")  # 对比实际值和预测值计算F值
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, predicts, valid_labels])
            # eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        total_loss, logits, trans, pred_ids = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels, False)

        tvars = tf.trainable_variables()
        # 加载BERT模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars,
                                                            init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, False)
            hook_dict = {}
            hook_dict['loss'] = total_loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(hook_dict,
                                                      every_n_iter=100)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            # def metric_fn(label_ids, pred_ids):
            #     return {
            #         "eval_loss": tf.metrics.mean_squared_error(labels=label_ids, predictions=pred_ids),
            #     }

            # eval_metrics = metric_fn(label_ids, pred_ids)
            # output_spec = tf.estimator.EstimatorSpec(
            #     mode=mode,
            #     loss=total_loss,
            #     eval_metric_ops=eval_metrics
            # )

            def metric_fn(label_ids, pred_ids):
                precision = tf_metrics.precision(label_ids,
                                                 pred_ids,
                                                 11, [2, 3, 4, 5, 6, 7],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           pred_ids,
                                           11, [2, 3, 4, 5, 6, 7],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  pred_ids,
                                  11, [2, 3, 4, 5, 6, 7],
                                  average="macro")
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = metric_fn(label_ids, pred_ids)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metrics)

        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=pred_ids)
        return output_spec
Exemple #8
0
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""

            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]
            label_ids = features["label_ids"]
            is_real_example = None
            if "is_real_example" in features:
                is_real_example = tf.cast(features["is_real_example"],
                                          dtype=tf.float32)
            else:
                is_real_example = tf.ones(tf.shape(label_ids),
                                          dtype=tf.float32)

            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            (total_loss, per_example_loss, logits,
             probabilities) = self.create_model(bert_config, is_training,
                                                input_ids, input_mask,
                                                segment_ids, label_ids,
                                                num_labels,
                                                use_one_hot_embeddings)

            tvars = tf.trainable_variables()
            initialized_variable_names = {}
            scaffold_fn = None
            if init_checkpoint:
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, init_checkpoint)
                if use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)

            tf.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"

            output_spec = None
            if mode == tf.estimator.ModeKeys.TRAIN:

                train_op = optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps,
                    num_warmup_steps, use_tpu)

                output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                         loss=total_loss,
                                                         train_op=train_op,
                                                         scaffold=scaffold_fn)
            elif mode == tf.estimator.ModeKeys.EVAL:

                def metric_fn(per_example_loss, label_ids, probabilities,
                              is_real_example):

                    logits_split = tf.split(probabilities, num_labels, axis=-1)
                    label_ids_split = tf.split(label_ids, num_labels, axis=-1)
                    # metrics change to auc of every class
                    eval_dict = {}

                    for j, logits in enumerate(logits_split):
                        label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32)

                        current_auc, update_op_auc = tf.metrics.auc(
                            label_id_, logits)
                        eval_dict[str(j)] = (current_auc, update_op_auc)

                    eval_dict['eval_loss'] = tf.metrics.mean(
                        values=per_example_loss)

                    return eval_dict

                eval_metrics = metric_fn(per_example_loss, label_ids,
                                         probabilities, is_real_example)

                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=eval_metrics,
                    scaffold=scaffold_fn)
            else:
                print("mode:", mode, "probabilities:", probabilities)
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions={"probabilities": probabilities},
                    scaffold=scaffold_fn)
            return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["order"]
        decoder_inputs = features["decoder_input"]
        sep_positions = features["sep_positions"]
        if add_masking:
            masked_lm_positions = features["mask_indices"]
            masked_lm_ids = features["target_token_ids"]
            masked_lm_weights = features["target_token_weights"]
        # is_real_example = None
        # if "is_real_example" in features:
        #   is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        # else:
        #   is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (order_loss, per_example_loss, logits,
         probabilities) = model_builder.create_model(
             model, label_ids, decoder_inputs,
             FLAGS.train_batch_size if is_training else FLAGS.eval_batch_size,
             FLAGS.order_model_type, sep_positions)

        if add_masking:
            # masked_lm_example_lossmasked_lm_log_probs
            (masked_lm_loss, _, _) = model_builder.get_masked_lm_output(
                bert_config, model.get_sequence_output(),
                model.get_embedding_table(), masked_lm_positions,
                masked_lm_ids, masked_lm_weights)
        if add_masking:
            total_loss = order_loss + masked_lm_loss
        else:
            total_loss = order_loss

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            # This probably shouldn't be done after the first time
            for key in assignment_map.keys():
                if "bert/embeddings/token_type_embeddings" in key:
                    del assignment_map[key]
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids,
                          logits):  # , is_real_example):
                packed_logits = tf.reshape(
                    tf.convert_to_tensor(logits, dtype=tf.float32), (-1, 5, 5))
                predictions = tf.argmax(packed_logits,
                                        axis=-1,
                                        output_type=tf.int32)
                accuracy = tf.metrics.accuracy(
                    labels=label_ids,
                    predictions=predictions)  #, weights=is_real_example)
                loss = tf.metrics.mean(
                    values=per_example_loss)  #, weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            # , is_real_example])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Exemple #10
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Exemple #11
0
def train():
    parser = configparser.ConfigParser()
    parser.read('params.ini')
    max_x_len = int(parser.get('chat_model', 'max_x_len'))
    max_y_len = int(parser.get('chat_model', 'max_y_len'))
    decode_max_len = int(parser.get('chat_model', 'decode_max_len'))
    vocab_file = parser.get('chat_model', 'vocab_file')
    config_file = parser.get('chat_model', 'config_file')
    ckpt_file = parser.get('chat_model', 'ckpt_file')
    beam_width = int(parser.get('chat_model', 'beam_width'))
    batch_size = int(parser.get('chat_model', 'batch_size'))
    lr = float(parser.get('chat_model', 'lr'))
    dropout_rate = float(parser.get('chat_model', 'dropout_rate'))
    train_nums = int(parser.get('chat_model', 'train_data_size'))
    warmup_proportion = float(parser.get('chat_model', 'warmup_proportion'))
    epochs = int(parser.get('chat_model', 'epochs'))
    length_penalty_weight = float(parser.get('chat_model', 'length_penalty_weight'))
    coverage_penalty_weight = float(parser.get('chat_model', 'coverage_penalty_weight'))
    log_dir = parser.get('chat_model', 'log_dir')
    data_dir = parser.get('chat_model', 'data_dir')
    train_file = parser.get('chat_model', 'train_file')
    train_file = os.path.join(data_dir, train_file)
    # vocab_file = './model/chinese_L-12_H-768_A-12/vocab.txt'
    tokenizer = tokenization.FullTokenizer(vocab_file)
    chatmodel_config = ChatModelConfig(
        max_x_len, max_y_len, decode_max_len,
        tokenizer.vocab, config_file, dropout_rate, ckpt_file, beam_width,
        coverage_penalty_weight, length_penalty_weight
    )
    os.makedirs(log_dir, exist_ok=True)
    graph = tf.Graph()
    step = 0
    eval_log = []
    with graph.as_default():
        input_fn = file_based_input_fn_builder(train_file, max_x_len, max_y_len, True, True)
        ds = input_fn(batch_size)
        iterator = ds.make_one_shot_iterator()
        batch_inputs = iterator.get_next()
        chat_model = ChatModel(chatmodel_config)
        loss, distance, predictions, train_predictions = chat_model.loss()
        num_train_steps = int(train_nums/batch_size*epochs)
        num_warmup_steps = int(num_train_steps * warmup_proportion)
        train_op = optimization.create_optimizer(
            loss, lr, num_train_steps, num_warmup_steps, False)
        # saver = tf.train.Saver()
        # scaf = tf.train.Scaffold(saver=saver)
        tf.Session().run(tf.global_variables_initializer())
        with tf.train.MonitoredTrainingSession(checkpoint_dir=log_dir,
                                               hooks=[tf.train.StopAtStepHook(last_step=num_train_steps),
                                                      tf.train.NanTensorHook(loss)],
                                               config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
            best_loss = float('inf')
            best_acc = 0
            try:
                while not sess.should_stop():
                    trainDatas = sess._tf_sess().run(batch_inputs)
                    feed_dict = make_feed_dict(chat_model, trainDatas, 0.1)
                    train_loss, _ = sess._tf_sess().run(
                        [loss, train_op], feed_dict=feed_dict
                    )
                    if step % 100 == 0:
                        print('====> step:{:06d}|{}\t[train loss:{:.3f}]'.format(
                            step, num_train_steps, train_loss))
                        eval_val, train_val = sess._tf_sess().run([predictions, train_predictions], feed_dict)
                        print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][0])))
                        print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][0])))
                        print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][1])))
                        print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][1])))
                        print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][2])))
                        print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][2])))
                        print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][3])))
                        print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][3])))
                        # v1 = train_val[0]
                        # v1[v1<0] = 100
                        # v2 = train_val[1]
                        # v2[v2 < 0] = 100
                        # v3 = train_val[2]
                        # v3[v3 < 0] = 100
                        # v4 = train_val[3]
                        # v4[v4 < 0] = 100
                        # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v1)))
                        # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v2)))
                        # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v3)))
                        # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v4)))
                        # print(train_val.shape)
                        print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[0])))
                        print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[1])))
                        print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[2])))
                        print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[3])))
                        print(eval_val.shape)
                        print('\n')
                        eval_log.append(tokenizer.convert_ids_to_tokens(eval_val[0]))
                        eval_log.append(eval_val[0])
                    step += 1
            except KeyboardInterrupt as e:
                # with open('./log/eval_log.txt', 'w', encoding='utf8') as f:
                #     for log in eval_log:
                #         f.write(' '.join(list(log)))
                #         f.write('\n')
                # saver.save(sess._sess, os.path.join(log_dir, 'except_model'), global_step=tf.train.get_or_create_global_step())
                print(e)
            except Exception as e:
                print(e)
Exemple #12
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        #(next_sentence_loss, next_sentence_example_loss,
        #next_sentence_log_probs) = get_next_sentence_output(
        #bert_config, model.get_pooled_output(), next_sentence_labels)

        #total_loss = masked_lm_loss + next_sentence_loss
        total_loss = masked_lm_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars,
                                                            init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                      masked_lm_ids, masked_lm_weights):
            # , next_sentence_example_loss,
            # next_sentence_log_probs, next_sentence_labels):
            """Computes the loss and accuracy of the model."""
            masked_lm_log_probs = tf.reshape(
                masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
            masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                              axis=-1,
                                              output_type=tf.int32)
            masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
            masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
            masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
            masked_lm_accuracy = tf.metrics.accuracy(
                labels=masked_lm_ids,
                predictions=masked_lm_predictions,
                weights=masked_lm_weights)
            masked_lm_mean_loss = tf.metrics.mean(
                values=masked_lm_example_loss, weights=masked_lm_weights)

            #next_sentence_log_probs = tf.reshape(
            #    next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
            #next_sentence_predictions = tf.argmax(
            #    next_sentence_log_probs, axis=-1, output_type=tf.int32)
            #next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
            #next_sentence_accuracy = tf.metrics.accuracy(
            #    labels=next_sentence_labels, predictions=next_sentence_predictions)
            #next_sentence_mean_loss = tf.metrics.mean(
            #    values=next_sentence_example_loss)

            return {
                "masked_lm_accuracy": masked_lm_accuracy,
                "masked_lm_loss": masked_lm_mean_loss
                #"next_sentence_accuracy": next_sentence_accuracy,
                #"next_sentence_loss": next_sentence_mean_loss,
            }

        # Since the tf.metrics are rolling averages, it does not make
        # sense to have them for training. The method below instead
        # calculate the per batch accuracy, which is used for training.
        def metric_fn_train_batch(masked_lm_log_probs, masked_lm_ids,
                                  masked_lm_weights):
            masked_lm_log_probs = tf.reshape(
                masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
            masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                              axis=-1,
                                              output_type=tf.int32)
            masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
            masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
            if masked_lm_ids.dtype != masked_lm_predictions.dtype:
                masked_lm_predictions = tf.cast(masked_lm_predictions,
                                                masked_lm_ids.dtype)
            is_correct = tf.to_float(
                math_ops.equal(masked_lm_predictions, masked_lm_ids))
            if masked_lm_weights is None:
                count = tf.to_float(tf.size(is_correct))
            else:
                masked_lm_weights = weights_broadcast_ops.broadcast_weights(
                    tf.to_float(masked_lm_weights), is_correct)
                is_correct = tf.math.multiply(is_correct, masked_lm_weights)
                count = tf.math.reduce_sum(masked_lm_weights)
            acc_value = tf.math.reduce_sum(is_correct) / count
            return dict(masked_lm_accuracy_train_batch=acc_value)

        # train hooks
        tensors_logging_hook_train = dict(
            batch_loss=total_loss,
            step=tf.train.get_global_step(),
        )
        metrics_train_batch = metric_fn_train_batch(masked_lm_log_probs,
                                                    masked_lm_ids,
                                                    masked_lm_weights)
        tensors_logging_hook_train.update(metrics_train_batch)
        logging_hook_train = tf.train.LoggingTensorHook(
            tensors_logging_hook_train, every_n_iter=10)

        # eval hooks
        tensors_logging_hook_eval = dict(
            eval_batch_loss=total_loss,
            step=tf.train.get_global_step(),
        )
        logging_hook_eval = tf.train.LoggingTensorHook(
            tensors_logging_hook_eval, every_n_iter=10)
        eval_hooks = [logging_hook_eval]
        if eval_saving_path:
            eval_hooks.append(
                SavingTensorHook(dict(
                    masked_lm_example_loss=masked_lm_example_loss,
                    masked_lm_log_probs=masked_lm_log_probs,
                    masked_lm_ids=masked_lm_ids,
                    masked_lm_weights=masked_lm_weights,
                    total_loss=total_loss,
                    step=tf.train.get_global_step(),
                ),
                                 eval_saving_path,
                                 every_n_iter=1))

        # tensorboard summaries for train metrics
        for metric_name, metric_value in metrics_train_batch.items():
            tf.summary.scalar(metric_name, metric_value)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook_train],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (
                metric_fn,
                [
                    masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                    masked_lm_weights
                    #, next_sentence_example_loss,
                    #next_sentence_log_probs, next_sentence_labels
                ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                evaluation_hooks=eval_hooks,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             mode)

        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))

        return output_spec
Exemple #14
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        token_label_ids = features["token_label_ids"]
        predicate_matrix_ids = features["predicate_matrix_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(token_label_ids), dtype=tf.float32)  # TO DO

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss,
         predicate_head_select_loss, predicate_head_probabilities, predicate_head_predictions,
         token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predictions) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            token_label_ids, predicate_matrix_ids, num_token_labels, num_predicate_labels,
            use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(predicate_head_select_loss, token_label_per_example_loss, token_label_ids, token_label_logits,
                          is_real_example):
                token_label_predictions = tf.argmax(token_label_logits, axis=-1, output_type=tf.int32)
                token_label_pos_indices_list = list(range(num_token_labels))[
                                               4:]  # ["[Padding]","[##WordPiece]", "[CLS]", "[SEP]"] + seq_out_set
                pos_indices_list = token_label_pos_indices_list[:-1]  # do not care "O"
                token_label_precision_macro = tf_metrics.precision(token_label_ids, token_label_predictions,
                                                                   num_token_labels,
                                                                   pos_indices_list, average="macro")
                token_label_recall_macro = tf_metrics.recall(token_label_ids, token_label_predictions, num_token_labels,
                                                             pos_indices_list, average="macro")
                token_label_f_macro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels,
                                                    pos_indices_list,
                                                    average="macro")
                token_label_precision_micro = tf_metrics.precision(token_label_ids, token_label_predictions,
                                                                   num_token_labels,
                                                                   pos_indices_list, average="micro")
                token_label_recall_micro = tf_metrics.recall(token_label_ids, token_label_predictions, num_token_labels,
                                                             pos_indices_list, average="micro")
                token_label_f_micro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels,
                                                    pos_indices_list,
                                                    average="micro")
                token_label_loss = tf.metrics.mean(values=token_label_per_example_loss, weights=is_real_example)
                predicate_head_select_loss = tf.metrics.mean(values=predicate_head_select_loss)
                return {
                    "predicate_head_select_loss": predicate_head_select_loss,
                    "eval_token_label_precision(macro)": token_label_precision_macro,
                    "eval_token_label_recall(macro)": token_label_recall_macro,
                    "eval_token_label_f(macro)": token_label_f_macro,
                    "eval_token_label_precision(micro)": token_label_precision_micro,
                    "eval_token_label_recall(micro)": token_label_recall_micro,
                    "eval_token_label_f(micro)": token_label_f_micro,
                    "eval_token_label_loss": token_label_loss,
                }

            eval_metrics = (metric_fn,
                            [predicate_head_select_loss, token_label_per_example_loss,
                             token_label_ids, token_label_logits, is_real_example])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "predicate_head_probabilities": predicate_head_probabilities,
                    "predicate_head_predictions": predicate_head_predictions,
                    "token_label_predictions": token_label_predictions},
                scaffold_fn=scaffold_fn)
        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        if FLAGS.use_next_sentence_prediction:
            sample_weights = None
            if FLAGS.no_nsp_while_masking:
                sample_weights = tf.cast(
                    tf.math.greater_equal(
                        tf.reduce_sum(masked_lm_weights, axis=1), 0.0),
                    tf.float32)
            (next_sentence_loss, next_sentence_example_loss,
             next_sentence_log_probs) = get_next_sentence_output(
                 bert_config, model.get_pooled_output(), next_sentence_labels,
                 sample_weights)

            # Compute total weighted loss:
            #   if mlm_loss_weight=1, this amounts to summing up the losses.
            total_loss = (bert_config.mlm_loss_weight * masked_lm_loss +
                          next_sentence_loss) / (
                              1 + bert_config.mlm_loss_weight) * 2

            next_sentence_log_probs = tf.reshape(
                next_sentence_log_probs,
                [-1, next_sentence_log_probs.shape[-1]])
            next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
            next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
            next_sentence_accuracy = tf.metrics.accuracy(
                labels=next_sentence_labels,
                predictions=next_sentence_predictions)
        else:
            total_loss = masked_lm_loss

        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                         [-1, masked_lm_log_probs.shape[-1]])
        masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                          axis=-1,
                                          output_type=tf.int32)
        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        masked_lm_accuracy = tf.metrics.accuracy(
            labels=masked_lm_ids,
            predictions=masked_lm_predictions,
            weights=masked_lm_weights)

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.logging.info("**** Assignment map **** %s" % assignment_map)
            for x in assignment_map:
                tf.logging.info(x)

            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            # Host function for saving summaries.
            def _host_fn(**kwargs):
                global_step = kwargs.pop("global_step")[0]
                with tf.compat.v2.summary.create_file_writer(
                        os.path.join(FLAGS.output_dir, "train")).as_default():
                    with tf.compat.v2.summary.record_summaries_every_n_global_steps(
                            FLAGS.steps_per_summary, global_step):
                        for name, tensor in kwargs.items():
                            tf.compat.v2.summary.scalar(name,
                                                        tf.reduce_mean(tensor),
                                                        step=global_step)
                    return tf.summary.all_v2_summary_ops()

            global_step = tf.train.get_or_create_global_step()
            if FLAGS.use_next_sentence_prediction:
                host_inputs = {
                    "global_step":
                    tf.expand_dims(global_step, 0),
                    "loss/mlm_loss":
                    tf.expand_dims(masked_lm_loss, 0),
                    "loss/cls_loss":
                    tf.expand_dims(next_sentence_loss, 0),
                    "loss/total_loss":
                    tf.expand_dims(total_loss, 0),
                    "accuracy/mlm_accuracy":
                    tf.expand_dims(masked_lm_accuracy, 0),
                    "accuracy/cls_accuracy":
                    tf.expand_dims(next_sentence_accuracy, 0),
                }
            else:
                host_inputs = {
                    "global_step": tf.expand_dims(global_step, 0),
                    "loss/mlm_loss": tf.expand_dims(masked_lm_loss, 0),
                    "loss/total_loss": tf.expand_dims(total_loss, 0),
                    "accuracy/mlm_accuracy":
                    tf.expand_dims(masked_lm_accuracy, 0),
                }
            host_call = (_host_fn, host_inputs)
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                # f1 = tf.contrib.metrics.f1_score(
                #     labels=label_ids, predictions=predictions, weights=is_real_example)
                # r = tf.metrics.recall(
                #     labels=label_ids, predictions=predictions, weights=is_real_example)
                # p = tf.metrics.precision(
                #     labels=label_ids, predictions=predictions, weights=is_real_example)

                return {
                    # 'r': r,
                    # 'p': p,
                    # 'f1': f1,
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Exemple #17
0
    def model_fn(features, labels, mode, params):

        # 1. 提取Features内容
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        print('shape of input_ids', input_ids.shape)

        # 2. create_model
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        total_loss, logits, trans, pred_ids = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels, False, args.dropout_rate, args.lstm_size,
            args.cell, args.num_layers)

        tvars = tf.trainable_variables()
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = \
                 modeling.get_assignment_map_from_checkpoint(tvars,
                                                             init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        # 3. 返回EstimatorSpec
        output_spec = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, False)

            hook_dict = {}  # hook_dict记录损失和步数信息
            hook_dict['loss'] = total_loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(
                hook_dict, every_n_iter=args.save_summary_steps)

            output_spec = tf.estimator.EstimatorSpec(  # 必需参数:loss,train_op
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:
            # PROBLEM REMAIN: eval的评估指标
            def metric_fn(label_ids, pred_ids):

                indices = [2, 3, 4]  # PROBLEM REMAIN: 与NERProcessor下标对应?
                weight = tf.sequence_mask(args.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels, indices, weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           indices, weight)
                f1 = tf_metrics.f1(label_ids, pred_ids, num_labels, indices,
                                   weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f1
                }

            eval_metrics = metric_fn(label_ids, pred_ids)
            output_spec = tf.estimator.EstimatorSpec(  # 必需参数:loss
                mode=mode,
                loss=total_loss,
                eval_metric_ops=eval_metrics)

        else:
            output_spec = tf.estimator.EstimatorSpec(  # 必需参数:predictions
                mode=mode, predictions=pred_ids)

        return output_spec
Exemple #18
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        (total_loss, logits, trans, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        scaffold_fn = None
        # 加载BERT模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")

        # 打印加载模型的参数
#         for var in tvars:
#             init_string = ""
#             if var.name in initialized_variable_names:
#                 init_string = ", *INIT_FROM_CKPT*"
#             tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
#                             init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)  # 钩子,这里用来将BERT中的参数作为我们模型的初始值
        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, pred_ids):
                # 首先对结果进行维特比解码
                # crf 解码

                indices = [2, 3, 4, 5, 6, 7]  # indice参数告诉评估矩阵评估哪些标签,与label_list相对应
                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, pred_ids])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)  #
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=pred_ids,
                scaffold_fn=scaffold_fn
            )
        return output_spec
Exemple #19
0
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        # The function signature is fixed as part of the estimator interface.
        # We pass task-specific labels as part of `features` and hence `labels` is
        # unused. `params` is for runtime parameters passed around by the estimator
        # framework and they are not used by us.
        # The unused parameters are deleted below.
        del labels, params

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s", name,
                            features[name].shape)

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        candidate_mask = features["candidate_mask"]
        error_location_mask = features["error_location_mask"]
        target_mask = features["target_mask"]

        sequence_length = tf.shape(input_ids)[1]

        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(input_ids)[0], dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, _,
         probabilities) = (create_original_varmisuse_model(
             bert_config=bert_config,
             is_training=is_training,
             enable_sequence_masking=enable_sequence_masking,
             input_ids=input_ids,
             input_mask=input_mask,
             segment_ids=segment_ids,
             candidate_mask=candidate_mask,
             target_mask=target_mask,
             error_location_mask=error_location_mask,
             use_one_hot_embeddings=use_one_hot_embeddings))

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = (
                modeling.get_assignment_map_from_checkpoint(
                    tvars, init_checkpoint))
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
            return output_spec

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, probabilities, error_location_mask,
                          target_mask, is_real_example):
                """Metric function."""

                buggy_mask = tf.equal(error_location_mask[:, 0], 0)
                non_buggy_mask = tf.logical_not(buggy_mask)

                location_probabilities, repair_probabilities = tf.unstack(
                    probabilities, axis=2)
                predicted_error_locations = tf.argmax(location_probabilities,
                                                      axis=1,
                                                      output_type=tf.int32)
                predicted_repair_locations = tf.argmax(repair_probabilities,
                                                       axis=1,
                                                       output_type=tf.int32)

                non_buggy_predictions = tf.equal(predicted_error_locations, 0)

                predicted_error_locations_one_hot = tf.one_hot(
                    predicted_error_locations, sequence_length, dtype=tf.int32)
                predicted_repair_locations_one_hot = tf.one_hot(
                    predicted_repair_locations,
                    sequence_length,
                    dtype=tf.int32)

                classification_accuracy = tf.metrics.accuracy(
                    labels=non_buggy_mask,
                    predictions=non_buggy_predictions,
                    weights=is_real_example)

                true_positive_rate = tf.metrics.accuracy(
                    labels=non_buggy_mask,
                    predictions=non_buggy_predictions,
                    weights=is_real_example *
                    tf.cast(non_buggy_mask, tf.float32))

                correct_location_predictions = tf.reduce_sum(tf.multiply(
                    predicted_error_locations_one_hot, error_location_mask),
                                                             axis=1)
                # We can have more than one valid repair locations, so `target_mask`
                # can have multiple ones in it. The following calculation yields 1
                # if the predicted repair location is one of the valid repair locations.
                correct_repair_predictions = tf.reduce_sum(tf.multiply(
                    predicted_repair_locations_one_hot, target_mask),
                                                           axis=1)
                correct_localization_repair_predictions = (
                    correct_location_predictions * correct_repair_predictions)

                localization_accuracy = tf.metrics.accuracy(
                    labels=tf.cast(buggy_mask, tf.int32),
                    predictions=correct_location_predictions,
                    weights=is_real_example * tf.cast(buggy_mask, tf.float32))

                repair_accuracy = tf.metrics.accuracy(
                    labels=tf.cast(buggy_mask, tf.int32),
                    predictions=correct_repair_predictions,
                    weights=is_real_example * tf.cast(buggy_mask, tf.float32))

                localization_repair_accuracy = tf.metrics.accuracy(
                    labels=tf.cast(buggy_mask, tf.int32),
                    predictions=correct_localization_repair_predictions,
                    weights=is_real_example * tf.cast(buggy_mask, tf.float32))

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)

                return {
                    "eval_accuracy_classification": classification_accuracy,
                    "eval_true_positive_rate": true_positive_rate,
                    "eval_accuracy_localization": localization_accuracy,
                    "eval_accuracy_repair": repair_accuracy,
                    "eval_accuracy_localization_repair":
                    localization_repair_accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, probabilities, error_location_mask,
                target_mask, is_real_example
            ])
            output_spec = contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
            return output_spec

        else:
            output_spec = contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
            return output_spec
    def model_fn(features, labels, mode, params):
        """
        内部函数
        :param features:  数据的features,一个字典,接收从input_fn中返回的features
        :param labels:  数据的labels, 接收从input_fn中返回的labels,但在这里labels一起放置在features中了
        :param mode:
        :param params:
        :return:
        """
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        # 根据input_mask来计算出每条序列的长度,因为input_mask中真实token是1,补全的pad是0
        used = tf.sign(tf.abs(input_mask))
        sequence_lens = tf.reduce_sum(used, 1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

        # 如果是train,则is_training=True
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        (loss, logits, trans_params, pred_y) = create_model(
            bert_config, is_training, input_ids, input_mask, sequence_lens, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None

        # 加载bert模型, 初始化变量名,assignment_map和initialized_variable_names都是有序的字典,
        # assignment_map取出了tvars中所有的变量名,并且键和值都是变量名
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    # 按照assignment_map中的变量名从init_checkpoint中加载出初始化变量值
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # 打印模型的参数
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            # 创建一个优化训练的op入口
            train_op = optimization.create_optimizer(
                loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            # 将训练时的变量初始化参数,损失和优化器封装起来
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)  # scaffold_fn这里用来将BERT中的参数作为我们模型的初始值

        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, logits, trans_params):
                # 获得验证集上的性能指标
                weight = tf.sequence_mask(sequence_lens, FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_y, num_labels, [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_y, num_labels, [2, 3, 4, 5, 6, 7], weight)
                f1 = tf_metrics.f1(label_ids, pred_y, num_labels, [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f1": f1,
                }

            # 这里eval_metrics必须是一个元祖
            eval_metrics = (metric_fn, [label_ids, logits, trans_params])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)

        else:
            # 预测时只返回预测的结果
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=pred_y,
                scaffold_fn=scaffold_fn
            )
        return output_spec
Exemple #21
0
    def model_fn(features, labels, mode, params):
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s" %
                         (name, features[name].shape))
        input_ids = features["input_ids"]
        mask = features["mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, logits,
         predicts) = create_model(bert_config, is_training, input_ids, mask,
                                  segment_ids, label_ids, num_labels,
                                  use_one_hot_embeddings, FLAGS.use_lstm)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        initialized_variable_names = None
        if init_checkpoint:

            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:

                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            logging.info("**** Trainable Variables ****")

            logging.info(
                "=======================================variables to fine tune============================================="
            )
            tvars = tvars[FLAGS.layer:]
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                             init_string)
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     FLAGS.layer)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(label_ids, logits, num_labels, mask):
                predictions = tf.math.argmax(logits,
                                             axis=-1,
                                             output_type=tf.int32)
                cm = metrics.streaming_confusion_matrix(label_ids,
                                                        predictions,
                                                        num_labels - 1,
                                                        weights=mask)
                return {"confusion_matrix": cm}
                #

            eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""

            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s", name,
                                features[name].shape)

            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]

            labels = None
            labels_mask = None
            if mode != tf.estimator.ModeKeys.PREDICT:
                if self._config.use_t2t_decoder:
                    # Account for the begin and end tokens used by Transformer.
                    labels = features["labels"] + 2
                else:
                    labels = features["labels"]
                labels_mask = tf.cast(features["labels_mask"], tf.float32)

            (total_loss, per_example_loss,
             predictions) = self._create_model(mode, input_ids, input_mask,
                                               segment_ids, labels,
                                               labels_mask)

            tvars = tf.trainable_variables()
            initialized_variable_names = {}
            scaffold_fn = None
            if self._init_checkpoint:
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, self._init_checkpoint)
                if self._use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(self._init_checkpoint,
                                                      assignment_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(self._init_checkpoint,
                                                  assignment_map)

            tf.logging.info("**** Trainable Variables ****")
            # for var in tvars:
            #   tf.logging.info("Initializing the model from: %s",
            #                   self._init_checkpoint)
            #   init_string = ""
            #   if var.name in initialized_variable_names:
            #     init_string = ", *INIT_FROM_CKPT*"
            #   tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
            #                   init_string)

            output_spec = None
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_op = optimization.create_optimizer(
                    total_loss, self._learning_rate, self._num_train_steps,
                    self._num_warmup_steps, self._use_tpu)

                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)

            elif mode == tf.estimator.ModeKeys.EVAL:

                def metric_fn(per_example_loss, labels, labels_mask,
                              predictions):
                    """Compute eval metrics."""
                    accuracy = tf.cast(
                        tf.
                        reduce_all(  # tf.reduce_all  相当于"逻辑AND"操作,找到输出完全正确的才算正确
                            tf.logical_or(tf.equal(labels, predictions),
                                          ~tf.cast(labels_mask, tf.bool)),
                            axis=1),
                        tf.float32)
                    return {
                        # This is equal to the Exact score if the final realization step
                        # doesn't introduce errors.
                        "sentence_level_acc": tf.metrics.mean(accuracy),
                        "eval_loss": tf.metrics.mean(per_example_loss),
                    }

                eval_metrics = (metric_fn, [
                    per_example_loss, labels, labels_mask, predictions
                ])
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={"pred": predictions},
                    scaffold_fn=scaffold_fn)
            return output_spec
Exemple #23
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        num_choices = 4

        read_size = num_choices + 1  # if FLAGS.bilin_preproc else num_choices
        start_size = 0  # if FLAGS.bilin_preproc else 1
        input_ids = [
            features["input_ids" + str(i)]
            for i in range(start_size, read_size)
        ]
        input_mask = [
            features["input_mask" + str(i)]
            for i in range(start_size, read_size)
        ]
        segment_ids = [
            features["segment_ids" + str(i)]
            for i in range(start_size, read_size)
        ]
        label_ids = features["labels"]
        label_ids = label_ids[:, 4]

        seq_length = input_ids[0].shape[-1]
        input_ids = tf.reshape(tf.stack(input_ids, axis=1), [-1, seq_length])
        input_mask = tf.reshape(tf.stack(input_mask, axis=1), [-1, seq_length])
        segment_ids = tf.reshape(tf.stack(segment_ids, axis=1),
                                 [-1, seq_length])

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if FLAGS.bilin_preproc:
            (total_loss, per_example_loss, logits,
             probabilities) = model_builder.create_model_bilin(
                 model, label_ids, num_choices)
        else:
            (total_loss, per_example_loss, logits,
             probabilities) = model_builder.create_model(
                 model, label_ids, num_choices)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(values=per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Exemple #24
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        (total_loss, logits, trans,
         pred_ids) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        scaffold_fn = None
        # 加载BERT模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")

        # 打印加载模型的参数
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)  # 钩子,这里用来将BERT中的参数作为我们模型的初始值
        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码
                eval_list = []
                assert True == os.path.exists(
                    os.path.join(FLAGS.output_dir, "eval_ids_list.txt"))
                list_file = open(
                    os.path.join(FLAGS.output_dir, "eval_ids_list.txt"), 'r')
                contents = list_file.readlines()
                for item in contents:
                    eval_list.append(int(
                        item.strip()))  ## 记得把字符转回来int类型,后面的评测都是基于int类型的list的
                assert 0 < len(eval_list)
                print("eval_list:", eval_list)
                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels, eval_list, weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           eval_list, weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, eval_list,
                                  weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, logits, trans])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)  #
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn)
        return output_spec
Exemple #25
0
	def model_fn(features, labels, mode, params):
		tf.logging.info("*******Features*******")
		for name in sorted(features.keys()):
			tf.logging.info("name = %s, shape = %s" % (name, features[name].shape))

		unique_ids = features["unique_ids"]
		input_ids = features["input_ids"]
		input_mask = features["input_mask"]
		segment_ids = features["segment_ids"]

		is_training = (mode == tf.estimator.ModeKeys.TRAIN)

		(start_logits, end_logits) = create_model(bert_config = bert_config,
		 	is_training = is_training,
		 	input_ids = input_ids, 
			input_mask = input_mask,
			segment_ids= segment_ids,
			use_one_hot_embeddings = use_one_hot_embeddings)

		tvars = tf.trainable_variables()
		initialized_variable_names = {}
		scaffold_fn = None

		if init_checkpoint:
			(assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
			if use_tpu:
				def tpu_scaffold():
					tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
					return tf.train.Scaffold()
				scaffold_fn = tpu_scaffold
			else:
				tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

		tf.logging.info("**** Trainable Variables ****")
		for var in tvars:
			init_string = ""
			if var.name in initialized_variable_names:
				init_string = ", *INIT_FROM_CKPT*"
			tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)

		output_spec = None
		if mode == tf.estimator.ModeKeys.TRAIN or mode==tf.estimator.ModeKeys.EVAL:
			seq_length = modeling.get_shape_list(input_ids)[1]
			def compute_loss(logits, positions):
				one_hot_positions = tf.one_hot(
					positions, depth=seq_length, dtype=tf.float32)
				log_probs = tf.nn.log_softmax(logits, axis=-1)
				per_example_loss = -tf.reduce_sum(log_probs * one_hot_positions, axis=[-1])
				loss = -tf.reduce_mean(
					tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
				return loss, per_example_loss
			start_positions = features["start_positions"]
			end_positions = features["end_positions"]
			start_loss, per_example_start_loss = compute_loss(start_logits, start_positions)
			end_loss, per_example_end_loss = compute_loss(end_logits, end_positions)
			total_loss = (start_loss + end_loss)/ 2.0
			per_example_total_loss = (per_example_start_loss+per_example_end_loss)/2.0
			train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
			if mode==tf.estimator.ModeKeys.TRAIN:
				output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode = mode, loss = total_loss, train_op = train_op, scaffold_fn = scaffold_fn)
			if mode == tf.estimator.ModeKeys.EVAL:
				def metric_fn(per_example_total_loss):
					per_example_total_loss = tf.reshape(per_example_total_loss,[-1])
					eval_loss = tf.metrics.mean(values=per_example_total_loss)
					return dict(eval_loss = eval_loss)
				output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode = mode, eval_metrics = (metric_fn, [per_example_total_loss]), loss = total_loss, scaffold_fn = scaffold_fn)

		elif mode == tf.estimator.ModeKeys.PREDICT:
			predictions = {
        		"unique_ids" : unique_ids,
        		"start_logits" : start_logits,
        		"end_logits" : end_logits
        	}
			output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode = mode, predictions=predictions, scaffold_fn= scaffold_fn)
		else:
			raise ValueError("Only TRAIN, EVAL and PREDICT modes are supported : %s" % (mode))

		return output_spec
Exemple #26
0
def train(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.device_map

    #一个处理的类,包括训练数据的输入等
    processors = {"ner": NerProcessor}
    #载入bert配置文件
    bert_config = modeling.BertConfig.from_json_file(args.bert_config_file)

    #检查序列的最大长度是否超出范围
    if args.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (args.max_seq_length, bert_config.max_position_embeddings))

    # 在re train 的时候,才删除上一轮产出的文件,在predicted 的时候不做clean
    if args.clean and args.do_train:
        if os.path.exists(args.output_dir):

            def del_file(path):
                ls = os.listdir(path)
                for i in ls:
                    c_path = os.path.join(path, i)
                    if os.path.isdir(c_path):
                        del_file(c_path)
                    else:
                        os.remove(c_path)

            try:
                del_file(args.output_dir)
            except Exception as e:
                print(e)
                print('pleace remove the files of output dir and data.conf')
                exit(-1)

    #check output dir exists
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    #通过output_dir初始化数据处理类,processor
    processor = processors[args.ner](args.output_dir)
    #通过bert字典,初始化bert自带分词类
    tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file,
                                           do_lower_case=args.do_lower_case)

    train_examples = None
    eval_examples = None
    num_train_steps = None
    num_warmup_steps = None

    #一般都是True
    if args.do_train and args.do_eval:
        # 加载训练数据,train和dev,会自动拼接文件夹和train.txt
        #返回的训练数据是一个list,每个元素是两个字符串,空格分隔字,空格分隔字标记,并写入训练examples类中
        train_examples = processor.get_train_examples(args.data_dir)
        #训练步数
        num_train_steps = int(
            len(train_examples) * 1.0 / args.batch_size *
            args.num_train_epochs)
        if num_train_steps < 1:
            raise AttributeError('training data is so small...')
        #
        num_warmup_steps = int(num_train_steps * args.warmup_proportion)

        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", args.batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        #读取验证集
        eval_examples = processor.get_dev_examples(args.data_dir)

        # 打印验证集数据信息
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", args.batch_size)

    #获取标签集合,是一个list,如果是自己输入的话,这里一定不能搞错,会影响最后预测的类目
    #一般label_list为所以的标签,[CLS],[SEP],O,这三个
    label_list = processor.get_labels()
    # label_list=["O", 'B-TIM', 'I-TIM', "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "X", "[CLS]", "[SEP]"]
    # label_list=['B-PER', '[SEP]', 'I-ORG', 'O', 'I-LOC', 'I-PER', 'B-ORG', 'B-LOC', '[CLS]', 'X']
    num_labels = len(label_list) + 1
    init_checkpoint = args.init_checkpoint
    learning_rate = args.learning_rate

    with tf.name_scope('input'):
        input_ids = tf.placeholder(tf.int32, [None, args.max_seq_length])
        input_mask = tf.placeholder(tf.int32, [None, args.max_seq_length])
        segment_ids = tf.placeholder(tf.int32, [None, args.max_seq_length])
        label_ids = tf.placeholder(tf.int32, [None, args.max_seq_length])
        is_training = tf.placeholder(tf.bool)
        #对参数赋值,对于训练模型来说

    # with tf.name_scope('input_eval'):
    #     input_ids_eval = tf.placeholder(tf.int32, [None, args.max_seq_length])
    #     input_mask_eval = tf.placeholder(tf.int32, [None, args.max_seq_length])
    #     segment_ids_eval = tf.placeholder(tf.int32, [None, args.max_seq_length])
    #     label_ids_eval = tf.placeholder(tf.int32, [None, args.max_seq_length])
    # with tf.name_scope('model_compute') as scope:
    #     total_loss, logits, trans, pred_ids = create_model(
    #         bert_config, True, input_ids, input_mask, segment_ids, label_ids,
    #         num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers)
    #  scope.reuse_variables()

    #create_model第一位为is_training
    def train_model():
        return create_model(bert_config,
                            True,
                            input_ids,
                            input_mask,
                            segment_ids,
                            label_ids,
                            num_labels,
                            False,
                            args.dropout_rate,
                            args.lstm_size,
                            args.cell,
                            args.num_layers,
                            reuse=False)

    def eval_model():
        return create_model(bert_config,
                            False,
                            input_ids,
                            input_mask,
                            segment_ids,
                            label_ids,
                            num_labels,
                            False,
                            args.dropout_rate,
                            args.lstm_size,
                            args.cell,
                            args.num_layers,
                            reuse=True)

    total_loss, logits, trans, pred_ids = tf.cond(tf.equal(
        is_training, tf.constant(True)),
                                                  true_fn=train_model,
                                                  false_fn=eval_model)

    accuracy, acc_op = tf.metrics.accuracy(
        labels=label_ids, predictions=pred_ids)  #计算准确率,pred_ids是预测序列,
    #输出loss的smmary
    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('accuracy', acc_op)
    #---------------------输出验证集,测试集数据------------------------------
    # is_training_evl = False #bert模型不采用training模式
    # total_loss_evl, logits_evl, trans_evl, pred_ids_evl = create_model(
    #     bert_config, is_training_evl, input_ids, input_mask, segment_ids, label_ids,
    #     num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers)
    # accuracy_evl, acc_op_evl = tf.metrics.accuracy(labels=label_ids, predictions=pred_ids_evl)  # 计算准确率,pred_ids是预测序列
    # tf.summary.scalar('total_loss_evl', total_loss_evl)
    # tf.summary.scalar('accuracy_evl', acc_op_evl)
    #----------------------------------------------------------------------------
    #加载预训练隐变量
    tvars = tf.trainable_variables()
    # 加载BERT模型,assignmen_map,加载的预训练变量值
    if init_checkpoint:  #只会运行一次
        (assignment_map, initialized_variable_names) = \
            modeling.get_assignment_map_from_checkpoint(tvars,
                                                        init_checkpoint)
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
    #优化loss
    train_op = optimization.create_optimizer(total_loss, learning_rate,
                                             num_train_steps, num_warmup_steps,
                                             False)

    # 1. 将数据转化为tf_record 数据,并把训练数据序列化,并写出到文件
    train_file = os.path.join(args.output_dir, "train.tf_record")
    #ok
    if not os.path.exists(train_file):
        filed_based_convert_examples_to_features(train_examples, label_list,
                                                 args.max_seq_length,
                                                 tokenizer, train_file,
                                                 args.output_dir)

    # 2.读取record 数据,组成batch,把上一部输出到文件的训练数据读取
    train_input_fn = file_based_input_fn_builder(
        input_file=train_file,
        seq_length=args.max_seq_length,
        is_training=True,
        drop_remainder=True,
        batch_size=args.batch_size)
    # estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    eval_file = os.path.join(args.output_dir, "eval.tf_record")
    if not os.path.exists(eval_file):
        filed_based_convert_examples_to_features(eval_examples, label_list,
                                                 args.max_seq_length,
                                                 tokenizer, eval_file,
                                                 args.output_dir)
    #构建验证集数据
    eval_input_fn = file_based_input_fn_builder(input_file=eval_file,
                                                seq_length=args.max_seq_length,
                                                is_training=False,
                                                drop_remainder=False,
                                                batch_size=args.batch_size)
    train_input = train_input_fn.make_one_shot_iterator()
    eval_input = eval_input_fn.make_one_shot_iterator()
    sess = tf.InteractiveSession()
    max_step = 1500
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./log', sess.graph)
    meta_train_data = train_input.get_next()
    meta_eval_data = eval_input.get_next()  #获取验证数据集
    #参数batch_size是64,train_batch_size是32,不知道train_batch_size是什么用的
    #------------------解决FailedPreconditionError:问题,初始化所有变量,不知道这样会不会影响初始化的bert预训练变量------------------
    # init_op = tf.initialize_all_variables()
    # init_global= tf.global_variables_initializer()
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    # config.gpu_options.allow_growth = True  # 动态申请显存
    sess = tf.Session(config=config)
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    eval_data = sess.run(meta_eval_data)
    print(label_list)
    label_map = {}
    # 1表示从1开始对label进行index化
    for (i, label) in enumerate(label_list, 1):
        label_map[label] = i
    print(label_map)
    #------------------问题--------------------------------------------------------------------------------------------

    for i in range(max_step):

        #把tensor转化为numpy输入
        train_data = sess.run(meta_train_data)
        #is_traing,是否使用bert,以及lstm中的dropout层
        #istraing,True,False混合使用,会涉及共享变量的问题,貌似共享变量后产生bug,暂时先false,不使用dropout。
        #好像后面的variable_scope都reuse,也不会产生问题
        sess.run(train_op,
                 feed_dict={
                     input_ids: train_data['input_ids'],
                     input_mask: train_data['input_mask'],
                     segment_ids: train_data['segment_ids'],
                     label_ids: train_data['label_ids'],
                     is_training: False
                 })
        if i % 10 == 1:
            train_summary, acco, prediction = sess.run(
                [merged, acc_op, pred_ids],
                feed_dict={
                    input_ids: train_data['input_ids'],
                    input_mask: train_data['input_mask'],
                    segment_ids: train_data['segment_ids'],
                    label_ids: train_data['label_ids'],
                    is_training: False
                })
            acco_evl, prediction_eval = sess.run(
                [acc_op, pred_ids],
                feed_dict={
                    input_ids: eval_data['input_ids'],
                    input_mask: eval_data['input_mask'],
                    segment_ids: eval_data['segment_ids'],
                    label_ids: eval_data['label_ids'],
                    is_training: False
                })
            train_writer.add_summary(train_summary, i)
            print('saving summary at %s, accuracy %s, accuracy_eval %s' %
                  (i, acco, acco_evl))
            # print(prediction)
            # print(train_data['label_ids'])
            mymetrics.compute(prediction_eval, eval_data['label_ids'],
                              label_list)
    train_writer.close()
Exemple #27
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        mention_ids = features["mention_id"]
        uids = features["uid"]
        start_token = features["start_token"]
        end_token = features["end_token"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, mention_rep) = create_zeshel_model(
            bert_config, output_dim, num_cands, margin, is_training, input_ids,
            input_mask, segment_ids, mention_ids, start_token, end_token,
            use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = bert.modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "uid": uids,
                    "mention_rep": mention_rep
                },
                loss=total_loss,
                scaffold_fn=scaffold_fn)
        return output_spec
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            from tensorflow.python.estimator.model_fn import EstimatorSpec

            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s" %
                                (name, features[name].shape))

            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]
            label_ids = features["label_ids"]

            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            (total_loss, per_example_loss, logits,
             probabilities) = BertSim.create_model(bert_config, is_training,
                                                   input_ids, input_mask,
                                                   segment_ids, label_ids,
                                                   num_labels,
                                                   use_one_hot_embeddings)

            tvars = tf.trainable_variables()
            initialized_variable_names = {}

            if init_checkpoint:
                (assignment_map, initialized_variable_names) \
                    = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            tf.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name,
                                var.shape, init_string)

            if mode == tf.estimator.ModeKeys.TRAIN:

                train_op = optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps,
                    num_warmup_steps, False)

                output_spec = EstimatorSpec(mode=mode,
                                            loss=total_loss,
                                            train_op=train_op)
            elif mode == tf.estimator.ModeKeys.EVAL:

                def metric_fn(per_example_loss, label_ids, logits):
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    accuracy = tf.metrics.accuracy(label_ids, predictions)
                    auc = tf.metrics.auc(label_ids, predictions)
                    loss = tf.metrics.mean(per_example_loss)
                    return {
                        "eval_accuracy": accuracy,
                        "eval_auc": auc,
                        "eval_loss": loss,
                    }

                eval_metrics = metric_fn(per_example_loss, label_ids, logits)
                output_spec = EstimatorSpec(mode=mode,
                                            loss=total_loss,
                                            eval_metric_ops=eval_metrics)
            else:
                output_spec = EstimatorSpec(mode=mode,
                                            predictions=probabilities)

            return output_spec
Exemple #29
0
    def compile(self):
        """Define operations for loss, measures, optimization.
        and create session, initialize variables.
        """
        config = self.config
        # define operations for loss, measures, optimization
        self.loss = self.__compute_loss()
        self.accuracy, self.precision, self.recall, self.f1 = self.__compute_measures(
        )
        with tf.variable_scope('optimization'):
            self.global_step = tf.train.get_or_create_global_step()
            if 'bert' in config.emb_class:
                from bert import optimization
                if config.use_bert_optimization:
                    self.learning_rate = tf.constant(
                        value=config.starter_learning_rate,
                        shape=[],
                        dtype=tf.float32)
                    self.train_op = optimization.create_optimizer(
                        self.loss, config.starter_learning_rate,
                        config.num_train_steps, config.num_warmup_steps, False)
                else:
                    # exponential decay of the learning rate
                    self.learning_rate = tf.train.exponential_decay(
                        config.starter_learning_rate,
                        self.global_step,
                        config.decay_steps,
                        config.decay_rate,
                        staircase=True)
                    # linear warmup, if global_step < num_warmup_steps, then
                    # learning rate = (global_step / num_warmup_steps) * starter_learning_rate
                    global_steps_int = tf.cast(self.global_step, tf.int32)
                    warmup_steps_int = tf.constant(config.num_warmup_steps,
                                                   dtype=tf.int32)
                    global_steps_float = tf.cast(global_steps_int, tf.float32)
                    warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
                    warmup_percent_done = global_steps_float / warmup_steps_float
                    warmup_learning_rate = config.starter_learning_rate * warmup_percent_done
                    is_warmup = tf.cast(global_steps_int < warmup_steps_int,
                                        tf.float32)
                    self.learning_rate = (
                        (1.0 - is_warmup) * self.learning_rate +
                        is_warmup * warmup_learning_rate)
                    # Adam optimizer with correct L2 weight decay
                    optimizer = optimization.AdamWeightDecayOptimizer(
                        learning_rate=self.learning_rate,
                        weight_decay_rate=0.01,
                        beta_1=0.9,
                        beta_2=0.999,
                        epsilon=1e-6,
                        exclude_from_weight_decay=[
                            "LayerNorm", "layer_norm", "bias"
                        ])
                    tvars = tf.trainable_variables()
                    grads, _ = tf.clip_by_global_norm(
                        tf.gradients(self.loss, tvars), config.clip_norm)
                    train_op = optimizer.apply_gradients(
                        zip(grads, tvars), global_step=self.global_step)
                    new_global_step = self.global_step + 1
                    self.train_op = tf.group(
                        train_op, [self.global_step.assign(new_global_step)])
            else:
                # exponential decay of the learning rate
                self.learning_rate = tf.train.exponential_decay(
                    config.starter_learning_rate,
                    self.global_step,
                    config.decay_steps,
                    config.decay_rate,
                    staircase=True)
                # linear warmup, if global_step < num_warmup_steps, then
                # learning rate = (global_step / num_warmup_steps) * starter_learning_rate
                global_steps_int = tf.cast(self.global_step, tf.int32)
                warmup_steps_int = tf.constant(config.num_warmup_steps,
                                               dtype=tf.int32)
                global_steps_float = tf.cast(global_steps_int, tf.float32)
                warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
                warmup_percent_done = global_steps_float / warmup_steps_float
                warmup_learning_rate = config.starter_learning_rate * warmup_percent_done
                is_warmup = tf.cast(global_steps_int < warmup_steps_int,
                                    tf.float32)
                self.learning_rate = ((1.0 - is_warmup) * self.learning_rate +
                                      is_warmup * warmup_learning_rate)
                # Adam optimizer
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                tvars = tf.trainable_variables()
                grads, _ = tf.clip_by_global_norm(
                    tf.gradients(self.loss, tvars), config.clip_norm)
                self.train_op = optimizer.apply_gradients(
                    zip(grads, tvars), global_step=self.global_step)
                '''
                # Adam optimizer with cyclical learning rate
                import clr # https://github.com/mhmoodlan/cyclic-learning-rate
                self.learning_rate = clr.cyclic_learning_rate(global_step=self.global_step,
                                                              learning_rate=config.starter_learning_rate * 0.3, # 0.0003
                                                              max_lr=config.starter_learning_rate,              # 0.001
                                                              step_size=5000,
                                                              mode='triangular')
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                tvars = tf.trainable_variables()
                grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), config.clip_norm)
                self.train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step)
                '''

        # create session, initialize variables. this should be placed at the end of graph definitions.
        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False,
                                      inter_op_parallelism_threads=0,
                                      intra_op_parallelism_threads=0)
        session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        feed_dict = {self.wrd_embeddings_init: config.embvec.wrd_embeddings}
        sess.run(tf.global_variables_initializer(),
                 feed_dict=feed_dict)  # feed large embedding data
        sess.run(tf.local_variables_initializer())  # for tf_metrics
        self.sess = sess
Exemple #30
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = tf.reshape(features["input_ids"],
                               [-1, FLAGS.max_seq_length])
        input_mask = tf.reshape(features["input_mask"],
                                [-1, FLAGS.max_seq_length])
        segment_ids = tf.reshape(features["segment_ids"],
                                 [-1, FLAGS.max_seq_length])

        label_ids = features["label"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (cpc_loss, _, logits,
         probabilities) = model_builder.create_model_bilin(
             model, label_ids, num_choices)

        total_loss = cpc_loss

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(cpc_loss, label_ids, logits):
                """Collect metrics for function."""

                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                cpc_loss_metric = tf.metrics.mean(values=cpc_loss)
                metric_dict = {
                    "eval_accuracy": accuracy,
                    "eval_cpc_loss": cpc_loss_metric,
                }
                return metric_dict

            eval_metrics = (metric_fn, [cpc_loss, label_ids, logits])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        (total_loss, logits, trans, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        scaffold_fn = None
        # 加载BERT模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")

        # 打印加载模型的参数
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)  # 钩子,这里用来将BERT中的参数作为我们模型的初始值
        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, logits, trans])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)  #
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=pred_ids,
                scaffold_fn=scaffold_fn
            )
        return output_spec