Beispiel #1
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)

        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_id": unique_ids,
        }

        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        # label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (logits, probabilities) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            num_labels)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode,
            predictions={"probabilities": probabilities},
            scaffold_fn=scaffold_fn)
        return output_spec
    def load_model(bert_config, init_checkpoint: Union[str, None], layer_indexes: List[int], input_ids, input_mask,
                   input_type_ids, is_training: bool=False, use_one_hot_embeddings: bool=False, scope: str=None):
        # Load the Bert Model
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
            scope=scope
        )
        tvars = tf.trainable_variables()
        initialized_variable_names = {}

        # Load the checkpoint
        if init_checkpoint is None:
            tf.logging.info("No checkpoint was loaded.")
        else:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # Get the defined output layer of the model (or concat multiple layers if specified)
        if len(layer_indexes) == 1:
            output_layer = model.get_all_encoder_layers()[layer_indexes[0]]
        else:
            all_layers = [model.get_all_encoder_layers()[l] for l in layer_indexes]
            output_layer = tf.concat(all_layers, -1)

        # Just some prints to make sure the ckpt init worked
        if init_checkpoint is not None:
            tf.logging.info("*** Trainable Variables ***")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("   name = %s, shape = %s%s", var.name, var.shape, init_string)

        return output_layer
Beispiel #4
0
    def model_fn(features, labels, mode, params):
        # logging.info("*** Features ***")
        # for name in sorted(features.keys()):
        #     logging.info("  name = {}, shape = {}".format(name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels, use_one_hot_embeddings)

        vars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
                vars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()
                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        logging.info("**** Trainable Variables ****")

        # for var in vars:
        #     init_string = ""
        #     if var.name in initialized_variable_names:
        #         init_string = ", *INIT_FROM_CKPT*"
        #     logging.info("  name = {}, shape = {} {}".format(var.name, var.shape, init_string))

        output_spec = None

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn)
        return output_spec
Beispiel #5
0
    def model_fn(features, labels, mode, params):

        input_ids = features['input_ids']
        input_mask = features['input_mask']
        segment_ids = features['segment_ids']
        label_ids = features['label_ids']

        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        print(label_ids)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        if init_checkpoint:
            tvars = tf.trainable_variables()
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)  #,
            #scaffold=scaffold_fn)
        return output_spec
Beispiel #6
0
    def model_fn(features, labels, mode, params):  #  pylint:  disable=unused-argument
        """The  `model_fn`  for  TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]

        model = modeling.BertModel(config=bert_config,
                                   is_training=False,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   token_type_ids=input_type_ids,
                                   use_one_hot_embeddings=False)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only  PREDICT  modes  are  supported:  %s" %
                             (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("****  Trainable  Variables  ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ",  *INIT_FROM_CKPT*"
            tf.logging.info("    name  =  %s,  shape  =  %s%s", var.name,
                            var.shape, init_string)
        output_layer = model.get_pooled_output()
        predictions = {"unique_id": unique_ids, "output_layer": output_layer}
        output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                      predictions=predictions,
                                                      scaffold_fn=scaffold_fn)
        return output_spec
Beispiel #7
0
    def __init__(self, bert_config, num_labels, seq_length, init_checkpoint):
        self.bert_config = bert_config
        self.num_labels = num_labels
        self.seq_length = seq_length

        self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length],
                                         name='input_mask')
        self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                          name='segment_ids')
        self.labels = tf.placeholder(tf.int32, [None], name='labels')
        self.is_training = tf.placeholder(tf.bool, name='is_training')
        self.learning_rate = tf.placeholder(tf.float32, name='learn_rate')

        self.model = modeling.BertModel(config=self.bert_config,
                                        is_training=self.is_training,
                                        input_ids=self.input_ids,
                                        input_mask=self.input_mask,
                                        token_type_ids=self.segment_ids)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        self.inference()
Beispiel #8
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        client_id = features["client_id"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)

        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # tf.logging.info("**** Trainable Variables ****")
        # for var in tvars:
        #     init_string = ""
        #     if var.name in initialized_variable_names:
        #         init_string = ", *INIT_FROM_CKPT*"
        #     tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
        #                     init_string)

        predictions = {
            'client_id': client_id,
            'encodes': model.get_sentence_encoding()
        }

        return EstimatorSpec(mode=mode, predictions=predictions)
    def build_model(self):
        tvars = tf.trainable_variables()
        initialized_variable_names = {}

        # 加载bert模型, 初始化变量名,assignment_map和initialized_variable_names都是有序的字典,
        # assignment_map取出了tvars中所有的变量名,并且键和值都是变量名
        if self.init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, self.init_checkpoint)

            tf.train.init_from_checkpoint(self.init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        (loss, logits, pred_y) = self.create_model()

        self.loss = loss
        self.pred_y = pred_y
        print(loss)
        print(FLAGS.learning_rate)
        print(self.num_train_steps)
        print(self.num_warmup_steps)
        self.train_op = optimization.create_optimizer(loss,
                                                      FLAGS.learning_rate,
                                                      self.num_train_steps,
                                                      self.num_warmup_steps,
                                                      use_tpu=False)

        self.saver = tf.train.Saver(tf.global_variables())
Beispiel #10
0
 def __bert_embedding(self, token_ids, token_masks, segment_ids, masks, keep_prob=0.8):
     """Compute BERT embeddings 
     """
     from bert import modeling
     bert_model = modeling.BertModel(
         config=self.bert_config,
         is_training=self.is_training,
         input_ids=token_ids,
         input_mask=token_masks,
         token_type_ids=segment_ids,
         use_one_hot_embeddings=False)
     bert_embeddings = bert_model.get_sequence_output()  # (batch_size, bert_max_seq_length, bert_embedding_size)
     # initialize pre-trained bert
     if self.is_training and self.bert_init_checkpoint:
         tvars = tf.trainable_variables()
         (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, self.bert_init_checkpoint)
         tf.train.init_from_checkpoint(self.bert_init_checkpoint, assignment_map)
         tf.logging.info("**** Trainable Variables ****")
         for var in tvars:
             init_string = ""
             if var.name in initialized_variable_names:
                 init_string = ", *INIT_FROM_CKPT*"
             tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)
     return tf.nn.dropout(bert_embeddings, keep_prob)
Beispiel #11
0
    def __init__(self, bert_config):
        self.bert_config = bert_config
        self.input_ids = \
            tf.placeholder(shape=[None, SEQ_LEN], dtype=tf.int32, name="input_ids")
        self.input_mask = \
            tf.placeholder(shape=[None, SEQ_LEN], dtype=tf.int32, name="input_mask")
        self.token_type = \
            tf.placeholder(shape=[None, SEQ_LEN], dtype=tf.int32, name="segment_ids")
        self.masked_lm_positions = \
            tf.placeholder(shape=[None, MAX_PREDICTIONS_PER_SEQ], dtype=tf.int32, name="masked_lm_positions")
        self.masked_lm_ids = \
            tf.placeholder(shape=[None, MAX_PREDICTIONS_PER_SEQ], dtype=tf.int32, name="masked_lm_ids")

        model = modeling.BertModel(config=self.bert_config,
                                   is_training=False,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_mask,
                                   token_type_ids=self.token_type,
                                   use_one_hot_embeddings=False)

        self.input_tensor = model.get_sequence_output()
        self.output_weights = model.get_embedding_table()

        self.masked_lm_example_loss = self.get_masked_lm_output()
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        (assignment_map, initialized_variable_names) \
            = modeling.get_assignment_map_from_checkpoint(tvars, BERT_INIT_CHKPNT)
        tf.train.init_from_checkpoint(BERT_INIT_CHKPNT, assignment_map)
        tf.compat.v1.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.compat.v1.logging.info("  name = %s, shape = %s%s", var.name,
                                      var.shape, init_string)
Beispiel #12
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = tf.reshape(features["input_ids"], [-1, FLAGS.max_seq_length])
    input_mask = tf.reshape(features["input_mask"], [-1, FLAGS.max_seq_length])
    segment_ids = tf.reshape(features["segment_ids"],
                             [-1, FLAGS.max_seq_length])

    label_types = features["label_types"]
    label_ids = features["label_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_real_example = tf.reduce_sum(
        tf.one_hot(label_types, FLAGS.k_size * 2), axis=1)

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    (cpc_loss, _, logits, probabilities) = bilin_model_builder.create_model(
        model, label_ids, label_types, num_choices, k_size=FLAGS.k_size)

    if add_masking:
      mask_rate = FLAGS.mask_rate  # search alternatives?
      max_predictions_per_seq = int(math.ceil(FLAGS.max_seq_length * mask_rate))
      masked_lm_positions = tf.reshape(features["mask_indices"],
                                       [-1, max_predictions_per_seq])
      masked_lm_ids = tf.reshape(features["target_token_ids"],
                                 [-1, max_predictions_per_seq])
      masked_lm_weights = tf.reshape(features["target_token_weights"],
                                     [-1, max_predictions_per_seq])
      (masked_lm_loss, _, _) = bilin_model_builder.get_masked_lm_output(
          bert_config, model.get_sequence_output(), model.get_embedding_table(),
          masked_lm_positions, masked_lm_ids, masked_lm_weights)
      total_loss = cpc_loss + masked_lm_loss
    else:
      total_loss = cpc_loss
      masked_lm_loss = tf.constant([0])

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)

      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(total_loss, learning_rate,
                                               num_train_steps,
                                               num_warmup_steps, use_tpu)

      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(cpc_loss, mlm_loss, label_ids, logits, is_real_example):
        """Collect metrics for function."""

        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(
            labels=label_ids, predictions=predictions, weights=is_real_example)
        cpc_loss_metric = tf.metrics.mean(values=cpc_loss)
        mlm_loss_metric = tf.metrics.mean(values=mlm_loss)
        metric_dict = {
            "eval_accuracy": accuracy,
            "eval_cpc_loss": cpc_loss_metric,
            "eval_mlm_loss": mlm_loss_metric
        }
        for i in range(FLAGS.k_size * 2):
          metric_dict["acc" + str(i)] = tf.metrics.accuracy(
              labels=label_ids[:, i],
              predictions=predictions[:, i],
              weights=is_real_example[:, i])
        return metric_dict

      eval_metrics = (metric_fn, [
          cpc_loss, masked_lm_loss, label_ids, logits, is_real_example
      ])
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        token_label_ids = features["token_label_ids"]
        predicate_label_id = features["predicate_label_id"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(token_label_ids),
                                      dtype=tf.float32)  # TO DO

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, predicate_loss, predicate_per_example_loss,
         predicate_probabilities, predicate_prediction, token_label_loss,
         token_label_per_example_loss,
         token_label_logits, token_label_predictions) = create_model(
             bert_config, is_training, input_ids, input_mask, segment_ids,
             token_label_ids, predicate_label_id, num_token_labels,
             num_predicate_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(predicate_loss, token_label_per_example_loss,
                          predicate_probabilities, token_label_ids,
                          token_label_logits, is_real_example):
                predicate_prediction = tf.argmax(predicate_probabilities,
                                                 axis=-1,
                                                 output_type=tf.int32)
                token_label_predictions = tf.argmax(token_label_logits,
                                                    axis=-1,
                                                    output_type=tf.int32)
                token_label_pos_indices_list = list(
                    range(num_token_labels)
                )[4:]  # ["[Padding]","[##WordPiece]", "[CLS]", "[SEP]"] + seq_out_set
                pos_indices_list = token_label_pos_indices_list[:
                                                                -1]  # do not care "O"
                token_label_precision_macro = tf_metrics.precision(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="macro")
                token_label_recall_macro = tf_metrics.recall(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="macro")
                token_label_f_macro = tf_metrics.f1(token_label_ids,
                                                    token_label_predictions,
                                                    num_token_labels,
                                                    pos_indices_list,
                                                    average="macro")
                token_label_precision_micro = tf_metrics.precision(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="micro")
                token_label_recall_micro = tf_metrics.recall(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="micro")
                token_label_f_micro = tf_metrics.f1(token_label_ids,
                                                    token_label_predictions,
                                                    num_token_labels,
                                                    pos_indices_list,
                                                    average="micro")
                token_label_loss = tf.metrics.mean(
                    values=token_label_per_example_loss,
                    weights=is_real_example)
                predicate_loss = tf.metrics.mean(values=predicate_loss)
                return {
                    "eval_predicate_loss": predicate_loss,
                    "predicate_prediction": predicate_prediction,
                    "eval_token_label_precision(macro)":
                    token_label_precision_macro,
                    "eval_token_label_recall(macro)": token_label_recall_macro,
                    "eval_token_label_f(macro)": token_label_f_macro,
                    "eval_token_label_precision(micro)":
                    token_label_precision_micro,
                    "eval_token_label_recall(micro)": token_label_recall_micro,
                    "eval_token_label_f(micro)": token_label_f_micro,
                    "eval_token_label_loss": token_label_loss,
                }

            eval_metrics = (metric_fn, [
                predicate_loss, token_label_per_example_loss,
                predicate_probabilities, token_label_ids, token_label_logits,
                is_real_example
            ])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "predicate_probabilities": predicate_probabilities,
                    "predicate_prediction": predicate_prediction,
                    "token_label_predictions": token_label_predictions
                },
                scaffold_fn=scaffold_fn)

        return output_spec
Beispiel #14
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_masks = features["input_masks"]
        segment_ids = features["segment_ids"]
        token_label_ids = features["token_label_ids"] if mode in [
            tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL
        ] else None
        sent_label_ids = features["sent_label_ids"] if mode in [
            tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL
        ] else None

        loss, token_predict_ids, sent_predict_ids = create_model(
            bert_config, input_ids, input_masks, segment_ids, token_label_ids,
            sent_label_ids, token_label_list, sent_label_list, mode, use_tpu)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None

        if init_checkpoint:
            assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)

        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"

            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(token_label_ids, sent_label_ids, token_predict_ids,
                          sent_predict_ids):
                token_precision = tf.metrics.precision(
                    labels=token_label_ids, predictions=token_predict_ids)
                token_recall = tf.metrics.recall(labels=token_label_ids,
                                                 predictions=token_predict_ids)
                sent_accuracy = tf.metrics.accuracy(
                    labels=sent_label_ids, predictions=sent_predict_ids)

                metric = {
                    "token_precision": token_precision,
                    "token_recall": token_recall,
                    "sent_accuracy": sent_accuracy,
                }

                return metric

            masked_token_label_ids = get_masked_data(token_label_ids,
                                                     token_label_list)
            masked_token_predict_ids = get_masked_data(token_predict_ids,
                                                       token_label_list)
            eval_metrics = (metric_fn, [
                masked_token_label_ids, sent_label_ids,
                masked_token_predict_ids, sent_predict_ids
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "token_predict": token_predict_ids,
                    "sent_predict": sent_predict_ids
                },
                scaffold_fn=scaffold_fn)

        return output_spec
Beispiel #15
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if FLAGS.input_file_processor == "run_classifier":
        processors = {
            "sst-2": rc.SST2Processor,
            "mnli": rc.MnliProcessor,
        }
    elif FLAGS.input_file_processor == "run_classifier_distillation":
        processors = {
            "sst-2": rc.SST2ProcessorDistillation,
            "mnli": rc.MNLIProcessorDistillation,
        }
    else:
        raise ValueError("Invalid --input_file_processor flag value")

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    task_name = FLAGS.task_name.lower()
    processor = processors[task_name]()
    label_list = processor.get_labels()
    num_labels = len(label_list)

    input_ids_placeholder = tf.placeholder(dtype=tf.int32,
                                           shape=[None, FLAGS.max_seq_length])

    bert_input_mask_placeholder = tf.placeholder(
        dtype=tf.int32, shape=[None, FLAGS.max_seq_length])

    token_type_ids_placeholder = tf.placeholder(
        dtype=tf.int32, shape=[None, FLAGS.max_seq_length])

    prob_vector_placeholder = tf.placeholder(dtype=tf.float32,
                                             shape=[None, num_labels])

    one_hot_input_ids = tf.one_hot(input_ids_placeholder,
                                   depth=bert_config.vocab_size)

    input_tensor, _ = em_util.run_one_hot_embeddings(
        one_hot_input_ids=one_hot_input_ids, config=bert_config)

    flex_input_obj, per_eg_obj, probs = em_util.model_fn(
        input_tensor=input_tensor,
        bert_input_mask=bert_input_mask_placeholder,
        token_type_ids=token_type_ids_placeholder,
        bert_config=bert_config,
        num_labels=num_labels,
        obj_type=FLAGS.obj_type,
        prob_vector=prob_vector_placeholder)

    if FLAGS.obj_type.startswith("min"):
        final_obj = -1 * flex_input_obj
    elif FLAGS.obj_type.startswith("max"):
        final_obj = flex_input_obj

    # Calculate the gradient of the final loss function with respect to
    # the one-hot input space
    grad_obj_one_hot = tf.gradients(ys=final_obj, xs=one_hot_input_ids)[0]

    # gradients with respect to position in one hot input space with 1s in it
    # this is one term in the directional derivative of HotFlip,
    # Eq1 in https://arxiv.org/pdf/1712.06751.pdf
    #
    # grad_obj_one_hot.shape = [batch_size, seq_length, vocab_size]
    # input_ids_placeholder.shape = [batch_size, seq_length]
    # original_token_gradients.shape = [batch_size, seq_length]
    original_token_gradients = tf.gather(params=grad_obj_one_hot,
                                         indices=tf.expand_dims(
                                             input_ids_placeholder, -1),
                                         batch_dims=2)
    original_token_gradients = tf.tile(original_token_gradients,
                                       multiples=[1, 1, FLAGS.beam_size])

    # These are the gradients / indices whose one-hot position has the largest
    # gradient magnitude, the performs part of the max calculation in Eq10 of
    # https://arxiv.org/pdf/1712.06751.pdf
    biggest_gradients, biggest_indices = tf.nn.top_k(input=grad_obj_one_hot,
                                                     k=FLAGS.beam_size)

    # Eq10 of https://arxiv.org/pdf/1712.06751.pdf
    grad_difference = biggest_gradients - original_token_gradients

    tvars = tf.trainable_variables()

    assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
        tvars, FLAGS.init_checkpoint)

    tf.logging.info("Variables mapped = %d / %d", len(assignment_map),
                    len(tvars))

    tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if FLAGS.input_file:
        custom_examples = processor.get_custom_examples(FLAGS.input_file)
        custom_templates = [
            em_util.input_to_template(x, label_list) for x in custom_examples
        ]
    else:
        prob_vector = [float(x) for x in FLAGS.prob_vector.split(",")]
        custom_templates = [(FLAGS.input_template, prob_vector)]

    num_input_sequences = custom_templates[0][0].count("[SEP]")

    if FLAGS.flipping_mode == "beam_search":
        FLAGS.batch_size = 1

    detok_partial = functools.partial(em_util.detokenize, tokenizer=tokenizer)

    # Since input files will often be quite large, this flag allows processing
    # only a slice of the input file
    if FLAGS.input_file_range:
        start_index, end_index = FLAGS.input_file_range.split("-")
        if start_index == "start":
            start_index = 0
        if end_index == "end":
            end_index = len(custom_templates)
        start_index, end_index = int(start_index), int(end_index)
    else:
        start_index = 0
        end_index = len(custom_templates)

    tf.logging.info("Processing examples in range %d, %d", start_index,
                    end_index)

    all_elements = []

    too_long = 0

    for ip_num, (ip_template, prob_vector) in enumerate(
            custom_templates[start_index:end_index]):
        # Parse the input template into a list of IDs and the corresponding mask.
        # Different segments in template are separated by " <piece> "
        # Each segment is associated with a word piece (or [EMPTY] to get flex
        # inputs) and a frequency. (which is separated by "<freq>"). * can be used
        # to choose a frequency till the end of the string
        #
        # Here is an example 2-sequence template for tasks like MNLI to optimize
        # 20 vectors, (10 for each sequence)
        # [CLS]<freq>1 <piece> [EMPTY]<freq>10 <piece> [SEP]<freq>1 <piece> \
        # [EMPTY]<freq>10 <piece> [SEP]<freq>1 <piece> [PAD]<freq>*
        (input_ids, input_mask, bert_input_mask,
         token_type_ids) = em_util.template_to_ids(
             template=ip_template,
             config=bert_config,
             tokenizer=tokenizer,
             max_seq_length=FLAGS.max_seq_length)

        if len(input_ids) > FLAGS.max_seq_length:
            # truncate them!
            input_ids = input_ids[:FLAGS.max_seq_length]
            input_mask = input_mask[:FLAGS.max_seq_length]
            bert_input_mask = bert_input_mask[:FLAGS.max_seq_length]
            token_type_ids = token_type_ids[:FLAGS.max_seq_length]
            too_long += 1

        all_elements.append({
            "input_ids": input_ids,
            "original_input_ids": [ii for ii in input_ids],
            "ip_num": start_index + ip_num,
            "score": 0.0,
            "bert_input_mask": bert_input_mask,
            "input_mask": input_mask,
            "token_type_ids": token_type_ids,
            "prob_vector": prob_vector,
            "stopped": False,
            "steps_taken": 0
        })

    tf.logging.info("%d / %d were too long and hence truncated.", too_long,
                    len(all_elements))

    iteration_number = 0
    consistent_output_sequences = []

    while all_elements and iteration_number < 10:

        steps_taken = []
        output_sequences = []
        failures = []
        zero_step_instances = 0

        iteration_number += 1
        tf.logging.info("Starting iteration number %d", iteration_number)
        tf.logging.info("Pending items = %d / %d", len(all_elements),
                        len(custom_templates[start_index:end_index]))

        batch_elements = []
        for ip_num, input_object in enumerate(all_elements):
            batch_elements.append(input_object)
            # wait until the input has populated up to the batch size
            if (len(batch_elements) < FLAGS.batch_size
                    and ip_num < len(all_elements) - 1):
                continue

            # optimize a part of the flex_input (depending on the template)
            for step_num in range(FLAGS.total_steps):
                feed_dict = {
                    input_ids_placeholder:
                    np.array([x["input_ids"] for x in batch_elements]),
                    bert_input_mask_placeholder:
                    np.array([x["bert_input_mask"] for x in batch_elements]),
                    token_type_ids_placeholder:
                    np.array([x["token_type_ids"] for x in batch_elements]),
                    prob_vector_placeholder:
                    np.array([x["prob_vector"] for x in batch_elements])
                }

                if FLAGS.flipping_mode == "random":
                    # Avoiding the gradient computation when the flipping mode is random
                    peo, pr = sess.run([per_eg_obj, probs],
                                       feed_dict=feed_dict)
                else:
                    peo, gd, bi, pr = sess.run(
                        [per_eg_obj, grad_difference, biggest_indices, probs],
                        feed_dict=feed_dict)

                if FLAGS.print_flips:
                    output_log = "\n" + "\n".join([
                        "Objective = %.4f, Score = %.4f, Element %d = %s" %
                        (obj, elem["score"], kk,
                         detok_partial(elem["input_ids"]))
                        for kk, (obj,
                                 elem) in enumerate(zip(peo, batch_elements))
                    ])
                    tf.logging.info("Step = %d %s\n", step_num, output_log)

                should_stop = evaluate_stopping(
                    stopping_criteria=FLAGS.stopping_criteria,
                    obj_prob_vector=np.array(
                        [x["prob_vector"] for x in batch_elements]),
                    curr_prob_vector=pr,
                    per_example_objective=peo)

                for elem, stop_bool in zip(batch_elements, should_stop):
                    if stop_bool and (not elem["stopped"]):
                        if step_num == 0:
                            # don't actually stop the perturbation since we want a new input
                            zero_step_instances += 1
                        else:
                            elem["stopped"] = True
                            elem["steps_taken"] = step_num

                if np.all([elem["stopped"] for elem in batch_elements]):
                    steps_taken.extend(
                        [elem["steps_taken"] for elem in batch_elements])
                    output_sequences.extend([elem for elem in batch_elements])
                    batch_elements = []
                    break

                if step_num == FLAGS.total_steps - 1:
                    failures.extend([
                        elem for elem in batch_elements if not elem["stopped"]
                    ])
                    steps_taken.extend([
                        elem["steps_taken"] for elem in batch_elements
                        if elem["stopped"]
                    ])
                    output_sequences.extend(
                        [elem for elem in batch_elements if elem["stopped"]])
                    batch_elements = []
                    break

                # Flip a token / word-piece either systematically or randomly
                # For instances where hotflip was not successful, do some random
                # perturbations before doing hotflip
                if (FLAGS.flipping_mode == "random" or
                    (iteration_number > 1 and step_num < iteration_number)):
                    for element in batch_elements:
                        # don't perturb elements which have stopped
                        if element["stopped"]:
                            continue

                        random_seq_index = np.random.choice([
                            ii
                            for ii, mask_id in enumerate(element["input_mask"])
                            if mask_id > 0.5
                        ])

                        random_token_id = np.random.randint(
                            len(tokenizer.vocab))
                        while (tokenizer.inv_vocab[random_token_id][0] == "["
                               and tokenizer.inv_vocab[random_token_id][-1]
                               == "]"):
                            random_token_id = np.random.randint(
                                len(tokenizer.vocab))

                        element["input_ids"][
                            random_seq_index] = random_token_id

                elif FLAGS.flipping_mode == "greedy":
                    batch_elements = greedy_updates(
                        old_elements=batch_elements,
                        grad_difference=gd,
                        biggest_indices=bi,
                        max_seq_length=FLAGS.max_seq_length)

                elif FLAGS.flipping_mode == "beam_search":
                    # only supported with a batch size of 1!
                    batch_elements = beam_search(
                        old_beams=batch_elements,
                        grad_difference=gd,
                        biggest_indices=bi,
                        beam_size=FLAGS.beam_size,
                        accumulate_scores=FLAGS.accumulate_scores,
                        max_seq_length=FLAGS.max_seq_length)

                else:
                    raise ValueError("Invalid --flipping_mode flag value")

            tf.logging.info("steps = %.4f (%d failed, %d non-zero, %d zero)",
                            np.mean([float(x) for x in steps_taken if x > 0]),
                            len(failures),
                            len([x for x in steps_taken if x > 0]),
                            zero_step_instances)

        # measure consistency of final dataset - run a forward pass through the
        # entire final dataset and verify it satisfies the original objective. This
        # if the code runs correctly, total_inconsistent = 0
        tf.logging.info("Measuring consistency of final dataset")

        total_inconsistent = 0
        total_lossy = 0

        for i in range(0, len(output_sequences), FLAGS.batch_size):
            batch_elements = output_sequences[i:i + FLAGS.batch_size]
            feed_dict = {
                input_ids_placeholder:
                np.array([x["input_ids"] for x in batch_elements]),
                bert_input_mask_placeholder:
                np.array([x["bert_input_mask"] for x in batch_elements]),
                token_type_ids_placeholder:
                np.array([x["token_type_ids"] for x in batch_elements]),
                prob_vector_placeholder:
                np.array([x["prob_vector"] for x in batch_elements])
            }
            peo, pr = sess.run([per_eg_obj, probs], feed_dict=feed_dict)
            consistency_flags = evaluate_stopping(
                stopping_criteria=FLAGS.stopping_criteria,
                obj_prob_vector=np.array(
                    [x["prob_vector"] for x in batch_elements]),
                curr_prob_vector=pr,
                per_example_objective=peo)
            total_inconsistent += len(batch_elements) - np.sum(
                consistency_flags)

            # Next, apply a lossy perturbation to the input (conversion to a string)
            # This is often lossy since it eliminates impossible sequences and
            # incorrect tokenizations. We check how many consistencies still hold true
            all_detok_strings = [
                em_util.ids_to_strings(elem["input_ids"], tokenizer)
                for elem in batch_elements
            ]

            all_ip_examples = []
            if num_input_sequences == 1:
                for ds, be in zip(all_detok_strings, batch_elements):
                    prob_vector_labels = be["prob_vector"].tolist()
                    all_ip_examples.append(
                        rc.InputExample(text_a=ds[0],
                                        text_b=None,
                                        label=prob_vector_labels,
                                        guid=None))
            else:
                for ds, be in zip(all_detok_strings, batch_elements):
                    prob_vector_labels = be["prob_vector"].tolist()
                    all_ip_examples.append(
                        rc.InputExample(text_a=ds[0],
                                        text_b=ds[1],
                                        label=prob_vector_labels,
                                        guid=None))

            all_templates = [
                em_util.input_to_template(aie, label_list)
                for aie in all_ip_examples
            ]
            all_new_elements = []
            for ip_template, prob_vector in all_templates:
                (input_ids, input_mask, bert_input_mask,
                 token_type_ids) = em_util.template_to_ids(
                     template=ip_template,
                     config=bert_config,
                     tokenizer=tokenizer,
                     max_seq_length=FLAGS.max_seq_length)

                if len(input_ids) > FLAGS.max_seq_length:
                    input_ids = input_ids[:FLAGS.max_seq_length]
                    input_mask = input_mask[:FLAGS.max_seq_length]
                    bert_input_mask = bert_input_mask[:FLAGS.max_seq_length]
                    token_type_ids = token_type_ids[:FLAGS.max_seq_length]

                all_new_elements.append({
                    "input_ids": input_ids,
                    "input_mask": input_mask,
                    "bert_input_mask": bert_input_mask,
                    "token_type_ids": token_type_ids,
                    "prob_vector": prob_vector
                })
            feed_dict = {
                input_ids_placeholder:
                np.array([x["input_ids"] for x in all_new_elements]),
                bert_input_mask_placeholder:
                np.array([x["bert_input_mask"] for x in all_new_elements]),
                token_type_ids_placeholder:
                np.array([x["token_type_ids"] for x in all_new_elements]),
                prob_vector_placeholder:
                np.array([x["prob_vector"] for x in all_new_elements])
            }
            peo, pr = sess.run([per_eg_obj, probs], feed_dict=feed_dict)
            lossy_consistency_flags = evaluate_stopping(
                stopping_criteria=FLAGS.stopping_criteria,
                obj_prob_vector=np.array(
                    [x["prob_vector"] for x in all_new_elements]),
                curr_prob_vector=pr,
                per_example_objective=peo)

            total_lossy += len(all_new_elements) - np.sum(
                lossy_consistency_flags)

            net_consistency_flags = np.logical_and(consistency_flags,
                                                   lossy_consistency_flags)

            for elem, ncf in zip(batch_elements, net_consistency_flags):
                if ncf:
                    consistent_output_sequences.append(elem)
                else:
                    failures.append(elem)

        tf.logging.info("Total inconsistent found = %d / %d",
                        total_inconsistent, len(output_sequences))
        tf.logging.info("Total lossy inconsistent found = %d / %d",
                        total_lossy, len(output_sequences))
        tf.logging.info("Total consistent outputs so far = %d / %d",
                        len(consistent_output_sequences),
                        len(custom_templates[start_index:end_index]))

        # Getting ready for next iteration of processing
        if iteration_number < 10:
            for elem in failures:
                elem["input_ids"] = [x for x in elem["original_input_ids"]]
                elem["stopped"] = False
                elem["steps_taken"] = 0
                elem["score"] = 0.0
            all_elements = failures

    tf.logging.info("Giving up on %d instances!", len(failures))
    for elem in failures:
        consistent_output_sequences.append(elem)

    if FLAGS.output_file:
        final_output = []
        for op_num, elem in enumerate(consistent_output_sequences):
            detok_strings = em_util.ids_to_strings(elem["input_ids"],
                                                   tokenizer)

            if num_input_sequences == 1:
                final_output.append("%d\t%d\t%s" %
                                    (op_num, elem["ip_num"], detok_strings[0]))
            elif num_input_sequences == 2:
                final_output.append("%d\t%d\t%s\t%s" %
                                    (op_num, elem["ip_num"], detok_strings[0],
                                     detok_strings[1]))

        if num_input_sequences == 1:
            header = "index\toriginal_index\tsentence"
        elif num_input_sequences == 2:
            header = "index\toriginal_index\tsentence1\tsentence2"

        final_output = [header] + final_output

        with tf.gfile.Open(FLAGS.output_file, "w") as f:
            f.write("\n".join(final_output) + "\n")

    return
Beispiel #16
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        num_choices = 2

        read_size = num_choices + 1
        input_ids = [
            features["input_ids" + str(i)] for i in range(0, read_size)
        ]
        input_mask = [
            features["input_mask" + str(i)] for i in range(0, read_size)
        ]
        segment_ids = [
            features["segment_ids" + str(i)] for i in range(0, read_size)
        ]
        label_ids = features["labels"]
        label_ids = label_ids[:, 4]

        seq_length = input_ids[0].shape[-1]
        input_ids = tf.reshape(tf.stack(input_ids, axis=1), [-1, seq_length])
        input_mask = tf.reshape(tf.stack(input_mask, axis=1), [-1, seq_length])
        segment_ids = tf.reshape(tf.stack(segment_ids, axis=1),
                                 [-1, seq_length])

        is_training = (mode == tf_estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if FLAGS.bilin_preproc:
            (total_loss, per_example_loss, logits,
             probabilities) = model_builder.create_model_bilin(
                 model, label_ids, num_choices)
        else:
            (total_loss, per_example_loss, logits,
             probabilities) = model_builder.create_model(
                 model, label_ids, num_choices)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf_estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)

        elif mode == tf_estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(values=per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Beispiel #17
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        (total_loss, logits, trans, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)
        print("total_loss=", total_loss)
        print("shape of pred_ids", pred_ids.shape)
        print(trans)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        # 加载BERT模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)
            # 初始化变量,从已经预训练的模型中获得这些参数
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")

        # 打印加载模型的参数
        #         for var in tvars:
        #             init_string = ""
        #             if var.name in initialized_variable_names:
        #                 init_string = ", *INIT_FROM_CKPT*"
        #             tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
        #                             init_string)
        output_spec = None
        ## 模型的训练
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)  # 钩子,这里用来将BERT中的参数作为我们模型的初始值
        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, logits, trans])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)  #
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=pred_ids,
                scaffold_fn=scaffold_fn
            )
        return output_spec
Beispiel #18
0
    def __init__(self, config):

        self.config = config
        self.lr = config["lr"]
        self.lstm_dim = config["lstm_dim"]
        self.num_tags = config["num_tags"]

        self.global_step = tf.Variable(0, trainable=False)
        self.best_dev_f1 = tf.Variable(0.0, trainable=False)
        self.best_test_f1 = tf.Variable(0.0, trainable=False)
        self.initializer = initializers.xavier_initializer()

        # add placeholders for the model
        self.input_ids = tf.placeholder(dtype=tf.int32,
                                        shape=[None, None],
                                        name="input_ids")
        self.input_mask = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None],
                                         name="input_mask")
        self.segment_ids = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None],
                                          name="segment_ids")
        self.targets = tf.placeholder(dtype=tf.int32,
                                      shape=[None, None],
                                      name="Targets")
        # dropout keep prob
        self.dropout = tf.placeholder(dtype=tf.float32, name="Dropout")

        used = tf.sign(tf.abs(self.input_ids))
        length = tf.reduce_sum(used, reduction_indices=1)
        self.lengths = tf.cast(length, tf.int32)
        self.batch_size = tf.shape(self.input_ids)[0]
        self.num_steps = tf.shape(self.input_ids)[-1]

        # embeddings for chinese character and segmentation representation
        embedding = self.bert_embedding()

        # apply dropout before feed to lstm layer
        lstm_inputs = tf.nn.dropout(embedding, self.dropout)

        # bi-directional lstm layer
        lstm_outputs = self.biLSTM_layer(lstm_inputs, self.lstm_dim,
                                         self.lengths)

        # logits for tags
        self.logits = self.project_layer(lstm_outputs)

        # loss of the model
        self.loss = self.loss_layer(self.logits, self.lengths)

        # bert模型参数初始化的地方
        init_checkpoint = "/home/ubuntu/zzp/bertNER/pretrain/new_bert/model.ckpt-400000"
        # 获取模型中所有的训练参数。
        tvars = tf.trainable_variables()
        # 加载BERT模型
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        print("**** Trainable Variables ****")
        # 打印加载模型的参数
        train_vars = []
        for var in tvars:
            init_string = ""
            train_vars.append(var)
            print("  name = %s, shape = %s%s", var.name, var.shape,
                  init_string)
        with tf.variable_scope("optimizer"):
            optimizer = self.config["optimizer"]
            if optimizer == "adam":
                self.opt = tf.train.AdamOptimizer(self.lr)
            else:
                raise KeyError

            grads = tf.gradients(self.loss, train_vars)
            (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

            self.train_op = self.opt.apply_gradients(
                zip(grads, train_vars), global_step=self.global_step)
            #capped_grads_vars = [[tf.clip_by_value(g, -self.config["clip"], self.config["clip"]), v]
            #                     for g, v in grads_vars if g is not None]
            #self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step, )

        # saver of the model
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
Beispiel #19
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        #label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         predicts) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 11, [2, 3, 4, 5, 6, 7],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           11, [2, 3, 4, 5, 6, 7],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  11, [2, 3, 4, 5, 6, 7],
                                  average="macro")
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    #"eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        (total_loss, logits, trans, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        scaffold_fn = None
        # 加载BERT模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")

        # 打印加载模型的参数
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)  # 钩子,这里用来将BERT中的参数作为我们模型的初始值
        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, logits, trans])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)  #
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=pred_ids,
                scaffold_fn=scaffold_fn
            )
        return output_spec
Beispiel #21
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")  # logging 用来记录日志
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        pos_embedding = features["pos_embedding"]  # 增加获取位置向量
        dp_embedding = features["dp_embedding"]  # 增加获取位置向量
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, logits, predicts) = create_model(  # 使用BERT的接口建模
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, pos_embedding, dp_embedding, num_labels,
            use_one_hot_embeddings)
        tvars = tf.trainable_variables()  # 得到所有要训练的变量
        scaffold_fn = None
        if init_checkpoint:  # 用BERT预加载模型,这里加载的只有BERT预训练的模型
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint,
                                          assignment_map)  # 使用预训练模型
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:  # 这里输出的是预加载模型中的向量格式
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:  # 在训练阶段
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps,
                                                     use_tpu)  # 创建一个Adam优化器
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(  # TPU运行时的特殊 estimator
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:  # 评估阶段

            def metric_fn(label_ids, predicts, valid_labels):
                # def metric_fn(label_ids, logits):
                # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)      #直接计算第三维最大值为预测值
                precision = tf_metrics.precision(
                    label_ids,
                    predicts,
                    num_labels,
                    valid_labels,
                    average="macro")  # 对比实际值和预测值计算正确率
                recall = tf_metrics.recall(label_ids,
                                           predicts,
                                           num_labels,
                                           valid_labels,
                                           average="macro")  # 对比实际值和预测值计算召回率
                f = tf_metrics.f1(label_ids,
                                  predicts,
                                  num_labels,
                                  valid_labels,
                                  average="macro")  # 对比实际值和预测值计算F值
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, predicts, valid_labels])
            # eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""
        del labels, params

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        start_logits, end_logits, answer_type_logits = create_model(
            bert_config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            segment_ids=segment_ids,
            span_encoding=span_encoding,
            max_answer_length=max_answer_length,
            use_one_hot_embeddings=use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        predictions = {
            "unique_ids": tf.identity(unique_ids),
            "start_logits": start_logits,
            "end_logits": end_logits,
            "answer_type_logits": answer_type_logits
        }

        # Input features need to be present in tf.Example output.
        predictions.update({
            "input_ids":
            tf.identity(input_ids),
            "input_mask":
            tf.identity(input_mask),
            "segment_ids":
            tf.identity(segment_ids),
            "start_positions":
            tf.identity(features["start_positions"]),
            "end_positions":
            tf.identity(features["end_positions"]),
            "answer_types":
            tf.identity(features["answer_types"])
        })

        output_spec = tf_estimator.tpu.TPUEstimatorSpec(
            mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
Beispiel #23
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for Estimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, output_layer, logits, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        num_labels, multilabel, sent_rels, sentiment, entailment_rels,
        entailment, corr_rels, correlation)

    # Print the details here into a file - No use - Here data isn't loaded
    # with open('debug_text.txt', 'a+') as infile:
    # 	print(logits, probabilities, file=infile)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint,
                                                      FLAGS.transfer_learning)
      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Initialized Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      freeze_layer_fn = (None
                         if not FLAGS.freeze_layers else lambda x: "bert" in x)
      train_op = optimization.create_optimizer(
          total_loss,
          learning_rate,
          num_train_steps,
          num_warmup_steps,
          use_tpu=False,
          freeze_layer_fn=freeze_layer_fn)

      output_spec = tf.estimator.EstimatorSpec(
          mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:

      # Create dictionary for evaluation metrics
      eval_dict = {}

      def metric_fn_single(per_example_loss, label_ids, logits):
        """Compute accuracy for the single-label case."""
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        true_labels = tf.argmax(
            label_ids, axis=-1,
            output_type=tf.int32)  # Get ids from one hot labels
        accuracy = tf.metrics.accuracy(
            labels=true_labels, predictions=predictions)
        loss = tf.metrics.mean(values=per_example_loss)
        eval_dict["eval_accuracy"] = accuracy
        eval_dict["eval_loss"] = loss

      def get_f1(precision, recall):
        """Calculate F1 score based on precision and recall."""
        return (2 * precision[0] * recall[0] /
                (precision[0] + recall[0] + 1e-5),
                tf.group(precision[1], recall[1]))

      def get_threshold_based_scores(y_true, y_pred):
        """Compute precision, recall and F1 at thresholds."""
        thresholds = [float(v) for v in FLAGS.eval_thresholds.split(",")]
        (prec_t, prec_t_op) = tf.metrics.precision_at_thresholds(
            y_true, y_pred, thresholds=thresholds)
        (rec_t, rec_t_op) = tf.metrics.recall_at_thresholds(
            y_true, y_pred, thresholds=thresholds)
        for i, v in enumerate(thresholds):
          eval_dict["precision_at_threshold_%.2f" % v] = (prec_t[i], prec_t_op)
          eval_dict["recall_at_threshold_%.2f" % v] = (rec_t[i], rec_t_op)
          eval_dict["F1_at_threshold_%.2f" % v] = get_f1((prec_t[i], prec_t_op),
                                                         (rec_t[i], rec_t_op))

      def get_relation_based_scores(y_true, y_pred, relations, name):
        """Measure performance based on label relations."""

        def expand_labels(labels):
          """Expand the set of labels based on label relations."""

          def check_relations(rels):
            """Check whether a relation applies to a particular label set."""
            is_in_category = tf.reduce_any((labels + rels) > 1)
            return tf.cond(is_in_category, lambda: labels + rels,
                           lambda: labels)

          new_labels = tf.reduce_sum(
              tf.map_fn(check_relations, relations), axis=0)
          return tf.cast(new_labels >= 1, tf.int64)

        pred = tf.map_fn(expand_labels, y_pred)
        true = tf.map_fn(expand_labels, y_true)
        precision = tf.metrics.precision(true, pred)
        recall = tf.metrics.recall(true, pred)
        eval_dict[name + "_precision"] = precision
        eval_dict[name + "_recall"] = recall
        eval_dict[name + "_f1"] = get_f1(precision, recall)
        eval_dict[name + "_accuracy"] = tf.metrics.accuracy(true, pred)

      def metric_fn_multi(per_example_loss, label_ids, probabilities):
        """Compute class-level accuracies for the multi-label case."""
        label_ids = tf.cast(label_ids, tf.int64)
        logits_split = tf.split(probabilities, num_labels, axis=-1)
        label_ids_split = tf.split(label_ids, num_labels, axis=-1)
        pred_ind = tf.cast(probabilities >= FLAGS.eval_prob_threshold, tf.int64)
        pred_ind_split = tf.split(pred_ind, num_labels, axis=-1)
        weights = tf.reduce_sum(label_ids, axis=0)

        eval_dict["per_example_eval_loss"] = tf.metrics.mean(
            values=per_example_loss)

        # Calculate accuracy, precision and recall
        get_threshold_based_scores(label_ids, probabilities)

        # Calculate values at the emotion level
        auc_vals = []
        accuracies = []
        for j, logits in enumerate(logits_split):
          current_auc, update_op_auc = tf.metrics.auc(label_ids_split[j],
                                                      logits)
          eval_dict[idx2emotion[j] + "_auc"] = (current_auc, update_op_auc)
          current_acc, update_op_acc = tf.metrics.accuracy(
              label_ids_split[j], pred_ind_split[j])
          eval_dict[idx2emotion[j] + "_accuracy"] = (current_acc, update_op_acc)
          eval_dict[idx2emotion[j] + "_precision"] = tf.metrics.precision(
              label_ids_split[j], pred_ind_split[j])
          eval_dict[idx2emotion[j] + "_recall"] = tf.metrics.recall(
              label_ids_split[j], pred_ind_split[j])
          auc_vals.append(current_auc)
          accuracies.append(current_auc)
        auc_vals = tf.convert_to_tensor(auc_vals, dtype=tf.float32)
        accuracies = tf.convert_to_tensor(accuracies, dtype=tf.float32)
        eval_dict["auc"] = tf.metrics.mean(values=auc_vals)
        eval_dict["auc_weighted"] = tf.metrics.mean(
            values=auc_vals, weights=weights)
        eval_dict["accuracy"] = tf.metrics.mean(values=accuracies)
        eval_dict["accuracy_weighted"] = tf.metrics.mean(
            values=accuracies, weights=weights)

        # Calculate sentiment-based performance
        get_relation_based_scores(label_ids, pred_ind,
                                  tf.constant(sentiment_groups, dtype=tf.int64),
                                  "sentiment")

        # Calculate emotion-intensity based performance
        get_relation_based_scores(label_ids, pred_ind,
                                  tf.constant(intensity_groups, dtype=tf.int64),
                                  "emotion_intensity")

      if multilabel:
        metric_fn_multi(per_example_loss, label_ids, probabilities)
      else:
        metric_fn_single(per_example_loss, label_ids, logits)

      output_spec = tf.estimator.EstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metric_ops=eval_dict,
          scaffold=scaffold_fn)
    else:
      print("mode:", mode, "probabilities:", probabilities)
      output_spec = tf.estimator.EstimatorSpec(
          mode=mode,
          predictions={"output_layer":output_layer, "logits":logits, "probabilities": probabilities},
          scaffold=scaffold_fn)
    return output_spec
Beispiel #24
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # batch数据导入
        (total_loss, logits, trans, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels, use_one_hot_embeddings, FLAGS.dropout_rate,
            FLAGS.lstm_size, FLAGS.cell, FLAGS.num_layers)
        tvars = tf.trainable_variables()
        # 加载BERT模型
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            # if use_tpu:
            #     def tpu_scaffold():
            #         tf.train.init_from_checkpoint(
            #             init_checkpoint, assignment_map)
            #         return tf.train.Scaffold()
            #
            #     scaffold_fn = tpu_scaffold
            # else:
            #     tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            tf.summary.scalar('loss', total_loss)
            # 针对NER有修改
            hook_dict = {}
            hook_dict['loss'] = total_loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(
                hook_dict, every_n_iter=FLAGS.save_summary_steps)

            tf.estimator.Estimator
            tf.estimator.train_and_evaluate
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook])
            # output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            #     mode=mode,
            #     loss=total_loss,
            #     train_op=train_op,
            #     scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            # 针对NER ,进行了修改
            # def metric_fn(label_ids, pred_ids):
            #     return {
            #         "eval_loss": tf.metrics.mean_squared_error(labels=label_ids, predictions=pred_ids),
            #     }

            # eval_metrics = metric_fn(label_ids, pred_ids)
            # output_spec = tf.estimator.EstimatorSpec(
            #     mode=mode,
            #     loss=total_loss,
            #     eval_metric_ops=eval_metrics
            # )
            # hook_dict = {}

            def metric_fn(label_ids, pred_ids, num_labels):
                # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                pos_indices = [id for id in range(2, num_labels - 3)]
                # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                #                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
                # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
                precision = tf_metrics.precision(label_ids,
                                                 pred_ids,
                                                 num_labels,
                                                 pos_indices,
                                                 average="micro")
                recall = tf_metrics.recall(label_ids,
                                           pred_ids,
                                           num_labels,
                                           pos_indices,
                                           average="micro")
                f = tf_metrics.f1(label_ids,
                                  pred_ids,
                                  num_labels,
                                  pos_indices,
                                  average="micro")
                # hook_dict['precision'] = precision
                # hook_dict['recall'] = recall
                # hook_dict['f'] = f
                # tf.summary.scalar('precision', precision)
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            # eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            eval_metrics = (metric_fn, [label_ids, pred_ids, num_labels])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                          predictions=pred_ids)
        return output_spec
Beispiel #25
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        print('shape of input_ids', input_ids.shape)
        print('shape of label_ids', label_ids.shape)
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, trans, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        print('shape of pred_ids', pred_ids.shape)

        global_step = tf.train.get_or_create_global_step()
        # add summary
        tf.summary.scalar('loss', total_loss)

        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint and is_training:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()
                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            tf.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                                init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=pred_ids,
                scaffold_fn=scaffold_fn
            )
        else:
            if mode == tf.estimator.ModeKeys.TRAIN:
                '''
                train_op = optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
                '''
                lr = tf.train.exponential_decay(learning_rate, global_step, 5000, 0.9, staircase=True)
                optimizer = tf.train.AdamOptimizer(lr)
                grads, _ = tf.clip_by_global_norm(tf.gradients(total_loss, tvars), 1.5)
                train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
                if FLAGS.use_feature_based:
                    train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss, global_step=global_step)
                logging_hook = tf.train.LoggingTensorHook({"batch_loss" : total_loss}, every_n_iter=10)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    training_hooks = [logging_hook],
                    scaffold_fn=scaffold_fn)
            else: # mode == tf.estimator.ModeKeys.EVAL:
                def metric_fn(label_ids, pred_ids, per_example_loss, input_mask):
                    # ['<pad>'] + ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC", "I-MISC", "X"]
                    indices = [2, 3, 4, 5, 6, 7, 8, 9]
                    precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, input_mask)
                    recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, input_mask)
                    f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, input_mask)
                    accuracy = tf.metrics.accuracy(label_ids, pred_ids, input_mask)
                    loss = tf.metrics.mean(per_example_loss)
                    return {
                        'eval_precision': precision,
                        'eval_recall': recall,
                        'eval_f': f,
                        'eval_accuracy': accuracy,
                        'eval_loss': loss,
                    }
                eval_metrics = (metric_fn, [label_ids, pred_ids, per_example_loss, input_mask])
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
        return output_spec
Beispiel #26
0
    def model_fn(features, labels, mode, params):
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = {}, shape = {}".format(name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, pred_ids) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels, use_one_hot_embeddings)

        vars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
                vars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()
                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        logging.info("**** Trainable Variables ****")

        for var in vars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = {}, shape = {} {}".format(var.name, var.shape, init_string))

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # 针对NER ,进行了修改
            def metric_fn(label_ids, pred_ids):
                try:
                    # confusion matrix
                    cm = tf_metrics.streaming_confusion_matrix(label_ids, pred_ids, num_labels, weights=input_mask)
                    return {
                        "confusion_matrix": cm
                    }
                except Exception as e:
                    logging.error(str(e))

            eval_metrics = (metric_fn, [label_ids, pred_ids])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn
            )
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn)
        return output_spec
Beispiel #27
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        else:
            initialized_variable_names = []

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {"probabilities": probabilities}
            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       predictions=predictions,
                                                       scaffold_fn=scaffold_fn)

        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn)
        return output_spec
    def __init__(self, bert_config, num_labels, seq_length, init_checkpoint):
        self.bert_config = bert_config
        self.num_labels = num_labels
        self.seq_length = seq_length
        self.tower_grads = []
        self.losses = []

        self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length],
                                         name='input_mask')
        self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                          name='segment_ids')
        self.labels = tf.placeholder(tf.int32, [None], name='labels')
        self.batch_size = tf.placeholder(tf.int32, shape=[], name='batch_size')
        self.is_training = tf.placeholder(tf.bool,
                                          shape=[],
                                          name='is_training')
        print(self.batch_size)
        self.gpu_step = self.batch_size // gpu_nums

        global_step = tf.train.get_or_create_global_step()

        learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)

        # Implements linear decay of the learning rate.
        learning_rate = tf.train.polynomial_decay(learning_rate,
                                                  global_step,
                                                  num_train_steps,
                                                  end_learning_rate=0.0,
                                                  power=1.0,
                                                  cycle=False)

        if num_warmup_steps:
            global_steps_int = tf.cast(global_step, tf.int32)
            warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)

            global_steps_float = tf.cast(global_steps_int, tf.float32)
            warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

            warmup_percent_done = global_steps_float / warmup_steps_float
            warmup_learning_rate = init_lr * warmup_percent_done

            is_warmup = tf.cast(global_steps_int < warmup_steps_int,
                                tf.float32)
            learning_rate = ((1.0 - is_warmup) * learning_rate +
                             is_warmup * warmup_learning_rate)

        optimizer = optimization.AdamWeightDecayOptimizer(
            learning_rate=learning_rate,
            weight_decay_rate=0.01,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-6,
            exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])

        with tf.variable_scope(tf.get_variable_scope()) as outer_scope:
            pred = []
            label = []
            for d in range(gpu_nums):
                with tf.device("/gpu:%s" % d), tf.name_scope("%s_%s" %
                                                             ("tower", d)):
                    self.model = modeling.BertModel(
                        config=self.bert_config,
                        is_training=self.is_training,
                        input_ids=self.input_ids[d * self.gpu_step:(d + 1) *
                                                 self.gpu_step],
                        input_mask=self.input_mask[d * self.gpu_step:(d + 1) *
                                                   self.gpu_step],
                        token_type_ids=self.segment_ids[d *
                                                        self.gpu_step:(d + 1) *
                                                        self.gpu_step])
                    print("GPU:", d)

                    tvars = tf.trainable_variables()
                    initialized_variable_names = {}
                    if init_checkpoint:
                        (assignment_map, initialized_variable_names
                         ) = modeling.get_assignment_map_from_checkpoint(
                             tvars, init_checkpoint)
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map)

                    logging.info("**** Trainable Variables ****")
                    for var in tvars:
                        init_string = ""
                        if var.name in initialized_variable_names:
                            init_string = ", *INIT_FROM_CKPT*"
                        logging.info("  name = %s, shape = %s%s", var.name,
                                     var.shape, init_string)

                    output_layer = self.model.get_pooled_output()
                    logging.info(output_layer)

                    if self.is_training == True:
                        output_layer = tf.nn.dropout(output_layer,
                                                     keep_prob=0.9)

                    match_1 = tf.strided_slice(output_layer, [0],
                                               [self.gpu_step], [2])
                    match_2 = tf.strided_slice(output_layer, [1],
                                               [self.gpu_step], [2])

                    match = tf.concat([match_1, match_2], 1)

                    self.logits = tf.layers.dense(match,
                                                  self.num_labels,
                                                  name='fc',
                                                  reuse=tf.AUTO_REUSE)

                    #预测标签
                    self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits),
                                                1,
                                                name="pred")
                    logging.info(self.y_pred_cls)

                    #真实标签
                    self.r_labels = tf.strided_slice(
                        self.labels[d * self.gpu_step:(d + 1) * self.gpu_step],
                        [0], [self.gpu_step], [2])
                    logging.info(self.r_labels)

                    one_hot_labels = tf.one_hot(self.r_labels,
                                                depth=self.num_labels,
                                                dtype=tf.float32)

                    log_probs = tf.nn.log_softmax(self.logits, axis=-1)
                    per_example_loss =  - (30*one_hot_labels[:,0] * log_probs[:,0]) \
                                        - (9*one_hot_labels[:,1] * log_probs[:,1]) \
                                        - (2*one_hot_labels[:,2] * log_probs[:,2]) \
                                        - (2*one_hot_labels[:,3] * log_probs[:,3]) \
                                        - (9*one_hot_labels[:,4] * log_probs[:,4]) \
                                        + 1e-10

                    self.loss = tf.reduce_mean(per_example_loss)

                    #self.optim = optimization.create_optimizer(self.loss, learning_rate, num_train_steps, num_warmup_steps, False)

                    tvars = tf.trainable_variables()
                    grads = tf.gradients(self.loss, tvars)

                    (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

                    self.tower_grads.append(list(zip(grads, tvars)))
                    self.losses.append(self.loss)
                    label.append(self.r_labels)
                    pred.append(self.y_pred_cls)
                outer_scope.reuse_variables()

        with tf.name_scope("apply_gradients"), tf.device("/cpu:0"):
            gradients = self.average_gradients(self.tower_grads)
            train_op = optimizer.apply_gradients(gradients,
                                                 global_step=global_step)
            new_global_step = global_step + 1
            self.train_op = tf.group(train_op,
                                     [global_step.assign(new_global_step)])
            self.losses = tf.reduce_mean(self.losses)
            self.pred = tf.concat(pred, 0)
            self.label = tf.concat(label, 0)
            logging.info(self.pred)
            logging.info(self.label)
Beispiel #29
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s", name, features[name].shape)

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (start_logits, end_logits, answer_type_logits) = create_model(
            bert_config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            segment_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = bert_modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                         init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            seq_length = bert_modeling.get_shape_list(input_ids)[1]

            # Computes the loss for positions.
            def compute_loss(logits, positions):
                one_hot_positions = tf.one_hot(positions,
                                               depth=seq_length,
                                               dtype=tf.float32)
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                loss = -tf.reduce_mean(
                    tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
                return loss

            # Computes the loss for labels.
            def compute_label_loss(logits, labels):
                one_hot_labels = tf.one_hot(labels,
                                            depth=len(data.AnswerType),
                                            dtype=tf.float32)
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                loss = -tf.reduce_mean(
                    tf.reduce_sum(one_hot_labels * log_probs, axis=-1))
                return loss

            start_positions = features["start_positions"]
            end_positions = features["end_positions"]
            answer_types = features["answer_types"]

            start_loss = compute_loss(start_logits, start_positions)
            end_loss = compute_loss(end_logits, end_positions)

            answer_type_loss = compute_label_loss(answer_type_logits,
                                                  answer_types)

            total_loss = (start_loss + end_loss + answer_type_loss) / 3.0

            train_op = bert_optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps,
                use_tpu)

            output_spec = tf_contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "unique_ids": unique_ids,
                "start_logits": start_logits,
                "end_logits": end_logits,
                "answer_type_logits": answer_type_logits,
            }
            output_spec = tf_contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                             (mode))

        return output_spec
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, probabilities,
         predicts) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                loss=total_loss,
                init_lr=learning_rate,
                num_train_steps=num_train_steps,
                num_warmup_steps=num_warmup_steps,
                use_tpu=None)

            hook_dict = dict()
            hook_dict["loss"] = total_loss
            hook_dict["global_steps"] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(hook_dict,
                                                      every_n_iter=100)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss_, label_ids_, logits_,
                          is_real_example_):
                predictions = tf.argmax(logits_, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids_,
                                               predictions=predictions,
                                               weights=is_real_example_)
                loss = tf.metrics.mean(values=per_example_loss_,
                                       weights=is_real_example_)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, eval_metrics=eval_metrics)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                          predictions={
                                                              "probabilities":
                                                              probabilities,
                                                              "predictions":
                                                              predicts
                                                          })

        return output_spec
Beispiel #31
0
    def model_fn(features, labels, mode, params):
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s" %
                         (name, features[name].shape))
        input_ids = features["input_ids"]
        mask = features["mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if FLAGS.crf:
            (total_loss, logits,
             predicts) = create_model(bert_config, is_training, input_ids,
                                      mask, segment_ids, label_ids, num_labels,
                                      use_one_hot_embeddings)

        else:
            (total_loss, logits,
             predicts) = create_model(bert_config, is_training, input_ids,
                                      mask, segment_ids, label_ids, num_labels,
                                      use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        initialized_variable_names = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:

                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                         init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(label_ids, logits, num_labels, mask):
                predictions = tf.math.argmax(logits,
                                             axis=-1,
                                             output_type=tf.int32)
                cm = metrics.streaming_confusion_matrix(label_ids,
                                                        predictions,
                                                        num_labels - 1,
                                                        weights=mask)
                return {"confusion_matrix": cm}
                #

            eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
  def model_fn(
      features,
      labels,
      mode,
      params
  ):
    """The `model_fn` for TPUEstimator."""

    # The function signature is fixed as part of the estimator interface.
    # We pass task-specific labels as part of `features` and hence `labels` is
    # unused. `params` is for runtime parameters passed around by the estimator
    # framework and they are not used by us.
    # The unused parameters are deleted below.
    del labels, params

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s", name, features[name].shape)

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    candidate_mask = features["candidate_mask"]
    error_location_mask = features["error_location_mask"]
    target_mask = features["target_mask"]

    sequence_length = tf.shape(input_ids)[1]

    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(input_ids)[0], dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, _, probabilities) = (
        create_original_varmisuse_model(
            bert_config=bert_config,
            is_training=is_training,
            enable_sequence_masking=enable_sequence_masking,
            input_ids=input_ids,
            input_mask=input_mask,
            segment_ids=segment_ids,
            candidate_mask=candidate_mask,
            target_mask=target_mask,
            error_location_mask=error_location_mask,
            use_one_hot_embeddings=use_one_hot_embeddings))

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names) = (
          modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint))
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
      return output_spec

    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(
          per_example_loss,
          probabilities,
          error_location_mask,
          target_mask,
          is_real_example):
        """Metric function."""

        buggy_mask = tf.equal(error_location_mask[:, 0], 0)
        non_buggy_mask = tf.logical_not(buggy_mask)

        location_probabilities, repair_probabilities = tf.unstack(
            probabilities, axis=2)
        predicted_error_locations = tf.argmax(
            location_probabilities, axis=1, output_type=tf.int32)
        predicted_repair_locations = tf.argmax(
            repair_probabilities, axis=1, output_type=tf.int32)

        non_buggy_predictions = tf.equal(predicted_error_locations, 0)

        predicted_error_locations_one_hot = tf.one_hot(
            predicted_error_locations, sequence_length, dtype=tf.int32)
        predicted_repair_locations_one_hot = tf.one_hot(
            predicted_repair_locations, sequence_length, dtype=tf.int32)

        classification_accuracy = tf.metrics.accuracy(
            labels=non_buggy_mask,
            predictions=non_buggy_predictions,
            weights=is_real_example)

        true_positive_rate = tf.metrics.accuracy(
            labels=non_buggy_mask,
            predictions=non_buggy_predictions,
            weights=is_real_example * tf.cast(non_buggy_mask, tf.float32))

        correct_location_predictions = tf.reduce_sum(
            tf.multiply(
                predicted_error_locations_one_hot, error_location_mask), axis=1)
        # We can have more than one valid repair locations, so `target_mask`
        # can have multiple ones in it. The following calculation yields 1
        # if the predicted repair location is one of the valid repair locations.
        correct_repair_predictions = tf.reduce_sum(
            tf.multiply(
                predicted_repair_locations_one_hot, target_mask), axis=1)
        correct_localization_repair_predictions = (
            correct_location_predictions * correct_repair_predictions)

        localization_accuracy = tf.metrics.accuracy(
            labels=tf.cast(buggy_mask, tf.int32),
            predictions=correct_location_predictions,
            weights=is_real_example * tf.cast(buggy_mask, tf.float32))

        repair_accuracy = tf.metrics.accuracy(
            labels=tf.cast(buggy_mask, tf.int32),
            predictions=correct_repair_predictions,
            weights=is_real_example * tf.cast(buggy_mask, tf.float32))

        localization_repair_accuracy = tf.metrics.accuracy(
            labels=tf.cast(buggy_mask, tf.int32),
            predictions=correct_localization_repair_predictions,
            weights=is_real_example * tf.cast(buggy_mask, tf.float32))

        loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)

        return {
            "eval_accuracy_classification": classification_accuracy,
            "eval_true_positive_rate": true_positive_rate,
            "eval_accuracy_localization": localization_accuracy,
            "eval_accuracy_repair": repair_accuracy,
            "eval_accuracy_localization_repair": localization_repair_accuracy,
            "eval_loss": loss,
        }

      eval_metrics = (metric_fn,
                      [per_example_loss, probabilities, error_location_mask,
                       target_mask, is_real_example])
      output_spec = contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
      return output_spec

    else:
      output_spec = contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
      return output_spec