def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, probabilities, logits, predictions) = \
        create_model(albert_config, is_training, input_ids, input_mask,
                     segment_ids, label_ids, num_labels,
                     use_one_hot_embeddings, task_name, hub_module)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps,
          use_tpu, optimizer)

      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:
      if task_name not in ["sts-b", "cola"]:
        def metric_fn(per_example_loss, label_ids, logits, is_real_example):
          predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
          accuracy = tf.metrics.accuracy(
              labels=label_ids, predictions=predictions,
              weights=is_real_example)
          loss = tf.metrics.mean(
              values=per_example_loss, weights=is_real_example)
          return {
              "eval_accuracy": accuracy,
              "eval_loss": loss,
          }
      elif task_name == "sts-b":
        def metric_fn(per_example_loss, label_ids, logits, is_real_example):
          """Compute Pearson correlations for STS-B."""
          # Display labels and predictions
          concat1 = contrib_metrics.streaming_concat(logits)
          concat2 = contrib_metrics.streaming_concat(label_ids)

          # Compute Pearson correlation
          pearson = contrib_metrics.streaming_pearson_correlation(
              logits, label_ids, weights=is_real_example)

          # Compute MSE
          # mse = tf.metrics.mean(per_example_loss)
          mse = tf.metrics.mean_squared_error(
              label_ids, logits, weights=is_real_example)

          loss = tf.metrics.mean(
              values=per_example_loss,
              weights=is_real_example)

          return {"pred": concat1, "label_ids": concat2, "pearson": pearson,
                  "MSE": mse, "eval_loss": loss,}
      elif task_name == "cola":
        def metric_fn(per_example_loss, label_ids, logits, is_real_example):
          """Compute Matthew's correlations for STS-B."""
          predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
          # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
          tp, tp_op = tf.metrics.true_positives(
              predictions, label_ids, weights=is_real_example)
          tn, tn_op = tf.metrics.true_negatives(
              predictions, label_ids, weights=is_real_example)
          fp, fp_op = tf.metrics.false_positives(
              predictions, label_ids, weights=is_real_example)
          fn, fn_op = tf.metrics.false_negatives(
              predictions, label_ids, weights=is_real_example)

          # Compute Matthew's correlation
          mcc = tf.div_no_nan(
              tp * tn - fp * fn,
              tf.pow((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn), 0.5))

          # Compute accuracy
          accuracy = tf.metrics.accuracy(
              labels=label_ids, predictions=predictions,
              weights=is_real_example)

          loss = tf.metrics.mean(
              values=per_example_loss,
              weights=is_real_example)

          return {"matthew_corr": (mcc, tf.group(tp_op, tn_op, fp_op, fn_op)),
                  "eval_accuracy": accuracy, "eval_loss": loss,}

      eval_metrics = (metric_fn,
                      [per_example_loss, label_ids, logits, is_real_example])
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={
              "probabilities": probabilities,
              "predictions": predictions
          },
          scaffold_fn=scaffold_fn)
    return output_spec
Esempio n. 2
0
    def __init__(self, is_training):
        # Training or not
        self.is_training = is_training
        # Placeholder
        self.input_ids = tf.placeholder(tf.int32,
                                        shape=[None, hp.sequence_length],
                                        name='input_ids')
        self.input_masks = tf.placeholder(tf.int32,
                                          shape=[None, hp.sequence_length],
                                          name='input_masks')
        self.segment_ids = tf.placeholder(tf.int32,
                                          shape=[None, hp.sequence_length],
                                          name='segment_ids')
        self.label_ids = tf.placeholder(tf.float32,
                                        shape=[None, hp.num_labels],
                                        name='label_ids')
        # Load BERT model
        self.model = modeling.AlbertModel(config=bert_config,
                                          is_training=self.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_masks,
                                          token_type_ids=self.segment_ids,
                                          use_one_hot_embeddings=False)
        # Get the feature vector by BERT
        output_layer_init = self.model.get_sequence_output()
        # Cell TextCNN
        output_layer = cell_textcnn(output_layer_init, self.is_training)
        # Hidden size
        hidden_size = output_layer.shape[-1].value
        # Full-connection
        with tf.name_scope("Full-connection"):
            output_weights = tf.get_variable(
                "output_weights", [num_labels, hidden_size],
                initializer=tf.truncated_normal_initializer(stddev=0.02))
            output_bias = tf.get_variable("output_bias", [num_labels],
                                          initializer=tf.zeros_initializer())
            logits = tf.nn.bias_add(
                tf.matmul(output_layer, output_weights, transpose_b=True),
                output_bias)
            # Prediction sigmoid(Multi-label)
            self.probabilities = tf.nn.sigmoid(logits)
        with tf.variable_scope("Prediction"):
            # Prediction
            zero = tf.zeros_like(logits)
            one = tf.ones_like(logits)
            self.predictions = tf.where(logits < 0.5, x=zero, y=one)
        with tf.variable_scope("loss"):
            # Summary for tensorboard
            if self.is_training:
                self.accuracy = tf.reduce_mean(
                    tf.to_float(tf.equal(self.predictions, self.label_ids)))
                tf.summary.scalar('accuracy', self.accuracy)

            # Initial embedding by BERT
            ckpt = tf.train.get_checkpoint_state(hp.saved_model_path)
            checkpoint_suffix = ".index"
            if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path +
                                        checkpoint_suffix):
                print('=' * 10, 'Restoring model from checkpoint!', '=' * 10)
                print("%s - Restoring model from checkpoint ~%s" %
                      (time_now_string(), ckpt.model_checkpoint_path))
            else:
                print('=' * 10, 'First time load BERT model!', '=' * 10)
                tvars = tf.trainable_variables()
                if hp.init_checkpoint:
                    (assignment_map, initialized_variable_names) = \
                      modeling.get_assignment_map_from_checkpoint(tvars,
                                                                  hp.init_checkpoint)
                    tf.train.init_from_checkpoint(hp.init_checkpoint,
                                                  assignment_map)

            # Loss and Optimizer
            if self.is_training:
                # Global_step
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=self.label_ids, logits=logits)
                self.loss = tf.reduce_mean(per_example_loss)
                # Optimizer BERT
                train_examples = processor.get_train_examples(hp.data_dir)
                num_train_steps = int(
                    len(train_examples) / hp.batch_size * hp.num_train_epochs)
                num_warmup_steps = int(num_train_steps * hp.warmup_proportion)
                print('num_train_steps', num_train_steps)
                self.optimizer = optimization.create_optimizer(
                    self.loss,
                    hp.learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    hp.use_tpu,
                    Global_step=self.global_step)

                # Summary for tensorboard
                tf.summary.scalar('loss', self.loss)
                self.merged = tf.summary.merge_all()