def create_model(albert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings, task_name, entity_a, entity_b):
  """Creates a classification model."""
  model = modeling.AlbertModel(
      config=albert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire
  # segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  output_layer = model.get_pooled_output()

  tf.logging.info('entities type: %s,%s'%(entity_a.dtype,entity_b.dtype))
  entity_cos = tf.multiply(entity_a,entity_b)
  output_layer = tf.concat([output_layer, entity_cos], 1)

  hidden_size = output_layer.shape[-1].value

  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    epsilon = tf.constant(1e-8)
    logits = logits + epsilon 

    if task_name != "regression":
      probabilities = tf.nn.softmax(logits, axis=-1)
      predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32)
      log_probs = tf.nn.log_softmax(logits, axis=-1)
      one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

      per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    else:
      probabilities = logits
      logits = tf.squeeze(logits, [-1])
      predictions = logits
      per_example_loss = tf.square(logits - labels)
    loss = tf.reduce_mean(per_example_loss)

    return (loss, per_example_loss, probabilities, logits, predictions)
Exemple #2
0
def create_model(albert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings,
                 max_seq_length, dropout_prob):
    """Creates a classification model."""
    bsz_per_core = tf.shape(input_ids)[0]

    model = modeling.AlbertModel(
        config=albert_config,
        is_training=is_training,
        input_ids=tf.reshape(input_ids,
                             [bsz_per_core * num_labels, max_seq_length]),
        input_mask=tf.reshape(input_mask,
                              [bsz_per_core * num_labels, max_seq_length]),
        token_type_ids=tf.reshape(segment_ids,
                                  [bsz_per_core * num_labels, max_seq_length]),
        use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [1, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [1],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer,
                                         keep_prob=1 - dropout_prob)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [bsz_per_core, num_labels])
        probabilities = tf.nn.softmax(logits, axis=-1)
        predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels,
                                    depth=tf.cast(num_labels, dtype=tf.int32),
                                    dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, probabilities, logits, predictions)
Exemple #3
0
def _create_model_from_scratch(albert_config, is_training, input_ids,
                               input_mask, segment_ids,
                               use_one_hot_embeddings):
    """Creates an ALBERT model from scratch/config."""
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=use_one_hot_embeddings)
    return (model.get_pooled_output(), model.get_sequence_output())
Exemple #4
0
def _create_model_from_scratch(albert_config, is_training, input_ids,
															 input_mask, segment_ids, use_one_hot_embeddings):
	"""Creates an ALBERT model from scratch (as opposed to hub)."""
	model = modeling.AlbertModel(
			config=albert_config,
			is_training=is_training,
			input_ids=input_ids,
			input_mask=input_mask,
			token_type_ids=segment_ids,
			use_one_hot_embeddings=use_one_hot_embeddings)
	output_layer = model.get_pooled_output()
	return output_layer
Exemple #5
0
def create_model(albert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.AlbertModel(
      config=albert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire
  # segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  if FLAGS.use_pooled_output:
    tf.logging.info("using pooled output")
    output_layer = model.get_pooled_output()
  else:
    tf.logging.info("using meaned output")
    output_layer = tf.reduce_mean(model.get_sequence_output(), axis=1)

  hidden_size = output_layer.shape[-1].value

  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
    probabilities = tf.nn.softmax(logits, axis=-1)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)

    return (loss, per_example_loss, probabilities, predictions)
Exemple #6
0
def module_fn(is_training):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config = modeling.AlbertConfig.from_json_file(
        os.path.join(FLAGS.albert_directory, "albert_config.json"))
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=False)

    mlm_logits = get_mlm_logits(model, albert_config, mlm_positions)

    assert tf.gfile.Exists(FLAGS.vocab_path)
    vocab_file = tf.constant(value=FLAGS.vocab_path,
                             dtype=tf.string,
                             name="vocab_file")

    # By adding `vocab_file` to the ASSET_FILEPATHS collection, TF-Hub will
    # rewrite this tensor so that this asset is portable.
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file)

    hub.add_signature(name="tokens",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output()))

    hub.add_signature(name="mlm",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids,
                                  mlm_positions=mlm_positions),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output(),
                                   mlm_logits=mlm_logits))

    hub.add_signature(name="tokenization_info",
                      inputs={},
                      outputs=dict(vocab_file=vocab_file,
                                   do_lower_case=tf.constant(
                                       FLAGS.do_lower_case)))
def build_model(sess):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config_path = os.path.join(FLAGS.albert_directory,
                                      "albert_config.json")
    albert_config = modeling.AlbertConfig.from_json_file(albert_config_path)
    model = modeling.AlbertModel(
        config=albert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=False,
    )

    get_mlm_logits(
        model.get_sequence_output(),
        albert_config,
        mlm_positions,
        model.get_embedding_table(),
    )
    get_sentence_order_logits(model.get_pooled_output(), albert_config)

    checkpoint_path = os.path.join(FLAGS.albert_directory,
                                   FLAGS.checkpoint_name)
    tvars = tf.trainable_variables()
    (
        assignment_map,
        initialized_variable_names,
    ) = modeling.get_assignment_map_from_checkpoint(tvars, checkpoint_path)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                        init_string)
    tf.train.init_from_checkpoint(checkpoint_path, assignment_map)
    init = tf.global_variables_initializer()
    sess.run(init)
    return sess
    def create_model(self):
      input_ids = AlbertModelTest.ids_tensor([self.batch_size, self.seq_length],
                                             self.vocab_size)

      input_mask = None
      if self.use_input_mask:
        input_mask = AlbertModelTest.ids_tensor(
            [self.batch_size, self.seq_length], vocab_size=2)

      token_type_ids = None
      if self.use_token_type_ids:
        token_type_ids = AlbertModelTest.ids_tensor(
            [self.batch_size, self.seq_length], self.type_vocab_size)

      config = modeling.AlbertConfig(
          vocab_size=self.vocab_size,
          embedding_size=self.embedding_size,
          hidden_size=self.hidden_size,
          num_hidden_layers=self.num_hidden_layers,
          num_attention_heads=self.num_attention_heads,
          intermediate_size=self.intermediate_size,
          hidden_act=self.hidden_act,
          hidden_dropout_prob=self.hidden_dropout_prob,
          attention_probs_dropout_prob=self.attention_probs_dropout_prob,
          max_position_embeddings=self.max_position_embeddings,
          type_vocab_size=self.type_vocab_size,
          initializer_range=self.initializer_range)

      model = modeling.AlbertModel(
          config=config,
          is_training=self.is_training,
          input_ids=input_ids,
          input_mask=input_mask,
          token_type_ids=token_type_ids,
          scope=self.scope)

      outputs = {
          "embedding_output": model.get_embedding_output(),
          "sequence_output": model.get_sequence_output(),
          "pooled_output": model.get_pooled_output(),
          "all_encoder_layers": model.get_all_encoder_layers(),
      }
      return outputs
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1):
    """create a ner model."""

    model = modeling.AlbertModel(config=bert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=use_one_hot_embeddings)
    # embedding.shape = [batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # add CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=True)
    return rst
Exemple #10
0
def build_model(topk, albert_config_path, checkpoint_path):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config = modeling.AlbertConfig.from_json_file(albert_config_path)
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=False,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=False)

    mlm_logits = get_mlm_logits(model.get_sequence_output(), albert_config,
                                mlm_positions, model.get_embedding_table())
    nsp_logits = get_sentence_order_logits(model.get_pooled_output(),
                                           albert_config)

    mlm_scores = tf.nn.softmax(mlm_logits)
    nsp_scores = tf.nn.softmax(nsp_logits)

    mlm_topk_scores, mlm_topk_indices = tf.math.top_k(mlm_scores, k=topk)
    nsp_predictions = nsp_scores[:, 0]

    tvars = tf.trainable_variables()
    (assignment_map,
     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
         tvars, checkpoint_path)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                        init_string)
    tf.train.init_from_checkpoint(checkpoint_path, assignment_map)
    init = tf.global_variables_initializer()
    return init, (mlm_topk_scores, mlm_topk_indices), nsp_predictions
Exemple #11
0
def build_model():
    """Module function."""

    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    # input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    # segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")
    mlm_ids = tf.placeholder(tf.int32, [None, None], "mlm_ids")
    mlm_weights = tf.placeholder(tf.float32, [None, None], "mlm_weights")
    albert_config_path = os.path.join(FLAGS.config_dir, "albert_config.json")
    albert_config = modeling.AlbertConfig.from_json_file(albert_config_path)
    model = modeling.AlbertModel(
        config=albert_config,
        is_training=False,
        input_ids=input_ids,
        # input_mask=input_mask,
        # token_type_ids=segment_ids,
        use_one_hot_embeddings=False)

    loss = get_mlm_output(model.get_sequence_output(),
                          albert_config, mlm_positions,
                          model.get_embedding_table(), mlm_ids, mlm_weights)
    return loss
Exemple #12
0
def module_fn(is_training):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config_path = os.path.join(FLAGS.albert_directory,
                                      "albert_config.json")
    albert_config = modeling.AlbertConfig.from_json_file(albert_config_path)
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=False,
                                 use_einsum=FLAGS.use_einsum)

    mlm_logits = get_mlm_logits(model, albert_config, mlm_positions)
    sop_log_probs = get_sop_log_probs(model, albert_config)

    vocab_model_path = os.path.join(FLAGS.albert_directory, "30k-clean.model")
    vocab_file_path = os.path.join(FLAGS.albert_directory, "30k-clean.vocab")

    config_file = tf.constant(value=albert_config_path,
                              dtype=tf.string,
                              name="config_file")
    vocab_model = tf.constant(value=vocab_model_path,
                              dtype=tf.string,
                              name="vocab_model")
    # This is only for visualization purpose.
    vocab_file = tf.constant(value=vocab_file_path,
                             dtype=tf.string,
                             name="vocab_file")

    # By adding `config_file, vocab_model and vocab_file`
    # to the ASSET_FILEPATHS collection, TF-Hub will
    # rewrite this tensor so that this asset is portable.
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, config_file)
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_model)
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file)

    hub.add_signature(name="tokens",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output()))

    hub.add_signature(name="sop",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output(),
                                   sop_log_probs=sop_log_probs))

    hub.add_signature(name="mlm",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids,
                                  mlm_positions=mlm_positions),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output(),
                                   mlm_logits=mlm_logits))

    hub.add_signature(name="tokenization_info",
                      inputs={},
                      outputs=dict(vocab_file=vocab_model,
                                   do_lower_case=tf.constant(
                                       FLAGS.do_lower_case)))
Exemple #13
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        # Note: We keep this feature name `next_sentence_labels` to be compatible
        # with the original data created by lanzhzh@. However, in the ALBERT case
        # it does represent sentence_order_labels.
        sentence_order_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.AlbertModel(
            config=albert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             albert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (sentence_order_loss, sentence_order_example_loss,
         sentence_order_log_probs) = get_sentence_order_output(
             albert_config, model.get_pooled_output(), sentence_order_labels)

        total_loss = masked_lm_loss + sentence_order_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            tf.logging.info("number of hidden group %d to initialize",
                            albert_config.num_hidden_groups)
            num_of_initialize_group = 1
            if FLAGS.init_from_group0:
                num_of_initialize_group = albert_config.num_hidden_groups
                if albert_config.net_structure_type > 0:
                    num_of_initialize_group = albert_config.num_hidden_layers
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint, num_of_initialize_group)
            if use_tpu:

                def tpu_scaffold():
                    for gid in range(num_of_initialize_group):
                        tf.logging.info("initialize the %dth layer", gid)
                        tf.logging.info(assignment_map[gid])
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map[gid])
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                for gid in range(num_of_initialize_group):
                    tf.logging.info("initialize the %dth layer", gid)
                    tf.logging.info(assignment_map[gid])
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map[gid])

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer, poly_power,
                                                     start_warmup_step)
            logging_hook = tf.train.LoggingTensorHook({"loss": total_loss},
                                                      every_n_iter=10)

            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(*args):
                """Computes the loss and accuracy of the model."""
                (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                 masked_lm_weights, sentence_order_example_loss,
                 sentence_order_log_probs, sentence_order_labels) = args[:7]

                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                metrics = {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                }

                sentence_order_log_probs = tf.reshape(
                    sentence_order_log_probs,
                    [-1, sentence_order_log_probs.shape[-1]])
                sentence_order_predictions = tf.argmax(
                    sentence_order_log_probs, axis=-1, output_type=tf.int32)
                sentence_order_labels = tf.reshape(sentence_order_labels, [-1])
                sentence_order_accuracy = tf.metrics.accuracy(
                    labels=sentence_order_labels,
                    predictions=sentence_order_predictions)
                sentence_order_mean_loss = tf.metrics.mean(
                    values=sentence_order_example_loss)
                metrics.update({
                    "sentence_order_accuracy": sentence_order_accuracy,
                    "sentence_order_loss": sentence_order_mean_loss
                })
                return metrics

            metric_values = [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, sentence_order_example_loss,
                sentence_order_log_probs, sentence_order_labels
            ]

            eval_metrics = (metric_fn, metric_values)

            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Exemple #14
0
    def __init__(self,is_training):
        # Training or not
        self.is_training = is_training    
        
        # Placeholder       
        self.input_ids = tf.placeholder(tf.int32, shape=[None, hp.sequence_length], name='input_ids')
        self.input_masks = tf.placeholder(tf.int32, shape=[None,  hp.sequence_length], name='input_masks')
        self.segment_ids = tf.placeholder(tf.int32, shape=[None,  hp.sequence_length], name='segment_ids')
        self.label_ids = tf.placeholder(tf.float32, shape=[None,hp.num_labels], name='label_ids')
               
        # Load BERT model
        self.model = modeling.AlbertModel(
                                    config=bert_config,
                                    is_training=self.is_training,
                                    input_ids=self.input_ids,
                                    input_mask=self.input_masks,
                                    token_type_ids=self.segment_ids,
                                    use_one_hot_embeddings=False)

        # Get the feature vector by BERT
        output_layer = self.model.get_pooled_output()
        print("***********************")
        print(output_layer.shape)
        print("****************************")
        print(self.model.get_sequence_output().shape)
        # Hidden size 
        hidden_size = output_layer.shape[-1].value                            

        with tf.name_scope("Full-connection"):  
            output_weights = tf.get_variable(
                  "output_weights", [num_labels, hidden_size],
                  initializer=tf.truncated_normal_initializer(stddev=0.02))            
            output_bias = tf.get_variable(
                  "output_bias", [num_labels], initializer=tf.zeros_initializer())   
            logits = tf.nn.bias_add(tf.matmul(output_layer, output_weights, transpose_b=True), output_bias)            
            # Prediction sigmoid(Multi-label)
            self.probabilities = tf.nn.sigmoid(logits)
 

        with tf.variable_scope("Prediction"):             
            # Prediction               
            zero = tf.zeros_like(self.probabilities)
            one = tf.ones_like(self.probabilities)
            self.predictions = tf.where(self.probabilities < 0.5, x=zero, y=one)
            
        with tf.variable_scope("loss"):            
            # Summary for tensorboard
            if self.is_training:
	            self.accuracy = tf.reduce_mean(tf.to_float(tf.equal(self.predictions, self.label_ids)))
	            tf.summary.scalar('accuracy', self.accuracy) 
                                               
            # Initial embedding by BERT
            ckpt = tf.train.get_checkpoint_state(hp.saved_model_path)
            checkpoint_suffix = ".index"
            if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path + checkpoint_suffix):
                print('='*10,'Restoring model from checkpoint!','='*10)
                print("%s - Restoring model from checkpoint ~%s" % (time_now_string(),
                                                                    ckpt.model_checkpoint_path))
            else:                   
                print('='*10,'First time load BERT model!','='*10)
                tvars = tf.trainable_variables()
                if hp.init_checkpoint:
                   (assignment_map, initialized_variable_names) = \
                     modeling.get_assignment_map_from_checkpoint(tvars,
                                                                 hp.init_checkpoint)
                   tf.train.init_from_checkpoint(hp.init_checkpoint, assignment_map)
                                
            # Loss and Optimizer
            if self.is_training:
                # Global_step
                self.global_step = tf.Variable(0, name='global_step', trainable=False)             
                per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label_ids,logits=logits)
                self.loss = tf.reduce_mean(per_example_loss)

                # Optimizer BERT
                train_examples = processor.get_train_examples(hp.data_dir)
                num_train_steps = int(
                    len(train_examples) / hp.batch_size * hp.num_train_epochs)
                #num_train_steps = 10000
                num_warmup_steps = int(num_train_steps * hp.warmup_proportion)
                print('num_train_steps',num_train_steps)
                self.optimizer = optimization.create_optimizer(self.loss,
                                                                hp.learning_rate, 
                                                                num_train_steps, 
                                                                num_warmup_steps,
                                                                hp.use_tpu,
                                                                Global_step=self.global_step)    

                # Summary for tensorboard                 
                tf.summary.scalar('loss', self.loss)
                self.merged = tf.summary.merge_all()
Exemple #15
0
    def __init__(self, is_training):
        self.is_training = is_training
        self.input_ids = tf.compat.v1.placeholder(
            tf.int32, shape=[None, hp.sequence_length], name='input_ids')
        self.input_masks = tf.compat.v1.placeholder(
            tf.int32, shape=[None, hp.sequence_length], name='input_masks')
        self.segment_ids = tf.compat.v1.placeholder(
            tf.int32, shape=[None, hp.sequence_length], name='segment_ids')
        self.label_ids = tf.compat.v1.placeholder(tf.int32,
                                                  shape=[None],
                                                  name='label_ids')
        # Load BERT Pre-training LM
        self.model = modeling.AlbertModel(config=bert_config,
                                          is_training=self.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_masks,
                                          token_type_ids=self.segment_ids,
                                          use_one_hot_embeddings=False)

        # Get the feature vector with size 3D:(batch_size,sequence_length,hidden_size)
        output_layer_init = self.model.get_sequence_output()
        # Cell textcnn
        output_layer = cell_textcnn(output_layer_init, self.is_training)
        # Hidden size
        #hidden_size = output_layer.shape[-1].value
        hidden_size = output_layer.shape[-1]
        # Dense
        with tf.name_scope("Full-connection"):
            output_weights = tf.compat.v1.get_variable(
                "output_weights", [num_labels, hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [num_labels],
                initializer=tf.zeros_initializer())
            # Logit
            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            self.logits = tf.nn.bias_add(logits, output_bias)
            print('logits: ', self.logits)
            self.probabilities = tf.nn.softmax(self.logits, axis=-1)
        # Prediction
        with tf.compat.v1.variable_scope("Prediction"):
            self.preds = tf.argmax(self.logits, axis=-1, output_type=tf.int32)
            print('preds:', self.preds)
        # Summary for tensorboard
        with tf.compat.v1.variable_scope("Loss"):
            if self.is_training:
                self.accuracy = tf.reduce_mean(
                    tf.compat.v1.to_float(tf.equal(self.preds,
                                                   self.label_ids)))
                tf.summary.scalar('Accuracy', self.accuracy)

                # Check whether has loaded model
            ckpt = tf.train.get_checkpoint_state(hp.saved_model_path)
            checkpoint_suffix = ".index"
            if ckpt and tf.compat.v1.gfile.Exists(ckpt.model_checkpoint_path +
                                                  checkpoint_suffix):
                print('=' * 10, 'Restoring model from checkpoint!', '=' * 10)
                print("%s - Restoring model from checkpoint ~%s" %
                      (time_now_string(), ckpt.model_checkpoint_path))
            else:
                # Load BERT Pre-training LM
                print('=' * 10, 'First time load BERT model!', '=' * 10)
                tvars = tf.compat.v1.trainable_variables()
                if hp.init_checkpoint:
                    (assignment_map, initialized_variable_names) = \
                        modeling.get_assignment_map_from_checkpoint(tvars,
                                                                    hp.init_checkpoint)
                    tf.compat.v1.train.init_from_checkpoint(
                        hp.init_checkpoint, assignment_map)

            # Optimization
            if self.is_training:
                # Global_step
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                # Loss
                log_probs = tf.nn.log_softmax(self.logits, axis=-1)  #预测的结果
                one_hot_labels = tf.one_hot(
                    self.label_ids, depth=num_labels,
                    dtype=tf.float32)  #标签的onehot(用于后续做loss和acc)
                per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                                  axis=-1)
                self.loss = tf.reduce_mean(per_example_loss)
                # Optimizer
                train_examples = processor.get_train_examples(hp.data_dir)
                num_train_steps = int(
                    len(train_examples) / hp.batch_size * hp.num_train_epochs)
                num_warmup_steps = int(num_train_steps * hp.warmup_proportion)
                print('num_train_steps', num_train_steps)
                self.optimizer = optimization.create_optimizer(
                    self.loss,
                    hp.learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    hp.use_tpu,
                )
                # Summary for tensorboard
                tf.summary.scalar('loss', self.loss)
                testvalue = tf.compat.v1.summary.merge_all()
                self.merged = tf.compat.v1.summary.merge_all()

        # Compte the parameters
        count_model_params()
        vs = tf.compat.v1.trainable_variables()
        for l in vs:
            print(l)
        print('=' * 40)
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    masked_lm_positions = features["masked_lm_positions"]
    masked_lm_ids = features["masked_lm_ids"]
    hydrophobicities = features["hydrophobicities"]
    solubilities = features["solubilities"]
    charges = features["charges"]
    pks = features["pks"]
    masked_lm_weights = features["masked_lm_weights"]
    hydrophobicity_weights = features["hydrophobicity_weights"]
    solubility_weights = features["solubility_weights"]
    charge_weights = features["charge_weights"]
    pk_weights = features["pk_weights"]


    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    k = albert_config.k #!NOTE: this is the length of the kmer

    model = modeling.AlbertModel(
        config=albert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    (masked_lm_loss, masked_lm_example_loss,
     masked_lm_log_probs) = get_masked_lm_output(albert_config,
                                                 model.get_sequence_output(),
                                                 model.get_embedding_table(),
                                                 masked_lm_positions,
                                                 masked_lm_ids,
                                                 masked_lm_weights)

    if do_hydro:
      (hydrophobicity_loss, hydrophobicity_example_loss, hydrophobicity_log_probs) = get_hydrophobicity_output(
          albert_config, model.get_sequence_output(),
          masked_lm_positions, hydrophobicities, hydrophobicity_weights)
    else:
      (hydrophobicity_loss, hydrophobicity_example_loss, hydrophobicity_log_probs) = (0, 0, None)

    if do_charge:
      (charge_loss, charge_example_loss, charge_log_probs) = get_charge_output(
          albert_config, model.get_sequence_output(), 
          masked_lm_positions, charges, charge_weights)
    else:
      (charge_loss, charge_example_loss, charge_log_probs) = (0, 0, None)

    if do_pks:
      (pk_loss, pk_example_loss, pk_log_probs) = get_pk_output(
          albert_config, model.get_sequence_output(), 
          masked_lm_positions, pks, pk_weights)
    else:
      (pk_loss, pk_example_loss, pk_log_probs) = (0, 0, None)

    if do_solubility:
      (solubility_loss, solubility_example_loss, solubility_log_probs) = get_solubility_output(
          albert_config, model.get_sequence_output(),
          masked_lm_positions, solubilities, solubility_weights)
    else:
      (solubility_loss, solubility_example_loss, solubility_log_probs) = (0, 0, None)

    total_loss = masked_lm_loss + hydrophobicity_loss + charge_loss + pk_loss + solubility_loss

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      tf.logging.info("number of hidden group %d to initialize",
                      albert_config.num_hidden_groups)
      num_of_initialize_group = 1
      if FLAGS.init_from_group0:
        num_of_initialize_group = albert_config.num_hidden_groups
        if albert_config.net_structure_type > 0:
          num_of_initialize_group = albert_config.num_hidden_layers
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(
              tvars, init_checkpoint, num_of_initialize_group)
      if use_tpu:

        def tpu_scaffold():
          for gid in range(num_of_initialize_group):
            tf.logging.info("initialize the %dth layer", gid)
            tf.logging.info(assignment_map[gid])
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid])
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        for gid in range(num_of_initialize_group):
          tf.logging.info("initialize the %dth layer", gid)
          tf.logging.info(assignment_map[gid])
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid])

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps,
          use_tpu, optimizer, poly_power, start_warmup_step)

      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(*args):
        """Computes the loss and accuracy of the model."""
        (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, 
            hydrophobicity_example_loss, hydrophobicity_log_probs, hydrophobicities, hydrophobicity_weights,
            charge_example_loss, charge_log_probs, charges, charge_weights,
            pk_example_loss, pk_log_probs, pks, pk_weights,
            solubility_example_loss, solubility_log_probs, solubilities, solubility_weights) = args[:20]

        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                         [-1, masked_lm_log_probs.shape[-1]])
        masked_lm_predictions = tf.argmax(
            masked_lm_log_probs, axis=-1, output_type=tf.int32)
        masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        masked_lm_accuracy = tf.metrics.accuracy(
            labels=masked_lm_ids,
            predictions=masked_lm_predictions,
            weights=masked_lm_weights)
        masked_lm_mean_loss = tf.metrics.mean(
            values=masked_lm_example_loss, weights=masked_lm_weights)

        if do_hydro:
          hydrophobicity_log_probs = tf.reshape(hydrophobicity_log_probs,
                                          [-1, hydrophobicity_log_probs.shape[-1]])
          hydrophobicity_predictions = tf.argmax(
              hydrophobicity_log_probs, axis=-1, output_type=tf.int32)
          hydrophobicity_example_loss = tf.reshape(hydrophobicity_example_loss, [-1])
          hydrophobicities = tf.reshape(hydrophobicities, [-1])
          hydrophobicity_weights = tf.reshape(hydrophobicity_weights, [-1])
          hydrophobicity_accuracy = tf.metrics.accuracy(
              labels=hydrophobicities,
              predictions=hydrophobicity_predictions,
              weights=hydrophobicity_weights)
          hydrophobicity_mean_loss = tf.metrics.mean(
              values=hydrophobicity_example_loss, weights=hydrophobicity_weights)
        else:
          hydrophobicity_accuracy = 0
          hydrophobicity_mean_loss = 0

        if do_charge:
          charge_log_probs = tf.reshape(charge_log_probs,
                                          [-1, charge_log_probs.shape[-1]])
          charge_predictions = tf.argmax(
              charge_log_probs, axis=-1, output_type=tf.int32)
          charge_example_loss = tf.reshape(charge_example_loss, [-1])
          charges = tf.reshape(charges, [-1])
          charge_weights = tf.reshape(charge_weights, [-1])
          charge_accuracy = tf.metrics.accuracy(
              labels=charges,
              predictions=charge_predictions,
              weights=charge_weights)
          charge_mean_loss = tf.metrics.mean(
              values=charge_example_loss, weights=charge_weights)
        else:
          charge_accuracy = 0
          charge_mean_loss = 0

        if do_pks:
          pk_log_probs = tf.reshape(pk_log_probs,
                                          [-1, pk_log_probs.shape[-1]])
          pk_predictions = tf.argmax(
              pk_log_probs, axis=-1, output_type=tf.int32)
          pk_example_loss = tf.reshape(pk_example_loss, [-1])
          pks = tf.reshape(pks, [-1])
          pk_weights = tf.reshape(pk_weights, [-1])
          pk_accuracy = tf.metrics.accuracy(
              labels=pks,
              predictions=pk_predictions,
              weights=pk_weights)
          pk_mean_loss = tf.metrics.mean(
              values=pk_example_loss, weights=pk_weights)
        else:
          pk_accuracy = 0
          pk_mean_loss = 0

        if do_solubility:
          solubility_log_probs = tf.reshape(solubility_log_probs,
                                          [-1, solubility_log_probs.shape[-1]])
          solubility_predictions = tf.argmax(
              solubility_log_probs, axis=-1, output_type=tf.int32)
          solubility_example_loss = tf.reshape(solubility_example_loss, [-1])
          solubilities = tf.reshape(solubilities, [-1])
          solubility_weights = tf.reshape(solubility_weights, [-1])
          solubility_accuracy = tf.metrics.accuracy(
              labels=solubilities,
              predictions=solubility_predictions,
              weights=solubility_weights)
          solubility_mean_loss = tf.metrics.mean(
              values=solubility_example_loss, weights=solubility_weights)
        else:
          solubility_accuracy = 0
          solubility_mean_loss = 0

        metrics = {
            "masked_lm_accuracy": masked_lm_accuracy,
            "masked_lm_loss": masked_lm_mean_loss,
            "hydrophobicity_accuracy": hydrophobicity_accuracy,
            "hydrophobicity_loss": hydrophobicity_mean_loss,
            "charge_accuracy": charge_accuracy,
            "charge_loss": charge_mean_loss,
            "pk_accuracy": pk_accuracy,
            "pk_loss": pk_mean_loss,
            "solubility_accuracy": solubility_accuracy,
            "solubility_loss": solubility_mean_loss
        }

        return metrics

      metric_values = [
          masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, 
          hydrophobicity_example_loss, hydrophobicity_log_probs, hydrophobicities, hydrophobicity_weights,
          charge_example_loss, charge_log_probs, charges, charge_weights,
          pk_example_loss, pk_log_probs, pks, pk_weights,
          solubility_example_loss, solubility_log_probs, solubilities, solubility_weights
      ]

      eval_metrics = (metric_fn, metric_values)

      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))

    return output_spec