Esempio n. 1
0
    def __init__(self, config: configure_finetuning.FinetuningConfig, tasks,
                 is_training, features, num_train_steps):
        # Create a shared transformer encoder
        bert_config = training_utils.get_bert_config(config)
        self.bert_config = bert_config
        if config.debug:
            bert_config.num_hidden_layers = 3
            bert_config.hidden_size = 144
            bert_config.intermediate_size = 144 * 4
            bert_config.num_attention_heads = 4
        assert config.max_seq_length <= bert_config.max_position_embeddings
        bert_model = modeling.BertModel(bert_config=bert_config,
                                        is_training=is_training,
                                        input_ids=features["input_ids"],
                                        input_mask=features["input_mask"],
                                        token_type_ids=features["segment_ids"],
                                        use_one_hot_embeddings=config.use_tpu,
                                        embedding_size=config.embedding_size)
        percent_done = (
            tf.cast(tf.train.get_or_create_global_step(), tf.float32) /
            tf.cast(num_train_steps, tf.float32))

        # Add specific tasks
        self.outputs = {"task_id": features["task_id"]}
        losses = []
        for task in tasks:
            with tf.variable_scope("task_specific/" + task.name,
                                   reuse=tf.AUTO_REUSE):
                task_losses, task_outputs = task.get_prediction_module(
                    bert_model, features, is_training, percent_done)

            grad, = tf.gradients(task_losses, bert_model.token_embeddings)
            grad = tf.stop_gradient(grad)
            perturb = self._scale_l2(grad, 0.125)

            adv_token_embeddings = bert_model.token_embeddings + perturb

            bert_model_adv = modeling.BertModel(
                bert_config=bert_config,
                is_training=is_training,
                input_ids=features["input_ids"],
                input_mask=features["input_mask"],
                token_type_ids=features["segment_ids"],
                use_one_hot_embeddings=config.use_tpu,
                embedding_size=config.embedding_size,
                input_embeddings=adv_token_embeddings)

            with tf.variable_scope("task_specific/" + task.name,
                                   reuse=tf.AUTO_REUSE):
                task_adv_losses, task_adv_outputs = task.get_prediction_module(
                    bert_model_adv, features, is_training, percent_done)

            total_loss = 0.875 * task_losses + 0.125 * task_adv_losses
            losses.append(total_loss)
            self.outputs[task.name] = task_outputs
        self.loss = tf.reduce_sum(
            tf.stack(losses, -1) *
            tf.one_hot(features["task_id"], len(config.task_names)))
Esempio n. 2
0
    def __init__(self, config: configure_finetuning.FinetuningConfig, tasks,
                 is_training, features, num_train_steps):
        # Create a shared transformer encoder
        bert_config = training_utils.get_bert_config(config)
        self.bert_config = bert_config
        if config.debug:
            bert_config.num_hidden_layers = 3
            bert_config.hidden_size = 144
            bert_config.intermediate_size = 144 * 4
            bert_config.num_attention_heads = 4

        # multi-choice mrc
        if any([isinstance(x, qa_tasks.MQATask) for x in tasks]):
            seq_len = config.max_seq_length
            assert seq_len <= bert_config.max_position_embeddings
            bs, total_len = modeling.get_shape_list(features["input_ids"],
                                                    expected_rank=2)
            to_shape = [
                bs * config.max_options_num * config.evidences_top_k, seq_len
            ]
            bert_model = modeling.BertModel(
                bert_config=bert_config,
                is_training=is_training,
                input_ids=tf.reshape(features["input_ids"], to_shape),
                input_mask=tf.reshape(features["input_mask"], to_shape),
                token_type_ids=tf.reshape(features["segment_ids"], to_shape),
                use_one_hot_embeddings=config.use_tpu,
                embedding_size=config.embedding_size)
        else:
            assert config.max_seq_length <= bert_config.max_position_embeddings
            bert_model = modeling.BertModel(
                bert_config=bert_config,
                is_training=is_training,
                input_ids=features["input_ids"],
                input_mask=features["input_mask"],
                token_type_ids=features["segment_ids"],
                use_one_hot_embeddings=config.use_tpu,
                embedding_size=config.embedding_size)
        percent_done = (
            tf.cast(tf.train.get_or_create_global_step(), tf.float32) /
            tf.cast(num_train_steps, tf.float32))

        # Add specific tasks
        self.outputs = {"task_id": features["task_id"]}
        losses = []
        for task in tasks:
            with tf.variable_scope("task_specific/" + task.name):
                task_losses, task_outputs = task.get_prediction_module(
                    bert_model, features, is_training, percent_done)
                losses.append(task_losses)
                self.outputs[task.name] = task_outputs
        self.loss = tf.reduce_sum(
            tf.stack(losses, -1) *
            tf.one_hot(features["task_id"], len(config.task_names)))
Esempio n. 3
0
  def __init__(self, config: configure_finetuning.FinetuningConfig, tasks,
               is_training, features, num_train_steps):
    # Create a shared transformer encoder
    bert_config = training_utils.get_bert_config(config)
    self.bert_config = bert_config
    assert config.max_seq_length <= bert_config.max_position_embeddings
    bert_model = modeling.BertModel(
        bert_config=bert_config,
        is_training=is_training,
        input_ids=features["input_ids"],
        input_mask=features["input_mask"],
        token_type_ids=features["segment_ids"],
        use_one_hot_embeddings=config.use_tpu,
        embedding_size=config.embedding_size)
    percent_done = (tf.cast(tf.train.get_or_create_global_step(), tf.float32) /
                    tf.cast(num_train_steps, tf.float32))

    # Add specific tasks
    self.outputs = {"task_id": features["task_id"]}
    losses = []
    for task in tasks:
      with tf.variable_scope("task_specific/" + task.name):
        task_losses, task_outputs = task.get_prediction_module(
            bert_model, features, is_training, percent_done)
        losses.append(task_losses)
        self.outputs[task.name] = task_outputs
    # sums all the losses?  filters only the task id?
    self.loss = tf.reduce_sum(
        tf.stack(losses, -1) *
        tf.one_hot(features["task_id"], len(config.task_names)))
Esempio n. 4
0
def create_classification_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                                labels, num_labels, use_one_hot_embeddings, multi_label=False):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value
    sequence_length = output_layer.shape[-2].value

    W_1 = tf.get_variable('dense_W1', [hidden_size],
                          initializer=tf.truncated_normal_initializer(stddev=0.02))
    b_1 = tf.get_variable('dense_b1', [], initializer=tf.zeros_initializer())
    W_2 = tf.get_variable('dense_W2', [sequence_length, num_labels],
                          initializer=tf.truncated_normal_initializer(stddev=0.02))
    b_2 = tf.get_variable('dense_b2', [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):

        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.reduce_sum(tf.multiply(output_layer, W_1), -1)
        logits = tf.add(logits, b_1)
        input_mask = tf.cast(input_mask, tf.float32)
        logits = tf.multiply(logits, input_mask)
        logits = tf.nn.relu(logits)
        logits = tf.nn.xw_plus_b(logits, W_2, b_2)

        if multi_label:
            probabilities = tf.nn.sigmoid(logits)
            labels = tf.cast(labels, tf.float32)
            per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
        else:
            probabilities = tf.nn.softmax(logits, axis=-1)
            log_probs = tf.nn.log_softmax(logits, axis=-1)

            one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
            per_example_loss = -(one_hot_labels * log_probs)

        per_example_loss = tf.reduce_sum(per_example_loss, axis=-1)
        loss = tf.reduce_mean(per_example_loss, name='train_loss')

        return loss, per_example_loss, logits, probabilities
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    input_type_ids = features["input_type_ids"]

    model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=input_type_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    if mode != tf.estimator.ModeKeys.PREDICT:
      raise ValueError("Only PREDICT modes are supported: %s" % (mode))

    tvars = tf.trainable_variables()
    scaffold_fn = None
    (assignment_map,
     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
         tvars, init_checkpoint)
    if use_tpu:

      def tpu_scaffold():
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        return tf.train.Scaffold()

      scaffold_fn = tpu_scaffold
    else:
      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    all_layers = model.get_all_encoder_layers()

    predictions = {
        "unique_id": unique_ids,
    }

    for (i, layer_index) in enumerate(layer_indexes):
      predictions["layer_output_%d" % i] = all_layers[layer_index]

    output_spec = tf.estimator.tpu.TPUEstimatorSpec(
        mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    return output_spec
Esempio n. 6
0
    def bert_module_fn(is_training):
        """Spec function for a token embedding module."""

        input_ids = tf.placeholder(shape=[None, None],
                                   dtype=tf.int32,
                                   name="input_ids")
        input_mask = tf.placeholder(shape=[None, None],
                                    dtype=tf.int32,
                                    name="input_mask")
        token_type = tf.placeholder(shape=[None, None],
                                    dtype=tf.int32,
                                    name="segment_ids")

        bert_config = training_utils.get_bert_config(config)

        model = modeling.BertModel(bert_config=bert_config,
                                   is_training=is_training,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   token_type_ids=token_type,
                                   use_one_hot_embeddings=use_tpu,
                                   embedding_size=config.embedding_size)

        seq_output = model.sequence_output
        pool_output = model.pooled_output

        vocab_file = tf.constant(value=vocab_path,
                                 dtype=tf.string,
                                 name="vocab_file")
        lower_case = tf.constant(do_lower_case)

        tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file)

        input_map = {
            "input_ids": input_ids,
            "input_mask": input_mask,
            "segment_ids": token_type
        }

        output_map = {
            "pooled_output": pool_output,
            "sequence_output": seq_output
        }

        output_info_map = {
            "vocab_file": vocab_file,
            "do_lower_case": lower_case
        }

        hub.add_signature(name="tokens", inputs=input_map, outputs=output_map)
        hub.add_signature(name="tokenization_info",
                          inputs={},
                          outputs=output_info_map)
Esempio n. 7
0
def create_sequence_binary_tagging_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                                         labels, num_labels, use_one_hot_embeddings):
    """Sequence tagging model When num_labels==2,
        the fine-tuning layers can be simpler than
        'create_sequence_tagging_model'  """
    if num_labels != 2:
        raise ValueError('num_labels must be 2. If not ,create_sequence_tagging_model should be used.')

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.reduce_sum(tf.multiply(output_layer, output_weights), -1)
        logits = tf.add(logits, output_bias)

        probabilities = tf.sigmoid(logits)
        input_mask = tf.cast(input_mask, tf.float32)
        probabilities = tf.multiply(probabilities, input_mask)

        labels = tf.cast(labels, dtype=tf.float32)

        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
        per_example_loss = tf.multiply(per_example_loss, input_mask)
        per_example_loss = tf.reduce_sum(per_example_loss, axis=-1)
        loss = tf.reduce_mean(per_example_loss, name='train_loss')

        return loss, per_example_loss, logits, probabilities
Esempio n. 8
0
def create_sequence_tagging_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                                  labels, num_labels, use_one_hot_embeddings):
    """Creates a sequence tagging model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value
    sequence_length = output_layer.shape[-2].value

    output_weights = tf.get_variable(
        "output_weights", [hidden_size, num_labels],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(tf.reshape(output_layer, [-1, hidden_size]),
                           output_weights)  # [batch_size*sequence_length, num_labels]
        logits = tf.reshape(logits, [-1, sequence_length, num_labels])  # [batch_size, sequence_length, num_labels]
        logits = tf.add(logits, output_bias)

        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        input_mask = tf.cast(input_mask, tf.float32)  # [batch_size, sequence_length]
        probabilities = tf.multiply(probabilities,
                                    tf.expand_dims(input_mask, axis=-1))  # [batch_size, sequence_length, num_labels]

        labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)  # [batch_size, sequence_length, num_labels]
        per_example_loss = -tf.multiply(log_probs, labels)  # [batch_size, sequence_length, num_labels]
        per_example_loss = tf.reduce_sum(per_example_loss, axis=-1)  # [batch_size, sequence_length]
        per_example_loss = tf.multiply(per_example_loss, input_mask)
        per_example_loss = tf.reduce_sum(per_example_loss, axis=-1)  # [batch_size]
        loss = tf.reduce_mean(per_example_loss, name='train_loss')

        return loss, per_example_loss, logits, probabilities
Esempio n. 9
0
 def _build_transformer(self, inputs: pretrain_data.Inputs, is_training,
                        bert_config=None, name="electra", reuse=False, **kwargs):
   """Build a transformer encoder network."""
   if bert_config is None:
     bert_config = self._bert_config
   with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
     return modeling.BertModel(
         bert_config=bert_config,
         is_training=is_training,
         input_ids=inputs.input_ids,
         input_mask=inputs.input_mask,
         token_type_ids=inputs.segment_ids,
         use_one_hot_embeddings=self._config.use_tpu,
         scope=name,
         **kwargs)
Esempio n. 10
0
def build_transformer(config: configure_pretraining.PretrainingConfig,
                      inputs: pretrain_data.Inputs,
                      is_training,
                      bert_config,
                      reuse=False,
                      **kwargs):
    """Build a transformer encoder network."""
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
        return modeling.BertModel(bert_config=bert_config,
                                  is_training=is_training,
                                  input_ids=inputs.input_ids,
                                  input_mask=inputs.input_mask,
                                  token_type_ids=inputs.segment_ids,
                                  use_one_hot_embeddings=config.use_tpu,
                                  **kwargs)
    def create_model(self,bert_config, is_training, input_ids, input_mask, segment_ids,
                     labels, num_labels, use_one_hot_embeddings):
        """Creates a classification model."""
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        # In the demo, we are doing a simple classification task on the entire
        # segment.
        #
        # If you want to use the token-level output, use model.get_sequence_output()
        # instead.
        output_layer = model.get_pooled_output()
        print('output_layer: {}',output_layer.shape)

        hidden_size = output_layer.shape[-1].value

        output_weights = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable(
            "output_bias", [num_labels], initializer=tf.zeros_initializer())

        with tf.variable_scope("loss"):
            if is_training:
                # I.e., 0.1 dropout
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            probabilities = tf.nn.softmax(logits, axis=-1)
            log_probs = tf.nn.log_softmax(logits, axis=-1)

            one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
            loss = tf.reduce_mean(per_example_loss)

            return (loss, per_example_loss, logits, probabilities)
Esempio n. 12
0
 def _build_transformer(self,
                        name,
                        inputs: pretrain_data.Inputs,
                        is_training,
                        use_fp16=False,
                        bert_config=None,
                        **kwargs):
     """Build a transformer encoder network."""
     if bert_config is None:
         bert_config = self._bert_config
     return modeling.BertModel(bert_config=bert_config,
                               is_training=is_training,
                               input_ids=inputs.input_ids,
                               input_mask=inputs.input_mask,
                               token_type_ids=inputs.segment_ids,
                               use_one_hot_embeddings=self._config.use_tpu,
                               scope=name,
                               use_fp16=use_fp16,
                               **kwargs)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    return output_layer, hidden_size
Esempio n. 14
0
        def create_model(self):
            input_ids = BertModelTest.ids_tensor(
                [self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = BertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = BertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], self.type_vocab_size)

            config = modeling.BertConfig(
                vocab_size=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range)

            model = modeling.BertModel(config=config,
                                       is_training=self.is_training,
                                       input_ids=input_ids,
                                       input_mask=input_mask,
                                       token_type_ids=token_type_ids,
                                       scope=self.scope)

            outputs = {
                "embedding_output": model.get_embedding_output(),
                "sequence_output": model.get_sequence_output(),
                "pooled_output": model.get_pooled_output(),
                "all_encoder_layers": model.get_all_encoder_layers(),
            }
            return outputs
def main():
    tf.set_random_seed(1234)
    np.random.seed(0)
    batch_size = 1
    tf_datatype = tf.int32
    np_datatype = np.int32
    iterations = 10

    features_ph = {}
    features_ph["input_ids"] = tf.placeholder(dtype=tf_datatype,
                                                shape=[batch_size, 128],
                                                name="input_ids")
    features_ph["input_mask"] = tf.placeholder(dtype=tf_datatype,
                                                shape=[batch_size, 128],
                                                name="input_mask")
    features_ph["token_type_ids"] = tf.placeholder(dtype=tf_datatype,
                                                    shape=[batch_size, 128],
                                                    name="token_type_ids")

    features_data = {}
    features_data["input_ids"] = np.random.rand(batch_size,
                                                128).astype(np_datatype)
    features_data["input_mask"] = np.random.rand(batch_size,
                                                    128).astype(np_datatype)
    features_data["token_type_ids"] = np.random.rand(
        batch_size, 128).astype(np_datatype)

    features_feed_dict = {
        features_ph[key]: features_data[key]
        for key in features_ph
    }

    finetuning_config = configure_finetuning.FinetuningConfig("ConvBert", "./")
    bert_config = training_utils.get_bert_config(finetuning_config)
    bert_model = modeling.BertModel(
        bert_config=bert_config,
        is_training=False,
        input_ids=features_ph["input_ids"],
        input_mask=features_ph["input_mask"],
        token_type_ids=features_ph["token_type_ids"])

    #outputs_names = "discriminator_predictions/Sigmoid:0,discriminator_predictions/truediv:0,discriminator_predictions/Cast_2:0,discriminator_predictions/truediv_1:0"
    graph_outputs = bert_model.get_sequence_output()
    outputs_names = graph_outputs.name
    print("graph output: ", graph_outputs)
    run_op_list = []
    outputs_names_with_port = outputs_names.split(",")
    outputs_names_without_port = [ name.split(":")[0] for name in outputs_names_with_port ]
    for index in range(len(outputs_names_without_port)):
        run_op_list.append(outputs_names_without_port[index])
    inputs_names_with_port = [features_ph[key].name for key in features_ph]

    # define saver
    #saver = tf.train.Saver(var_list=tf.trainable_variables())
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        for i in range(iterations):
            sess.run(run_op_list, feed_dict=features_feed_dict)
        tf_time_sum = 0
        a = datetime.now()
        for i in range(iterations):
            tf_result = sess.run(run_op_list, feed_dict=features_feed_dict)
        b = datetime.now()
        tf_time_sum = (b - a).total_seconds()
        tf_time = "[INFO] TF  execution time: " + str(
            tf_time_sum * 1000 / iterations) + " ms"
        # tf_result = tf_result.flatten()

        frozen_graph = tf.graph_util.convert_variables_to_constants(
            sess, sess.graph_def, outputs_names_without_port)
        # frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)
        # save frozen model
        with open("ConvBert.pb", "wb") as ofile:
            ofile.write(frozen_graph.SerializeToString())

    # tf.reset_default_graph()
    # tf.import_graph_def(frozen_graph, name='')

    # #with tf.Session(config=config) as sess:
    # sess = tf.Session(config=config)
    # graph_def = tf_optimize(inputs_names_with_port, outputs_names_without_port,
    #                         sess.graph_def, True)

    # with open("ConvBert_optimized_model.pb", "wb") as ofile:
    #     ofile.write(graph_def.SerializeToString())

    onnx_model_file = "ConvBert.onnx"
    command = "python3 -m tf2onnx.convert --input ConvBert.pb --output %s --fold_const --opset 12 --verbose" % onnx_model_file
    command += " --inputs "
    for name in inputs_names_with_port:
        command += "%s," % name
    command = command[:-1] + " --outputs "
    for name in outputs_names_with_port:
        command += "%s," % name
    command = command[:-1]
    os.system(command)
    print(command)
    #exit(0)

    command = "trtexec - -onnx = ConvBert.onnx - -verbose"
    os.system(command)
    print(command)
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    masked_lm_positions = features["masked_lm_positions"]
    masked_lm_ids = features["masked_lm_ids"]
    masked_lm_weights = features["masked_lm_weights"]
    next_sentence_labels = features["next_sentence_labels"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    (masked_lm_loss,
     masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
         bert_config, model.get_sequence_output(), model.get_embedding_table(),
         masked_lm_positions, masked_lm_ids, masked_lm_weights)

    (next_sentence_loss, next_sentence_example_loss,
     next_sentence_log_probs) = get_next_sentence_output(
         bert_config, model.get_pooled_output(), next_sentence_labels)

    total_loss = masked_lm_loss + next_sentence_loss

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                    masked_lm_weights, next_sentence_example_loss,
                    next_sentence_log_probs, next_sentence_labels):
        """Computes the loss and accuracy of the model."""
        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                         [-1, masked_lm_log_probs.shape[-1]])
        masked_lm_predictions = tf.argmax(
            masked_lm_log_probs, axis=-1, output_type=tf.int32)
        masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        masked_lm_accuracy = tf.metrics.accuracy(
            labels=masked_lm_ids,
            predictions=masked_lm_predictions,
            weights=masked_lm_weights)
        masked_lm_mean_loss = tf.metrics.mean(
            values=masked_lm_example_loss, weights=masked_lm_weights)

        next_sentence_log_probs = tf.reshape(
            next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
        next_sentence_predictions = tf.argmax(
            next_sentence_log_probs, axis=-1, output_type=tf.int32)
        next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
        next_sentence_accuracy = tf.metrics.accuracy(
            labels=next_sentence_labels, predictions=next_sentence_predictions)
        next_sentence_mean_loss = tf.metrics.mean(
            values=next_sentence_example_loss)

        return {
            "masked_lm_accuracy": masked_lm_accuracy,
            "masked_lm_loss": masked_lm_mean_loss,
            "next_sentence_accuracy": next_sentence_accuracy,
            "next_sentence_loss": next_sentence_mean_loss,
        }

      eval_metrics = (metric_fn, [
          masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
          masked_lm_weights, next_sentence_example_loss,
          next_sentence_log_probs, next_sentence_labels
      ])
      output_spec = tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))

    return output_spec
Esempio n. 17
0
    def create_model(self):
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, 10, 4],
                                      name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32,
                                       shape=[None, n_sub, 4],
                                       name='input_y2')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32,
                                               name='output_keep_prob')

        self.input_ids = tf.placeholder(dtype=tf.int32,
                                        shape=[None, 190],
                                        name='input_ids')
        self.mask_ids = tf.placeholder(dtype=tf.int32,
                                       shape=[None, 190],
                                       name='mask_ids')
        self.type_ids = tf.placeholder(dtype=tf.int32,
                                       shape=[None, 190],
                                       name='type_ids')
        self.is_training = tf.placeholder(dtype=tf.bool, name='is_training')

        #  bert_hidden_size = bert_output_layer.shape[-1].value
        #  hidden_size = output_layer.shape[-1].value

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len],
                                          name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding,
                                                  name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        elif self.main_feature.lower() in [
                'elmo_word', 'elmo_char', 'elmo_qiuqiu'
        ]:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len + 2],
                                          name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file

            self.bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        else:
            exit('wrong feature')

        self.layer_embedding = tf.get_variable(shape=[10, self.hidden_dim],
                                               name='layer_embedding')

        self.forward = self.LSTM()
        self.backwad = self.LSTM()
        # self.forward2 = self.LSTM()
        # self.backwad2 = self.LSTM()

        # add point
        self.forward2 = self.GRU()
        self.backwad2 = self.GRU()

        # bert使用
        bert_config = modeling.BertConfig.from_json_file(
            self.config.BERT_CONFIG_FILES)

        bert_model = modeling.BertModel(config=bert_config,
                                        is_training=self.is_training,
                                        input_ids=self.input_ids,
                                        input_mask=self.mask_ids,
                                        token_type_ids=self.type_ids)
        if self.is_training is not None:
            print('bert config hidden dropout -- ---',
                  bert_config.hidden_dropout_prob)
            print('bert config hidden dropout -- ---',
                  bert_config.attention_probs_dropout_prob)
        self.word_encoding = bert_model.get_sequence_output()
        all_layer_output = bert_model.get_all_encoder_layers()
        self.word_encoding = (all_layer_output[0] + all_layer_output[1] +
                              all_layer_output[2] + all_layer_output[3]) / 4
        with tf.variable_scope('sentence_encode'):
            all_output_words, _ = tf.nn.bidirectional_dynamic_rnn(
                self.forward,
                self.backwad,
                self.word_encoding,
                dtype=tf.float32)
        # output_sentence = 0.5*(all_output_words[0] + all_output_words[1])
        output_sentence = tf.concat(axis=2, values=all_output_words)

        with tf.variable_scope('sentence_encode2'):
            all_output_words, _ = tf.nn.bidirectional_dynamic_rnn(
                self.forward2,
                self.backwad2,
                output_sentence,
                dtype=tf.float32)
        # output_sentence = 0.5*(all_output_words[0] + all_output_words[1])
        output_sentence = tf.concat(axis=2, values=all_output_words)
        output_sentence = tf.layers.dense(output_sentence,
                                          self.hidden_dim,
                                          activation=tf.nn.tanh)
        sentence_reshape = tf.reshape(output_sentence,
                                      [-1, 1, self.max_len, self.hidden_dim])
        sentence_reshape_tile = tf.tile(sentence_reshape,
                                        [1, 10, 1, 1])  # 句子复制10份

        layer_reshape = tf.reshape(self.layer_embedding,
                                   [1, 10, 1, self.hidden_dim])
        layer_reshape_tile = tf.tile(layer_reshape,
                                     [self.batch_size, 1, self.max_len, 1])

        embed_concat = tf.reshape(
            tf.concat(axis=3,
                      values=[sentence_reshape_tile, layer_reshape_tile]),
            [-1, 2 * self.hidden_dim])

        self.att_w = tf.get_variable(
            shape=[2 * self.hidden_dim, self.hidden_dim], name='att_w')
        self.att_b = tf.get_variable(shape=[self.hidden_dim], name='att_b')
        self.att_v = tf.get_variable(shape=[self.hidden_dim, 1], name='att_v')

        score = tf.reshape(
            tf.matmul(
                tf.nn.tanh(tf.matmul(embed_concat, self.att_w) + self.att_b),
                self.att_v), [-1, 10, self.max_len])
        alpah = tf.nn.softmax(score, axis=2)
        layer_sentence = tf.matmul(alpah, output_sentence)

        layer_reshape2 = tf.reshape(self.layer_embedding,
                                    [1, 10, self.hidden_dim])
        layer_reshape2_tile = tf.tile(layer_reshape2, [self.batch_size, 1, 1])
        layer_sentence = tf.concat(
            axis=2, values=[layer_sentence, layer_reshape2_tile])
        layer_sentence = tf.reshape(layer_sentence, [-1, 2 * self.hidden_dim])

        layer_sentence = tf.layers.dense(layer_sentence,
                                         self.hidden_dim,
                                         activation=tf.nn.relu)

        # add point
        layer_sentence = tf.nn.dropout(layer_sentence, self.dropout_keep_prob)

        self.logits = tf.layers.dense(layer_sentence, 4, activation=None)
        y_ = tf.nn.softmax(self.logits, axis=1)
        self.prob = tf.reshape(y_, [-1, 10, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y,
                                                            [-1, 4])))
            self.loss += tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y2,
                                                            [-1, 4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight *
                                       (y[:, 0] * tf.log(y_[:, 0])) -
                                       class1_weight *
                                       (y[:, 1] * tf.log(y_[:, 1])) -
                                       class2_weight *
                                       (y[:, 2] * tf.log(y_[:, 2])) -
                                       class3_weight *
                                       (y[:, 3] * tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self