Ejemplo n.º 1
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    final_hidden = model.get_sequence_output()

    final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
    batch_size = final_hidden_shape[0]
    seq_length = final_hidden_shape[1]
    hidden_size = final_hidden_shape[2]

    output_weights = tf.get_variable(
        "cls/squad/output_weights", [2, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("cls/squad/output_bias", [2],
                                  initializer=tf.zeros_initializer())

    final_hidden_matrix = tf.reshape(final_hidden,
                                     [batch_size * seq_length, hidden_size])
    logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    logits = tf.reshape(logits, [batch_size, seq_length, 2])
    logits = tf.transpose(logits, [2, 0, 1])

    unstacked_logits = tf.unstack(logits, axis=0)

    (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])

    return (start_logits, end_logits)
Ejemplo n.º 2
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value

    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, 11])

        log_probs = tf.nn.log_softmax(logits, axis=-1)
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_sum(per_example_loss)
        probabilities = tf.nn.softmax(logits, axis=-1)
        predict = tf.argmax(probabilities, axis=-1)
        return (loss, per_example_loss, logits, predict)
Ejemplo n.º 3
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, pos_weight=None):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  output_layer = model.get_pooled_output()
  hidden_size = output_layer.shape[-1].value

  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))
  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    probabilities = tf.nn.softmax(logits, axis=-1)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, per_example_loss, logits, probabilities)
def init(max_sequence_length, bert_config_file, model_path, vocab_file):
    sess = tf.Session()
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True)
    bert_config = modeling.BertConfig.from_json_file(bert_config_file)

    input_ids = tf.placeholder(tf.int32, shape=[None, max_sequence_length], name='input_ids')
    input_mask = tf.placeholder(tf.int32, shape=[None, max_sequence_length], name='input_mask')
    segment_ids = tf.placeholder(tf.int32, shape=[None, max_sequence_length], name='segment_ids')

    with sess.as_default():
        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=False)

        output_layer = model.get_pooled_output()

        with tf.variable_scope("cls/seq_relationship"):
            output_weights = tf.get_variable(
                "output_weights",
                shape=[2, bert_config.hidden_size],
                initializer=modeling.create_initializer(bert_config.initializer_range))
            output_bias = tf.get_variable(
                "output_bias", shape=[2], initializer=tf.zeros_initializer())

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            probs = tf.nn.softmax(logits, axis=-1, name='probs')

        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, model_path)

    return sess, tokenizer
Ejemplo n.º 5
0
            def model_fn(features, labels, mode, params):
                unique_id = features["unique_id"]
                input_ids = features["input_ids"]
                input_mask = features["input_mask"]
                input_type_ids = features["input_type_ids"]
                tokens = features["tokens"]
                model = modeling.BertModel(config=bert_config,
                                           is_training=False,
                                           input_ids=input_ids,
                                           input_mask=input_mask,
                                           token_type_ids=input_type_ids,
                                           use_one_hot_embeddings=False)
                if mode != tf.estimator.ModeKeys.PREDICT:
                    raise ValueError("Only PREDICT modes are supported: %s" %
                                     (mode))
                tvars = tf.trainable_variables()
                scaffold_fn = None
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, init_checkpoint)
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                all_layers = model.get_all_encoder_layers()

                predictions = {
                    "unique_id": unique_id,
                    "tokens": tokens,
                }

                for (i, layer_index) in enumerate(layer_indexes):
                    predictions["layer_output_%d" %
                                i] = all_layers[layer_index]

                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
                return output_spec
Ejemplo n.º 6
0
def Convert2BertPb(args):

    output_op_names = args.output_ops.split(',')

    pathname = os.path.join(args.dir, "bert_model.ckpt")  # 模型地址
    bert_config = modeling.BertConfig.from_json_file(
        os.path.join(args.dir, "bert_config.json"))  # 配置文件地址

    configsession = tf.ConfigProto()
    configsession.gpu_options.allow_growth = True
    sess = tf.Session(config=configsession)
    input_ids = tf.placeholder(shape=[None, args.seq_len],
                               dtype=tf.int32,
                               name="input_ids")
    input_mask = tf.placeholder(shape=[None, args.seq_len],
                                dtype=tf.int32,
                                name="input_mask")
    segment_ids = tf.placeholder(shape=[None, args.seq_len],
                                 dtype=tf.int32,
                                 name="segment_ids")

    with sess.as_default():
        model = modeling.BertModel(config=bert_config,
                                   is_training=False,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   token_type_ids=segment_ids,
                                   use_one_hot_embeddings=True)
        saver = tf.train.Saver()
        # 这里尤其注意,先初始化,在加载参数,否者会把bert的参数重新初始化。这里和demo1是有区别的
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, pathname)

        # frozen_graph = freeze_session(sess, output_names=['bert/encoder/Reshape_3'])
        frozen_graph = freeze_session(sess, output_names=output_op_names)
        # Save
        tf.train.write_graph(frozen_graph, ".", args.out_file, as_text=False)
Ejemplo n.º 7
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 num_tags, osentences_len):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    # output_layer = model.get_pooled_output()
    output_layer = model.get_sequence_output()

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        _, sentence_len, _ = output_layer.shape.as_list()

        # Ignore the [cls] token in the head of the sentence.
        output_layer = output_layer[:, 1:, :]

        # FC layer
        logits = tf.layers.dense(output_layer, num_tags)

        # crf layer
        crf_params = tf.get_variable(name='crf',
                                     shape=[num_tags, num_tags],
                                     dtype=tf.float32)
        pred_id, _ = tf.contrib.crf.crf_decode(logits, crf_params,
                                               osentences_len)
        return logits, crf_params, pred_id, sentence_len
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # Classification task on the entire segment.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.sigmoid(logits)

        sigmoid_cross_entropy_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.cast(labels, tf.float32), logits=logits)
        per_example_loss = tf.reduce_sum(sigmoid_cross_entropy_loss, axis=-1)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits, probabilities)
Ejemplo n.º 9
0
def create_model(bert_config, is_training, input_ids, mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    output_layer = model.get_sequence_output()
    '''
    output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.contrib.layers.xavier_initializer)
    '''
    #output_layer shape is
    if is_training:
        output_layer = tf.keras.layers.Dropout(rate=0.1)(output_layer)
    logits = hidden2tag(output_layer, num_labels)
    # TODO test shape
    logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, num_labels])
    '''
    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    '''
    if FLAGS.crf:
        mask2len = tf.reduce_sum(mask, axis=1)
        loss, trans = crf_loss(logits, labels, mask, num_labels, mask2len)
        predict, viterbi_score = tf.contrib.crf.crf_decode(
            logits, trans, mask2len)
        return (loss, logits, predict)

    else:
        loss, predict = softmax_layer(logits, labels, num_labels, mask)

        return (loss, logits, predict)
input_ids = tokenizer.convert_tokens_to_ids(tokens)

def create_int_feature(values):
    feature = tf.train.Feature(
        int64_list=tf.train.Int64List(value=list(values)))
    return feature

print(input_ids)

bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

model = modeling.BertModel(
      config=bert_config,#たぶんこの設定にしたがってbertを呼び出すということ
      is_training=False,
      input_ids=tf.constant([input_ids,input_ids]),
      input_mask=tf.constant([[0,0,0,0,0],[0,0,0,0,0]]),
      token_type_ids=tf.constant([[0,0,0,0,0],[0,0,0,0,0]]),
      use_one_hot_embeddings=True)

final_hidden = model.get_sequence_output()
eout = model.get_embedding_output()
pout = model.get_pooled_output()
etab = model.get_embedding_table()


init = tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init)
    #[動く]result = sess.run(final_hidden)
Ejemplo n.º 11
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        ##########    各个参数的值
        ##########    features: {'input_ids': <tf.Tensor 'IteratorGetNext:0' shape=(8, 128) dtype=int32>,
        ##########    'input_mask': <tf.Tensor 'IteratorGetNext:1' shape=(8, 128) dtype=int32>,
        ##########    'masked_lm_ids': <tf.Tensor 'IteratorGetNext:2' shape=(8, 20) dtype=int32>,
        ##########    'masked_lm_positions': <tf.Tensor 'IteratorGetNext:3' shape=(8, 20) dtype=int32>,
        ##########    'masked_lm_weights': <tf.Tensor 'IteratorGetNext:4' shape=(8, 20) dtype=float32>,
        ##########    'next_sentence_labels': <tf.Tensor 'IteratorGetNext:5' shape=(8, 1) dtype=int32>,
        ##########    'segment_ids': <tf.Tensor 'IteratorGetNext:6' shape=(8, 128) dtype=int32>}
        ##########    labels: None
        ##########    mode: eval
        ##########    params {'batch_size': 8, 'use_tpu': False}

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        #input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]
        hist_len = features["hist_len"]
        input_ids = features["input_ids"]
        next_item = features['next_item']

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
            hist_len=hist_len,
            next_item=next_item)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        #3/1,更改
        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_item_output(
             bert_config, model.get_decoder_layer(), next_sentence_labels,
             hist_len)

        total_loss = masked_lm_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Ejemplo n.º 12
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        word_ids = features["word_ids"]
        mention_ids = features["mention_id"]

        random_mask = tf.random_uniform(input_ids.shape)
        masked_lm_positions = tf.cast(random_mask < FLAGS.mask_lm_rate,
                                      tf.int32)
        masked_lm_positions *= word_ids
        masked_lm_input_ids = masked_lm_positions * FLAGS.mask_word_id + (
            1 - masked_lm_positions) * input_ids
        masked_lm_weights = masked_lm_positions
        masked_lm_ids = input_ids

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=masked_lm_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            mention_ids=mention_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, input_ids,
             masked_lm_weights)

        total_loss = masked_lm_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = bert.modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
        #   if use_tpu:

        #     def tpu_scaffold():
        #       tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        #       return tf.train.Scaffold()

        #     scaffold_fn = tpu_scaffold
        #   else:
        #     tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                if FLAGS.max_seq_length > 1024:
                    pass
                    tf.train.init_from_checkpoint(
                        init_checkpoint, {
                            "bert/embeddings/position_embeddings":
                            "bert/embeddings/position_embeddings_first"
                        })
                    tf.train.init_from_checkpoint(
                        init_checkpoint, {
                            "bert/embeddings/position_embeddings":
                            "bert/embeddings/position_embeddings_second"
                        })
                    tf.train.init_from_checkpoint(
                        init_checkpoint, {
                            "bert/embeddings/position_embeddings":
                            "bert/embeddings/position_embeddings_third"
                        })
                elif FLAGS.max_seq_length > 512:
                    pass
                    tf.train.init_from_checkpoint(
                        init_checkpoint, {
                            "bert/embeddings/position_embeddings":
                            "bert/embeddings/position_embeddings_former"
                        })
                    tf.train.init_from_checkpoint(
                        init_checkpoint, {
                            "bert/embeddings/position_embeddings":
                            "bert/embeddings/position_embeddings_latter"
                        })
                else:
                    pass
                    tf.train.init_from_checkpoint(
                        init_checkpoint, {
                            "bert/embeddings/position_embeddings":
                            "bert/embeddings/position_embeddings"
                        })
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if FLAGS.max_seq_length > 1024:
                pass
                tf.train.init_from_checkpoint(
                    init_checkpoint, {
                        "bert/embeddings/position_embeddings":
                        "bert/embeddings/position_embeddings_first"
                    })
                tf.train.init_from_checkpoint(
                    init_checkpoint, {
                        "bert/embeddings/position_embeddings":
                        "bert/embeddings/position_embeddings_second"
                    })
                tf.train.init_from_checkpoint(
                    init_checkpoint, {
                        "bert/embeddings/position_embeddings":
                        "bert/embeddings/position_embeddings_third"
                    })
            if FLAGS.max_seq_length > 512:
                pass
                tf.train.init_from_checkpoint(
                    init_checkpoint, {
                        "bert/embeddings/position_embeddings":
                        "bert/embeddings/position_embeddings_former"
                    })
                tf.train.init_from_checkpoint(
                    init_checkpoint, {
                        "bert/embeddings/position_embeddings":
                        "bert/embeddings/position_embeddings_latter"
                    })
            else:
                pass
                tf.train.init_from_checkpoint(
                    init_checkpoint, {
                        "bert/embeddings/position_embeddings":
                        "bert/embeddings/position_embeddings"
                    })

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Ejemplo n.º 13
0
def main():
    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
    }

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_print_test:
        raise ValueError("At least one of `do_train` or `do_eval` "
                         "or `do_print_test` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    if not os.path.isdir(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None

    # TODO: use special Adam from "optimization.py"
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    bert = modeling.BertModel(config=bert_config)
    model = modeling.BertClassifier(bert, num_labels=len(label_list))
    chainer.serializers.load_npz(FLAGS.init_checkpoint,
                                 model,
                                 ignore_names=['output/W', 'output/b'])

    if FLAGS.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(FLAGS.gpu).use()
        model.to_gpu()

    if FLAGS.do_train:
        # TODO: use special Adam from "optimization.py"
        optimizer = chainer.optimizers.Adam(alpha=FLAGS.learning_rate)
        optimizer.setup(model)
        train_iter = chainer.iterators.SerialIterator(train_examples,
                                                      FLAGS.train_batch_size)
        converter = Converter(label_list, FLAGS.max_seq_length, tokenizer)
        updater = training.updaters.StandardUpdater(train_iter,
                                                    optimizer,
                                                    converter=converter,
                                                    device=FLAGS.gpu)
        trainer = training.Trainer(updater, (num_train_steps, 'iteration'),
                                   out=FLAGS.output_dir)
        trainer.extend(extensions.snapshot(),
                       trigger=(num_train_steps, 'iteration'))
        trainer.extend(extensions.LogReport(trigger=(50, 'iteration')))
        trainer.extend(
            extensions.PrintReport(
                ['iteration', 'main/loss', 'main/accuracy', 'elapsed_time']))
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.run()

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        test_iter = chainer.iterators.SerialIterator(eval_examples,
                                                     FLAGS.train_batch_size *
                                                     2,
                                                     repeat=False,
                                                     shuffle=False)
        converter = Converter(label_list, FLAGS.max_seq_length, tokenizer)
        evaluator = extensions.Evaluator(test_iter,
                                         model,
                                         converter=converter,
                                         device=FLAGS.gpu)
        results = evaluator()
        print(results)

    # if you wanna see some output arrays for debugging
    if FLAGS.do_print_test:
        short_eval_examples = processor.get_dev_examples(FLAGS.data_dir)[:3]
        short_eval_examples = short_eval_examples[:FLAGS.eval_batch_size]
        short_test_iter = chainer.iterators.SerialIterator(
            short_eval_examples,
            FLAGS.eval_batch_size,
            repeat=False,
            shuffle=False)
        converter = Converter(label_list, FLAGS.max_seq_length, tokenizer)
        evaluator = extensions.Evaluator(test_iter,
                                         model,
                                         converter=converter,
                                         device=FLAGS.gpu)

        with chainer.using_config('train', False):
            with chainer.no_backprop_mode():
                data = short_test_iter.__next__()
                out = model.bert.get_pooled_output(
                    *converter(data, FLAGS.gpu)[:-1])
                print(out)
                print(out.shape)
            print(converter(data, -1))
Ejemplo n.º 14
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info('*** Features ***')
        for name in sorted(features.keys()):
            tf.logging.info('  name = %s, shape = %s' %
                            (name, features[name].shape))

        input_ids = features['input_ids']
        input_mask = features['input_mask']
        segment_ids = features['segment_ids']
        masked_lm_positions = features['masked_lm_positions']
        masked_lm_ids = features['masked_lm_ids']
        masked_lm_weights = features['masked_lm_weights']
        next_sentence_labels = features['next_sentence_labels']

        is_training = mode == tf.estimator.ModeKeys.TRAIN

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
        )

        (
            masked_lm_loss,
            masked_lm_example_loss,
            masked_lm_log_probs,
        ) = get_masked_lm_output(
            bert_config,
            model.get_sequence_output(),
            model.get_embedding_table(),
            masked_lm_positions,
            masked_lm_ids,
            masked_lm_weights,
        )

        (
            next_sentence_loss,
            next_sentence_example_loss,
            next_sentence_log_probs,
        ) = get_next_sentence_output(bert_config, model.get_pooled_output(),
                                     next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (
                assignment_map,
                initialized_variable_names,
            ) = modeling.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info('**** Trainable Variables ****')
        for var in tvars:
            init_string = ''
            if var.name in initialized_variable_names:
                init_string = ', *INIT_FROM_CKPT*'
            tf.logging.info('  name = %s, shape = %s%s', var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2:
                # train_op = custom_optimization.create_optimizer(
                #     total_loss, learning_rate, num_train_steps, num_warmup_steps
                # )
                train_op = optimization.create_optimizer(
                    total_loss,
                    learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    use_tpu,
                )
            else:
                train_op = optimization.create_optimizer(
                    total_loss,
                    learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    use_tpu,
                )
            if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    scaffold=scaffold_fn,
                )
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn,
                )
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
                next_sentence_example_loss,
                next_sentence_log_probs,
                next_sentence_labels,
            ):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights,
                )
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]],
                )
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions,
                )
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    'masked_lm_accuracy': masked_lm_accuracy,
                    'masked_lm_loss': masked_lm_mean_loss,
                    'next_sentence_accuracy': next_sentence_accuracy,
                    'next_sentence_loss': next_sentence_mean_loss,
                }

            eval_metrics = (
                metric_fn,
                [
                    masked_lm_example_loss,
                    masked_lm_log_probs,
                    masked_lm_ids,
                    masked_lm_weights,
                    next_sentence_example_loss,
                    next_sentence_log_probs,
                    next_sentence_labels,
                ],
            )
            if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold=scaffold_fn,
                )
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn,
                )
        else:
            raise ValueError('Only TRAIN and EVAL modes are supported: %s' %
                             (mode))

        return output_spec
Ejemplo n.º 15
0
def optimize_graph(logger=None, verbose=False):
    if not logger:
        logger = set_logger(colored('BERT_VEC', 'yellow'), verbose)
    try:
        # we don't need GPU for optimizing the graph
        from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
        tf.gfile.MakeDirs(args.output_dir)

        config_fp = args.config_name
        logger.info('model config: %s' % config_fp)

        # 加载bert配置文件
        with tf.gfile.GFile(config_fp, 'r') as f:
            bert_config = modeling.BertConfig.from_dict(json.load(f))

        logger.info('build graph...')
        # input placeholders, not sure if they are friendly to XLA
        input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len),
                                   'input_ids')
        input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len),
                                    'input_mask')
        input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len),
                                        'input_type_ids')

        jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

        with jit_scope():
            input_tensors = [input_ids, input_mask, input_type_ids]

            model = modeling.BertModel(config=bert_config,
                                       is_training=False,
                                       input_ids=input_ids,
                                       input_mask=input_mask,
                                       token_type_ids=input_type_ids,
                                       use_one_hot_embeddings=False)

            # 获取所有要训练的变量
            tvars = tf.trainable_variables()

            init_checkpoint = args.ckpt_name
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            # 共享卷积核
            with tf.variable_scope("pooling"):
                # 如果只有一层,就只取对应那一层的weight
                if len(args.layer_indexes) == 1:
                    encoder_layer = model.all_encoder_layers[
                        args.layer_indexes[0]]
                else:
                    # 否则遍历需要取的层,把所有层的weight取出来并拼接起来shape:768*层数
                    all_layers = [
                        model.all_encoder_layers[l] for l in args.layer_indexes
                    ]
                    encoder_layer = tf.concat(all_layers, -1)

            mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
            masked_reduce_mean = lambda x, m: tf.reduce_sum(
                mul_mask(x, m), axis=1) / (tf.reduce_sum(
                    m, axis=1, keepdims=True) + 1e-10)

            input_mask = tf.cast(input_mask, tf.float32)
            # 以下代码是句向量的生成方法,可以理解为做了一个卷积的操作,但是没有把结果相加, 卷积核是input_mask
            pooled = masked_reduce_mean(encoder_layer, input_mask)
            pooled = tf.identity(pooled, 'final_encodes')

            output_tensors = [pooled]
            tmp_g = tf.get_default_graph().as_graph_def()

        # allow_soft_placement:自动选择运行设备
        config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=config) as sess:
            logger.info('load parameters from checkpoint...')
            sess.run(tf.global_variables_initializer())
            logger.info('freeze...')
            tmp_g = tf.graph_util.convert_variables_to_constants(
                sess, tmp_g, [n.name[:-2] for n in output_tensors])
            dtypes = [n.dtype for n in input_tensors]
            logger.info('optimize...')
            tmp_g = optimize_for_inference(
                tmp_g, [n.name[:-2] for n in input_tensors],
                [n.name[:-2] for n in output_tensors],
                [dtype.as_datatype_enum for dtype in dtypes], False)
        tmp_file = tempfile.NamedTemporaryFile('w',
                                               delete=False,
                                               dir=args.output_dir).name
        logger.info('write graph to a tmp file: %s' % tmp_file)
        with tf.gfile.GFile(tmp_file, 'wb') as f:
            f.write(tmp_g.SerializeToString())
        return tmp_file
    except Exception as e:
        logger.error('fail to optimize the graph!')
        logger.error(e)
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    masked_lm_positions = features["masked_lm_positions"]
    masked_lm_ids = features["masked_lm_ids"]
    masked_lm_weights = features["masked_lm_weights"]
    next_sentence_labels = features["next_sentence_labels"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
        compute_type=tf.float16 if FLAGS.manual_fp16 else tf.float32)

    (masked_lm_loss,
     masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
         bert_config, model.get_sequence_output(), model.get_embedding_table(), 
         masked_lm_positions, masked_lm_ids, 
         masked_lm_weights)

    (next_sentence_loss, next_sentence_example_loss,
     next_sentence_log_probs) = get_next_sentence_output(
         bert_config, model.get_pooled_output(), next_sentence_labels)

    masked_lm_loss = tf.identity(masked_lm_loss, name="mlm_loss")
    next_sentence_loss = tf.identity(next_sentence_loss, name="nsp_loss")
    total_loss = masked_lm_loss + next_sentence_loss
    total_loss = tf.identity(total_loss, name='total_loss')

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    if init_checkpoint and (hvd is None or hvd.rank() == 0):
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)

      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  %d :: name = %s, shape = %s%s", 0 if hvd is None else hvd.rank(), var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps,
          hvd, FLAGS.manual_fp16, FLAGS.use_fp16)

      output_spec = tf.estimator.EstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                    masked_lm_weights, next_sentence_example_loss,
                    next_sentence_log_probs, next_sentence_labels):
        """Computes the loss and accuracy of the model."""
        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                         [-1, masked_lm_log_probs.shape[-1]])
        masked_lm_predictions = tf.argmax(
            masked_lm_log_probs, axis=-1, output_type=tf.int32)
        masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        masked_lm_accuracy = tf.metrics.accuracy(
            labels=masked_lm_ids,
            predictions=masked_lm_predictions,
            weights=masked_lm_weights)
        masked_lm_mean_loss = tf.metrics.mean(
            values=masked_lm_example_loss, weights=masked_lm_weights)

        next_sentence_log_probs = tf.reshape(
            next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
        next_sentence_predictions = tf.argmax(
            next_sentence_log_probs, axis=-1, output_type=tf.int32)
        next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
        next_sentence_accuracy = tf.metrics.accuracy(
            labels=next_sentence_labels, predictions=next_sentence_predictions)
        next_sentence_mean_loss = tf.metrics.mean(
            values=next_sentence_example_loss)

        return {
            "masked_lm_accuracy": masked_lm_accuracy,
            "masked_lm_loss": masked_lm_mean_loss,
            "next_sentence_accuracy": next_sentence_accuracy,
            "next_sentence_loss": next_sentence_mean_loss,
        }

      eval_metric_ops = metric_fn(
          masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
          masked_lm_weights, next_sentence_example_loss,
          next_sentence_log_probs, next_sentence_labels
      )
      output_spec = tf.estimator.EstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metric_ops=eval_metric_ops)
    else:
      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))

    return output_spec
Ejemplo n.º 17
0
def create_model(bert_config, is_training, slot_list, features,
                 num_class_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    class_output_layer = model.get_pooled_output()
    token_output_layer = model.get_sequence_output()

    token_output_shape = modeling.get_shape_list(token_output_layer,
                                                 expected_rank=3)
    batch_size = token_output_shape[0]
    seq_length = token_output_shape[1]
    hidden_size = token_output_shape[2]

    # Define prediction variables
    class_proj_layer_dim = [hidden_size]
    for idx in range(FLAGS.num_class_hidden_layer):
        class_proj_layer_dim.append(64)
    class_proj_layer_dim.append(num_class_labels)

    token_proj_layer_dim = [hidden_size]
    for idx in range(FLAGS.num_token_hidden_layer):
        token_proj_layer_dim.append(64)
    token_proj_layer_dim.append(2)

    if is_training:
        # I.e., 0.1 dropout
        class_output_layer = tf.nn.dropout(class_output_layer,
                                           keep_prob=(1 - FLAGS.dropout_rate))
        token_output_layer = tf.nn.dropout(token_output_layer,
                                           keep_prob=(1 - FLAGS.dropout_rate))
    total_loss = 0
    per_slot_per_example_loss = {}
    per_slot_class_logits = {}
    per_slot_start_logits = {}
    per_slot_end_logits = {}
    for slot in slot_list:
        start_pos = features["start_pos_%s" % slot]
        end_pos = features["end_pos_%s" % slot]
        class_label_id = features["class_label_id_%s" % slot]
        slot_scope_name = "slot_%s" % slot
        if slot == 'price range':
            slot_scope_name = "slot_price"
        with tf.variable_scope(slot_scope_name):
            class_list_output_weights = []
            class_list_output_bias = []

            for l_idx in range(len(class_proj_layer_dim) - 1):
                dim_in = class_proj_layer_dim[l_idx]
                dim_out = class_proj_layer_dim[l_idx + 1]
                class_list_output_weights.append(
                    tf.get_variable(
                        "class/output_weights_%d" % l_idx, [dim_in, dim_out],
                        initializer=tf.truncated_normal_initializer(
                            stddev=0.02)))
                class_list_output_bias.append(
                    tf.get_variable("class/output_bias_%d" % l_idx, [dim_out],
                                    initializer=tf.zeros_initializer()))

            token_list_output_weights = []
            token_list_output_bias = []

            for l_idx in range(len(token_proj_layer_dim) - 1):
                dim_in = token_proj_layer_dim[l_idx]
                dim_out = token_proj_layer_dim[l_idx + 1]
                token_list_output_weights.append(
                    tf.get_variable(
                        "token/output_weights_%d" % l_idx, [dim_in, dim_out],
                        initializer=tf.truncated_normal_initializer(
                            stddev=0.02)))
                token_list_output_bias.append(
                    tf.get_variable("token/output_bias_%d" % l_idx, [dim_out],
                                    initializer=tf.zeros_initializer()))

            with tf.variable_scope("loss"):
                class_logits = util.fully_connect_layers(
                    class_output_layer, class_list_output_weights,
                    class_list_output_bias)
                one_hot_class_labels = tf.one_hot(class_label_id,
                                                  depth=num_class_labels,
                                                  dtype=tf.float32)
                class_loss = tf.losses.softmax_cross_entropy(
                    one_hot_class_labels,
                    class_logits,
                    reduction=tf.losses.Reduction.NONE)

                token_is_pointable = tf.cast(tf.equal(class_label_id, 2),
                                             dtype=tf.float32)

                token_output_layer = tf.reshape(
                    token_output_layer, [batch_size * seq_length, hidden_size])
                token_logits = util.fully_connect_layers(
                    token_output_layer, token_list_output_weights,
                    token_list_output_bias)
                token_logits = tf.reshape(token_logits,
                                          [batch_size, seq_length, 2])
                token_logits = tf.transpose(token_logits, [2, 0, 1])
                unstacked_token_logits = tf.unstack(token_logits, axis=0)
                (start_logits, end_logits) = (unstacked_token_logits[0],
                                              unstacked_token_logits[1])

                def compute_loss(logits, positions):
                    one_hot_positions = tf.one_hot(positions,
                                                   depth=seq_length,
                                                   dtype=tf.float32)
                    log_probs = tf.nn.log_softmax(logits, axis=1)
                    loss = -tf.reduce_sum(one_hot_positions * log_probs,
                                          axis=1)
                    return loss

                token_loss = (
                    compute_loss(start_logits, start_pos) +
                    compute_loss(end_logits, end_pos)) / 2.0  # per example
                if not FLAGS.location_loss_for_nonpointable:
                    token_loss *= token_is_pointable

                per_example_loss = FLAGS.class_loss_ratio * class_loss + (
                    1 - FLAGS.class_loss_ratio) * token_loss

                total_loss += tf.reduce_sum(per_example_loss)
                per_slot_per_example_loss[slot] = per_example_loss
                per_slot_class_logits[slot] = class_logits
                per_slot_start_logits[slot] = start_logits
                per_slot_end_logits[slot] = end_logits
    return (total_loss, per_slot_per_example_loss, per_slot_class_logits,
            per_slot_start_logits, per_slot_end_logits)
def create_ner_model(bert_config, is_training, input_ids, input_mask,
                     segment_ids, num_token_labels, num_predicate_labels,
                     max_seq_length):
    """
    :param bert_config:
    :param is_training:
    :param input_ids:
    :param input_mask:
    :param segment_ids:
    :param labels:
    :param num_labels:
    :param use_one_hot_embedding:
    :return:
    """

    # import tensorflow as tf
    # import modeling

    # 通过传入的训练数据,进行representation
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
    )

    # We "pool" the model by simply taking the hidden state corresponding
    # to the first token. float Tensor of shape [batch_size, hidden_size]
    predicate_output_layer = model.get_pooled_output()

    intent_hidden_size = predicate_output_layer.shape[-1].value

    predicate_output_weights = tf.get_variable(
        "predicate_output_weights", [num_predicate_labels, intent_hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    predicate_output_bias = tf.get_variable("predicate_output_bias",
                                            [num_predicate_labels],
                                            initializer=tf.zeros_initializer())

    with tf.variable_scope("predicate_loss"):
        if is_training:
            # I.e., 0.1 dropout
            predicate_output_layer = tf.nn.dropout(predicate_output_layer,
                                                   keep_prob=0.9)

        predicate_logits = tf.matmul(predicate_output_layer,
                                     predicate_output_weights,
                                     transpose_b=True)
        predicate_logits = tf.nn.bias_add(predicate_logits,
                                          predicate_output_bias)
        predicate_probabilities = tf.nn.softmax(predicate_logits, axis=-1)
        predicate_prediction = tf.argmax(predicate_probabilities,
                                         axis=-1,
                                         output_type=tf.int32)
        # predicate_labels = tf.one_hot(predicate_label_id, depth=num_predicate_labels, dtype=tf.float32)
        # predicate_per_example_loss = tf.reduce_sum(
        #     tf.nn.sigmoid_cross_entropy_with_logits(logits=predicate_logits, labels=predicate_labels), -1)
        # predicate_loss = tf.reduce_mean(predicate_per_example_loss)

    #     """Gets final hidden layer of encoder.
    #
    #     Returns:
    #       float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
    #       to the final hidden of the transformer encoder.
    #     """
    token_label_output_layer = model.get_sequence_output()

    token_label_hidden_size = token_label_output_layer.shape[-1].value

    token_label_output_weight = tf.get_variable(
        "token_label_output_weights",
        [num_token_labels, token_label_hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))
    token_label_output_bias = tf.get_variable(
        "token_label_output_bias", [num_token_labels],
        initializer=tf.zeros_initializer())
    with tf.variable_scope("token_label_loss"):
        if is_training:
            token_label_output_layer = tf.nn.dropout(token_label_output_layer,
                                                     keep_prob=0.9)
        token_label_output_layer = tf.reshape(token_label_output_layer,
                                              [-1, token_label_hidden_size])
        token_label_logits = tf.matmul(token_label_output_layer,
                                       token_label_output_weight,
                                       transpose_b=True)
        token_label_logits = tf.nn.bias_add(token_label_logits,
                                            token_label_output_bias)

        token_label_logits = tf.reshape(token_label_logits,
                                        [-1, max_seq_length, num_token_labels])
        # token_label_log_probs = tf.nn.log_softmax(token_label_logits, axis=-1)
        # token_label_one_hot_labels = tf.one_hot(token_label_ids, depth=num_token_labels, dtype=tf.float32)
        # token_label_per_example_loss = -tf.reduce_sum(token_label_one_hot_labels * token_label_log_probs, axis=-1)
        # token_label_loss = tf.reduce_sum(token_label_per_example_loss)
        token_label_probabilities = tf.nn.softmax(token_label_logits, axis=-1)
        # token_label_predictions = tf.argmax(token_label_probabilities, axis=-1)
        # return (token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predict)

    # loss = 0.5 * predicate_loss + token_label_loss
    # return (loss,
    #         predicate_loss, predicate_per_example_loss, predicate_probabilities, predicate_prediction,
    #         token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predictions)
    return (predicate_probabilities, token_label_probabilities)
Ejemplo n.º 19
0
#token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
#token_type_ids = tf.constant(np.random.randint(0,2, [2, 3]))
token_type_ids = tf.placeholder(shape=[2, 3],
                                dtype=tf.int32,
                                name='token_type_ids')

config = modeling.BertConfig(vocab_size=32000,
                             hidden_size=768,
                             num_hidden_layers=8,
                             num_attention_heads=6,
                             intermediate_size=1024)

model = modeling.BertModel(config=config,
                           is_training=True,
                           input_ids=input_ids,
                           input_mask=input_mask,
                           token_type_ids=token_type_ids)

label_embeddings = tf.get_variable(
    name="word_embeddings",
    shape=[768, 12],
    initializer=tf.truncated_normal_initializer(0.02))
pooled_output = model.get_pooled_output()
logits = tf.matmul(pooled_output, label_embeddings)

with tf.compat.v1.Session() as sess:
    sess.run(tf.global_variables_initializer())
    rand_array = np.random.randint(0, 1, [2, 3])
    print(
        sess.run(logits,
def create_classification_model(bert_config, is_training, input_ids,
                                input_mask, segment_ids, labels, num_labels):
    """
    :param bert_config:
    :param is_training:
    :param input_ids:
    :param input_mask:
    :param segment_ids:
    :param labels:
    :param num_labels:
    :param use_one_hot_embedding:
    :return:
    """

    # import tensorflow as tf
    # import modeling

    # 通过传入的训练数据,进行representation
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
    )

    embedding_layer = model.get_sequence_output()
    output_layer = model.get_pooled_output()
    hidden_size = output_layer.shape[-1].value

    # predict = CNN_Classification(embedding_chars=embedding_layer,
    #                                labels=labels,
    #                                num_tags=num_labels,
    #                                sequence_length=FLAGS.max_seq_length,
    #                                embedding_dims=embedding_layer.shape[-1].value,
    #                                vocab_size=0,
    #                                filter_sizes=[3, 4, 5],
    #                                num_filters=3,
    #                                dropout_keep_prob=FLAGS.dropout_keep_prob,
    #                                l2_reg_lambda=0.001)
    # loss, predictions, probabilities = predict.add_cnn_layer()

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.sigmoid(logits)
        if labels is not None:
            label_ids = tf.cast(labels, tf.float32)
            per_example_loss = tf.reduce_sum(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                        labels=label_ids),
                axis=-1)
            loss = tf.reduce_mean(per_example_loss)
        else:
            loss, per_example_loss = None, None
    return (loss, per_example_loss, logits, probabilities)
Ejemplo n.º 21
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """
    待返回的模型函数,model_fn
    :param features: 输入dict
    :param labels: 标签
    :param mode: 模式,训练还是 eval
    :param params:
    :return: 输出结果
    """

        # 记录特征名称与形状
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        # 获取各个输入
        input_ids = features["input_ids"]  # 输入的id张量, [batch_size, seq_len]
        input_mask = features["input_mask"]  # 输入的mask张量, [batch_size, seq_len]
        segment_ids = features["segment_ids"]  # 第一句、第二句, [batch_size, seq_len]
        masked_lm_positions = features[
            "masked_lm_positions"]  # 语言模型中被遮蔽的位置, [batch_size, masked_len]
        masked_lm_ids = features[
            "masked_lm_ids"]  # 遮蔽语言模型的标签, [batch_size, masked_len]
        masked_lm_weights = features[
            "masked_lm_weights"]  # 遮蔽语言模型中被遮蔽的标签的权重, [batch_size, masked_len]
        next_sentence_labels = features[
            "next_sentence_labels"]  # 下一句预测的标签, [batch_size]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)  # 是否训练模型

        # 获取bert模型,具体参考modeling.py文件
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        # 下游任务,遮蔽语言模型的相关处理
        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        # 下游任务,下一句预测的相关处理
        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss  # 计算两个任务的整体loss计算

        tvars = tf.trainable_variables()  # 模型中可训练参数

        initialized_variable_names = {}  #被初始化的变量名字
        scaffold_fn = None
        # 用已有模型初始化参数
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:
                # tpu相关变量初始化
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                # 非tpu变量初始化
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # 记录各变量名称与形状
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:  # 训练模式
            # 创建优化器
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            # 训练返回结果
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:  # 验证模式

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """
        度量函数,计算模型的loss与acc
        :param masked_lm_example_loss:  遮蔽语言模型的样本loss, [batch_size, masked_len]
        :param masked_lm_log_probs:  遮蔽语言模型的对数概率值, [batch_size*masked_len, voc_size]
        :param masked_lm_ids:  遮蔽语言模型的标签id, [batch_size, masked_len]
        :param masked_lm_weights: 遮蔽语言模型的标签权重, [batch_size, masked_len]
        :param next_sentence_example_loss:  下一句预测的样本loss, [batch_size]
        :param next_sentence_log_probs:  下一句预测的对数概率值, [batch_size, 2]
        :param next_sentence_labels:  下一句预测的标签, [batch_size]
        :return:
        """
                # 除最后一个维度,其他维度铺平, [batch_size*masked_len, voc_size]
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                # 获取概率最大的位置得到预测值, [batch_size*masked_len]
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(
                    masked_lm_example_loss,
                    [-1])  # 铺平loss, [batch_size*masked_len]
                masked_lm_ids = tf.reshape(masked_lm_ids,
                                           [-1])  # 铺平, [batch_size*masked_len]
                masked_lm_weights = tf.reshape(
                    masked_lm_weights, [-1])  # 铺平, [batch_size*masked_len]
                # 根据真实值与预测值计算acc
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                # 根据各个样本loss与权重计算整体loss
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                # 除最后一个维度,其他维度铺平, [batch_size, 2]
                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                # 获取概率最大的位置得到预测值, [batch_size]
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels,
                                                  [-1])  # 平铺, [batch_size]
                # 根据真实值与预测值计算acc
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                # 计算下一句预测的平均loss
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            # 验证时的度量函数与参数
            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            # 验证模式的输出结果
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec  # 返回输出结果
Ejemplo n.º 22
0
def main(_):
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

    processors = {
        "cola": ColaProcessor,
        "mnlim": MnliMProcessor,
        "mnlimm": MnliMMProcessor,
        "mrpc": MrpcProcessor,
        "qnli": QnliProcessor,
        "qqp": QqpProcessor,
        "rte": RteProcessor,
        "sst2": Sst2Processor,
        "stsb": StsbProcessor,
        "wnli": WnliProcessor,
        "ax": AxProcessor,
        "mnlimdevastest": MnliMDevAsTestProcessor
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_eval:
        raise ValueError("At least 'do_eval' must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.io.gfile.makedirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()
    print("Current task", task_name)

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    # special handling for mnlimdevastest
    if task_name == 'mnlimdevastest':
        task_name = 'mnlim'

    label_list = processor.get_labels()
    print("Label list of current task", label_list)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    num_actual_eval_examples = len(eval_examples)
    print("num_actual_eval_examples", num_actual_eval_examples)

    batch_size = FLAGS.eval_batch_size
    embed_dim = FLAGS.hidden_size  # hidden size, 768 for BERT-base, 512 for BERT-small
    seq_length = FLAGS.max_seq_length
    num_labels = len(label_list)

    # Define some placeholders for the input
    input_ids_ph = tf.compat.v1.placeholder(tf.int32,
                                            shape=[None, seq_length],
                                            name='input_ids')
    input_mask_ph = tf.compat.v1.placeholder(tf.int32,
                                             shape=[None, seq_length],
                                             name='input_mask')
    segment_ids_ph = tf.compat.v1.placeholder(tf.int32,
                                              shape=[None, seq_length],
                                              name='segment_ids')
    label_ids_ph = tf.compat.v1.placeholder(tf.int32,
                                            shape=[
                                                None,
                                            ],
                                            name='label_ids')

    tf.compat.v1.logging.info(
        "Running single-head masking out and direct evaluation!")

    # we want to mask out the individual head and then evaluate. So there are 12 layers * 12 heads results.
    n_layers = 12
    n_heads = 12
    folder = FLAGS.output_dir
    save_file = 'single_head_mask.pickle'
    output = np.zeros((n_layers, n_heads))

    # two placeholders for the head coordinates, layer, head
    head_mask_ph = tf.compat.v1.placeholder(tf.int32,
                                            shape=[
                                                None,
                                            ],
                                            name='head_mask')
    layer_mask_ph = tf.compat.v1.placeholder(tf.int32,
                                             shape=[
                                                 None,
                                             ],
                                             name='layer_mask')

    model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids_ph,  # input_ids,
        input_mask=input_mask_ph,  # input_mask,
        token_type_ids=segment_ids_ph,  # segment_ids,
        use_one_hot_embeddings=False,
        head_mask=head_mask_ph,
        layer_mask=layer_mask_ph)

    output_layer = model.get_pooled_output()
    output_weights = tf.get_variable(
        "output_weights", [num_labels, embed_dim],
        initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())
    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    with tf.compat.v1.variable_scope("loss"):
        # for stsb
        if num_labels == 1:
            logits = tf.squeeze(logits, [-1])
            per_example_loss = tf.square(logits - label_ids_ph)
            loss = tf.reduce_mean(per_example_loss)
        else:
            log_probs = tf.nn.log_softmax(logits, axis=-1)
            one_hot_labels = tf.one_hot(label_ids_ph,
                                        depth=num_labels,
                                        dtype=tf.float32)
            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                              axis=-1)
            loss = tf.reduce_mean(per_example_loss)
            predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)

    # metric and summary
    # metric is tf.metric object, (val, op)
    metric = metric_fn(per_example_loss, label_ids_ph, logits, num_labels,
                       task_name)
    metric_name = list(metric.keys())
    metric_val = [m[0] for m in metric.values()]
    metric_op = [m[1] for m in metric.values()]

    init_checkpoint = FLAGS.init_checkpoint
    tvars = tf.compat.v1.trainable_variables()
    saver_init = tf.train.Saver(tvars)

    # Isolate the variables stored behind the scenes by the metric operation
    var_metric = []
    for key in metric.keys():
        var_metric.extend(
            tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope=key))
    # Define initializer to initialize/reset running variables
    metric_vars_initializer = tf.variables_initializer(var_list=var_metric)

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.compat.v1.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver_init.restore(sess, init_checkpoint)

        # if number of eval examples < 1000, just load it directly, or load by batch.
        if num_actual_eval_examples <= 1000:
            eval_input_ids, eval_input_mask, eval_segment_ids, \
            eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=num_actual_eval_examples,
                                                                     seq_length=seq_length,
                                                                     examples=eval_examples,
                                                                     label_list=label_list,
                                                                     tokenizer=tokenizer)

        # loop over layers, then loop over heads
        for l in range(n_layers):
            for h in range(n_heads):

                cur_l, cur_h = l, h
                head_mask = [h]
                layer_mask = [l]

                # if number of eval examples < 1000, just load it directly, or load by batch.
                if num_actual_eval_examples <= 1000:
                    sess.run(metric_vars_initializer)
                    sess.run(metric_op,
                             feed_dict={
                                 input_ids_ph: eval_input_ids,
                                 input_mask_ph: eval_input_mask,
                                 segment_ids_ph: eval_segment_ids,
                                 label_ids_ph: eval_label_ids,
                                 head_mask_ph: head_mask,
                                 layer_mask_ph: layer_mask
                             })
                    eval_metric_val = sess.run(metric_val)
                else:
                    num_batch_eval = num_actual_eval_examples // batch_size \
                        if num_actual_eval_examples % batch_size == 0 \
                        else num_actual_eval_examples // batch_size + 1
                    id_eval = 0
                    sess.run(metric_vars_initializer)
                    for _ in range(num_batch_eval):
                        eval_input_ids, eval_input_mask, eval_segment_ids, \
                        eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=batch_size,
                                                                                 seq_length=seq_length,
                                                                                 examples=eval_examples,
                                                                                 label_list=label_list,
                                                                                 tokenizer=tokenizer,
                                                                                 train_idx_offset=id_eval)
                        id_eval += batch_size
                        sess.run(metric_op,
                                 feed_dict={
                                     input_ids_ph: eval_input_ids,
                                     input_mask_ph: eval_input_mask,
                                     segment_ids_ph: eval_segment_ids,
                                     label_ids_ph: eval_label_ids,
                                     head_mask_ph: head_mask,
                                     layer_mask_ph: layer_mask
                                 })
                    eval_metric_val = sess.run(metric_val)

                for name, val in zip(metric_name, eval_metric_val):
                    if name == 'accuracy':
                        output[cur_l][cur_h] = val
                        print(
                            "Mask out the head in (Layer {}, Head {}) | {}: {}"
                            .format(cur_l, cur_h, name, val))

        joblib.dump(output, folder + save_file)
Ejemplo n.º 23
0
def main(_):
    mode = tf.estimator.ModeKeys.TRAIN
    use_one_hot_embeddings = FLAGS.use_tpu

    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    input_files = []
    for input_pattern in FLAGS.input_file.split(","):
        input_files.extend(tf.gfile.Glob(input_pattern))

    tf.logging.info("*** Input Files ***")
    for input_file in input_files:
        tf.logging.info("  %s" % input_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    # model_fn = mode_hot_embeddings=FLAGS.usl_fn_builder(
    #   #     bert_config=bert_config,
    #   #     init_checkpoint=FLAGS.init_checkpoint,
    #   #     learning_rate=FLAGS.learning_rate,
    #   #     num_train_steps=FLAGS.num_train_steps,
    #   #     num_warmup_steps=FLAGS.num_warmup_steps,
    #   #     use_tpu=FLAGS.use_tpu,
    #   #     use_onee_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    # estimator = tf.contrib.tpu.TPUEstimator(
    #     use_tpu=FLAGS.use_tpu,
    #     model_fn=model_fn,
    #     config=run_config,
    #     train_batch_size=FLAGS.train_batch_size,
    #     eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        n_gpus = 4
        batch_size = FLAGS.train_batch_size
        d = input_fn(input_files, FLAGS.train_batch_size * n_gpus,
                     FLAGS.max_seq_length, FLAGS.max_predictions_per_seq, True)
        features, iterator = parse_input_fn_result(d)
        # train_input_fn = input_fn_builder(
        #     input_files=input_files,
        #     max_seq_length=FLAGS.max_seq_length,
        #     max_predictions_per_seq=FLAGS.max_predictions_per_seq,
        #     is_training=True)
        # estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps)

        input_ids_list = tf.split(features["input_ids"], n_gpus, axis=0)
        input_mask_list = tf.split(features["input_mask"], n_gpus, axis=0)
        segment_ids_list = tf.split(features["segment_ids"], n_gpus, axis=0)
        masked_lm_positions_list = tf.split(features["masked_lm_positions"],
                                            n_gpus,
                                            axis=0)
        masked_lm_ids_list = tf.split(features["masked_lm_ids"],
                                      n_gpus,
                                      axis=0)
        masked_lm_weights_list = tf.split(features["masked_lm_weights"],
                                          n_gpus,
                                          axis=0)
        next_sentence_labels_list = tf.split(features["next_sentence_labels"],
                                             n_gpus,
                                             axis=0)

        # multi-gpu train
        with tf.device('/cpu:0'):
            optimizer = optimization_gpu.create_optimizer(
                None, FLAGS.learning_rate, FLAGS.num_train_steps,
                FLAGS.num_warmup_steps, False)

            global_step = tf.train.get_or_create_global_step()
            # calculate the gradients on each GPU
            tower_grads = []
            models = []
            train_perplexity = tf.get_variable(
                'train_perplexity', [],
                initializer=tf.constant_initializer(0.0),
                trainable=False)
            for k in range(n_gpus):
                with tf.device('/gpu:%d' % k):
                    with tf.variable_scope('lm', reuse=k > 0):
                        # calculate the loss for one model replica and get
                        #   lstm states

                        input_ids = input_ids_list[k]
                        input_mask = input_mask_list[k]
                        segment_ids = segment_ids_list[k]
                        masked_lm_positions = masked_lm_positions_list[k]
                        masked_lm_ids = masked_lm_ids_list[k]
                        masked_lm_weights = masked_lm_weights_list[k]
                        next_sentence_labels = next_sentence_labels_list[k]

                        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

                        model = modeling.BertModel(
                            config=bert_config,
                            is_training=is_training,
                            input_ids=input_ids,
                            input_mask=input_mask,
                            token_type_ids=segment_ids,
                            use_one_hot_embeddings=use_one_hot_embeddings)

                        (masked_lm_loss, masked_lm_example_loss,
                         masked_lm_log_probs) = get_masked_lm_output(
                             bert_config, model.get_sequence_output(),
                             model.get_embedding_table(), masked_lm_positions,
                             masked_lm_ids, masked_lm_weights)

                        (next_sentence_loss, next_sentence_example_loss,
                         next_sentence_log_probs) = get_next_sentence_output(
                             bert_config, model.get_pooled_output(),
                             next_sentence_labels)

                        total_loss = masked_lm_loss + next_sentence_loss

                        loss = total_loss
                        models.append(model)
                        # get gradients
                        grads = optimizer.compute_gradients(
                            loss,
                            aggregation_method=tf.AggregationMethod.
                            EXPERIMENTAL_TREE,
                        )
                        tower_grads.append(grads)
                        # keep track of loss across all GPUs
                        train_perplexity += loss

            average_grads = average_gradients(tower_grads, None, None)
            average_grads, norm_summary_ops = clip_grads(
                average_grads, 10.0, True, global_step)
            train_perplexity = tf.exp(train_perplexity / n_gpus)
            train_op = optimizer.apply_gradients(average_grads,
                                                 global_step=global_step)
            init = tf.global_variables_initializer()
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            sess.run(init)
            sess.run(iterator.initializer)
            sum = 0
            count = 0
            t0 = time.time()
            while True:

                _, train_perplexity_ = sess.run([train_op, train_perplexity])

                sum += train_perplexity_
                count += 1
                if count % 100 == 0:
                    print("------------")
                    print(time.time() - t0, " ms")
                    t0 = time.time()
                    print("loss ", sum / count)
                    sum = 0

                if count % 10000 == 0:
                    checkpoint_path = os.path.join(FLAGS.output_dir,
                                                   'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=global_step)

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        eval_input_fn = input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=False)

        result = estimator.evaluate(input_fn=eval_input_fn,
                                    steps=FLAGS.max_eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
Ejemplo n.º 24
0
def gec_create_model(bert_config, is_training, input_sequence, 
  input_mask, segment_ids, edit_sequence, 
  use_one_hot_embeddings, mode, 
  copy_weight, 
  use_bert_more, 
  insert_ids,
  multitoken_insert_ids,
  subtract_replaced_from_replacement):
  """Creates a classification model."""
  # insert_ids: word ids of unigram inserts (list)
  # multitoken_insert_ids: word_ids of bigram inserts (list of tuples of length 2)
  # Defining the space of all possible edits: 
  # unk, sos and eos are dummy edits mapped to 0, 1 and 2 respectively
  # copy is mapped to 3
  # del is mapped to 4
  num_appends = len(insert_ids) + len(multitoken_insert_ids)
  num_replaces = num_appends # appends and replacements come from the same set (inserts and multitoken_inserts)
  append_begin = 5 # First append edit (mapped to 5)
  append_end = append_begin + num_appends - 1 #Last append edit
  rep_begin = append_end + 1 # First replace edit
  rep_end = rep_begin + num_replaces - 1 #Last replace edit  
  num_suffix_transforms = 58 #num of transformation edits
  num_labels = 5 + num_appends + num_replaces + num_suffix_transforms # total number of edits
  print("************ num of labels : {} ***************".format(num_labels))

  config = bert_config
  input_sequence_shape = modeling.get_shape_list(input_sequence,2)
  batch_size = input_sequence_shape[0]
  seq_len = input_sequence_shape[1]

  if not use_bert_more:  #default use of bert (without logit factorisation)
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_sequence,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    output_layer = model.get_sequence_output()
  else:                 # LOGIT FACTORISATION is On!
    model = modified_modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_sequence,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    output_layer = model.get_sequence_output()
    replace_layer = output_layer[:,seq_len:2*seq_len,:]  #representation of replacement slots as described in paper
    append_layer = output_layer[:,2*seq_len:3*seq_len,:] #representation of append slots as described in paper
    output_layer = output_layer[:,0:seq_len,:]

  output_layer_shape = modeling.get_shape_list(output_layer,3)
  hidden_size = output_layer_shape[-1]

  flattened_output_layer = tf.reshape(output_layer,[-1, hidden_size])

  h_edit = flattened_output_layer

  if use_bert_more:
    h_word = flattened_output_layer
    flattened_replace_layer = tf.reshape(replace_layer,[-1, hidden_size])
    flattened_append_layer = tf.reshape(append_layer, [-1, hidden_size])

    m_replace = flattened_replace_layer    
    m_append = flattened_append_layer

    
    with tf.variable_scope("cls/predictions"):
      with tf.variable_scope("transform"):
        h_word = tf.layers.dense(
            h_word,
            units=bert_config.hidden_size,
            activation=modeling.get_activation(bert_config.hidden_act),
            kernel_initializer=modeling.create_initializer(
                bert_config.initializer_range))
        h_word = modeling.layer_norm(h_word)

    with tf.variable_scope("cls/predictions",reuse=True):
      with tf.variable_scope("transform",reuse=True):
        m_replace = tf.layers.dense(
            m_replace,
            units=bert_config.hidden_size,
            activation=modeling.get_activation(bert_config.hidden_act),
            kernel_initializer=modeling.create_initializer(
                bert_config.initializer_range))
        m_replace = modeling.layer_norm(m_replace)

    with tf.variable_scope("cls/predictions",reuse=True):
      with tf.variable_scope("transform",reuse=True):
        m_append = tf.layers.dense(
            m_append,
            units=bert_config.hidden_size,
            activation=modeling.get_activation(bert_config.hidden_act),
            kernel_initializer=modeling.create_initializer(
                bert_config.initializer_range))
        m_append = modeling.layer_norm(m_append)
    
    word_embedded_input = model.word_embedded_input
    flattened_word_embedded_input = tf.reshape(word_embedded_input, [-1, hidden_size])    

  labels = edit_sequence
  
  edit_weights = tf.get_variable(
      "edit_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  if is_training:
    h_edit = tf.nn.dropout(h_edit, keep_prob=0.9) 

  if use_bert_more:
      # append/replace weight vector for a given append or replace operation
      # correspond to word embedding for its token argument
      # for multitoken append/replace (e.g. has been)
      # weight vector is sum of word embeddings of token arguments

      append_weights = edit_word_embedding_lookup(model.embedding_table, insert_ids,
       use_one_hot_embeddings, config.vocab_size, config.hidden_size)
      replace_weights = append_weights #tokens in replace and append vocab are same 
                                       #(i.e. inserts and multitoken_inserts)

      multitoken_append_weights = wem_utils.edit_embedding_loopkup(model.embedding_table, multitoken_insert_ids,
                        use_one_hot_embeddings, config.vocab_size, config.hidden_size)
      multitoken_replace_weights = multitoken_append_weights #tokens in replace and append vocab are same 
                                                             #(i.e. inserts and multitoken_inserts)

      append_weights = tf.concat([append_weights, multitoken_append_weights],0)
      replace_weights = tf.concat([replace_weights, multitoken_replace_weights],0)

  with tf.variable_scope("loss"):
    edit_logits = tf.matmul(h_edit, edit_weights, transpose_b=True) #first term in eq3 in paper
    logits = edit_logits
    if use_bert_more:

      #=============== inplace_word_logits==============# #2nd term in eq3 in paper
      inplace_logit = tf.reduce_sum(h_word * flattened_word_embedded_input, axis=1, keepdims=True) #copy
      #inplace_logit = tf.reduce_sum(m_replace * flattened_word_embedded_input, axis=1, keepdims=True) #copy
      inplace_logit_appends = tf.tile(inplace_logit,[1,num_appends])
      inplace_logit_transforms = tf.tile(inplace_logit,[1,num_suffix_transforms])
      zero_3_logits = tf.zeros([batch_size*seq_len,3]) #unk sos eos 
      zero_1_logits = tf.zeros([batch_size*seq_len,1]) # del
      zero_replace_logits = tf.zeros([batch_size*seq_len,num_replaces])

      concat_list = [zero_3_logits, inplace_logit, zero_1_logits]\
                  + [inplace_logit_appends]\
                  + [zero_replace_logits]\
                  + [inplace_logit_transforms]

      inplace_word_logits = tf.concat(concat_list,1)

      #======additional (insert,replace) logits ====# #3rd term in eqn3 in paper
      zero_5_logits = tf.zeros([batch_size*seq_len,5])      
      append_logits = tf.matmul(m_append, append_weights, transpose_b=True)

      if subtract_replaced_from_replacement:
        replace_logits = replacement_minus_replaced_logits(m_replace, 
          flattened_word_embedded_input, replace_weights)
      else:
        replace_logits = tf.matmul(m_replace, replace_weights, transpose_b=True)
      
      suffix_logits  = tf.zeros([batch_size*seq_len,num_suffix_transforms])
      
      concat_list = [zero_5_logits, append_logits, replace_logits, suffix_logits]
      additional_logits = tf.concat(concat_list,1)
      #====================================================#

      logits = edit_logits + inplace_word_logits + additional_logits
      logits_bias = tf.get_variable("output_bias", shape=[num_labels], initializer=tf.zeros_initializer())
      logits += logits_bias
    
    logits = tf.reshape(logits, [output_layer_shape[0], output_layer_shape[1], num_labels])
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    probs = tf.nn.softmax(logits,axis=-1)
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    per_token_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    per_token_loss = per_token_loss * tf.to_float(input_mask)
    mask = copy_weight*tf.to_float(tf.equal(labels,3)) +  tf.to_float(tf.not_equal(labels,3))
    masked_per_token_loss = per_token_loss * mask
    per_example_loss = tf.reduce_sum(masked_per_token_loss, axis=-1)
    loss = tf.reduce_mean(per_example_loss)            

    return (loss, per_example_loss, logits, probs)
Ejemplo n.º 25
0
def create_model(bert_config, is_training, input_ids, head_ids, tail_ids,
                 position1_ids, position2_ids, input_mask, segment_ids, labels,
                 num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               head_ids=head_ids,
                               tail_ids=tail_ids,
                               position1_ids=position1_ids,
                               position2_ids=position2_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # [ batch_size, seq_length, hidden_size ]
    encoder_layer = model.get_all_encoder_layers()[-1]

    mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
    masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
        tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)

    sentence_embedding = masked_reduce_mean(encoder_layer, input_mask)

    pos_head_embedding = model.get_head_embedding()
    pos_tail_embedding = model.get_tail_embedding()
    neg_head_embedding = tf.random_shuffle(pos_head_embedding)
    neg_tail_embedding = tf.random_shuffle(pos_tail_embedding)

    hidden_size = encoder_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            sentence_embedding = tf.nn.dropout(sentence_embedding,
                                               keep_prob=0.9)

        pos = tf.reduce_sum(abs(pos_tail_embedding + pos_head_embedding -
                                sentence_embedding),
                            axis=1,
                            keep_dims=True)
        neg = tf.reduce_sum(max(
            abs(neg_tail_embedding + pos_head_embedding - sentence_embedding),
            abs(pos_tail_embedding + neg_head_embedding - sentence_embedding)),
                            axis=1,
                            keep_dims=True)

        pre_trans_loss = tf.maximum(pos - neg + FLAGS.marign, 0)
        loss = tf.reduce_mean(pre_trans_loss)

        logits = tf.matmul(sentence_embedding,
                           output_weights,
                           transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1)
        # log_probs = tf.nn.log_softmax(logits, axis=-1)
        #
        # one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        #
        # per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        # loss = tf.reduce_mean(per_example_loss)

        # return (loss, per_example_loss, logits, probabilities)
        return loss, pre_trans_loss, logits, probabilities
param['num_class'] = len(index2label)

# bert input
input_ids = tf.placeholder (shape = [None, param['sentence_len']], dtype = tf.int32, name = 'input_ids')
input_mask = tf.placeholder (shape = [None, param['sentence_len']], dtype = tf.int32, name = 'input_mask')
segment_ids = tf.placeholder (shape = [None, param['sentence_len']], dtype = tf.int32, name = 'segment_ids')
input_labels = tf.placeholder (shape = [None, param['num_class']], dtype = tf.float32, name = 'input_ids')
train_flag = tf.placeholder (dtype = tf.bool, name = 'is_training')
dropout_keep_prob = tf.placeholder(dtype = tf.float32, name = 'dropout_keep_prob')
learning_rate = tf.placeholder(dtype = tf.float32, name = 'learning_rate')

bert_config = modeling.BertConfig.from_json_file(param['bert_config_path'])
model = modeling.BertModel(
    config = bert_config,
    is_training = train_flag,
    input_ids = input_ids,
    input_mask = input_mask,
    token_type_ids = segment_ids,
    use_one_hot_embeddings = False # If you use TPU, set it True else False
)

output_layer = model.get_pooled_output() 
hidden_size = output_layer.shape[-1].value # 768

# your own full concect layer
output_weights = tf.get_variable('output_weights', [hidden_size, param['num_class']], initializer = tf.truncated_normal_initializer(stddev = 0.02))
output_bias = tf.get_variable('output_bias', [param['num_class']], initializer = tf.zeros_initializer())
with tf.variable_scope('loss'):
    output_layer = tf.nn.dropout(output_layer, keep_prob = dropout_keep_prob)
    logits = tf.matmul(output_layer, output_weights)
    logits = tf.nn.bias_add(logits, output_bias)
Ejemplo n.º 27
0
configsession.gpu_options.allow_growth = True
sess = tf.Session(config=configsession)
input_ids = tf.placeholder(shape=[None, None],
                           dtype=tf.int32,
                           name="input_ids")
input_mask = tf.placeholder(shape=[None, None],
                            dtype=tf.int32,
                            name="input_mask")
segment_ids = tf.placeholder(shape=[None, None],
                             dtype=tf.int32,
                             name="segment_ids")

with sess.as_default():
    model = modeling.BertModel(config=bert_config,
                               is_training=True,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=False)
    encoder_last_layer = model.get_sequence_output()
    encoder_last2_layer = model.all_encoder_layers[-2]
    #saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer()
             )  # 这里尤其注意,先初始化,在加载参数,否者会把bert的参数重新初始化。这里和demo1是有区别的
    #saver.restore(sess, pathname)
    #print(1)
    token = tokenization.CharTokenizer(
        vocab_file="chinese_L-12_H-768_A-12/vocab.txt")
    query = u'美国大选,特朗普到底是咋想的,难道美国人民眼睛有问题吗?'
    split_tokens = token.tokenize(query)
    print(split_tokens)
    word_ids = token.convert_tokens_to_ids(split_tokens)
Ejemplo n.º 28
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 label_ids, seq_length, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    embedding = model.get_sequence_output()
    embeddings = tf.layers.dropout(embedding,
                                   rate=FLAGS.dropout_rate,
                                   training=is_training)
    with tf.variable_scope('Graph', reuse=None, custom_getter=None):
        # LSTM
        t = tf.transpose(embeddings, perm=[1, 0, 2])
        lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(
            128)  # 序列标注问题中一般lstm单元个数就是max_seq_length
        lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(128)
        lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
        output_fw, _ = lstm_cell_fw(t,
                                    dtype=tf.float32,
                                    sequence_length=seq_length)
        output_bw, _ = lstm_cell_bw(t,
                                    dtype=tf.float32,
                                    sequence_length=seq_length)
        output = tf.concat([output_fw, output_bw], axis=-1)
        output = tf.transpose(output, perm=[1, 0, 2])
        tf.logging.info(output.shape)
        output = tf.layers.dropout(output, rate=0.5, training=is_training)
        output = tf.reshape(output, [-1, 128 * 256])
        output_weights = tf.get_variable(
            "output_weights", [num_labels, 128 * 256],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())
        logits = tf.matmul(output, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        tf.logging.info("*****shape of label_ids******")
        tf.logging.info(label_ids.shape)
        tf.logging.info(logits.shape)

        correctPred = tf.equal(
            tf.argmax(logits, 1), tf.argmax(label_ids, 1)
        )  # tf.argmax: Returns the index with the largest value across axes of a tensor.
        accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32))
        tf.summary.scalar('Accuracy', accuracy)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                    labels=label_ids))
        # optimizer = tf.train.AdamOptimizer().minimize(loss)

        # crf_params = tf.get_variable("crf", [num_labels, num_labels], dtype=tf.float32)
        # trans = tf.get_variable(
        #     "transitions",
        #     shape=[num_labels, num_labels],
        #     initializer=initializers.xavier_initializer())
        # pred_ids, trans = tf.contrib.crf.crf_decode(logits, crf_params, seq_length)
        # log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
        #     logits, label_ids, seq_length, crf_params)
        # loss = tf.reduce_mean(-log_likelihood)
        # if mode == tf.estimator.ModeKeys.EVAL:
        #     return tf.estimator.EstimatorSpec(
        #         mode, loss=loss, eval_metric_ops=metrics)

        # elif mode == tf.estimator.ModeKeys.TRAIN:

        #   return loss, logits, trans, pred_ids
        return loss, tf.argmax(logits, 1)
Ejemplo n.º 29
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        # output_spec = None
        # if mode == tf.estimator.ModeKeys.TRAIN:
        #   train_op = optimization.create_optimizer(
        #       total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
        #
        #   output_spec = tf.contrib.tpu.TPUEstimatorSpec(
        #       mode=mode,
        #       loss=total_loss,
        #       train_op=train_op,
        #       scaffold_fn=scaffold_fn)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            # eval_metrics = (metric_fn, [
            #     masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
            #     masked_lm_weights, next_sentence_example_loss,
            #     next_sentence_log_probs, next_sentence_labels
            # ])
            # output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            #     mode=mode,
            #     loss=total_loss,
            #     eval_metrics=eval_metrics,
            #     scaffold_fn=scaffold_fn)
            eval_metrics = metric_fn(masked_lm_example_loss,
                                     masked_lm_log_probs, masked_lm_ids,
                                     masked_lm_weights,
                                     next_sentence_example_loss,
                                     next_sentence_log_probs,
                                     next_sentence_labels)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metrics)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Ejemplo n.º 30
0
 parser.add_argument("--ckpt_dir", default='/home/chandler/mask-bert/model/ew_75/model.ckpt-24643', type=str, 
                     help="model ckpt dir. E.g., MNLI/model_0/model.ckpt-24643")
 parser.add_argument("--output_dir", default='./fig/masked_weight.png', type=str, 
                     help="output png file dir")
 parser.add_argument("--layer_index", default=0, type=int, 
                     help="layer to plot")  
 parser.add_argument("--matrix_name", default="wq", type=str, 
                     help="name of the matrix to plot. (wq, wk, wv, fc1, fc2, fc3)")
 parser.add_argument("--fig_type", default="pdf", type=str, 
                     help="figure file extension type")
 args = parser.parse_args()
 
 bert_config = modeling.BertConfig.from_json_file('../Model/uncased_L-12_H-768_A-12/bert_config.json')
 input_ids = tf.placeholder(tf.int32,(8,256))
 model = modeling.BertModel(
     config=bert_config,
     is_training=False,
     input_ids=input_ids)
 saver = tf.train.Saver()
 with tf.Session() as sess:
   sess.run(tf.global_variables_initializer())
   ckpt_dir = args.ckpt_dir
   saver.restore(sess, ckpt_dir)
   print(f'parsing file {ckpt_dir}')
   
   results = []
   for probe_layer in range(0,12):
   
     mask_wq = sess.graph.get_tensor_by_name(f'bert/encoder/layer_{probe_layer}/attention/self/query/mask-o:0')
     mask_wk = sess.graph.get_tensor_by_name(f'bert/encoder/layer_{probe_layer}/attention/self/key/mask-o:0')
     mask_wv = sess.graph.get_tensor_by_name(f'bert/encoder/layer_{probe_layer}/attention/self/value/mask-o:0')
     mask_fc1 = sess.graph.get_tensor_by_name(f'bert/encoder/layer_{probe_layer}/attention/output/dense/mask-o:0')