コード例 #1
0
    def create_model(self, bert_config, is_training, input_ids, input_mask,
                     segment_ids, labels, num_labels, use_one_hot_embeddings):
        """
        构建分类模型
        :param bert_config:
        :param is_training:
        :param input_ids:
        :param input_mask:
        :param segment_ids:
        :param labels:
        :param num_labels:
        :return:
        """
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        # In the demo, we are doing a simple classification task on the entire segment.
        #
        # If you want to use the token-level output, use model.get_sequence_output() instead.
        # embedding_layer = model.get_sequence_output()      # 获取embedding,类似embedding_lookup操作, 后面可以接 crf
        output_layer = model.get_pooled_output()

        hidden_size = output_layer.shape[-1].value

        output_weights = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())

        with tf.variable_scope("loss"):
            if is_training:
                # I.e., 0.1 dropout
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            probabilities = tf.nn.softmax(logits, axis=-1)

            # 这里对分类样本进行加权操作,处理分类样本不均衡问题
            log_probs = tf.nn.log_softmax(logits, axis=-1)

            one_hot_labels = tf.one_hot(labels,
                                        depth=num_labels,
                                        dtype=tf.float32)

            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                              axis=-1)
            loss = tf.reduce_mean(per_example_loss)

            return (loss, per_example_loss, logits, probabilities)
コード例 #2
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, keep_prob, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings,
                               scope='bert')
    output_layer = model.get_sequence_output()
    hidden_size = output_layer.shape[-1].value
    seq_length = output_layer.shape[-2].value
    print(output_layer.shape)

    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())
    with tf.variable_scope("loss"):
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.reshape(logits, [-1, seq_length, num_labels])

        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, shape=(-1, seq_length, num_labels))

        input_m = tf.count_nonzero(input_mask, -1)

        log_likelihood, transition_matrix = tf.contrib.crf.crf_log_likelihood(
            logits, labels, input_m)
        loss = tf.reduce_mean(-log_likelihood)
        # inference
        viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode(
            logits, transition_matrix, input_m)
        # 不计算 padding 的 acc
        equals = tf.reduce_sum(
            tf.cast(tf.equal(tf.cast(viterbi_sequence, tf.int64), labels),
                    tf.float32) * tf.cast(input_mask, tf.float32))
        acc = equals / tf.cast(tf.reduce_sum(input_mask), tf.float32)
        return (loss, acc, logits, viterbi_sequence)
コード例 #3
0
    def create_classification_model(self, bert_config, is_training, input_ids,
                                    input_mask, segment_ids, labels,
                                    num_labels):
        # 通过传入的训练数据,进行representation
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
        )

        embedding_layer = model.get_sequence_output()
        output_layer = model.get_pooled_output()
        hidden_size = output_layer.shape[-1].value

        output_weights = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())

        with tf.variable_scope("loss"):
            if is_training:
                # I.e., 0.1 dropout
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            probabilities = tf.nn.softmax(logits, axis=-1)
            log_probs = tf.nn.log_softmax(logits, axis=-1)

            if labels is not None:
                one_hot_labels = tf.one_hot(labels,
                                            depth=num_labels,
                                            dtype=tf.float32)

                per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                                  axis=-1)
                loss = tf.reduce_mean(per_example_loss)
            else:
                loss, per_example_loss = None, None
        return (loss, per_example_loss, logits, probabilities)