def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()  # 从主干模型获得模型的输出

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(  # 分类模型特有的分类层的参数
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(  # 分类模型特有的bias
        "output_bias", [num_labels],
        initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights,
                           transpose_b=True)  # 分类模型特有的分类层
        logits = tf.nn.bias_add(logits, output_bias)

        #print("labels:",labels,";logits:",logits,"isinstance(labels,list):",isinstance(labels,list))
        # mulit-label classification: 1.multi-hot==> then use sigmoid to transform it to possibility
        probabilities = tf.nn.sigmoid(logits)
        #log_probs=tf.log(probabilities)
        labels = tf.cast(labels, tf.float32)
        #  below is for single label classification
        #  one-hot for single label classification
        #  probabilities = tf.nn.softmax(logits, axis=-1)
        #log_probs = tf.nn.log_softmax(logits, axis=-1)
        #  one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        print("num_labels:", num_labels, ";logits:", logits, ";labels:",
              labels)
        #print("log_probs:",log_probs)
        #per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # 利用交叉熵就和
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels, logits=logits)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits, probabilities)
Example #2
0
def create_model_original(bert_config, is_training, input_ids, input_mask,
                          segment_ids, labels, num_labels,
                          use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()  # 从主干模型获得模型的输出

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(  # 分类模型特有的分类层的参数
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(  # 分类模型特有的bias
        "output_bias", [num_labels],
        initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights,
                           transpose_b=True)  # 分类模型特有的分类层
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                          axis=-1)  # 利用交叉熵就和
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits, probabilities)