def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() # 从主干模型获得模型的输出 hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( # 分类模型特有的分类层的参数 "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( # 分类模型特有的bias "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) # 分类模型特有的分类层 logits = tf.nn.bias_add(logits, output_bias) #print("labels:",labels,";logits:",logits,"isinstance(labels,list):",isinstance(labels,list)) # mulit-label classification: 1.multi-hot==> then use sigmoid to transform it to possibility probabilities = tf.nn.sigmoid(logits) #log_probs=tf.log(probabilities) labels = tf.cast(labels, tf.float32) # below is for single label classification # one-hot for single label classification # probabilities = tf.nn.softmax(logits, axis=-1) #log_probs = tf.nn.log_softmax(logits, axis=-1) # one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) print("num_labels:", num_labels, ";logits:", logits, ";labels:", labels) #print("log_probs:",log_probs) #per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # 利用交叉熵就和 per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
def create_model_original(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() # 从主干模型获得模型的输出 hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( # 分类模型特有的分类层的参数 "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( # 分类模型特有的bias "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) # 分类模型特有的分类层 logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # 利用交叉熵就和 loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)