def create_model(self, bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ 构建分类模型 :param bert_config: :param is_training: :param input_ids: :param input_mask: :param segment_ids: :param labels: :param num_labels: :return: """ model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire segment. # # If you want to use the token-level output, use model.get_sequence_output() instead. # embedding_layer = model.get_sequence_output() # 获取embedding,类似embedding_lookup操作, 后面可以接 crf output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) # 这里对分类样本进行加权操作,处理分类样本不均衡问题 log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, keep_prob, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, scope='bert') output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value seq_length = output_layer.shape[-2].value print(output_layer.shape) output_weight = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) output_layer = tf.reshape(output_layer, [-1, hidden_size]) logits = tf.matmul(output_layer, output_weight, transpose_b=True) logits = tf.reshape(logits, [-1, seq_length, num_labels]) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, shape=(-1, seq_length, num_labels)) input_m = tf.count_nonzero(input_mask, -1) log_likelihood, transition_matrix = tf.contrib.crf.crf_log_likelihood( logits, labels, input_m) loss = tf.reduce_mean(-log_likelihood) # inference viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode( logits, transition_matrix, input_m) # 不计算 padding 的 acc equals = tf.reduce_sum( tf.cast(tf.equal(tf.cast(viterbi_sequence, tf.int64), labels), tf.float32) * tf.cast(input_mask, tf.float32)) acc = equals / tf.cast(tf.reduce_sum(input_mask), tf.float32) return (loss, acc, logits, viterbi_sequence)
def create_classification_model(self, bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels): # 通过传入的训练数据,进行representation model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, ) embedding_layer = model.get_sequence_output() output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) if labels is not None: one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) else: loss, per_example_loss = None, None return (loss, per_example_loss, logits, probabilities)