def create_model(albert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, max_seq_length, dropout_prob, hub_module): """Creates a classification model.""" bsz_per_core = tf.shape(input_ids)[0] input_ids = tf.reshape(input_ids, [bsz_per_core * num_labels, max_seq_length]) input_mask = tf.reshape(input_mask, [bsz_per_core * num_labels, max_seq_length]) token_type_ids = tf.reshape(segment_ids, [bsz_per_core * num_labels, max_seq_length]) (output_layer, _) = fine_tuning_utils.create_albert( albert_config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=token_type_ids, use_one_hot_embeddings=use_one_hot_embeddings, use_einsum=True, hub_module=hub_module) hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [1, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [1], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [bsz_per_core, num_labels]) probabilities = tf.nn.softmax(logits, axis=-1) predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=tf.cast(num_labels, dtype=tf.int32), dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, probabilities, logits, predictions)
def create_model(albert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, task_name, hub_module): """Creates a classification model.""" (output_layer, _) = fine_tuning_utils.create_albert( albert_config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, use_einsum=True, hub_module=hub_module) hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) if task_name != "sts-b": probabilities = tf.nn.softmax(logits, axis=-1) predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) else: probabilities = logits logits = tf.squeeze(logits, [-1]) predictions = logits per_example_loss = tf.square(logits - labels) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, probabilities, logits, predictions)
def create_model(albert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, task_name, hub_module): """Creates a classification model.""" (output_layer, _) = fine_tuning_utils.create_albert( albert_config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, use_einsum=True, hub_module=hub_module) hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) # print("labels:",labels,";logits:",logits,"isinstance(labels,list):",isinstance(labels,list)) # mulit-label classification: 1.multi-hot==> then use sigmoid to transform it to possibility probabilities = tf.nn.sigmoid(logits) # log_probs=tf.log(probabilities) labels = tf.cast(labels, tf.float32) #probabilities = tf.nn.softmax(logits, axis=-1) predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) #log_probs = tf.nn.log_softmax(logits, axis=-1) #one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) ##per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, probabilities, logits, predictions)