def classifier(config, pooled_output, num_labels, labels, dropout_prob, ratio_weight=None, **kargs): output_layer = pooled_output hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) if config.get("label_type", "single_label") == "single_label": if config.get("loss", "entropy") == "entropy": print("==standard cross entropy==") per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.stop_gradient(labels)) elif config.get("loss", "entropy") == "focal_loss": print("==multi_label focal loss==") per_example_loss, _ = loss_utils.focal_loss_multi_v1(config, logits=logits, labels=labels) try: per_example_loss = loss_utils.weighted_loss_ratio( config, per_example_loss, labels, ratio_weight) loss = tf.reduce_sum(per_example_loss) print(" == applying weighted loss == ") except: loss = tf.reduce_mean(per_example_loss) if config.get("with_center_loss", "no") == "center_loss": print("==apply with center loss==") center_loss, _ = loss_utils.center_loss_v2(config, features=pooled_output, labels=labels) loss += center_loss * config.get("center_loss_coef", 1e-3) return (loss, per_example_loss, logits) elif config.get("label_type", "single_label") == "multi_label": logits = tf.log_sigmoid(logits) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=tf.stop_gradient(labels)) per_example_loss = tf.reduce_sum(per_example_loss, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits) else: raise NotImplementedError()
def order_classifier_v1(config, output_lst, num_labels, labels, dropout_prob, ratio_weight=None): assert len(output_lst) == 2 seq_output_a = output_lst[0] seq_output_b = output_lst[1] # batch x (hidden x 2) # repres = tf.concat([seq_output_a, seq_output_b], # axis=-1) repres = seq_output_a + seq_output_b hidden_size = repres.shape[-1].value repres = tf.layers.dense(repres, hidden_size, activation=tf.nn.tanh, name="output_dense") output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) output_layer = tf.nn.dropout(repres, keep_prob=1 - dropout_prob) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) if config.get("label_type", "single_label") == "single_label": if config.get("loss", "entropy") == "entropy": per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.stop_gradient(labels)) elif config.get("loss", "entropy") == "focal_loss": tf.logging.info("===apply multi-class focal loss===") print("===apply multi-class focal loss===") per_example_loss = loss_utils.focal_loss_multi_v1(config, logits=logits, labels=labels) try: per_example_loss = loss_utils.weighted_loss_ratio( config, per_example_loss, labels, ratio_weight) loss = tf.reduce_sum(per_example_loss) except: loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits)