def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, multilabel): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids) # Here, we are doing a classification task on the entire segment. For # token-level output, use model.get_sequece_output() instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [FLAGS.original_emotion_size, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [FLAGS.original_emotion_size], initializer=tf.zeros_initializer()) new_output_weights = tf.get_variable( "new_output_weights", [num_labels, FLAGS.original_emotion_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) new_output_bias = tf.get_variable( "new_output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.matmul(logits, new_output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, new_output_bias) # Labels both for single and multilabel classification labels = tf.cast(labels, tf.float32) if multilabel: probabilities = tf.nn.sigmoid(logits) tf.logging.info("num_labels:{};logits:{};labels:{}".format( num_labels, logits, labels)) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) else: probabilities = tf.nn.softmax(logits, axis=-1) per_example_loss = tf.nn.softmax_cross_entropy_with_logits( labels=labels, logits=logits) loss = tf.reduce_mean(per_example_loss) tf.summary.scalar("loss", loss) return (loss, per_example_loss, logits, probabilities)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, multilabel, sent_rels, sentiment, entailment_rels, entailment, corr_rels, correlation): """Creates a classification model.""" model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids) # Here, we are doing a classification task on the entire segment. For # token-level output, use model.get_sequece_output() instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) # Labels both for single and multilabel classification labels = tf.cast(labels, tf.float32) if multilabel: probabilities = tf.nn.sigmoid(logits) tf.logging.info("num_labels:{};logits:{};labels:{}".format( num_labels, logits, labels)) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) else: probabilities = tf.nn.softmax(logits, axis=-1) per_example_loss = tf.nn.softmax_cross_entropy_with_logits( labels=labels, logits=logits) loss = tf.reduce_mean(per_example_loss) # Add regularization based on label relations prior probs_exp = tf.expand_dims(probabilities, 1) m = tf.tile(probs_exp, [1, num_labels, 1]) probs_exp_t = tf.transpose(probs_exp, perm=[0, 2, 1]) # Subtract each prediction from all others: # Example (with batch size=1): # tiled predictions: [0.1] [0.1] [0.1] # [0.2] [0.2] [0.2] # [0.3] [0.3] [0.3] # subtract [0.1, 0.2, 0.3] row-wise # result: [0.0] [-.1] [-.2] --> row represents difference between # emotion 1 and all other emotions # [0.1] [0.0] [-.1] # [0.2] [0.1] [0.0] dists = tf.square(tf.subtract(m, probs_exp_t)) # square distances dists = tf.transpose(dists, perm=[0, 2, 1]) # Sentiment-based regularization sent_reg = tf.multiply( tf.constant(sentiment), tf.reduce_mean( tf.multiply(dists, tf.constant(sent_rels, dtype=tf.float32)))) tf.summary.scalar("sentiment_regularization", sent_reg) loss += sent_reg # Entailment-based regularization ent_reg = tf.multiply( tf.constant(entailment), tf.reduce_mean( tf.multiply(dists, tf.constant(entailment_rels, dtype=tf.float32)))) tf.summary.scalar("entailment_regularization", ent_reg) loss += ent_reg # Correlation-based regularization corr_reg = tf.multiply( tf.constant(correlation), tf.reduce_mean( tf.multiply(dists, tf.constant(corr_rels, dtype=tf.float32)))) tf.summary.scalar("correlation_regularization", corr_reg) loss += corr_reg tf.summary.scalar("loss", loss) return (loss, per_example_loss, logits, probabilities)