def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, multilabel):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids)

    # Here, we are doing a classification task on the entire segment. For
    # token-level output, use model.get_sequece_output() instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [FLAGS.original_target_size, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [FLAGS.original_target_size],
                                  initializer=tf.zeros_initializer())

    new_output_weights = tf.get_variable(
        "new_output_weights", [num_labels, FLAGS.original_target_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    new_output_bias = tf.get_variable("new_output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.matmul(logits, new_output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, new_output_bias)

        # Labels both for single and multilabel classification
        labels = tf.cast(labels, tf.float32)

        if multilabel:
            probabilities = tf.nn.sigmoid(logits)
            tf.logging.info("num_labels:{};logits:{};labels:{}".format(
                num_labels, logits, labels))
            per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=labels, logits=logits)
        else:
            probabilities = tf.nn.softmax(logits, axis=-1)
            per_example_loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=labels, logits=logits)
        loss = tf.reduce_mean(per_example_loss)

        tf.summary.scalar("loss", loss)

        return (loss, per_example_loss, logits, probabilities)
Exemple #2
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, multilabel, sent_rels, sentiment,
                 entailment_rels, entailment, corr_rels, correlation):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids)

    # Here, we are doing a classification task on the entire segment. For
    # token-level output, use model.get_sequece_output() instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

        # Labels both for single and multilabel classification
        labels = tf.cast(labels, tf.float32)

        if multilabel:
            probabilities = tf.nn.sigmoid(logits)
            tf.logging.info("num_labels:{};logits:{};labels:{}".format(
                num_labels, logits, labels))
            per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=labels, logits=logits)
        else:
            probabilities = tf.nn.softmax(logits, axis=-1)
            per_example_loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=labels, logits=logits)
        loss = tf.reduce_mean(per_example_loss)

        # Add regularization based on label relations prior
        probs_exp = tf.expand_dims(probabilities, 1)
        m = tf.tile(probs_exp, [1, num_labels, 1])
        probs_exp_t = tf.transpose(probs_exp, perm=[0, 2, 1])

        # Subtract each prediction from all others:
        # Example (with batch size=1):
        #     tiled predictions: [0.1] [0.1] [0.1]
        #                        [0.2] [0.2] [0.2]
        #                        [0.3] [0.3] [0.3]
        #     subtract [0.1, 0.2, 0.3] row-wise
        #     result:   [0.0] [-.1] [-.2] --> row represents difference between
        #                                     emotion 1 and all other emotions
        #               [0.1] [0.0] [-.1]
        #               [0.2] [0.1] [0.0]
        dists = tf.square(tf.subtract(m, probs_exp_t))  # square distances
        dists = tf.transpose(dists, perm=[0, 2, 1])

        # Sentiment-based regularization
        sent_reg = tf.multiply(
            tf.constant(sentiment),
            tf.reduce_mean(
                tf.multiply(dists, tf.constant(sent_rels, dtype=tf.float32))))
        tf.summary.scalar("sentiment_regularization", sent_reg)
        loss += sent_reg

        # Entailment-based regularization
        ent_reg = tf.multiply(
            tf.constant(entailment),
            tf.reduce_mean(
                tf.multiply(dists,
                            tf.constant(entailment_rels, dtype=tf.float32))))
        tf.summary.scalar("entailment_regularization", ent_reg)
        loss += ent_reg

        # Correlation-based regularization
        corr_reg = tf.multiply(
            tf.constant(correlation),
            tf.reduce_mean(
                tf.multiply(dists, tf.constant(corr_rels, dtype=tf.float32))))
        tf.summary.scalar("correlation_regularization", corr_reg)
        loss += corr_reg

        tf.summary.scalar("loss", loss)

        return (loss, per_example_loss, logits, probabilities)