Exemple #1
0
def classifier(config,
               pooled_output,
               num_labels,
               labels,
               dropout_prob,
               ratio_weight=None,
               **kargs):

    output_layer = pooled_output

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    if config.get("label_type", "single_label") == "single_label":
        if config.get("loss", "entropy") == "entropy":
            print("==standard cross entropy==")
            per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=tf.stop_gradient(labels))
        elif config.get("loss", "entropy") == "focal_loss":
            print("==multi_label focal loss==")
            per_example_loss, _ = loss_utils.focal_loss_multi_v1(config,
                                                                 logits=logits,
                                                                 labels=labels)

        try:
            per_example_loss = loss_utils.weighted_loss_ratio(
                config, per_example_loss, labels, ratio_weight)
            loss = tf.reduce_sum(per_example_loss)
            print(" == applying weighted loss == ")
        except:
            loss = tf.reduce_mean(per_example_loss)

        if config.get("with_center_loss", "no") == "center_loss":
            print("==apply with center loss==")
            center_loss, _ = loss_utils.center_loss_v2(config,
                                                       features=pooled_output,
                                                       labels=labels)
            loss += center_loss * config.get("center_loss_coef", 1e-3)

        return (loss, per_example_loss, logits)
    elif config.get("label_type", "single_label") == "multi_label":
        logits = tf.log_sigmoid(logits)
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=logits, labels=tf.stop_gradient(labels))
        per_example_loss = tf.reduce_sum(per_example_loss, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, per_example_loss, logits)
    else:
        raise NotImplementedError()
def multi_choice_classifier(config, pooled_output, num_labels, labels,
                            dropout_prob):
    output_layer = pooled_output

    final_hidden_shape = bert_utils.get_shape_list(output_layer,
                                                   expected_rank=2)

    print(final_hidden_shape, "====multi-choice shape====")

    output_layer = tf.reshape(output_layer,
                              [-1, num_labels, final_hidden_shape[-1]
                               ])  # batch x num_choices x hidden_dim

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)
    logits = tf.einsum("abc,c->ab", output_layer, output_weights)
    logits = tf.nn.bias_add(logits, output_bias)  # batch x num_labels

    if config.get("loss_type", "entropy") == "focal_loss":
        per_example_loss = loss_utils.focal_loss_multi_v1(logits=logits,
                                                          labels=labels)
    else:
        per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.stop_gradient(labels))
    loss = tf.reduce_mean(per_example_loss)

    return (loss, per_example_loss, logits)
def order_classifier_v1(config,
                        output_lst,
                        num_labels,
                        labels,
                        dropout_prob,
                        ratio_weight=None):

    assert len(output_lst) == 2

    seq_output_a = output_lst[0]
    seq_output_b = output_lst[1]

    # batch x (hidden x 2)
    # repres = tf.concat([seq_output_a, seq_output_b],
    # 					axis=-1)

    repres = seq_output_a + seq_output_b

    hidden_size = repres.shape[-1].value

    repres = tf.layers.dense(repres,
                             hidden_size,
                             activation=tf.nn.tanh,
                             name="output_dense")

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    output_layer = tf.nn.dropout(repres, keep_prob=1 - dropout_prob)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    if config.get("label_type", "single_label") == "single_label":
        if config.get("loss", "entropy") == "entropy":
            per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=tf.stop_gradient(labels))

        elif config.get("loss", "entropy") == "focal_loss":
            tf.logging.info("===apply multi-class focal loss===")
            print("===apply multi-class focal loss===")
            per_example_loss = loss_utils.focal_loss_multi_v1(config,
                                                              logits=logits,
                                                              labels=labels)
        try:
            per_example_loss = loss_utils.weighted_loss_ratio(
                config, per_example_loss, labels, ratio_weight)
            loss = tf.reduce_sum(per_example_loss)
        except:
            loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits)
def siamese_classifier(config,
                       pooled_output,
                       num_labels,
                       labels,
                       dropout_prob,
                       ratio_weight=None):

    if config.get("output_layer", "interaction") == "interaction":
        print("==apply interaction layer==")
        repres_a = pooled_output[0]
        repres_b = pooled_output[1]

        output_layer = tf.concat([
            repres_a, repres_b,
            tf.abs(repres_a - repres_b), repres_a * repres_b
        ],
                                 axis=-1)
        hidden_size = output_layer.shape[-1].value

        output_weights = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())

        output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

        print("==logits shape==", logits.get_shape())

        if config.get("label_type", "single_label") == "single_label":
            if config.get("loss", "entropy") == "entropy":
                per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=tf.stop_gradient(labels))
            elif config.get("loss", "entropy") == "focal_loss":
                per_example_loss, _ = loss_utils.focal_loss_multi_v1(
                    config, logits=logits, labels=labels)
            print("==per_example_loss shape==", per_example_loss.get_shape())
            loss = tf.reduce_mean(per_example_loss)

            return (loss, per_example_loss, logits)
        elif config.get("label_type", "single_label") == "multi_label":
            logits = tf.log_sigmoid(logits)
            per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                logits=logits, labels=tf.stop_gradient(labels))
            per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
            loss = tf.reduce_mean(per_example_loss)
            return (loss, per_example_loss, logits)
        else:
            raise NotImplementedError()
def distributed_classifier(config,
                           pooled_output,
                           num_labels,
                           labels,
                           dropout_prob,
                           ratio_weight=None):

    output_layer = pooled_output

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    if config.get("label_type", "single_label") == "single_label":
        if config.get("loss", "entropy") == "entropy":
            per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=tf.stop_gradient(labels))
        elif config.get("loss", "entropy") == "focal_loss":
            per_example_loss = loss_utils.focal_loss_multi_v1(config,
                                                              logits=logits,
                                                              labels=labels)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits)
    elif config.get("label_type", "single_label") == "multi_label":
        logits = tf.log_sigmoid(logits)
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=logits, labels=tf.stop_gradient(labels))
        per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, per_example_loss, logits)
    else:
        raise NotImplementedError()
Exemple #6
0
def multi_position_classifier(config, features, sequence_output, num_labels,
                              dropout_prob):

    final_hidden_shape = bert_utils.get_shape_list(sequence_output,
                                                   expected_rank=3)

    print(final_hidden_shape, "====multi-choice shape====")

    answer_pos = tf.cast(features['label_positions'], tf.int32)
    cls_pos = tf.zeros_like(answer_pos)
    input_tensor = bert_utils.gather_indexes(sequence_output, answer_pos)
    cls_tensor = bert_utils.gather_indexes(sequence_output, cls_pos)

    answer_cls_tensor = tf.concat([cls_tensor, input_tensor], axis=-1)

    input_tensor = tf.layers.dense(
        answer_cls_tensor,
        units=config.hidden_size,
        activation=bert_modules.get_activation(config.hidden_act),
        kernel_initializer=bert_modules.create_initializer(
            config.initializer_range))
    input_tensor = bert_modules.layer_norm(input_tensor)

    output_weights = tf.get_variable(
        "output_weights", [num_labels, final_hidden_shape[-1]],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias",
                                  shape=[num_labels],
                                  initializer=tf.zeros_initializer())
    logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    label_ids = tf.reshape(tf.cast(features['label_ids'], tf.int32), [-1])
    label_weights = tf.reshape(tf.cast(features['label_weights'], tf.float32),
                               [-1])

    if config.get('class_weights', None):
        class_weights = tf.constant(
            np.array(config.class_weights).astype(np.float32))

    if config.get("loss", "entropy") == "focal_loss":
        per_example_loss, _ = loss_utils.focal_loss_multi_v1(
            config, logits=logits, labels=tf.stop_gradient(label_ids))
    elif config.get("loss", "smoothed_ce") == 'smoothed_ce':
        per_example_loss = loss_utils.ce_label_smoothing(
            config, logits=logits, labels=tf.stop_gradient(label_ids))
    elif config.get('loss', 'class_balanced_focal') == 'class_balanced_focal':
        per_example_loss, _ = loss_utils.class_balanced_focal_loss_multi_v1(
            config,
            logits=logits,
            labels=label_ids,
            label_weights=class_weights)
    else:
        per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.stop_gradient(label_ids), logits=logits)

    numerator = tf.reduce_sum(label_weights * per_example_loss)
    denominator = tf.reduce_sum(label_weights) + 1e-5
    loss = numerator / denominator

    return (loss, per_example_loss, logits)
Exemple #7
0
def get_masked_lm_output(config,
                         input_tensor,
                         output_weights,
                         positions,
                         label_ids,
                         label_weights,
                         reuse=None):
    """Get loss and log probs for the masked LM."""
    input_tensor = tf.cast(input_tensor, tf.float32)
    positions = tf.cast(positions, tf.int32)
    label_ids = tf.cast(label_ids, tf.int32)
    label_weights = tf.cast(label_weights, tf.float32)

    input_tensor = bert_utils.gather_indexes(input_tensor, positions)
    """
	flatten masked lm ids with positions
	"""
    with tf.variable_scope("cls/predictions", reuse=reuse):
        # We apply one more non-linear transformation before the output layer.
        # This matrix is not used after pre-training.
        with tf.variable_scope("transform"):
            input_tensor = tf.layers.dense(
                input_tensor,
                units=config.hidden_size,
                activation=bert_modules.get_activation(config.hidden_act),
                kernel_initializer=bert_modules.create_initializer(
                    config.initializer_range))
            input_tensor = bert_modules.layer_norm(input_tensor)

        # The output weights are the same as the input embeddings, but there is
        # an output-only bias for each token.
        output_bias = tf.get_variable("output_bias",
                                      shape=[config.vocab_size],
                                      initializer=tf.zeros_initializer())
        logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        label_ids = tf.reshape(label_ids, [-1])
        label_weights = tf.cast(tf.reshape(label_weights, [-1]), tf.float32)

        per_example_loss, target_predictions = loss_utils.focal_loss_multi_v1(
            config, logits, label_ids)

        # one_hot_labels = tf.one_hot(
        # 		label_ids, depth=config.vocab_size, dtype=tf.float32)

        # tsa_start = 0.5 / config.vocab_size
        # tsa_threshold = tsa.get_tsa_threshold(
        # 					  config.tsa,
        # 					  tf.train.get_or_create_global_step(),
        # 					  config.num_train_steps,
        # 					  tsa_start, end=1)

        # larger_than_threshold = tf.greater(
        # 					target_predictions, tsa_threshold)
        # loss_mask = label_weights * (1 - tf.cast(larger_than_threshold, tf.float32))

        # per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        # 											labels=tf.stop_gradient(label_ids),
        # 											logits=logits)
        # per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])

        numerator = tf.reduce_sum(label_weights * per_example_loss)
        denominator = tf.reduce_sum(label_weights) + 1e-5

        # The `positions` tensor might be zero-padded (if the sequence is too
        # short to have the maximum number of predictions). The `label_weights`
        # tensor has a value of 1.0 for every real prediction and 0.0 for the
        # padding predictions.
        # per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])
        # numerator = tf.reduce_sum(label_weights * per_example_loss)
        # denominator = tf.reduce_sum(label_weights) + 1e-5
        loss = numerator / denominator

    return (loss, per_example_loss, log_probs, label_weights)