Exemple #1
0
def _AddNextSentenceOutput(input_blob, label_blob, hidden_size,
                           initializer_range):
    with flow.scope.namespace("cls-seq_relationship"):
        output_weight_blob = flow.get_variable(
            name="output_weights",
            shape=[2, hidden_size],
            dtype=input_blob.dtype,
            model_name="weight",
            initializer=bert_util.CreateInitializer(initializer_range),
        )
        output_bias_blob = flow.get_variable(
            name="output_bias",
            shape=[2],
            dtype=input_blob.dtype,
            model_name="bias",
            initializer=flow.constant_initializer(0.0),
        )
        logit_blob = flow.matmul(input_blob,
                                 output_weight_blob,
                                 transpose_b=True)
        logit_blob = flow.nn.bias_add(logit_blob, output_bias_blob)
        pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logit_blob, labels=label_blob)
        loss = pre_example_loss
        return (loss, pre_example_loss, logit_blob)
Exemple #2
0
def _AddMaskedLanguageModelLoss(
    input_blob,
    output_weights_blob,
    positions_blob,
    label_id_blob,
    label_weight_blob,
    seq_length,
    hidden_size,
    vocab_size,
    max_predictions_per_seq,
    hidden_act,
    initializer_range,
):
    with flow.scope.namespace("other"):
        sum_label_weight_blob = flow.math.reduce_sum(label_weight_blob, axis=[-1])
        ones = sum_label_weight_blob * 0.0 + 1.0
        sum_label_weight_blob = flow.math.reduce_sum(sum_label_weight_blob)
        batch_size = flow.math.reduce_sum(ones)
        sum_label_weight_blob = sum_label_weight_blob / batch_size
    with flow.scope.namespace("cls-predictions"):
        input_blob = _GatherIndexes(input_blob, positions_blob, seq_length, hidden_size)
        with flow.scope.namespace("transform"):
            if callable(hidden_act):
                act_fn = op_conf_util.kNone
            else:
                act_fn = hidden_act
            input_blob = bert_util._FullyConnected(
                input_blob,
                input_size=hidden_size,
                units=hidden_size,
                activation=act_fn,
                weight_initializer=bert_util.CreateInitializer(initializer_range),
                name="dense",
            )
            if callable(hidden_act):
                input_blob = hidden_act(input_blob)
                input_blob = bert_util._LayerNorm(input_blob, hidden_size)
        output_bias = flow.get_variable(
            name="output_bias",
            shape=[vocab_size],
            dtype=input_blob.dtype,
            initializer=flow.constant_initializer(1.0),
        )
        logit_blob = flow.matmul(input_blob, output_weights_blob, transpose_b=True)
        logit_blob = flow.nn.bias_add(logit_blob, output_bias)
        label_id_blob = flow.reshape(label_id_blob, [-1])
        pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logit_blob, labels=label_id_blob
        )
        pre_example_loss = flow.reshape(pre_example_loss, [-1, max_predictions_per_seq])
        numerator = pre_example_loss * label_weight_blob
        with flow.scope.namespace("loss"):
            numerator = flow.math.reduce_sum(numerator, axis=[-1])
            denominator = sum_label_weight_blob + 1e-5
            loss = numerator / denominator
        return loss, pre_example_loss, logit_blob
Exemple #3
0
def PooledOutput(sequence_output, hidden_size, initializer_range):
    with flow.scope.namespace("bert-pooler"):
        first_token_tensor = flow.slice(sequence_output, [None, 0, 0], [None, 1, -1])
        first_token_tensor = flow.reshape(first_token_tensor, [-1, hidden_size])
        pooled_output = bert_util._FullyConnected(
            first_token_tensor,
            input_size=hidden_size,
            units=hidden_size,
            weight_initializer=bert_util.CreateInitializer(initializer_range),
            name="dense",
        )
        pooled_output = flow.math.tanh(pooled_output)
    return pooled_output
Exemple #4
0
def SQuAD(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    initializer_range=0.02,
):

    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )

    with flow.scope.namespace("cls-squad"):
        final_hidden = backbone.sequence_output()
        final_hidden_matrix = flow.reshape(final_hidden, [-1, hidden_size])
        logits = bert_util._FullyConnected(
            final_hidden_matrix,
            hidden_size,
            units=2,
            weight_initializer=bert_util.CreateInitializer(initializer_range),
            name='output')
        logits = flow.reshape(logits, [-1, seq_length, 2])

        start_logits = flow.slice(logits, [None, None, 0], [None, None, 1])
        end_logits = flow.slice(logits, [None, None, 1], [None, None, 1])

    return start_logits, end_logits
Exemple #5
0
def _AddMaskedLanguageModel(
    input_blob,
    output_weights_blob,
    positions_blob,
    seq_length,
    hidden_size,
    vocab_size,
    hidden_act,
    initializer_range,
):
    with flow.scope.namespace("cls-predictions"):
        # 获取mask词的encode
        input_blob = _GatherIndexes(input_blob, positions_blob, seq_length,
                                    hidden_size)
        # 在输出之前添加一个非线性变换,只在预训练阶段起作用
        with flow.scope.namespace("transform"):
            if callable(hidden_act):
                act_fn = op_conf_util.kNone
            else:
                act_fn = hidden_act
            # print('hhhhh')
            input_blob = bert_util._FullyConnected(
                input_blob,
                input_size=hidden_size,
                units=hidden_size,
                activation=act_fn,
                weight_initializer=bert_util.CreateInitializer(
                    initializer_range),
                name="dense",
            )
            if callable(hidden_act):
                input_blob = hidden_act(input_blob)
                input_blob = bert_util._LayerNorm(input_blob, hidden_size)
        # output_weights是和传入的word embedding一样的
        # 这里再添加一个bias
        output_bias = flow.get_variable(
            name="output_bias",
            shape=[vocab_size],
            dtype=input_blob.dtype,
            initializer=flow.constant_initializer(1.0),
        )
        logit_blob = flow.matmul(input_blob,
                                 output_weights_blob,
                                 transpose_b=True)
        logit_blob = flow.nn.bias_add(logit_blob, output_bias)

        return logit_blob