Esempio n. 1
0
def maskedBert(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    masked_lm_positions_blob,
    # masked_lm_positions_blob,
    # masked_lm_ids_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    # max_predictions_per_seq=20,
    initializer_range=0.02,
):
    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )

    predictions = _AddMaskedLanguageModel(
        input_blob=backbone.sequence_output(),
        output_weights_blob=backbone.embedding_table(),
        positions_blob=masked_lm_positions_blob,
        seq_length=seq_length,
        hidden_size=hidden_size,
        vocab_size=vocab_size,
        hidden_act=bert_util.GetActivation(hidden_act),
        initializer_range=initializer_range,
    )
    pooled_output = PooledOutput(backbone.sequence_output(), hidden_size,
                                 initializer_range)
    return predictions
Esempio n. 2
0
def PreTrain(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    masked_lm_positions_blob,
    masked_lm_ids_blob,
    masked_lm_weights_blob,
    next_sentence_label_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    max_predictions_per_seq=20,
    initializer_range=0.02,
    use_fp16=False,
):
    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )

    (lm_loss, _, _) = _AddMaskedLanguageModelLoss(
        input_blob=backbone.sequence_output(),
        output_weights_blob=backbone.embedding_table(),
        positions_blob=masked_lm_positions_blob,
        label_id_blob=masked_lm_ids_blob,
        label_weight_blob=masked_lm_weights_blob,
        seq_length=seq_length,
        hidden_size=hidden_size,
        vocab_size=vocab_size,
        max_predictions_per_seq=max_predictions_per_seq,
        hidden_act=bert_util.GetActivation(hidden_act),
        initializer_range=initializer_range,
    )
    pooled_output = PooledOutput(backbone.sequence_output(), hidden_size,
                                 initializer_range)
    (ns_loss, _, _) = _AddNextSentenceOutput(
        input_blob=pooled_output,
        label_blob=next_sentence_label_blob,
        hidden_size=hidden_size,
        initializer_range=initializer_range,
    )
    with flow.scope.namespace("cls-loss"):
        lm_loss = flow.math.reduce_mean(lm_loss)
        ns_loss = flow.math.reduce_mean(ns_loss)
        total_loss = lm_loss + ns_loss
    return total_loss, lm_loss, ns_loss