Python create_initializer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: docent.bert.modeling

메소드/함수: create_initializer

hotexamples.com에서의 예제들: 2

Python create_initializer - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 docent.bert.modeling.create_initializer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: run_pretraining.py 프로젝트: tallamjr/google-research

def get_next_sentence_output(bert_config,
                             input_tensor,
                             labels,
                             label_weights=None):
    """Get loss and log probs for the next sentence prediction."""

    # Simple binary classification. Note that 0 is "next sentence" and 1 is
    # "random sentence". This weight matrix is not used after pre-training.
    num_labels = FLAGS.num_cls_labels
    with tf.variable_scope("cls/seq_relationship"):
        output_weights = tf.get_variable(
            "output_weights_" + str(num_labels),
            shape=[num_labels, bert_config.hidden_size],
            initializer=modeling.create_initializer(
                bert_config.initializer_range))
        output_bias = tf.get_variable("output_bias_" + str(num_labels),
                                      shape=[num_labels],
                                      initializer=tf.zeros_initializer())

        logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        labels = tf.reshape(labels, [-1])
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        if label_weights is not None:
            numerator = tf.reduce_sum(label_weights * per_example_loss)
            denominator = tf.reduce_sum(label_weights) + 1e-5
            loss = numerator / denominator
        else:
            loss = tf.reduce_mean(per_example_loss)
        return (loss, per_example_loss, log_probs)

예제 #2

파일 보기

파일: run_pretraining.py 프로젝트: tallamjr/google-research

def get_masked_lm_output(bert_config, input_tensor, output_weights, positions,
                         label_ids, label_weights):
    """Get loss and log probs for the masked LM."""
    input_tensor = gather_indexes(input_tensor, positions)

    with tf.variable_scope("cls/predictions"):
        # We apply one more non-linear transformation before the output layer.
        # This matrix is not used after pre-training.
        with tf.variable_scope("transform"):
            input_tensor = tf.layers.dense(
                input_tensor,
                units=bert_config.hidden_size,
                activation=modeling.get_activation(bert_config.hidden_act),
                kernel_initializer=modeling.create_initializer(
                    bert_config.initializer_range))
            input_tensor = modeling.layer_norm(input_tensor)

        # The output weights are the same as the input embeddings, but there is
        # an output-only bias for each token.
        output_bias_pretrained = tf.get_variable(
            "output_bias",
            shape=[bert_config.pretrained_vocab_size],
            initializer=tf.zeros_initializer())
        if bert_config.vocab_size == bert_config.pretrained_vocab_size:
            output_bias = output_bias_pretrained
        elif bert_config.vocab_size > bert_config.pretrained_vocab_size:
            output_bias_entities = tf.get_variable(
                "output_bias_entities_0",
                shape=[
                    bert_config.vocab_size - bert_config.pretrained_vocab_size
                ],
                initializer=tf.zeros_initializer())
            output_bias = tf.concat(
                [output_bias_pretrained, output_bias_entities], 0)
        else:  # pretrained_vocab_size > vocab_size
            raise Exception(
                "Pretrained vocab cannot be larger than actual vocab!")

        logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        label_ids = tf.reshape(label_ids, [-1])
        label_weights = tf.reshape(label_weights, [-1])

        one_hot_labels = tf.one_hot(label_ids,
                                    depth=bert_config.vocab_size,
                                    dtype=tf.float32)

        # The `positions` tensor might be zero-padded (if the sequence is too
        # short to have the maximum number of predictions). The `label_weights`
        # tensor has a value of 1.0 for every real prediction and 0.0 for the
        # padding predictions.
        per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels,
                                          axis=[-1])
        numerator = tf.reduce_sum(label_weights * per_example_loss)
        denominator = tf.reduce_sum(label_weights) + 1e-5
        loss = numerator / denominator

    return (loss, per_example_loss, log_probs)