Python BertModel 예제들, albert.modeling.BertModel Python 예제들

예제 #1

0

파일 보기

파일: model.py 프로젝트: wangbq18/bert-for-task

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_masks,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=False)

        # 获取bert最后一层的输出
        output_layer = model.get_sequence_output()

        hidden_size = output_layer.shape[-1].value
        if self.__is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        ner_model = BiLSTMCRF(embedded_chars=output_layer,
                              hidden_sizes=self.__ner_hidden_sizes,
                              layers=self.__ner_layers,
                              keep_prob=self.keep_prob,
                              num_labels=self.__num_classes,
                              max_len=self.__max_len,
                              labels=self.label_ids,
                              sequence_lens=self.sequence_len,
                              is_training=self.__is_training)

        self.loss, self.true_y, self.predictions = ner_model.construct_graph()

        with tf.name_scope('train_op'):
            self.train_op = optimization.create_optimizer(
                self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)

예제 #2

0

파일 보기

파일: model.py 프로젝트: zoeyhub/bert-for-task

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_masks,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=False)
        output_layer = model.get_pooled_output()

        hidden_size = output_layer.shape[-1].value
        if self.__is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        with tf.name_scope("output"):
            output_weights = tf.get_variable(
                "output_weights", [self.__num_classes, hidden_size],
                initializer=tf.truncated_normal_initializer(stddev=0.02))

            output_bias = tf.get_variable("output_bias", [self.__num_classes],
                                          initializer=tf.zeros_initializer())

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            if self.__num_classes == 1:
                self.predictions = tf.cast(tf.greater_equal(logits, 0.0),
                                           dtype=tf.int32,
                                           name="predictions")
            else:
                self.predictions = tf.argmax(logits,
                                             axis=-1,
                                             name="predictions")

        if self.__is_training:
            with tf.name_scope("loss"):
                if self.__num_classes == 1:
                    losses = tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=tf.reshape(logits, [-1]),
                        labels=tf.cast(self.label_ids, dtype=tf.float32))
                else:
                    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits, labels=self.label_ids)
                self.loss = tf.reduce_mean(losses, name="loss")

            with tf.name_scope('train_op'):
                self.train_op = optimization.create_optimizer(
                    self.loss,
                    self.__learning_rate,
                    self.__num_train_step,
                    self.__num_warmup_step,
                    use_tpu=False)

예제 #3

0

파일 보기

파일: extract_feature.py 프로젝트: Mywayking/albert_demo

        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""

            guids = features["guids"]
            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]

            jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

            with jit_scope():
                model = modeling.BertModel(config=bert_config,
                                           is_training=False,
                                           input_ids=input_ids,
                                           input_mask=input_mask,
                                           token_type_ids=segment_ids)

                if mode != tf.estimator.ModeKeys.PREDICT:
                    raise ValueError("Only PREDICT modes are supported: %s" %
                                     (mode))

                tvars = tf.trainable_variables()

                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, init_checkpoint)

                tf.logging.info("**** Trainable Variables ****")
                for var in tvars:
                    init_string = ""
                    if var.name in initialized_variable_names:
                        init_string = ", *INIT_FROM_CKPT*"
                    tf.logging.info("  name = %s, shape = %s%s", var.name,
                                    var.shape, init_string)

                all_layers = model.get_all_encoder_layers()

                predictions = {
                    "guid": guids,
                }

                for (i, layer_index) in enumerate(layer_indexes):
                    predictions["layer_output_%d" %
                                i] = all_layers[layer_index]

                from tensorflow.estimator import EstimatorSpec

                output_spec = EstimatorSpec(mode=mode, predictions=predictions)
                return output_spec

예제 #4

0

파일 보기

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.concat_input_ids,
                                   input_mask=self.concat_input_masks,
                                   token_type_ids=self.concat_segment_ids,
                                   use_one_hot_embeddings=False)
        concat_output = model.get_pooled_output()

        output_a, output_b = tf.split(concat_output, [self.__batch_size] * 2,
                                      axis=0)

        # -------------------------------------------------------------------------------------------
        # 余弦相似度 + 对比损失
        # -------------------------------------------------------------------------------------------
        with tf.name_scope("cosine_similarity"):
            # [batch_size]
            norm_a = tf.sqrt(tf.reduce_sum(tf.square(output_a), axis=-1))
            # [batch_size]
            norm_b = tf.sqrt(tf.reduce_sum(tf.square(output_b), axis=-1))
            # [batch_size]
            dot = tf.reduce_sum(tf.multiply(output_a, output_b), axis=-1)
            # [batch_size]
            norm = norm_a * norm_b
            # [batch_size]
            self.similarity = tf.div(dot, norm, name="similarity")
            self.predictions = tf.cast(tf.greater_equal(
                self.similarity, self.__neg_threshold),
                                       tf.int32,
                                       name="predictions")

        with tf.name_scope("loss"):
            # 预测为正例的概率
            pred_pos_prob = tf.square((1 - self.similarity))
            cond = (self.similarity > self.__neg_threshold)
            zeros = tf.zeros_like(self.similarity, dtype=tf.float32)
            pred_neg_prob = tf.where(cond, tf.square(self.similarity), zeros)
            self.label_ids = tf.cast(self.label_ids, dtype=tf.float32)
            losses = self.label_ids * pred_pos_prob + (
                1. - self.label_ids) * pred_neg_prob
            self.loss = tf.reduce_mean(losses, name="loss")

        # --------------------------------------------------------------------------------------------
        # # 曼哈顿距离 + 二元交叉熵
        # --------------------------------------------------------------------------------------------
        # with tf.name_scope("manhattan_distance"):
        #     man_distance = tf.reduce_sum(tf.abs(output_a - output_b), -1)
        #     self.similarity = tf.exp(-man_distance)
        #     self.predictions = tf.cast(tf.greater_equal(self.similarity, 0.5), tf.int32, name="predictions")
        #
        # with tf.name_scope("loss"):
        #     losses = self.label_ids * tf.log(self.similarity) + (1 - self.label_ids) * tf.log(1 - self.similarity)
        #     self.loss = tf.reduce_mean(-losses, name="loss")

        with tf.name_scope('train_op'):
            self.train_op = optimization.create_optimizer(
                self.loss,
                self.__learning_rate,
                self.__num_train_step,
                self.__num_warmup_step,
                use_tpu=False)

예제 #5

0

파일 보기

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_masks,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=False)

        final_hidden = model.get_sequence_output()

        final_hidden_shape = modeling.get_shape_list(final_hidden,
                                                     expected_rank=3)
        seq_length = final_hidden_shape[1]
        hidden_size = final_hidden_shape[2]

        with tf.name_scope("output"):
            output_weights = tf.get_variable(
                "output_weights", [2, hidden_size],
                initializer=tf.truncated_normal_initializer(stddev=0.02))

            output_bias = tf.get_variable("output_bias", [2],
                                          initializer=tf.zeros_initializer())

            final_hidden_matrix = tf.reshape(final_hidden, [-1, hidden_size])
            logits = tf.matmul(final_hidden_matrix,
                               output_weights,
                               transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

            logits = tf.reshape(logits, [-1, seq_length, 2])
            logits = tf.transpose(logits, [2, 0, 1])

            unstacked_logits = tf.unstack(logits, axis=0)

            # [batch_size, seq_length]
            start_logits, end_logits = (unstacked_logits[0],
                                        unstacked_logits[1])

            self.start_logits = start_logits
            self.end_logits = end_logits

        if self.__is_training:
            with tf.name_scope("loss"):
                start_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=start_logits, labels=self.start_position)
                end_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=end_logits, labels=self.end_position)

                losses = tf.concat([start_losses, end_losses], axis=0)
                self.loss = tf.reduce_mean(losses, name="loss")

            with tf.name_scope('train_op'):
                self.train_op = optimization.create_optimizer(
                    self.loss,
                    self.__learning_rate,
                    self.__num_train_step,
                    self.__num_warmup_step,
                    use_tpu=False)

예제 #6

0

파일 보기

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.concat_input_ids,
                                   input_mask=self.concat_input_masks,
                                   token_type_ids=self.concat_segment_ids,
                                   use_one_hot_embeddings=False)
        concat_output = model.get_pooled_output()

        output_a, output_b = tf.split(
            concat_output,
            [self.__batch_size, self.__batch_size * self.__num_samples],
            axis=0)

        with tf.name_scope("reshape_output_b"):
            # batch_size 个tensor：[neg_samples, hidden_size]
            split_output_b = tf.split(output_b,
                                      [self.__num_samples] * self.__batch_size,
                                      axis=0)
            # batch_size 个tensor: [1, neg_samples, hidden_size]
            expand_output_b = [
                tf.expand_dims(tensor, 0) for tensor in split_output_b
            ]
            # [batch_size, num_samples, hidden_size]
            reshape_output_b = tf.concat(expand_output_b, axis=0)

        with tf.name_scope("cosine_similarity"):
            # [batch_size, 1, hidden_size]
            expand_output_a = tf.expand_dims(output_a, 1)
            # [batch_size, 1]
            norm_a = tf.sqrt(tf.reduce_sum(tf.square(expand_output_a), -1))
            # [batch_size, n_samples]
            norm_b = tf.sqrt(tf.reduce_sum(tf.square(reshape_output_b), -1))
            # [batch_size, n_samples]
            dot = tf.reduce_sum(tf.multiply(expand_output_a, reshape_output_b),
                                axis=-1)

            # [batch_size, n_samples]
            norm = norm_a * norm_b

            self.similarity = tf.div(dot, norm, name="similarity")
            self.predictions = tf.argmax(self.similarity,
                                         -1,
                                         name="predictions")

        with tf.name_scope("loss"):
            if self.__num_samples == 2:
                pos_similarity = tf.reshape(
                    tf.slice(self.similarity, [0, 0], [self.__batch_size, 1]),
                    [self.__batch_size])
                neg_similarity = tf.reshape(
                    tf.slice(self.similarity, [0, 1],
                             [self.__batch_size, self.__num_samples - 1]),
                    [self.__batch_size])
                distance = self.__margin - pos_similarity + neg_similarity
                zeros = tf.zeros_like(distance, dtype=tf.float32)
                cond = (distance >= zeros)
                losses = tf.where(cond, distance, zeros)
                self.loss = tf.reduce_mean(losses, name="loss")
            else:
                pos_similarity = tf.exp(
                    tf.reshape(
                        tf.slice(self.similarity, [0, 0],
                                 [self.__batch_size, 1]), [self.__batch_size]))
                neg_similarity = tf.exp(
                    tf.slice(self.similarity, [0, 1],
                             [self.__batch_size, self.__num_samples - 1]))
                norm_seg_similarity = tf.reduce_sum(neg_similarity, axis=-1)
                pos_prob = tf.div(pos_similarity, norm_seg_similarity)
                self.loss = tf.reduce_mean(-tf.log(pos_prob), name="loss")

        with tf.name_scope('train_op'):
            self.train_op = optimization.create_optimizer(
                self.loss,
                self.__learning_rate,
                self.__num_train_step,
                self.__num_warmup_step,
                use_tpu=False)

예제 #7

0

파일 보기

파일: run_multiple_relations_extraction_mask_loss.py 프로젝트: nikhil-krishna/Improving-Multihead-Selection-Model

def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 token_label_ids, predicate_matrix_ids, num_token_labels,
                 num_predicate_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # We "pool" the model by simply taking the hidden state corresponding
    # to the first token. float Tensor of shape [batch_size, hidden_size]
    # model_pooled_output = model.get_pooled_output()

    #     """Gets final hidden layer of encoder.
    #
    #     Returns:
    #       float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
    #       to the final hidden of the transformer encoder.
    #     """
    sequence_bert_encode_output = model.get_sequence_output()
    if is_training:
        sequence_bert_encode_output = tf.nn.dropout(
            sequence_bert_encode_output, keep_prob=0.9)

    with tf.variable_scope("predicate_head_select_loss"):
        bert_sequenc_length = sequence_bert_encode_output.shape[-2].value
        # shape [batch_size, sequence_length, sequencd_length, predicate_label_numbers]
        predicate_score_matrix = getHeadSelectionScores(
            encode_input=sequence_bert_encode_output,
            hidden_size_n1=100,
            label_number=num_predicate_labels)
        predicate_head_probabilities = tf.nn.sigmoid(predicate_score_matrix)
        # predicate_head_prediction = tf.argmax(predicate_head_probabilities, axis=3)
        predicate_head_predictions_round = tf.round(
            predicate_head_probabilities)
        predicate_head_predictions = tf.cast(predicate_head_predictions_round,
                                             tf.int32)
        # shape [batch_size, sequence_length, sequencd_length]
        predicate_matrix = tf.reshape(
            predicate_matrix_ids,
            [-1, bert_sequenc_length, bert_sequenc_length])
        gold_predicate_matrix_one_hot = tf.one_hot(predicate_matrix,
                                                   depth=num_predicate_labels,
                                                   dtype=tf.float32)
        # shape [batch_size, sequence_length, sequencd_length, predicate_label_numbers]
        predicate_sigmoid_cross_entropy_with_logits = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=predicate_score_matrix,
            labels=gold_predicate_matrix_one_hot)

        def batch_sequence_matrix_max_sequence_length(batch_sequence_matrix):
            """Get the longest effective length of the input sequence (excluding padding)"""
            mask = tf.math.logical_not(tf.math.equal(batch_sequence_matrix, 0))
            mask = tf.cast(mask, tf.float32)
            mask_length = tf.reduce_sum(mask, axis=1)
            mask_length = tf.cast(mask_length, tf.int32)
            mask_max_length = tf.reduce_max(mask_length)
            return mask_max_length

        mask_max_length = batch_sequence_matrix_max_sequence_length(
            token_label_ids)

        predicate_sigmoid_cross_entropy_with_logits = predicate_sigmoid_cross_entropy_with_logits[:, :
                                                                                                  mask_max_length, :
                                                                                                  mask_max_length, :]
        # shape []
        predicate_head_select_loss = tf.reduce_sum(
            predicate_sigmoid_cross_entropy_with_logits)

    with tf.variable_scope("token_label_loss"):
        bert_encode_hidden_size = sequence_bert_encode_output.shape[-1].value
        token_label_output_weight = tf.get_variable(
            "token_label_output_weights",
            [num_token_labels, bert_encode_hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        token_label_output_bias = tf.get_variable(
            "token_label_output_bias", [num_token_labels],
            initializer=tf.zeros_initializer())
        sequence_bert_encode_output = tf.reshape(sequence_bert_encode_output,
                                                 [-1, bert_encode_hidden_size])
        token_label_logits = tf.matmul(sequence_bert_encode_output,
                                       token_label_output_weight,
                                       transpose_b=True)
        token_label_logits = tf.nn.bias_add(token_label_logits,
                                            token_label_output_bias)

        token_label_logits = tf.reshape(
            token_label_logits, [-1, FLAGS.max_seq_length, num_token_labels])
        token_label_log_probs = tf.nn.log_softmax(token_label_logits, axis=-1)

        token_label_one_hot_labels = tf.one_hot(token_label_ids,
                                                depth=num_token_labels,
                                                dtype=tf.float32)
        token_label_per_example_loss = -tf.reduce_sum(
            token_label_one_hot_labels * token_label_log_probs, axis=-1)
        token_label_loss = tf.reduce_sum(token_label_per_example_loss)
        token_label_probabilities = tf.nn.softmax(token_label_logits, axis=-1)
        token_label_predictions = tf.argmax(token_label_probabilities, axis=-1)
        # return (token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predict)

    loss = predicate_head_select_loss + token_label_loss
    return (loss, predicate_head_select_loss, predicate_head_probabilities,
            predicate_head_predictions, token_label_loss,
            token_label_per_example_loss, token_label_logits,
            token_label_predictions)