Python create_optimizer Examples, albert.optimization_finetuning.create_optimizer Python Examples

Example #1

0

Show file

File: model.py Project: wangbq18/bert-for-task

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_masks,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=False)

        # 获取bert最后一层的输出
        output_layer = model.get_sequence_output()

        hidden_size = output_layer.shape[-1].value
        if self.__is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        ner_model = BiLSTMCRF(embedded_chars=output_layer,
                              hidden_sizes=self.__ner_hidden_sizes,
                              layers=self.__ner_layers,
                              keep_prob=self.keep_prob,
                              num_labels=self.__num_classes,
                              max_len=self.__max_len,
                              labels=self.label_ids,
                              sequence_lens=self.sequence_len,
                              is_training=self.__is_training)

        self.loss, self.true_y, self.predictions = ner_model.construct_graph()

        with tf.name_scope('train_op'):
            self.train_op = optimization.create_optimizer(
                self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)

Example #2

0

Show file

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_masks,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=False)
        output_layer = model.get_pooled_output()

        hidden_size = output_layer.shape[-1].value
        if self.__is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        with tf.name_scope("output"):
            output_weights = tf.get_variable(
                "output_weights", [self.__num_classes, hidden_size],
                initializer=tf.truncated_normal_initializer(stddev=0.02))

            output_bias = tf.get_variable("output_bias", [self.__num_classes],
                                          initializer=tf.zeros_initializer())

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            self.predictions = tf.argmax(logits, axis=-1, name="predictions")

        if self.__is_training:

            with tf.name_scope("loss"):
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=self.label_ids)
                self.loss = tf.reduce_mean(losses, name="loss")

            with tf.name_scope('train_op'):
                self.train_op = optimization.create_optimizer(
                    self.loss,
                    self.__learning_rate,
                    self.__num_train_step,
                    self.__num_warmup_step,
                    use_tpu=False)

Example #3

0

Show file

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.concat_input_ids,
                                   input_mask=self.concat_input_masks,
                                   token_type_ids=self.concat_segment_ids,
                                   use_one_hot_embeddings=False)
        concat_output = model.get_pooled_output()

        output_a, output_b = tf.split(concat_output, [self.__batch_size] * 2,
                                      axis=0)

        # -------------------------------------------------------------------------------------------
        # 余弦相似度 + 对比损失
        # -------------------------------------------------------------------------------------------
        with tf.name_scope("cosine_similarity"):
            # [batch_size]
            norm_a = tf.sqrt(tf.reduce_sum(tf.square(output_a), axis=-1))
            # [batch_size]
            norm_b = tf.sqrt(tf.reduce_sum(tf.square(output_b), axis=-1))
            # [batch_size]
            dot = tf.reduce_sum(tf.multiply(output_a, output_b), axis=-1)
            # [batch_size]
            norm = norm_a * norm_b
            # [batch_size]
            self.similarity = tf.div(dot, norm, name="similarity")
            self.predictions = tf.cast(tf.greater_equal(
                self.similarity, self.__neg_threshold),
                                       tf.int32,
                                       name="predictions")

        with tf.name_scope("loss"):
            # 预测为正例的概率
            pred_pos_prob = tf.square((1 - self.similarity))
            cond = (self.similarity > self.__neg_threshold)
            zeros = tf.zeros_like(self.similarity, dtype=tf.float32)
            pred_neg_prob = tf.where(cond, tf.square(self.similarity), zeros)
            self.label_ids = tf.cast(self.label_ids, dtype=tf.float32)
            losses = self.label_ids * pred_pos_prob + (
                1. - self.label_ids) * pred_neg_prob
            self.loss = tf.reduce_mean(losses, name="loss")

        # --------------------------------------------------------------------------------------------
        # # 曼哈顿距离 + 二元交叉熵
        # --------------------------------------------------------------------------------------------
        # with tf.name_scope("manhattan_distance"):
        #     man_distance = tf.reduce_sum(tf.abs(output_a - output_b), -1)
        #     self.similarity = tf.exp(-man_distance)
        #     self.predictions = tf.cast(tf.greater_equal(self.similarity, 0.5), tf.int32, name="predictions")
        #
        # with tf.name_scope("loss"):
        #     losses = self.label_ids * tf.log(self.similarity) + (1 - self.label_ids) * tf.log(1 - self.similarity)
        #     self.loss = tf.reduce_mean(-losses, name="loss")

        with tf.name_scope('train_op'):
            self.train_op = optimization.create_optimizer(
                self.loss,
                self.__learning_rate,
                self.__num_train_step,
                self.__num_warmup_step,
                use_tpu=False)

Example #4

0

Show file

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_masks,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=False)

        final_hidden = model.get_sequence_output()

        final_hidden_shape = modeling.get_shape_list(final_hidden,
                                                     expected_rank=3)
        seq_length = final_hidden_shape[1]
        hidden_size = final_hidden_shape[2]

        with tf.name_scope("output"):
            output_weights = tf.get_variable(
                "output_weights", [2, hidden_size],
                initializer=tf.truncated_normal_initializer(stddev=0.02))

            output_bias = tf.get_variable("output_bias", [2],
                                          initializer=tf.zeros_initializer())

            final_hidden_matrix = tf.reshape(final_hidden, [-1, hidden_size])
            logits = tf.matmul(final_hidden_matrix,
                               output_weights,
                               transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

            logits = tf.reshape(logits, [-1, seq_length, 2])
            logits = tf.transpose(logits, [2, 0, 1])

            unstacked_logits = tf.unstack(logits, axis=0)

            # [batch_size, seq_length]
            start_logits, end_logits = (unstacked_logits[0],
                                        unstacked_logits[1])

            self.start_logits = start_logits
            self.end_logits = end_logits

        if self.__is_training:
            with tf.name_scope("loss"):
                start_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=start_logits, labels=self.start_position)
                end_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=end_logits, labels=self.end_position)

                losses = tf.concat([start_losses, end_losses], axis=0)
                self.loss = tf.reduce_mean(losses, name="loss")

            with tf.name_scope('train_op'):
                self.train_op = optimization.create_optimizer(
                    self.loss,
                    self.__learning_rate,
                    self.__num_train_step,
                    self.__num_warmup_step,
                    use_tpu=False)

Example #5

0

Show file

    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.concat_input_ids,
                                   input_mask=self.concat_input_masks,
                                   token_type_ids=self.concat_segment_ids,
                                   use_one_hot_embeddings=False)
        concat_output = model.get_pooled_output()

        output_a, output_b = tf.split(
            concat_output,
            [self.__batch_size, self.__batch_size * self.__num_samples],
            axis=0)

        with tf.name_scope("reshape_output_b"):
            # batch_size 个tensor：[neg_samples, hidden_size]
            split_output_b = tf.split(output_b,
                                      [self.__num_samples] * self.__batch_size,
                                      axis=0)
            # batch_size 个tensor: [1, neg_samples, hidden_size]
            expand_output_b = [
                tf.expand_dims(tensor, 0) for tensor in split_output_b
            ]
            # [batch_size, num_samples, hidden_size]
            reshape_output_b = tf.concat(expand_output_b, axis=0)

        with tf.name_scope("cosine_similarity"):
            # [batch_size, 1, hidden_size]
            expand_output_a = tf.expand_dims(output_a, 1)
            # [batch_size, 1]
            norm_a = tf.sqrt(tf.reduce_sum(tf.square(expand_output_a), -1))
            # [batch_size, n_samples]
            norm_b = tf.sqrt(tf.reduce_sum(tf.square(reshape_output_b), -1))
            # [batch_size, n_samples]
            dot = tf.reduce_sum(tf.multiply(expand_output_a, reshape_output_b),
                                axis=-1)

            # [batch_size, n_samples]
            norm = norm_a * norm_b

            self.similarity = tf.div(dot, norm, name="similarity")
            self.predictions = tf.argmax(self.similarity,
                                         -1,
                                         name="predictions")

        with tf.name_scope("loss"):
            if self.__num_samples == 2:
                pos_similarity = tf.reshape(
                    tf.slice(self.similarity, [0, 0], [self.__batch_size, 1]),
                    [self.__batch_size])
                neg_similarity = tf.reshape(
                    tf.slice(self.similarity, [0, 1],
                             [self.__batch_size, self.__num_samples - 1]),
                    [self.__batch_size])
                distance = self.__margin - pos_similarity + neg_similarity
                zeros = tf.zeros_like(distance, dtype=tf.float32)
                cond = (distance >= zeros)
                losses = tf.where(cond, distance, zeros)
                self.loss = tf.reduce_mean(losses, name="loss")
            else:
                pos_similarity = tf.exp(
                    tf.reshape(
                        tf.slice(self.similarity, [0, 0],
                                 [self.__batch_size, 1]), [self.__batch_size]))
                neg_similarity = tf.exp(
                    tf.slice(self.similarity, [0, 1],
                             [self.__batch_size, self.__num_samples - 1]))
                norm_seg_similarity = tf.reduce_sum(neg_similarity, axis=-1)
                pos_prob = tf.div(pos_similarity, norm_seg_similarity)
                self.loss = tf.reduce_mean(-tf.log(pos_prob), name="loss")

        with tf.name_scope('train_op'):
            self.train_op = optimization.create_optimizer(
                self.loss,
                self.__learning_rate,
                self.__num_train_step,
                self.__num_warmup_step,
                use_tpu=False)