def build_key(self):
        with tf.compat.v1.variable_scope("embeddings"):
            input_tensor = self.get_embeddings(self.input_ids,
                                               self.segment_ids)
            self.input_shape = bc.get_shape_list(input_tensor, expected_rank=3)

        with tf.compat.v1.variable_scope("encoder"):
            self.attention_mask = bc.create_attention_mask_from_input_mask(
                input_tensor, self.input_mask)
            prev_output = bc.reshape_to_matrix(input_tensor)

            for layer_idx in range(self.layers_before_key_pooling):
                with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                    intermediate_output, prev_output = self.forward_layer(
                        prev_output)
                    intermediate_output = tf.reshape(intermediate_output, [
                        self.batch_size * self.seq_length,
                        self.config.intermediate_size
                    ])
                    final_output = bc.reshape_from_matrix(
                        prev_output, self.input_shape)
                    self.all_layer_outputs.append(final_output)

        self.last_intermediate_output = intermediate_output

        self.last_key_layer = prev_output
        with tf.compat.v1.variable_scope("mr_key"):
            key_vectors = bc.dense(self.key_dimension,
                                   self.initializer)(intermediate_output)
            self.debug1 = key_vectors
            key_vectors = tf.reshape(
                key_vectors,
                [self.batch_size, self.seq_length, self.key_dimension])
            key_output = self.key_pooling(key_vectors)
        return key_output
    def build_by_attention(self, key):
        hidden_size = self.config.hidden_size
        with tf.compat.v1.variable_scope("embeddings"):
            lexical_tensor = self.get_lexical_lookup()
            self.embedding_output = self.embedding_postprocessor(
                d_input_ids=self.input_ids,
                input_tensor=lexical_tensor,
                use_token_type=True,
                token_type_ids=self.segment_ids,
                token_type_vocab_size=self.config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=self.config.initializer_range,
                max_position_embeddings=self.config.max_position_embeddings,
                dropout_prob=self.config.hidden_dropout_prob)
            input_tensor = self.embedding_output
            #[ def_per_batch, seq_length, hidden_size]

        with tf.compat.v1.variable_scope("encoder"):
            num_key_tokens = self.ssdr_config.num_key_tokens
            project_dim = hidden_size * num_key_tokens
            raw_key = bc.dense(project_dim, self.initializer)(key)
            key_tokens = tf.reshape(
                raw_key, [self.batch_size, num_key_tokens, hidden_size])

            input_tensor = tf.concat([key_tokens, input_tensor], axis=1)
            input_shape = bc.get_shape_list(input_tensor, expected_rank=3)

            mask_for_key = tf.ones([self.batch_size, num_key_tokens],
                                   dtype=tf.int64)
            self.input_mask = tf.cast(self.input_mask, tf.int64)
            self.input_mask = tf.concat([mask_for_key, self.input_mask],
                                        axis=1)
            self.seq_length = self.seq_length + num_key_tokens

            self.attention_mask = bc.create_attention_mask_from_input_mask(
                input_tensor, self.input_mask)
            prev_output = bc.reshape_to_matrix(input_tensor)
            for layer_idx in range(self.ssdr_config.num_hidden_layers):
                with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                    intermediate_output, prev_output = self.forward_layer(
                        prev_output)
                    self.all_layer_outputs.append(prev_output)

            final_output = bc.reshape_from_matrix(prev_output, input_shape)
            self.scores = bc.dense(1, self.initializer)(final_output[:, 0, :])

            if self.ssdr_config.info_pooling_method == "first_tokens":
                self.info_output = final_output[:, :num_key_tokens, :]
            elif self.ssdr_config.info_pooling_method == "max_pooling":
                self.info_output = tf.reduce_max(final_output, axis=1)

        return self.scores, self.info_output
 def __init__(self, config, input_ids, input_mask, segment_ids,
              use_one_hot_embeddings):
     self.config = config
     self.use_one_hot_embeddings = use_one_hot_embeddings
     self.input_ids = input_ids
     self.input_mask = input_mask
     self.segment_ids = segment_ids
     self.batch_size, self.seq_length = get_batch_and_seq_length(
         input_ids, 2)
     self.initializer = base.create_initializer(config.initializer_range)
     self.attention_mask = bc.create_attention_mask_from_input_mask(
         input_ids, self.input_mask)
예제 #4
0
    def build(self, value_out, locations):
        with tf.compat.v1.variable_scope("embeddings"):
            input_tensor = self.get_embeddings(self.input_ids,
                                               self.segment_ids)
            self.input_shape = bc.get_shape_list(input_tensor, expected_rank=3)

        with tf.compat.v1.variable_scope("encoder"):
            self.attention_mask = bc.create_attention_mask_from_input_mask(
                input_tensor, self.input_mask)
            prev_output = bc.reshape_to_matrix(input_tensor)
            prev_output = tf.tensor_scatter_nd_update(prev_output, locations,
                                                      value_out)

            for layer_idx in range(self.config.num_hidden_layers):
                with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                    intermediate_output, prev_output = self.forward_layer(
                        prev_output)
                    final_output = bc.reshape_from_matrix(
                        prev_output, self.input_shape)
                    self.all_layer_outputs.append(final_output)

        return self.all_layer_outputs
예제 #5
0
    def build(self):
        with tf.compat.v1.variable_scope("dict"):
            with tf.compat.v1.variable_scope("embeddings"):
                input_tensor = self.get_embeddings(self.input_ids,
                                                   self.segment_ids)

            with tf.compat.v1.variable_scope("encoder"):
                num_key_tokens = self.ssdr_config.num_key_tokens
                input_shape = bc.get_shape_list(input_tensor, expected_rank=3)

                mask_for_key = tf.ones([self.batch_size, num_key_tokens],
                                       dtype=tf.int64)
                self.input_mask = tf.cast(self.input_mask, tf.int64)
                self.input_mask = tf.concat([mask_for_key, self.input_mask],
                                            axis=1)
                self.seq_length = self.seq_length + num_key_tokens

                self.attention_mask = bc.create_attention_mask_from_input_mask(
                    input_tensor, self.input_mask)
                prev_output = bc.reshape_to_matrix(input_tensor)
                for layer_idx in range(self.ssdr_config.num_hidden_layers):
                    with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                        intermediate_output, prev_output = self.forward_layer(
                            prev_output)
                        self.all_layer_outputs.append(prev_output)

                final_output = bc.reshape_from_matrix(prev_output, input_shape)
                self.scores = bc.dense(1, self.initializer)(final_output[:,
                                                                         0, :])

                if self.ssdr_config.info_pooling_method == "first_tokens":
                    self.info_output = final_output[:, :num_key_tokens, :]
                elif self.ssdr_config.info_pooling_method == "max_pooling":
                    self.info_output = tf.reduce_max(final_output, axis=1)

            return self.scores, self.info_output
예제 #6
0
파일: patch_v2.py 프로젝트: clover3/Chair
    def __init__(self,
                             config,
                             is_training,
                             input_ids,
                             input_mask=None,
                             token_type_ids=None,
                             use_one_hot_embeddings=True,
                             scope=None):
        """Constructor for BertModel.

        Args:
            config: `BertConfig` instance.
            is_training: bool. rue for training model, false for eval model. Controls
                whether dropout will be applied.
            input_ids: int32 Tensor of shape [batch_size, seq_length].
            input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].
            token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
            use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
                embeddings or tf.embedding_lookup() for the word embeddings. On the TPU,
                it is must faster if this is True, on the CPU or GPU, it is faster if
                this is False.
            scope: (optional) variable scope. Defaults to "bert".

        Raises:
            ValueError: The config is invalid or one of the input tensor shapes
                is invalid.
        """
        config = copy.deepcopy(config)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        input_shape = get_shape_list(input_ids, expected_rank=2)
        batch_size = input_shape[0]
        seq_length = input_shape[1]

        if input_mask is None:
            input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32)

        if token_type_ids is None:
            token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32)

        with tf.compat.v1.variable_scope(scope, default_name="bert"):
            with tf.compat.v1.variable_scope("embeddings"):
                # Perform embedding lookup on the word ids.
                (self.embedding_output, self.embedding_table) = embedding_lookup(
                        input_ids=input_ids,
                        vocab_size=config.vocab_size,
                        embedding_size=config.hidden_size,
                        initializer_range=config.initializer_range,
                        word_embedding_name="word_embeddings",
                        use_one_hot_embeddings=use_one_hot_embeddings)

                # Add positional embeddings and token type embeddings, then layer
                # normalize and perform dropout.
                self.embedding_output = embedding_postprocessor(
                        input_tensor=self.embedding_output,
                        use_token_type=True,
                        token_type_ids=token_type_ids,
                        token_type_vocab_size=config.type_vocab_size,
                        token_type_embedding_name="token_type_embeddings",
                        use_position_embeddings=True,
                        position_embedding_name="position_embeddings",
                        initializer_range=config.initializer_range,
                        max_position_embeddings=config.max_position_embeddings,
                        dropout_prob=config.hidden_dropout_prob)

            with tf.compat.v1.variable_scope("encoder"):
                # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
                # mask of shape [batch_size, seq_length, seq_length] which is used
                # for the attention scores.
                attention_mask = create_attention_mask_from_input_mask(
                        input_ids, input_mask)

                # Run the stacked transformer.
                # `sequence_output` shape = [batch_size, seq_length, hidden_size].
                self.all_encoder_layers, key = transformer_model(
                        input_tensor=self.embedding_output,
                        attention_mask=attention_mask,
                        input_mask=input_mask,
                        hidden_size=config.hidden_size,
                        num_hidden_layers=config.num_hidden_layers,
                        num_attention_heads=config.num_attention_heads,
                        is_training=is_training,
                        #mr_layer=config.mr_layer,
                        mr_num_route=config.mr_num_route,
                        #mr_key_layer=config.mr_key_layer,
                        intermediate_size=config.intermediate_size,
                        intermediate_act_fn=get_activation(config.hidden_act),
                        hidden_dropout_prob=config.hidden_dropout_prob,
                        attention_probs_dropout_prob=config.attention_probs_dropout_prob,
                        initializer_range=config.initializer_range,
                        do_return_all_layers=True)

            self.key = key
            self.sequence_output = self.all_encoder_layers[-1]
            # The "pooler" converts the encoded sequence tensor of shape
            # [batch_size, seq_length, hidden_size] to a tensor of shape
            # [batch_size, hidden_size]. This is necessary for segment-level
            # (or segment-pair-level) classification tasks where we need a fixed
            # dimensional representation of the segment.
            with tf.compat.v1.variable_scope("pooler"):
                # We "pool" the model by simply taking the hidden state corresponding
                # to the first token. We assume that this has been pre-trained
                first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1)
                self.pooled_output = tf.keras.layers.Dense(config.hidden_size,
                                      activation=tf.keras.activations.tanh,
                                      kernel_initializer=create_initializer(config.initializer_range))(first_token_tensor)