Ejemplo n.º 1
0
def gather_indexes(sequence_tensor, positions):
    """Gathers the vectors at the specific positions over a minibatch."""
    sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)
    batch_size = sequence_shape[0]
    seq_length = sequence_shape[1]
    width = sequence_shape[2]

    flat_offsets = tf.reshape(
        tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
    flat_positions = tf.reshape(positions + flat_offsets, [-1])
    flat_sequence_tensor = tf.reshape(sequence_tensor,
                                      [batch_size * seq_length, width])
    output_tensor = tf.gather(flat_sequence_tensor, flat_positions)
    return output_tensor
Ejemplo n.º 2
0
    def built_model(self):
        bert_config = modeling.BertConfig.from_json_file(
            self.__bert_config_path)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.__is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_masks,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=False)

        final_hidden = model.get_sequence_output()

        final_hidden_shape = modeling.get_shape_list(final_hidden,
                                                     expected_rank=3)
        seq_length = final_hidden_shape[1]
        hidden_size = final_hidden_shape[2]

        with tf.name_scope("output"):
            output_weights = tf.get_variable(
                "output_weights", [2, hidden_size],
                initializer=tf.truncated_normal_initializer(stddev=0.02))

            output_bias = tf.get_variable("output_bias", [2],
                                          initializer=tf.zeros_initializer())

            final_hidden_matrix = tf.reshape(final_hidden, [-1, hidden_size])
            logits = tf.matmul(final_hidden_matrix,
                               output_weights,
                               transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

            logits = tf.reshape(logits, [-1, seq_length, 2])
            logits = tf.transpose(logits, [2, 0, 1])

            unstacked_logits = tf.unstack(logits, axis=0)

            # [batch_size, seq_length]
            start_logits, end_logits = (unstacked_logits[0],
                                        unstacked_logits[1])

            self.start_logits = start_logits
            self.end_logits = end_logits

        if self.__is_training:
            with tf.name_scope("loss"):
                start_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=start_logits, labels=self.start_position)
                end_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=end_logits, labels=self.end_position)

                losses = tf.concat([start_losses, end_losses], axis=0)
                self.loss = tf.reduce_mean(losses, name="loss")

            with tf.name_scope('train_op'):
                self.train_op = optimization.create_optimizer(
                    self.loss,
                    self.__learning_rate,
                    self.__num_train_step,
                    self.__num_warmup_step,
                    use_tpu=False)