Exemple #1
0
    def __init__(self, old_config: JsonConfig):
        self.n_layers = 3
        self.all_layer_outputs = []
        self.last_key_layer = None
        self.old_config = old_config

        self.inner_config = self.build_config(old_config,
                                              old_config.mid_expanding_factor)
        self.embedding = None
        self.layer_list = []
        self.initializer = base.create_initializer(
            self.inner_config.initializer_range)
        self.token_type_table = tf.compat.v1.get_variable(
            name="token_type_embeddings",
            shape=[
                self.inner_config.type_vocab_size,
                self.inner_config.hidden_size
            ],
            initializer=self.initializer)
        self.full_position_embeddings = tf.compat.v1.get_variable(
            name="position_embeddings",
            shape=[
                self.inner_config.max_position_embeddings,
                self.inner_config.hidden_size
            ],
            initializer=self.initializer)
        with tf.compat.v1.variable_scope("mid"):
            for layer_idx in range(self.n_layers):
                with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                    layer = ForwardLayer(self.inner_config, self.initializer)
                    self.layer_list.append(layer)
Exemple #2
0
    def __init__(self, config, n_layers, use_one_hot_embeddings):
        super(TopicVectorBert, self).__init__()
        self.n_layers = n_layers
        self.all_layer_outputs = []
        self.last_key_layer = None
        self.config = config
        self.embedding = None
        self.layer_list = []
        self.initializer = base.create_initializer(config.initializer_range)
        self.attention_mask = None
        self.use_one_hot_embeddings = use_one_hot_embeddings
        for layer_idx in range(self.n_layers):
            layer = ForwardLayer(self.config, self.initializer)
            self.layer_list.append(layer)

        self.n_topics = config.n_topics
        self.use_topic_all_layer = config.use_topic_all_layer
        self.hidden_size = config.hidden_size
        topic_emb_len = 4

        self.topic_embedding_size = self.hidden_size * topic_emb_len
        self.n_topics = config.n_topics
        self.topic_emb_len = topic_emb_len
        self.use_one_hot_embeddings = use_one_hot_embeddings
        self.topic_embedding = tf.Variable(lambda: self.initializer(
            shape=(self.n_topics, self.topic_embedding_size
                   ), dtype=tf.float32),
                                           name="topic_embedding")
Exemple #3
0
 def init(self, config, is_training, input_ids, input_ids2, input_mask,
          input_mask2, token_type_ids, segment_ids2,
          use_one_hot_embeddings):
     with tf.compat.v1.variable_scope(dual_model_prefix1):
         model_1 = BertModel(
             config=config,
             is_training=is_training,
             input_ids=input_ids,
             input_mask=input_mask,
             token_type_ids=token_type_ids,
             use_one_hot_embeddings=use_one_hot_embeddings,
         )
     with tf.compat.v1.variable_scope(dual_model_prefix2):
         model_2 = BertModel(
             config=config,
             is_training=is_training,
             input_ids=input_ids2,
             input_mask=input_mask2,
             token_type_ids=segment_ids2,
             use_one_hot_embeddings=use_one_hot_embeddings,
         )
     model_1_first_token = model_1.get_sequence_output()[:, 0, :]
     model_2_first_token = model_2.get_sequence_output()[:, 0, :]
     rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)
     self.sequence_output = tf.concat(
         [model_1.get_sequence_output(),
          model_2.get_sequence_output()],
         axis=2)
     dense_layer = tf.keras.layers.Dense(
         config.hidden_size,
         activation=tf.keras.activations.tanh,
         kernel_initializer=create_initializer(config.initializer_range))
     pooled_output = dense_layer(rep)
     self.pooled_output = pooled_output
Exemple #4
0
 def apply_binary_dense(vector):
     output = tf.keras.layers.Dense(
         2,
         activation=tf.keras.activations.softmax,
         name="cls_dense",
         kernel_initializer=create_initializer(
             config.initializer_range))(vector)
     return output
 def __init__(self, config, input_ids, input_mask, segment_ids,
              use_one_hot_embeddings):
     self.config = config
     self.use_one_hot_embeddings = use_one_hot_embeddings
     self.input_ids = input_ids
     self.input_mask = input_mask
     self.segment_ids = segment_ids
     self.batch_size, self.seq_length = get_batch_and_seq_length(
         input_ids, 2)
     self.initializer = base.create_initializer(config.initializer_range)
     self.attention_mask = bc.create_attention_mask_from_input_mask(
         input_ids, self.input_mask)
Exemple #6
0
    def __init__(self, config, use_one_hot_embeddings, **kwargs):
        kwargs['autocast'] = False
        super(SharedTransformer, self).__init__(kwargs)
        self.all_layer_outputs = []
        self.last_key_layer = None
        self.config = config

        self.initializer = base.create_initializer(config.initializer_range)
        self.attention_mask = None
        self.use_one_hot_embeddings = use_one_hot_embeddings
        with tf.compat.v1.variable_scope("layer"):
            self.layer = ForwardLayer(self.config, self.initializer)
Exemple #7
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(DualBertTwoInputIgnoreSecondModel, self).__init__()

        input_ids2 = tf.zeros_like(input_ids)
        input_mask2 = tf.zeros_like(input_mask)
        segment_ids2 = tf.zeros_like(token_type_ids)

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model_2 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = model_2.get_sequence_output()[:, 0, :]

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)

        self.sequence_output = tf.concat(
            [model_1.get_sequence_output(),
             model_2.get_sequence_output()],
            axis=2)
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
Exemple #8
0
    def __init__(self, config, n_layers, **kwargs):
        kwargs['autocast'] = False
        super(UpperTransformer, self).__init__(kwargs)
        self.n_layers = n_layers
        self.all_layer_outputs = []
        self.last_key_layer = None
        self.config = config
        self.embedding = None
        self.layer_list = []
        self.initializer = base.create_initializer(config.initializer_range)
        self.layer_idx_base = 0

        for layer_idx in range(self.n_layers):
            with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                layer = ForwardLayer(self.config, self.initializer)
                self.layer_list.append(layer)
Exemple #9
0
    def __init__(self,
                 config, # This is different from BERT config,
                 is_training,
                 input_ids,
                 input_mask,
                 token_type_ids,
                 use_one_hot_embeddings,
                 features,
                 ):
        super(MultiContextEncoder, self).__init__()
        self.config = config
        if not is_training:
            config.set_attrib("hidden_dropout_prob", 0.0)
            config.set_attrib("attention_probs_dropout_prob", 0.0)

        def reform_context(context):
            return tf.reshape(context, [-1, config.max_context, config.max_context_length])

        batch_size, _ = get_shape_list(input_ids)
        def combine(input_ids, context_input_ids):
            a = tf.tile(tf.expand_dims(input_ids, 1), [1, config.max_context, 1])
            b = reform_context(context_input_ids)
            rep_3d = tf.concat([a, b], 2)
            return tf.reshape(rep_3d, [batch_size * config.max_context, -1])

        context_input_ids = features["context_input_ids"]
        context_input_mask = features["context_input_mask"]
        context_segment_ids = features["context_segment_ids"]
        context_segment_ids = tf.ones_like(context_segment_ids, tf.int32) * 2
        self.module = BertModel(config=config,
                                is_training=is_training,
                                input_ids=combine(input_ids, context_input_ids),
                                input_mask=combine(input_mask, context_input_mask),
                                token_type_ids=combine(token_type_ids, context_segment_ids),
                                use_one_hot_embeddings=use_one_hot_embeddings,
                                )
        dense_layer_setup = tf.keras.layers.Dense(config.hidden_size,
                                                  activation=tf.keras.activations.tanh,
                                                  kernel_initializer=create_initializer(config.initializer_range))
        h1 = self.module.get_pooled_output()
        h2 = dense_layer_setup(h1)
        h2 = tf.reshape(h2, [batch_size, config.max_context, -1])
        h2 = h2[:, :config.num_context]
        h3 = tf.reduce_mean(h2, axis=1)
        h4 = dense_layer_setup(h3)
        self.pooled_output = h4
Exemple #10
0
    def __init__(self,
                 sero_config,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 scope=None):
        super(DualSeroBertModel, self).__init__()

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        with tf.compat.v1.variable_scope(dual_model_prefix2):
            with tf.compat.v1.variable_scope("sero"):
                model = SeroEpsilon(sero_config, is_training,
                                    use_one_hot_embeddings)

                batch_size, _ = get_shape_list(input_mask)
                use_context = tf.ones([batch_size, 1], tf.int32)
                input_ids = tf.expand_dims(input_ids, 1)
                input_mask = tf.expand_dims(input_mask, 1)
                segment_ids = tf.expand_dims(token_type_ids, 1)
                sequence_output2 = model.network_stacked(
                    input_ids, input_mask, segment_ids, use_context)

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = sequence_output2[:, 0, :]

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
Exemple #11
0
    def __init__(
        self,
        config,
        use_one_hot_embeddings,
        is_training,
        masked_input_ids,
        input_mask,
        segment_ids,
        tt_input_ids,
        tt_input_mask,
        tt_segment_ids,
    ):

        all_input_ids = tf.concat([masked_input_ids, tt_input_ids], axis=0)
        all_input_mask = tf.concat([input_mask, tt_input_mask], axis=0)
        all_segment_ids = tf.concat([segment_ids, tt_segment_ids], axis=0)
        self.config = config
        self.lm_batch_size, _ = get_shape_list2(masked_input_ids)
        self.model = BertModel(config, is_training, all_input_ids,
                               all_input_mask, all_segment_ids,
                               use_one_hot_embeddings)
        initializer = base.create_initializer(config.initializer_range)
        self.tt_layer = ForwardLayer(config, initializer)

        self.tt_input_mask = tt_input_mask
        seq_output = self.model.get_sequence_output()[self.lm_batch_size:]
        tt_batch_size, seq_length = get_shape_list2(tt_input_ids)
        tt_attention_mask = create_attention_mask_from_input_mask2(
            seq_output, self.tt_input_mask)

        print('tt_attention_mask', tt_attention_mask.shape)
        print("seq_output", seq_output.shape)
        seq_output = self.tt_layer.apply_3d(seq_output, tt_batch_size,
                                            seq_length, tt_attention_mask)
        self.tt_feature = mimic_pooling(seq_output, self.config.hidden_size,
                                        self.config.initializer_range)
Exemple #12
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(TripleBertMasking, self).__init__()

        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]

        input_ids3 = features["input_ids3"]
        input_mask3 = features["input_mask3"]
        segment_ids3 = features["segment_ids3"]

        with tf.compat.v1.variable_scope(triple_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(triple_model_prefix2):
            model_2 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(triple_model_prefix3):
            model_3 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids3,
                input_mask=input_mask3,
                token_type_ids=segment_ids3,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = model_2.get_sequence_output()[:, 0, :]

        pooled3 = model_3.get_pooled_output()
        probs3 = tf.keras.layers.Dense(2,
                                       activation=tf.keras.activations.softmax,
                                       kernel_initializer=create_initializer(
                                           config.initializer_range))(pooled3)
        mask_scalar = probs3[:, 1:2]
        self.rel_score = mask_scalar

        model_2_first_token = mask_scalar * model_2_first_token

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)

        self.sequence_output = tf.concat(
            [model_1.get_sequence_output(),
             model_2.get_sequence_output()],
            axis=2)
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
Exemple #13
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(TripleBertWeighted, self).__init__()

        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]

        input_ids3 = features["input_ids3"]
        input_mask3 = features["input_mask3"]
        segment_ids3 = features["segment_ids3"]

        def apply_binary_dense(vector):
            output = tf.keras.layers.Dense(
                2,
                activation=tf.keras.activations.softmax,
                name="cls_dense",
                kernel_initializer=create_initializer(
                    config.initializer_range))(vector)
            return output

        with tf.compat.v1.variable_scope(triple_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
            model_1_pred = tf.keras.layers.Dense(
                3,
                activation=tf.keras.activations.softmax,
                name="cls_dense",
                kernel_initializer=create_initializer(
                    config.initializer_range))(model_1.get_pooled_output())
            model_1_pred = model_1_pred[:, :2]

        with tf.compat.v1.variable_scope(triple_model_prefix2):
            model_2 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
            model_2_pred = apply_binary_dense(model_2.get_pooled_output())

        with tf.compat.v1.variable_scope(triple_model_prefix3):
            model_3 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids3,
                input_mask=input_mask3,
                token_type_ids=segment_ids3,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
            model_3_pred = apply_binary_dense(model_3.get_pooled_output())

        # Option : initialize dense

        combined_pred = model_1_pred * model_3_pred[:, 0:1] \
                        + model_2_pred * model_3_pred[:, 1:2]

        self.rel_score = model_3_pred[:, 1:2]
        self.pooled_output = combined_pred
Exemple #14
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(DualBertTwoInputModelEx, self).__init__()

        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]

        modeling_option = config.model_option

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model_2 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = model_2.get_sequence_output()[:, 0, :]
        print('model_2_first_token', model_2_first_token)
        mask_scalar = {
            "0": 0.,
            "1": 1.,
            "random": tf.random.uniform(shape=[], minval=0., maxval=1.)
        }[modeling_option]
        print("Mask_scalar:", mask_scalar)
        model_2_first_token = mask_scalar * model_2_first_token
        print('model_2_first_token', model_2_first_token)

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)

        self.sequence_output = tf.concat(
            [model_1.get_sequence_output(),
             model_2.get_sequence_output()],
            axis=2)
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
Exemple #15
0
    def __init__(self):
        config = BertConfig.from_json_file(
            os.path.join(data_path, "bert_config.json"))
        self.attention_probs_list = []

        input_ids = tf.constant([[101] + [100] * 511])
        token_type_ids = tf.constant([[0] * 512])
        input_mask = tf.constant([[1] * 512])
        attention_mask = create_attention_mask_from_input_mask(
            input_ids, input_mask)
        initializer = create_initializer(config.initializer_range)

        scope = None
        with tf.compat.v1.variable_scope(scope, default_name="bert"):
            with tf.compat.v1.variable_scope("embeddings"):
                # Perform embedding lookup on the word ids.
                (self.embedding_output,
                 self.embedding_table) = embedding_lookup(
                     input_ids=input_ids,
                     vocab_size=config.vocab_size,
                     embedding_size=config.hidden_size,
                     initializer_range=config.initializer_range,
                     word_embedding_name="word_embeddings",
                     use_one_hot_embeddings=False)

                # Add positional embeddings and token type embeddings, then layer
                # normalize and perform dropout.
                self.embedding_output = embedding_postprocessor(
                    input_tensor=self.embedding_output,
                    use_token_type=True,
                    token_type_ids=token_type_ids,
                    token_type_vocab_size=config.type_vocab_size,
                    token_type_embedding_name="token_type_embeddings",
                    use_position_embeddings=True,
                    position_embedding_name="position_embeddings",
                    initializer_range=config.initializer_range,
                    max_position_embeddings=config.max_position_embeddings,
                    dropout_prob=config.hidden_dropout_prob)
            prev_output = reshape_to_matrix(self.embedding_output)
            with tf.compat.v1.variable_scope("encoder"):

                for layer_idx in range(12):
                    with tf.compat.v1.variable_scope("layer_%d" % layer_idx):
                        layer_input = prev_output

                        with tf.compat.v1.variable_scope("attention"):
                            attention_heads = []
                            with tf.compat.v1.variable_scope("self"):
                                attention_head = self.attention_fn(layer_input)
                                attention_heads.append(attention_head)

                            attention_output = None
                            if len(attention_heads) == 1:
                                attention_output = attention_heads[0]
                            else:
                                # In the case where we have other sequences, we just concatenate
                                # them to the self-attention head before the projection.
                                attention_output = tf.concat(attention_heads,
                                                             axis=-1)

                            # Run a linear projection of `hidden_size` then add a residual
                            # with `layer_input`.
                            with tf.compat.v1.variable_scope("output"):
                                attention_output = dense(
                                    hidden_size, initializer)(attention_output)
                                attention_output = layer_norm(
                                    attention_output + layer_input)

                        # The activation is only applied to the "intermediate" hidden layer.
                        with tf.compat.v1.variable_scope("intermediate"):
                            intermediate_output = dense(
                                config.intermediate_size,
                                initializer,
                                activation=gelu)(attention_output)

                        # Down-project back to `hidden_size` then add the residual.
                        with tf.compat.v1.variable_scope("output"):
                            layer_output = dense(
                                hidden_size, initializer)(intermediate_output)
                            layer_output = layer_norm(layer_output +
                                                      attention_output)
                            prev_output = layer_output