コード例 #1
0
def get_attention(feature_map, lstm, config, dropout_keep=1, reuse=False):
    attention_mode = config.get("mode", None)

    # print("feature_map = {} ",feature_map)

    if attention_mode == "none":
        image_out = feature_map

    elif attention_mode == "mean":
        image_out = tf.reduce_mean(feature_map, axis=(1, 2))

    elif attention_mode == "classic":
        image_out = compute_attention(feature_map,
                                      lstm,
                                      no_mlp_units=config['no_attention_mlp'],
                                      reuse=reuse)

    elif attention_mode == "glimpse":
        image_out = compute_glimpse(
            feature_map,
            lstm,
            no_glimpse=config['no_glimpses'],
            glimpse_embedding_size=config['no_attention_mlp'],
            keep_dropout=dropout_keep,
            reuse=reuse)

    else:
        assert False, "Wrong attention mode: {}".format(attention_mode)

    return image_out
コード例 #2
0
    def forward(self, image_feat):

        # Should we reuse attention from one level to another
        reuse = (self.already_forward and self.shared_attention) or self.reuse

        with tf.variable_scope(self.scope, reuse=reuse) as scope:

            self.memory_cell = attention.compute_attention(
                self.states,
                seq_length=self.seq_length,
                context=self.memory_cell,
                no_mlp_units=self.no_units,
                reuse=reuse)

            self.memory_cell = tfc_layers.layer_norm(self.memory_cell,
                                                     reuse=self.reuse)

            output = self.memory_cell

            if self.shared_attention:
                self.scope = scope
                self.already_forward = True

        if self.feedback_loop:
            image_feat = tf.reduce_mean(image_feat, axis=[1, 2])
            output = tf.concat([output, image_feat], axis=-1)

        return output
コード例 #3
0
    def forward(self, image_feature):

        if self.inject_img_before:
            with tf.variable_scope("feedback_loop", reuse=self.reuse):
                image_feat = tf.reduce_mean(image_feature, axis=[1, 2])

                new_memory = tf.concat([self.memory_cell, image_feat], axis=-1)
                new_memory = tfc_layers.fully_connected(new_memory,
                                                        num_outputs=int(self.memory_cell.get_shape()[1]),
                                                        scope='hidden_layer',
                                                        reuse=self.reuse)  # reuse: multi-gpu computation

                self.memory_cell = tfc_layers.layer_norm(new_memory, reuse=self.reuse)

        if self.inject_img_before2:
            with tf.variable_scope("feedback_loop", reuse=self.reuse):
                image_feat = tf.reduce_mean(image_feature, axis=[1, 2])

                image_emb = tfc_layers.fully_connected(image_feat,
                                                       num_outputs=int(self.memory_cell.get_shape()[1]),
                                                       scope='hidden_layer',
                                                       reuse=self.reuse)  # reuse: multi-gpu computation
                image_emb = tf.nn.relu(image_emb)

                self.memory_cell += image_emb

        # Should we reuse attention from one level to another
        reuse = (self.already_forward and self.shared_attention) or self.reuse

        with tf.variable_scope(self.scope, reuse=reuse) as scope:
            new_memory_cell = attention.compute_attention(self.states,
                                                          seq_length=self.seq_length,
                                                          context=self.memory_cell,
                                                          no_mlp_units=self.attention_hidden_units,
                                                          fuse_mode="dot",
                                                          reuse=reuse)

        if self.sum_memory:
            self.memory_cell = self.memory_cell + new_memory_cell
        else:
            self.memory_cell = new_memory_cell

        self.memory_cell = tfc_layers.layer_norm(new_memory_cell, reuse=self.reuse)

        output = self.memory_cell

        if self.shared_attention:
            self.scope = scope
            self.already_forward = True

        if self.inject_img_after:
            image_feat = tf.reduce_mean(image_feature, axis=[1, 2])
            output = tf.concat([output, image_feat], axis=-1)

        return output
コード例 #4
0
def get_attention(feature_map,
                  context,
                  config,
                  is_training,
                  dropout_keep,
                  reuse=False):
    attention_mode = config.get("mode", None)

    if attention_mode == "none":
        image_out = feature_map

    elif attention_mode == "max":
        image_out = tf.reduce_max(feature_map, axis=(1, 2))

    elif attention_mode == "mean":
        image_out = tf.reduce_mean(feature_map, axis=(1, 2))

    elif attention_mode == "classic":
        image_out = compute_attention(feature_map,
                                      context,
                                      no_mlp_units=config['no_attention_mlp'],
                                      fuse_mode=config['fuse_mode'],
                                      keep_dropout=dropout_keep,
                                      reuse=reuse)

    elif attention_mode == "glimpse":
        image_out = compute_glimpse(
            feature_map,
            context,
            no_glimpse=config['no_glimpses'],
            glimpse_embedding_size=config['no_attention_mlp'],
            keep_dropout=dropout_keep,
            reuse=reuse)

    elif attention_mode == "conv_pooling":
        image_out = compute_convolution_pooling(
            feature_map,
            no_mlp_units=config['no_attention_mlp'],
            is_training=is_training,
            reuse=reuse)

    else:
        assert False, "Wrong attention mode: {}".format(attention_mode)

    return image_out