def get_attention(feature_map, lstm, config, dropout_keep=1, reuse=False): attention_mode = config.get("mode", None) # print("feature_map = {} ",feature_map) if attention_mode == "none": image_out = feature_map elif attention_mode == "mean": image_out = tf.reduce_mean(feature_map, axis=(1, 2)) elif attention_mode == "classic": image_out = compute_attention(feature_map, lstm, no_mlp_units=config['no_attention_mlp'], reuse=reuse) elif attention_mode == "glimpse": image_out = compute_glimpse( feature_map, lstm, no_glimpse=config['no_glimpses'], glimpse_embedding_size=config['no_attention_mlp'], keep_dropout=dropout_keep, reuse=reuse) else: assert False, "Wrong attention mode: {}".format(attention_mode) return image_out
def forward(self, image_feat): # Should we reuse attention from one level to another reuse = (self.already_forward and self.shared_attention) or self.reuse with tf.variable_scope(self.scope, reuse=reuse) as scope: self.memory_cell = attention.compute_attention( self.states, seq_length=self.seq_length, context=self.memory_cell, no_mlp_units=self.no_units, reuse=reuse) self.memory_cell = tfc_layers.layer_norm(self.memory_cell, reuse=self.reuse) output = self.memory_cell if self.shared_attention: self.scope = scope self.already_forward = True if self.feedback_loop: image_feat = tf.reduce_mean(image_feat, axis=[1, 2]) output = tf.concat([output, image_feat], axis=-1) return output
def forward(self, image_feature): if self.inject_img_before: with tf.variable_scope("feedback_loop", reuse=self.reuse): image_feat = tf.reduce_mean(image_feature, axis=[1, 2]) new_memory = tf.concat([self.memory_cell, image_feat], axis=-1) new_memory = tfc_layers.fully_connected(new_memory, num_outputs=int(self.memory_cell.get_shape()[1]), scope='hidden_layer', reuse=self.reuse) # reuse: multi-gpu computation self.memory_cell = tfc_layers.layer_norm(new_memory, reuse=self.reuse) if self.inject_img_before2: with tf.variable_scope("feedback_loop", reuse=self.reuse): image_feat = tf.reduce_mean(image_feature, axis=[1, 2]) image_emb = tfc_layers.fully_connected(image_feat, num_outputs=int(self.memory_cell.get_shape()[1]), scope='hidden_layer', reuse=self.reuse) # reuse: multi-gpu computation image_emb = tf.nn.relu(image_emb) self.memory_cell += image_emb # Should we reuse attention from one level to another reuse = (self.already_forward and self.shared_attention) or self.reuse with tf.variable_scope(self.scope, reuse=reuse) as scope: new_memory_cell = attention.compute_attention(self.states, seq_length=self.seq_length, context=self.memory_cell, no_mlp_units=self.attention_hidden_units, fuse_mode="dot", reuse=reuse) if self.sum_memory: self.memory_cell = self.memory_cell + new_memory_cell else: self.memory_cell = new_memory_cell self.memory_cell = tfc_layers.layer_norm(new_memory_cell, reuse=self.reuse) output = self.memory_cell if self.shared_attention: self.scope = scope self.already_forward = True if self.inject_img_after: image_feat = tf.reduce_mean(image_feature, axis=[1, 2]) output = tf.concat([output, image_feat], axis=-1) return output
def get_attention(feature_map, context, config, is_training, dropout_keep, reuse=False): attention_mode = config.get("mode", None) if attention_mode == "none": image_out = feature_map elif attention_mode == "max": image_out = tf.reduce_max(feature_map, axis=(1, 2)) elif attention_mode == "mean": image_out = tf.reduce_mean(feature_map, axis=(1, 2)) elif attention_mode == "classic": image_out = compute_attention(feature_map, context, no_mlp_units=config['no_attention_mlp'], fuse_mode=config['fuse_mode'], keep_dropout=dropout_keep, reuse=reuse) elif attention_mode == "glimpse": image_out = compute_glimpse( feature_map, context, no_glimpse=config['no_glimpses'], glimpse_embedding_size=config['no_attention_mlp'], keep_dropout=dropout_keep, reuse=reuse) elif attention_mode == "conv_pooling": image_out = compute_convolution_pooling( feature_map, no_mlp_units=config['no_attention_mlp'], is_training=is_training, reuse=reuse) else: assert False, "Wrong attention mode: {}".format(attention_mode) return image_out