def test_masked_sum(self): tf.reset_default_graph() data = tf.placeholder(tf.float32, shape=[None, None]) mask = tf.placeholder(tf.float32, shape=[None, None]) masked_sums = utils.masked_sum(data, mask) with self.test_session() as sess: result = sess.run(masked_sums, feed_dict={ data: [[1, 2, 3], [4, 5, 6]], mask: [[1, 0, 1], [0, 1, 0]] }) self.assertAllClose(result, [[4], [5]]) result = sess.run(masked_sums, feed_dict={ data: [[1, 2, 3], [4, 5, 6]], mask: [[0, 1, 0], [1, 0, 1]] }) self.assertAllClose(result, [[2], [10]])
def _build_latent_network(self, num_proposals, proposal_features, num_classes=20, num_latent_factors=20, proba_h_given_c=None, proba_h_use_sigmoid=False): """Builds the Multiple Instance Detection Network. MIDN: An attention network. Args: num_proposals: A [batch] int tensor. proposal_features: A [batch, max_num_proposals, features_dims] float tensor. num_classes: Number of classes. proba_h_given_c: A [num_latent_factors, num_classes] float tensor. Returns: logits: A [batch, num_classes] float tensor. proba_r_given_c: A [batch, max_num_proposals, num_classes] float tensor. proba_h_given_c: A [num_latent_factors, num_classes] float tensor. """ if proba_h_given_c is not None: assert proba_h_given_c.get_shape()[0].value == num_latent_factors batch, max_num_proposals, _ = utils.get_tensor_shape(proposal_features) mask = tf.sequence_mask(num_proposals, maxlen=max_num_proposals, dtype=tf.float32) mask = tf.expand_dims(mask, axis=-1) # Calculates the values of following tensors: # logits_c_given_r shape = [batch, max_num_proposals, num_classes]. # logits_r_given_h shape = [batch, max_num_proposals, num_latent_factors]. # logits_h_given_c shape = [num_latent_factors, num_classes]. with tf.variable_scope('midn'): logits_c_given_r = slim.fully_connected(proposal_features, num_outputs=num_classes, activation_fn=None, scope='proba_c_given_r') logits_r_given_h = slim.fully_connected( proposal_features, num_outputs=num_latent_factors, activation_fn=None, scope='proba_r_given_h') if proba_h_given_c is None: logits_h_given_c = slim.fully_connected( tf.diag(tf.ones([num_classes])), num_outputs=num_latent_factors, activation_fn=None, scope='proba_h_given_c') logits_h_given_c = tf.transpose(logits_h_given_c) tf.summary.histogram('logits_h_given_c', logits_h_given_c) if proba_h_use_sigmoid: proba_h_given_c = tf.nn.sigmoid(logits_h_given_c) else: proba_h_given_c = tf.nn.softmax(logits_h_given_c, axis=0) logits_r_given_c = tf.matmul( tf.reshape(logits_r_given_h, [-1, num_latent_factors]), proba_h_given_c) logits_r_given_c = tf.reshape(logits_r_given_c, [batch, max_num_proposals, num_classes]) proba_r_given_c = utils.masked_softmax(data=logits_r_given_c, mask=mask, dim=1) proba_r_given_c = tf.multiply(mask, proba_r_given_c) # Aggregates the logits. class_logits = tf.multiply(logits_c_given_r, proba_r_given_c) class_logits = utils.masked_sum(data=class_logits, mask=mask, dim=1) proposal_scores = tf.multiply(tf.nn.sigmoid(class_logits), proba_r_given_c) #proposal_scores = tf.multiply( # tf.nn.softmax(class_logits), proba_r_given_c) tf.summary.histogram('midn/logits_c_given_r', logits_c_given_r) tf.summary.histogram('midn/logits_r_given_h', logits_r_given_h) tf.summary.histogram('midn/class_logits', class_logits) return (tf.squeeze(class_logits, axis=1), proposal_scores, proba_r_given_c, proba_h_given_c)
def _build_midn_network(self, num_proposals, proposal_features, num_classes=20): """Builds the Multiple Instance Detection Network. MIDN: An attention network. Args: num_proposals: A [batch] int tensor. proposal_features: A [batch, max_num_proposals, features_dims] float tensor. num_classes: Number of classes. Returns: logits: A [batch, num_classes] float tensor. proba_r_given_c: A [batch, max_num_proposals, num_classes] float tensor. """ with tf.name_scope('multi_instance_detection'): batch, max_num_proposals, _ = utils.get_tensor_shape( proposal_features) mask = tf.sequence_mask(num_proposals, maxlen=max_num_proposals, dtype=tf.float32) mask = tf.expand_dims(mask, axis=-1) # Calculates the values of following tensors: # logits_r_given_c shape = [batch, max_num_proposals, num_classes]. # logits_c_given_r shape = [batch, max_num_proposals, num_classes]. with tf.variable_scope('midn'): logits_r_given_c = slim.fully_connected( proposal_features, num_outputs=num_classes, activation_fn=None, scope='proba_r_given_c') logits_c_given_r = slim.fully_connected( proposal_features, num_outputs=num_classes, activation_fn=None, scope='proba_c_given_r') # Calculates the detection scores. proba_r_given_c = utils.masked_softmax(data=tf.multiply( mask, logits_r_given_c), mask=mask, dim=1) proba_r_given_c = tf.multiply(mask, proba_r_given_c) # Aggregates the logits. class_logits = tf.multiply(logits_c_given_r, proba_r_given_c) class_logits = utils.masked_sum(data=class_logits, mask=mask, dim=1) proposal_scores = tf.multiply(tf.nn.sigmoid(class_logits), proba_r_given_c) #proposal_scores = tf.multiply( # tf.nn.softmax(class_logits), proba_r_given_c) tf.summary.histogram('midn/logits_r_given_c', logits_r_given_c) tf.summary.histogram('midn/logits_c_given_r', logits_c_given_r) tf.summary.histogram('midn/proposal_scores', proposal_scores) tf.summary.histogram('midn/class_logits', class_logits) return tf.squeeze(class_logits, axis=1), proposal_scores, proba_r_given_c