def call(self, u): batch_size = tf.shape(u)[0] u = tf.expand_dims(u, 1) u = tf.expand_dims(u, 3) u = tf.tile(u, [1, self.out_capsules, 1, 1, 1]) u = tf.tile(u, [1, 1, 1, self.out_dim, 1]) w = tf.tile(self.W, [batch_size, 1, 1, 1, 1]) u_hat = tf.reduce_sum(u * w, axis=-1) bias = tf.tile(self.bias, [batch_size, 1, 1]) if self.use_bias else 0.0 b_ij = tf.zeros( shape=[batch_size, self.out_capsules, self.in_capsules, 1]) for r in range(self.routing_iterations): c_ij = tf.nn.softmax(b_ij, axis=1) c_ij_tiled = tf.tile(c_ij, [1, 1, 1, self.out_dim]) s_j = tf.reduce_sum(c_ij_tiled * u_hat, axis=2) + bias v_j = squash(s_j) if (r < self.routing_iterations - 1): v_j = tf.expand_dims(v_j, 2) v_j = tf.tile(v_j, [1, 1, self.in_capsules, 1]) u_x_v = tf.reduce_sum(v_j * u_hat, axis=-1) b_ij = b_ij + tf.expand_dims(u_x_v, axis=-1) return v_j
def _body(i, logits, activations): """Routing while loop.""" route = tf.nn.softmax(logits, axis=-1) preactivate_unrolled = route * votes_trans preact_trans = tf.transpose(preactivate_unrolled, r_t_shape) preactivate = tf.reduce_sum(preact_trans, axis=1) + biases activation = squash(preactivate) activations = activations.write(i, activation) act_3d = tf.expand_dims(activation, 1) act_replicated = tf.tile(act_3d, [1, input_dim, 1, 1, 1, 1]) # Calculate scale factor t p_p = 0.9 d = tf.norm(act_replicated - votes, axis=-1, keepdims=True) d_o = tf.reduce_mean(tf.reduce_mean(d)) d_p = d_o * 0.5 t = tf.constant(np.log(p_p * (out_capsules - 1)) - np.log(1 - p_p), dtype=tf.float32) \ / (d_p - d_o + 1e-12) t = tf.expand_dims(t, axis=-1) # Calc log prior using inverse distances logits = tf.norm(t * d, axis=-1) return i + 1, logits, activations
def call(self, u): batch_size = tf.shape(u)[0] # Reshape u into (batch_size, out_capsules, in_capsules, out_dim, in_dim) u = tf.expand_dims(u, 1) u = tf.expand_dims(u, 3) u = tf.tile(u, [1, self.out_capsules, 1, 1, 1]) u = tf.tile(u, [1, 1, 1, self.out_dim, 1]) # Duplicate transformation matrix for each batch w = tf.tile(self.W, [batch_size, 1, 1, 1, 1]) # Dotwise product between u and w to get all votes u_hat = tf.reduce_sum(u * w, axis=-1) # Routing-by-agreement bias = tf.tile(self.bias, [batch_size, 1, 1]) b_ij = tf.zeros( shape=[batch_size, self.out_capsules, self.in_capsules, 1]) for r in range(self.routing_iterations): c_ij = tf.nn.softmax(b_ij, axis=1) c_ij_tiled = tf.tile(c_ij, [1, 1, 1, self.out_dim]) s_j = tf.reduce_sum(c_ij_tiled * u_hat, axis=2) + bias v_j = squash(s_j) if (r < self.routing_iterations - 1): v_j = tf.expand_dims(v_j, 2) v_j = tf.tile(v_j, [1, 1, self.in_capsules, 1]) u_x_v = tf.reduce_sum(v_j * u_hat, axis=-1) b_ij = b_ij + tf.expand_dims(u_x_v, axis=-1) return v_j, c_ij
def call(self, u): """ param: u - (batch_size, in_caps, in_dim) """ batch_size = tf.shape(u)[0] u_norm = tf.norm(u, axis=-1) # (batch_size, in_caps) # Reshape u into (batch_size, out_caps, in_caps, out_dim, in_dim) u = tf.expand_dims(u, 1) u = tf.expand_dims(u, 3) u = tf.tile(u, [1, self.out_capsules, 1, 1, 1]) u = tf.tile(u, [1, 1, 1, self.out_dim, 1]) # Duplicate transformation matrix for each batch w = tf.tile(self.W, [batch_size, 1, 1, 1, 1]) # Dotwise product between u and w to get all votes # shape = (batch_size, out_caps, in_caps, out_dim) u_hat = tf.reduce_sum(u * w, axis=-1) # Ensure that ||u_hat|| <= ||v_i|| u_hat_norm = tf.norm(u_hat, axis=-1, keepdims=True) u_norm = tf.expand_dims(u_norm, axis=1) u_norm = tf.expand_dims(u_norm, axis=3) u_norm = tf.tile(u_norm, [1, self.out_capsules, 1, self.out_dim]) new_u_hat_norm = tf.math.minimum(u_hat_norm, u_norm) u_hat = u_hat / u_hat_norm * new_u_hat_norm # Scaled-distance-agreement routing bias = tf.tile(self.bias, [batch_size, 1, 1]) b_ij = tf.zeros( shape=[batch_size, self.out_capsules, self.in_capsules, 1]) for r in range(self.routing_iterations): c_ij = tf.nn.softmax(b_ij, axis=1) c_ij_tiled = tf.tile(c_ij, [1, 1, 1, self.out_dim]) s_j = tf.reduce_sum(c_ij_tiled * u_hat, axis=2) + bias v_j = squash(s_j) if (r < self.routing_iterations - 1): v_j = tf.expand_dims(v_j, 2) v_j = tf.tile(v_j, [1, 1, self.in_capsules, 1 ]) # (batch_size, out_caps, in_caps, out_dim) # Calculate scale factor t p_p = 0.9 d = tf.norm(v_j - u_hat, axis=-1, keepdims=True) d_o = tf.reduce_mean(tf.reduce_mean(d)) d_p = d_o * 0.5 t = tf.constant(np.log(p_p * (self.out_capsules - 1)) - np.log(1 - p_p), dtype=tf.float32) \ / (d_p - d_o + 1e-12) t = tf.expand_dims(t, axis=-1) # Calc log prior using inverse distances b_ij = t * d return v_j, c_ij
def _body(i, logits, activations): """Routing while loop.""" route = tf.nn.softmax(logits, axis=-1) preactivate_unrolled = route * votes_trans preact_trans = tf.transpose(preactivate_unrolled, r_t_shape) preactivate = tf.reduce_sum(preact_trans, axis=1) + biases activation = squash(preactivate) activations = activations.write(i, activation) act_3d = tf.expand_dims(activation, 1) act_replicated = tf.tile(act_3d, [1, input_dim, 1, 1, 1, 1]) distances = tf.reduce_sum(votes * act_replicated, axis=-1) logits += distances return i + 1, logits, activations
def call(self, inputs): x = self.conv1(inputs) x = self.reshape(x) x = squash(x) return x
def call(self, inputs): x = self.conv1(inputs) x = self.reshape(x) # Shape=(batch_size, 1152, 8) x = squash(x) return x