def primary_capsules(self, encoded, seq_length): '''compute the primary capsules args: encoded: encoded sequences [batch_size x time x dim] seq_length: the sequence lengths [batch_size] returns: the primary capsules [batch_size x num_capsules x capsule_dim] the contribution of the time steps in the primary capsules [batch_size x time x num_capsules x capsule_dim] ''' with tf.variable_scope('primary_capsules'): encoded = tf.identity(encoded, 'encoded') seq_length = tf.identity(seq_length, 'seq_length') num_capsules = int(self.conf['num_capsules']) capsule_dim = int(self.conf['capsule_dim']) #distribute timesteps over primary capsules distribution = tf.layers.dense(encoded, num_capsules, tf.nn.softmax) #put a weight on all the timesteps attention = tf.layers.dense(encoded, 1, tf.nn.sigmoid) weights = attention*distribution #compute the weighted averages combinations = tf.matmul(weights, encoded, transpose_a=True) #map the averages to primary capsule orientations layer = tf.layers.Dense( capsule_dim, use_bias=False, name='orientations') orientations = layer(combinations) primary_capsules = ops.squash(orientations) #get the squash factor squash = tf.get_default_graph().get_tensor_by_name( 'model/primary_capsules/squash/div_1:0') contrib = layer(encoded) contrib = ( tf.expand_dims(squash, 1) *tf.expand_dims(contrib, 2) *tf.expand_dims(weights, 3)) tf.add_to_collection( 'image', tf.expand_dims(weights, 3, 'prim_weights')) primary_capsules = tf.identity(primary_capsules, 'primary_capsules') return primary_capsules, contrib
def call(self, input_tensor, training=None): inputs_hat = self.get_predictions(input_tensor) b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_caps, self.num_in_caps]) assert self.routings > 0, 'routing should be > 0.' for i in range(self.routings): # c.shape=[batch_size, num_caps, num_in_caps] c = tf.nn.softmax(b, dim=1) activations = squash(K.batch_dot(c, inputs_hat, [2, 2])) # [None, 10, 16] return activations
def build_network(self): with tf.variable_scope('Conv1_layer'): conv1 = tf.layers.conv2d(self.X, name="conv1", **conv1_params) # [batch_size, 20, 20, 256] with tf.variable_scope('PrimaryCaps_layer'): conv2 = tf.layers.conv2d(conv1, name="conv2", **conv2_params) # [batch_size, 6, 6, 256] caps1_raw = tf.reshape(conv2, (args.batch_size, caps1_n_caps, caps1_n_dims, 1), name="caps1_raw") # [batch_size, 1152, 8, 1] caps1_output = squash(caps1_raw, name="caps1_output") # [batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): caps2_input = tf.reshape(caps1_output, shape=(args.batch_size, caps1_n_caps, 1, caps1_n_dims, 1)) # [batch_size, 1152, 1, 8, 1] b_IJ = tf.zeros([args.batch_size, caps1_n_caps, caps2_n_caps, 1, 1], dtype=np.float32, name="b_ij") # [batch_size, 1152, 10, 1, 1] self.caps2_output = routing(caps2_input, b_IJ, caps2_n_dims) # [batch_size, 10, 16, 1] # Decoder with tf.variable_scope('Masking'): epsilon = 1e-9 self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2_output), axis=2, keep_dims=True) + epsilon) # [batch_size, 10, 1, 1] y_prob_argmax = tf.to_int32(tf.argmax(self.v_length, axis=1)) # [batch_size, 1, 1] self.y_pred = tf.reshape(y_prob_argmax, shape=(args.batch_size,)) # [batch_size] (predicted labels) y_pred_ohe = tf.one_hot(self.y_pred, depth=caps2_n_caps) # [batch_size, 10] (one-hot-encoded predicted labels) reconst_targets = tf.cond(self.mask_with_labels, # condition lambda: self.Y, # if True (Training) lambda: y_pred_ohe, # if False (Test) name="reconstruction_targets") # [batch_size, 10] caps2_output_masked = tf.multiply(tf.squeeze(self.caps2_output), tf.expand_dims(reconst_targets, -1)) # [batch_size, 10, 16] decoder_input = tf.reshape(caps2_output_masked, [args.batch_size, -1]) # [batch_size, 160] with tf.variable_scope('Decoder'): fc1 = tf.layers.dense(decoder_input, n_hidden1, activation=tf.nn.relu, name="FC1") # [batch_size, 512] fc2 = tf.layers.dense(fc1, n_hidden2, activation=tf.nn.relu, name="FC2") # [batch_size, 1024] self.decoder_output = tf.layers.dense(fc2, n_output, activation=tf.nn.sigmoid, name="FC3")
def primary_capsules(self, encoded, seq_length): '''compute the primary capsules args: encoded: encoded sequences [batch_size x time x dim] seq_length: the sequence lengths [batch_size] returns: the primary capsules [batch_size x time x num_capsules x capsule_dim] ''' with tf.variable_scope('primary_capsules'): encoded = tf.identity(encoded, 'encoded') seq_length = tf.identity(seq_length, 'seq_length') r = int(self.conf['capsule_ratio'])**int(self.conf['num_tc_layers']) num_capsules = int(self.conf['num_tc_capsules'])*r capsule_dim = int(self.conf['tc_capsule_dim'])/r output_dim = num_capsules*capsule_dim primary_capsules = tf.layers.dense( encoded, output_dim, use_bias=False ) primary_capsules = tf.reshape( primary_capsules, [encoded.shape[0].value, tf.shape(encoded)[1], num_capsules, capsule_dim] ) primary_capsules = ops.squash(primary_capsules) prim_norm = ops.safe_norm(primary_capsules) tf.add_to_collection('image', tf.expand_dims(prim_norm, 3)) primary_capsules = tf.identity(primary_capsules, 'primary_capsules') return primary_capsules