def capsule(input_tensor, input_dim, output_dim, layer_name, input_atoms=8, output_atoms=8, **routing_args): """Builds a fully connected capsule layer. Given an input tensor of shape `[batch, input_dim, input_atoms]`, this op performs the following: 1. For each input capsule, multiples it with the weight variable to get votes of shape `[batch, input_dim, output_dim, output_atoms]`. 2. Scales the votes for each output capsule by iterative routing. 3. Squashes the output of each capsule to have norm less than one. Each capsule of this layer has one weight tensor for each capsules of layer below. Therefore, this layer has the following number of trainable variables: w: [input_dim * num_in_atoms, output_dim * num_out_atoms] b: [output_dim * num_out_atoms] Args: input_tensor: tensor, activation output of the layer below. input_dim: scalar, number of capsules in the layer below. output_dim: scalar, number of capsules in this layer. layer_name: string, Name of this layer. input_atoms: scalar, number of units in each capsule of input layer. output_atoms: scalar, number of units in each capsule of output layer. **routing_args: dictionary {leaky, num_routing}, args for routing function. Returns: Tensor of activations for this layer of shape `[batch, output_dim, output_atoms]`. """ with tf.variable_scope(layer_name): # weights variable will hold the state of the weights for the layer weights = variables.weight_variable( [input_dim, input_atoms, output_dim * output_atoms]) biases = variables.bias_variable([output_dim, output_atoms]) with tf.name_scope('Wx_plus_b'): # Depthwise matmul: [b, d, c] ** [d, c, o_c] = [b, d, o_c] # To do this: tile input, do element-wise multiplication and reduce # sum over input_atoms dimmension. input_tiled = tf.tile(tf.expand_dims(input_tensor, -1), [1, 1, 1, output_dim * output_atoms]) votes = tf.reduce_sum(input_tiled * weights, axis=2) votes_reshaped = tf.reshape( votes, [-1, input_dim, output_dim, output_atoms]) with tf.name_scope('routing'): input_shape = tf.shape(input_tensor) logit_shape = tf.stack([input_shape[0], input_dim, output_dim]) activations = _update_routing(votes=votes_reshaped, biases=biases, logit_shape=logit_shape, num_dims=4, input_dim=input_dim, output_dim=output_dim, **routing_args) return activations
def inference(self, features): """Adds the inference graph ops. Builds the architecture of the neural net to drive logits from features. The inference graph includes a series of convolution and fully connected layers and outputs a [batch, 10] tensor as the logits. Args: features: Dictionary of batched feature tensors like images and labels. Returns: A model.Inferred named tuple of expected outputs of the model like 'logits' and 'remakes' for the reconstructions (to be added). """ image = features['images'] image_dim = features['height'] image_depth = features['depth'] image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim]) conv = self._add_convs(image_4d, [image_depth, 512, 256]) hidden1 = tf.contrib.layers.flatten(conv) with tf.variable_scope('fc1') as scope: dim = hidden1.get_shape()[1].value weights = variables.weight_variable(shape=[dim, 1024], stddev=0.1, verbose=self._hparams.verbose) biases = variables.bias_variable(shape=[1024], verbose=self._hparams.verbose) pre_activation = tf.matmul(hidden1, weights) + biases hidden2 = tf.nn.relu(pre_activation, name=scope.name) with tf.variable_scope('softmax_layer') as scope: weights = variables.weight_variable( shape=[1024, features['num_classes']], stddev=0.1, verbose=self._hparams.verbose) biases = variables.bias_variable(shape=[features['num_classes']], verbose=self._hparams.verbose) logits = tf.matmul(hidden2, weights) + biases return model.Inferred(logits, None)
def testVariableDeclaration(self): """Checks the value and shape of the squidge output given a rank 2 input.""" with tf.Graph().as_default(): with self.test_session() as sess: weights = variables.weight_variable((1, 2), stddev=0.1) bias = variables.bias_variable((1)) sess.run(tf.global_variables_initializer()) w_value, b_value = sess.run([weights, bias]) self.assertNear(w_value[0][0], 0.0, 0.2) self.assertNear(w_value[0][1], 0.0, 0.2) self.assertEqual(b_value, 0.1) trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual(len(trainable_vars), 2) self.assertStartsWith(trainable_vars[0].name, 'weights') self.assertStartsWith(trainable_vars[1].name, 'biases')
def inference(self, features): """Adds the inference graph ops. Builds the architecture of the neural net to drive logits from features. The inference graph includes a convolution layer, a primary capsule layer and a 10-capsule final layer. Optionally, it also adds the reconstruction network on top of the 10-capsule final layer. Args: features: Dictionary of batched feature tensors like images and labels. Returns: A model.Inferred named tuple of expected outputs of the model like 'logits' and 'recons' for the reconstructions. """ image_dim = features['height'] image_depth = features['depth'] image = features['images'] image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim]) # ReLU Convolution with tf.variable_scope('conv1') as scope: kernel = variables.weight_variable(shape=[9, 9, image_depth, 256], stddev=5e-2, verbose=self._hparams.verbose) biases = variables.bias_variable([256], verbose=self._hparams.verbose) conv1 = tf.nn.conv2d(image_4d, kernel, [1, 1, 1, 1], padding=self._hparams.padding, data_format='NCHW') pre_activation = tf.nn.bias_add(conv1, biases, data_format='NCHW') relu1 = tf.nn.relu(pre_activation, name=scope.name) if self._hparams.verbose: tf.summary.histogram('activation', relu1) hidden1 = tf.expand_dims(relu1, 1) # Capsules capsule_output = self._build_capsule(hidden1, features['num_classes']) logits = tf.norm(capsule_output, axis=-1) # Reconstruction if self._hparams.remake: remake = self._remake(features, capsule_output) else: remake = None return model.Inferred(logits, remake)
def _add_convs(self, input_tensor, channels): """Adds the convolution layers. Adds a series of convolution layers with ReLU nonlinearity and pooling after each of them. Args: input_tensor: a 4D float tensor as the input to the first convolution. channels: A list of channel sizes for input_tensor and following convolution layers. Number of channels in input tensor should be equal to channels[0]. Returns: A 4D tensor as the output of the last pooling layer. """ for i in range(1, len(channels)): with tf.variable_scope('conv{}'.format(i)) as scope: kernel = variables.weight_variable( shape=[5, 5, channels[i - 1], channels[i]], stddev=5e-2, verbose=self._hparams.verbose) conv = tf.nn.conv2d(input_tensor, kernel, [1, 1, 1, 1], padding=self._hparams.padding, data_format='NCHW') biases = variables.bias_variable([channels[i]], verbose=self._hparams.verbose) pre_activation = tf.nn.bias_add(conv, biases, data_format='NCHW') relu = tf.nn.relu(pre_activation, name=scope.name) if self._hparams.verbose: tf.summary.histogram('activation', relu) input_tensor = tf.contrib.layers.max_pool2d(relu, kernel_size=2, stride=2, data_format='NCHW', padding='SAME') return input_tensor
def conv_slim_capsule(input_tensor, input_dim, output_dim, layer_name, input_atoms=8, output_atoms=8, stride=2, kernel_size=5, padding='SAME', **routing_args): """Builds a slim convolutional capsule layer. This layer performs 2D convolution given 5D input tensor of shape `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines the votes with routing and applies Squash non linearity for each capsule. Each capsule in this layer is a convolutional unit and shares its kernel over the position grid and different capsules of layer below. Therefore, number of trainable variables in this layer is: kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms] bias: [output_dim, output_atoms] Output of a conv2d layer is a single capsule with channel number of atoms. Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer with num_routing=1, input_dim=1 and input_atoms=conv_channels. Args: input_tensor: tensor, of rank 5. Last two dimmensions representing height and width position grid. input_dim: scalar, number of capsules in the layer below. output_dim: scalar, number of capsules in this layer. layer_name: string, Name of this layer. input_atoms: scalar, number of units in each capsule of input layer. output_atoms: scalar, number of units in each capsule of output layer. stride: scalar, stride of the convolutional kernel. kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size]. padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels. **routing_args: dictionary {leaky, num_routing}, args to be passed to the update_routing function. Returns: Tensor of activations for this layer of shape `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is 'SAME', out_height = in_height and out_width = in_width. Otherwise, height and width is adjusted with same rules as 'VALID' in tf.nn.conv2d. """ with tf.variable_scope(layer_name): kernel = variables.weight_variable(shape=[ kernel_size, kernel_size, input_atoms, output_dim * output_atoms ]) biases = variables.bias_variable([output_dim, output_atoms, 1, 1]) votes, votes_shape, input_shape = _depthwise_conv3d( input_tensor, kernel, input_dim, output_dim, input_atoms, output_atoms, stride, padding) with tf.name_scope('routing'): logit_shape = tf.stack([ input_shape[0], input_dim, output_dim, votes_shape[2], votes_shape[3] ]) biases_replicated = tf.tile(biases, [1, 1, votes_shape[2], votes_shape[3]]) activations = _update_routing(votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=6, input_dim=input_dim, output_dim=output_dim, **routing_args) return activations