def _build_capsule(self, input_tensor, num_classes):
        """Adds the capsule layers.

    A slim convolutional capsule layer transforms the input tensor to capsule
    format. The nonlinearity for slim convolutional capsule is squash function
    but there is no routing and each spatial instantiation of capsule is derived
    as traditional convolutional layer.
    In order to connect the convolutional capsule layer to the top fully
    connected capsule layer the grid position of convolution capsule is
    merged with different types of capsules dimmension and capsule2 learns
    different transformations for each of them.

    Args:
      input_tensor: 5D input tensor, shape: [batch, 1, 256, height, width].
      num_classes: Number of object categories. Used as the output dimmension.
    Returns:
      A 3D tensor of the top capsule layer with 10 capsule embeddings.
    """
        # PrimaryCaps Layer Start
        capsule1 = layers.conv_slim_capsule(
            input_tensor,
            input_dim=1,
            output_dim=self._hparams.num_prime_capsules,
            layer_name='conv_capsule1',
            num_routing=1,
            input_atoms=256,
            output_atoms=8,
            stride=2,
            kernel_size=9,
            padding=self._hparams.padding,
            leaky=self._hparams.leaky,
        )
        capsule1_atom_last = tf.transpose(capsule1, [0, 1, 3, 4, 2])
        # PrimaryCaps Layer End, return [batch_size, 32, 6, 6, 8] in format
        # [batch_size, capsule_channels, height, width, capsule_dims]

        # Since the digitCaps layer is a fully connected capsule layer, reshape to
        # [batch_size, 1152, 8] will be easier to deal with
        capsule1_3d = tf.reshape(capsule1_atom_last,
                                 [tf.shape(input_tensor)[0], -1, 8])
        _, _, _, height, width = capsule1.get_shape()

        # get 1152, the number of capsules in primaryCaps layer
        input_dim = self._hparams.num_prime_capsules * height.value * width.value

        # DigitCaps layer, return [batch_size, 10, 16]
        return layers.capsule(
            input_tensor=capsule1_3d,
            input_dim=input_dim,
            output_dim=num_classes,
            layer_name='capsule2',
            input_atoms=8,
            output_atoms=16,
            num_routing=self._hparams.routing,
            leaky=self._hparams.leaky,
        )
Exemple #2
0
 def testCapsule(self):
   """Tests the correct output and variable declaration of layers.capsule."""
   input_tensor = tf.random_uniform((4, 3, 2))
   output = layers.capsule(
       input_tensor=input_tensor,
       input_dim=3,
       output_dim=2,
       layer_name='capsule',
       input_atoms=2,
       output_atoms=5,
       num_routing=3,
       leaky=False)
   self.assertListEqual(output.get_shape().as_list(), [4, 2, 5])
   trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
   self.assertEqual(len(trainable_vars), 2)
   self.assertStartsWith(trainable_vars[0].name, 'capsule')
Exemple #3
0
    def _build_capsule(self, input_tensor, num_classes):
        """Adds the capsule layers.

    A slim convolutional capsule layer transforms the input tensor to capsule
    format. The nonlinearity for slim convolutional capsule is squash function
    but there is no routing and each spatial instantiation of capsule is derived
    as traditional convolutional layer.
    In order to connect the convolutional capsule layer to the top fully
    connected capsule layer the grid position of convolution capsule is
    merged with different types of capsules dimmension and capsule2 learns
    different transformations for each of them.

    Args:
      input_tensor: 5D input tensor, shape: [batch, 1, 256, height, width].
      num_classes: Number of object categories. Used as the output dimmension.
    Returns:
      A 3D tensor of the top capsule layer with 10 capsule embeddings.
    """
        capsule1 = layers.conv_slim_capsule(
            input_tensor,
            input_dim=1,
            output_dim=self._hparams.num_prime_capsules,
            layer_name='conv_capsule1',
            num_routing=1,
            input_atoms=self._hparams.conv1_channel,
            output_atoms=self._hparams.prime_capsule_dim,
            stride=2,
            kernel_size=9,
            padding=self._hparams.padding,
            leaky=self._hparams.leaky,
        )
        capsule1_atom_last = tf.transpose(capsule1, [0, 1, 3, 4, 2])
        capsule1_3d = tf.reshape(
            capsule1_atom_last,
            [tf.shape(input_tensor)[0], -1, self._hparams.prime_capsule_dim])
        _, _, _, height, width = capsule1.get_shape()
        input_dim = self._hparams.num_prime_capsules * height.value * width.value
        return layers.capsule(
            input_tensor=capsule1_3d,
            input_dim=input_dim,
            output_dim=num_classes,
            layer_name='capsule2',
            input_atoms=self._hparams.prime_capsule_dim,
            output_atoms=self._hparams.digit_capsule_dim,
            num_routing=self._hparams.routing,
            leaky=self._hparams.leaky,
        )
Exemple #4
0
  def _build_capsule(self, input_tensor, num_classes):
    """Adds the capsule layers.

    A slim convolutional capsule layer transforms the input tensor to capsule
    format. The nonlinearity for slim convolutional capsule is squash function
    but there is no routing and each spatial instantiation of capsule is derived
    as traditional convolutional layer.
    In order to connect the convolutional capsule layer to the top fully
    connected capsule layer the grid position of convolution capsule is
    merged with different types of capsules dimmension and capsule2 learns
    different transformations for each of them.

    Args:
      input_tensor: 5D input tensor, shape: [batch, 1, 256, height, width].
      num_classes: Number of object categories. Used as the output dimmension.
    Returns:
      A 3D tensor of the top capsule layer with 10 capsule embeddings.
    """
    capsule1 = layers.conv_slim_capsule(
        input_tensor,
        input_dim=1,
        output_dim=self._hparams.num_prime_capsules,
        layer_name='conv_capsule1',
        num_routing=1,
        input_atoms=256,
        output_atoms=8,
        stride=2,
        kernel_size=9,
        padding=self._hparams.padding,
        leaky=self._hparams.leaky,)
    capsule1_atom_last = tf.transpose(capsule1, [0, 1, 3, 4, 2])
    capsule1_3d = tf.reshape(capsule1_atom_last,
                             [tf.shape(input_tensor)[0], -1, 8])
    _, _, _, height, width = capsule1.get_shape()
    input_dim = self._hparams.num_prime_capsules * height.value * width.value
    return layers.capsule(
        input_tensor=capsule1_3d,
        input_dim=input_dim,
        output_dim=num_classes,
        layer_name='capsule2',
        input_atoms=8,
        output_atoms=16,
        num_routing=self._hparams.routing,
        leaky=self._hparams.leaky,)