コード例 #1
0
ファイル: layers.py プロジェクト: AI-RG/baselines
def capsule(input_tensor,
            input_dim,
            output_dim,
            layer_name,
            input_atoms=8,
            output_atoms=8,
            **routing_args):
    """Builds a fully connected capsule layer.

  Given an input tensor of shape `[batch, input_dim, input_atoms]`, this op
  performs the following:

    1. For each input capsule, multiples it with the weight variable to get
      votes of shape `[batch, input_dim, output_dim, output_atoms]`.
    2. Scales the votes for each output capsule by iterative routing.
    3. Squashes the output of each capsule to have norm less than one.

  Each capsule of this layer has one weight tensor for each capsules of layer
  below. Therefore, this layer has the following number of trainable variables:
    w: [input_dim * num_in_atoms, output_dim * num_out_atoms]
    b: [output_dim * num_out_atoms]

  Args:
    input_tensor: tensor, activation output of the layer below.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    **routing_args: dictionary {leaky, num_routing}, args for routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms]`.
  """
    with tf.variable_scope(layer_name):
        # weights variable will hold the state of the weights for the layer
        weights = variables.weight_variable(
            [input_dim, input_atoms, output_dim * output_atoms])
        biases = variables.bias_variable([output_dim, output_atoms])
        with tf.name_scope('Wx_plus_b'):
            # Depthwise matmul: [b, d, c] ** [d, c, o_c] = [b, d, o_c]
            # To do this: tile input, do element-wise multiplication and reduce
            # sum over input_atoms dimmension.
            input_tiled = tf.tile(tf.expand_dims(input_tensor, -1),
                                  [1, 1, 1, output_dim * output_atoms])
            votes = tf.reduce_sum(input_tiled * weights, axis=2)
            votes_reshaped = tf.reshape(
                votes, [-1, input_dim, output_dim, output_atoms])
        with tf.name_scope('routing'):
            input_shape = tf.shape(input_tensor)
            logit_shape = tf.stack([input_shape[0], input_dim, output_dim])
            activations = _update_routing(votes=votes_reshaped,
                                          biases=biases,
                                          logit_shape=logit_shape,
                                          num_dims=4,
                                          input_dim=input_dim,
                                          output_dim=output_dim,
                                          **routing_args)
        return activations
コード例 #2
0
ファイル: conv_model.py プロジェクト: AI-RG/baselines
    def inference(self, features):
        """Adds the inference graph ops.

    Builds the architecture of the neural net to drive logits from features.
    The inference graph includes a series of convolution and fully connected
    layers and outputs a [batch, 10] tensor as the logits.

    Args:
      features: Dictionary of batched feature tensors like images and labels.
    Returns:
      A model.Inferred named tuple of expected outputs of the model like
      'logits' and 'remakes' for the reconstructions (to be added).
    """
        image = features['images']
        image_dim = features['height']
        image_depth = features['depth']
        image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim])
        conv = self._add_convs(image_4d, [image_depth, 512, 256])
        hidden1 = tf.contrib.layers.flatten(conv)

        with tf.variable_scope('fc1') as scope:
            dim = hidden1.get_shape()[1].value
            weights = variables.weight_variable(shape=[dim, 1024],
                                                stddev=0.1,
                                                verbose=self._hparams.verbose)
            biases = variables.bias_variable(shape=[1024],
                                             verbose=self._hparams.verbose)
            pre_activation = tf.matmul(hidden1, weights) + biases
            hidden2 = tf.nn.relu(pre_activation, name=scope.name)

        with tf.variable_scope('softmax_layer') as scope:
            weights = variables.weight_variable(
                shape=[1024, features['num_classes']],
                stddev=0.1,
                verbose=self._hparams.verbose)
            biases = variables.bias_variable(shape=[features['num_classes']],
                                             verbose=self._hparams.verbose)
            logits = tf.matmul(hidden2, weights) + biases

        return model.Inferred(logits, None)
コード例 #3
0
ファイル: variables_test.py プロジェクト: AI-RG/baselines
 def testVariableDeclaration(self):
     """Checks the value and shape of the squidge output given a rank 2 input."""
     with tf.Graph().as_default():
         with self.test_session() as sess:
             weights = variables.weight_variable((1, 2), stddev=0.1)
             bias = variables.bias_variable((1))
             sess.run(tf.global_variables_initializer())
             w_value, b_value = sess.run([weights, bias])
             self.assertNear(w_value[0][0], 0.0, 0.2)
             self.assertNear(w_value[0][1], 0.0, 0.2)
             self.assertEqual(b_value, 0.1)
             trainable_vars = tf.get_collection(
                 tf.GraphKeys.TRAINABLE_VARIABLES)
             self.assertEqual(len(trainable_vars), 2)
             self.assertStartsWith(trainable_vars[0].name, 'weights')
             self.assertStartsWith(trainable_vars[1].name, 'biases')
コード例 #4
0
ファイル: conv_model.py プロジェクト: AI-RG/baselines
    def _add_convs(self, input_tensor, channels):
        """Adds the convolution layers.

    Adds a series of convolution layers with ReLU nonlinearity and pooling
    after each of them.

    Args:
      input_tensor: a 4D float tensor as the input to the first convolution.
      channels: A list of channel sizes for input_tensor and following
        convolution layers. Number of channels in input tensor should be
        equal to channels[0].
    Returns:
      A 4D tensor as the output of the last pooling layer.
    """
        for i in xrange(1, len(channels)):
            with tf.variable_scope('conv{}'.format(i)) as scope:
                kernel = variables.weight_variable(
                    shape=[5, 5, channels[i - 1], channels[i]],
                    stddev=5e-2,
                    verbose=self._hparams.verbose)
                conv = tf.nn.conv2d(input_tensor,
                                    kernel, [1, 1, 1, 1],
                                    padding=self._hparams.padding,
                                    data_format='NCHW')
                biases = variables.bias_variable([channels[i]],
                                                 verbose=self._hparams.verbose)
                pre_activation = tf.nn.bias_add(conv,
                                                biases,
                                                data_format='NCHW')
                relu = tf.nn.relu(pre_activation, name=scope.name)
                if self._hparams.verbose:
                    tf.summary.histogram('activation', relu)
                input_tensor = tf.contrib.layers.max_pool2d(relu,
                                                            kernel_size=2,
                                                            stride=2,
                                                            data_format='NCHW',
                                                            padding='SAME')

        return input_tensor
コード例 #5
0
ファイル: layers.py プロジェクト: AI-RG/baselines
def conv_slim_capsule(input_tensor,
                      input_dim,
                      output_dim,
                      layer_name,
                      input_atoms=8,
                      output_atoms=8,
                      stride=2,
                      kernel_size=5,
                      padding='SAME',
                      **routing_args):
    """Builds a slim convolutional capsule layer.

  This layer performs 2D convolution given 5D input tensor of shape
  `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines
  the votes with routing and applies Squash non linearity for each capsule.

  Each capsule in this layer is a convolutional unit and shares its kernel over
  the position grid and different capsules of layer below. Therefore, number
  of trainable variables in this layer is:

    kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms]
    bias: [output_dim, output_atoms]

  Output of a conv2d layer is a single capsule with channel number of atoms.
  Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer
  with num_routing=1, input_dim=1 and input_atoms=conv_channels.

  Args:
    input_tensor: tensor, of rank 5. Last two dimmensions representing height
      and width position grid.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    stride: scalar, stride of the convolutional kernel.
    kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size].
    padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels.
    **routing_args: dictionary {leaky, num_routing}, args to be passed to the
      update_routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is
      'SAME', out_height = in_height and out_width = in_width. Otherwise, height
      and width is adjusted with same rules as 'VALID' in tf.nn.conv2d.
  """
    with tf.variable_scope(layer_name):
        kernel = variables.weight_variable(shape=[
            kernel_size, kernel_size, input_atoms, output_dim * output_atoms
        ])
        biases = variables.bias_variable([output_dim, output_atoms, 1, 1])
        votes, votes_shape, input_shape = _depthwise_conv3d(
            input_tensor, kernel, input_dim, output_dim, input_atoms,
            output_atoms, stride, padding)

        with tf.name_scope('routing'):
            logit_shape = tf.stack([
                input_shape[0], input_dim, output_dim, votes_shape[2],
                votes_shape[3]
            ])
            biases_replicated = tf.tile(biases,
                                        [1, 1, votes_shape[2], votes_shape[3]])
            activations = _update_routing(votes=votes,
                                          biases=biases_replicated,
                                          logit_shape=logit_shape,
                                          num_dims=6,
                                          input_dim=input_dim,
                                          output_dim=output_dim,
                                          **routing_args)
        return activations