Exemple #1
0
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(self.X,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # Primary Capsules, [batch_size, 1152, 8, 1]
        with tf.variable_scope('Primary_Caps_layer'):
            primaryCaps = CapsConv(num_units=8, with_routing=False)
            caps1 = primaryCaps(conv1, num_outputs=32, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, [batchj_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsConv(num_units=16, with_routing=True)
            self.caps2 = digitCaps(caps1, num_outputs=10)

        # 1. Masking
        with tf.variable_scope('Masking'):

            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(
                tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True))
            self.softmax_v = tf.nn.softmax(self.v_length, dim=1)
            assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

            # b). pick out the index of the max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            argmax_idx = tf.argmax(self.softmax_v,
                                   axis=1,
                                   output_type=tf.int32)
            assert argmax_idx.get_shape() == [cfg.batch_size, 10, 1, 1]

            # c). indexing
            masked_v = []
            argmax_idx = tf.reshape(argmax_idx, shape=(cfg.batch_size, ))
            for batch_size in range(cfg.batch_size):
                v = self.caps2[batch_size][argmax_idx[batch_size], :]
                masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

            self.masked_v = tf.concat(masked_v, axis=0)
            assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]

        # 2. reconstruct the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            assert fc1.get_shape() == [cfg.batch_size, 512]
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            assert fc2.get_shape() == [cfg.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(
                fc2, num_outputs=784, activation_fn=tf.sigmoid)
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256,
                                             kernel_size=9, stride=1,
                                             padding='VALID')
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # TODO: Rewrite the 'CapsConv' class as a function, the capsLay
        # function should be encapsulated into two functions, one like conv2d
        # and another is fully_connected in Tensorflow.
        # Primary Capsules, [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsConv(num_units=8, with_routing=False)
            caps1 = primaryCaps(conv1, num_outputs=32, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsConv(num_units=16, with_routing=True)
            self.caps2 = digitCaps(caps1, num_outputs=10)

        # Decoder structure in Fig. 2
        # 1. Do masking, how:
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2),
                                                  axis=2, keep_dims=True))
            self.softmax_v = tf.nn.softmax(self.v_length, dim=1)
            assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            argmax_idx = tf.argmax(self.softmax_v, axis=1, output_type=tf.int32)
            assert argmax_idx.get_shape() == [cfg.batch_size, 1, 1]

            # c). indexing
            # It's not easy to understand the indexing process with argmax_idx
            # as we are 3-dim animal
            masked_v = []
            argmax_idx = tf.reshape(argmax_idx, shape=(cfg.batch_size, ))
            for batch_size in range(cfg.batch_size):
                v = self.caps2[batch_size][argmax_idx[batch_size], :]
                masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

            self.masked_v = tf.concat(masked_v, axis=0)
            assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            assert fc1.get_shape() == [cfg.batch_size, 512]
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            assert fc2.get_shape() == [cfg.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784, activation_fn=tf.sigmoid)
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(self.X,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # TODO: Rewrite the 'CapsConv' class as a function
        # Primary Capsules, [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsConv(num_units=8, with_routing=False)
            caps1 = primaryCaps(conv1, num_outputs=32, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsConv(num_units=16, with_routing=True)
            self.caps2 = digitCaps(caps1, num_outputs=10)

        with tf.variable_scope('Masking'):
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(
                tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True))
            self.softmax_v = tf.nn.softmax(self.v_length, dim=1)
            assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

            # [batch_size, 10, 1, 1] => [batch_size] (index)
            argmax_idx = tf.argmax(self.softmax_v,
                                   axis=1,
                                   output_type=tf.int32)
            assert argmax_idx.get_shape() == [cfg.batch_size, 1, 1]

            # as we are 3-dim animal
            masked_v = []
            argmax_idx = tf.reshape(argmax_idx, shape=(cfg.batch_size, ))
            for batch_size in range(cfg.batch_size):
                v = self.caps2[batch_size][argmax_idx[batch_size], :]
                masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

            self.masked_v = tf.concat(masked_v, axis=0)
            assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]

        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            assert fc1.get_shape() == [cfg.batch_size, 512]
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            assert fc2.get_shape() == [cfg.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(
                fc2, num_outputs=784, activation_fn=tf.sigmoid)
    def build_arch(self):
        # Conv1
        conv1 = tf.contrib.layers.conv2d(self.input,
                                         num_outputs=256,
                                         kernel_size=9,
                                         strides=1)

        # Primary Capsules
        primaryCaps = CapsConv(num_units=8)
        caps1 = primaryCaps(conv1, num_outputs=32, kernel_size=9, strides=2)

        # DigitCaps layer
        digitCaps = CapsConv(num_units=16)
        caps2 = digitCaps(caps1, num_outputs=10, kernel_size=9, strides=2)

        # Decoder structure in Fig. 2
        # TODO: before reconstruction the input caps2 should do masking to pick
        # out the activity vector of the correct digit capsule.
        fc1 = tf.contrib.layers.fully_connected(caps2, num_outputs=512)
        fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
        self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784)
Exemple #5
0
    def build_arch(self):
        # Conv1, [batch_size, 20, 20, 256]
        conv1 = tf.contrib.layers.conv2d(self.X,
                                         num_outputs=256,
                                         kernel_size=9,
                                         stride=1)

        # Primary Capsules, [batch_size, 1152, 8, 1]
        primaryCaps = CapsConv(num_units=8, with_routing=False)
        caps1 = primaryCaps(conv1, num_outputs=32, kernel_size=9, strides=2)

        # DigitCaps layer, [batch_size, 10, 16, 1]
        digitCaps = CapsConv(num_units=16, with_routing=True)
        self.caps2 = digitCaps(caps1, num_outputs=10)

        # Decoder structure in Fig. 2
        # TODO: before reconstruction the input caps2 should do masking to pick
        # out the activity vector of the correct digit capsule.
        fc1 = tf.contrib.layers.fully_connected(self.caps2, num_outputs=512)
        fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
        self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784)