Beispiel #1
0
class CapsuleNet(nn.Module):
    def __init__(self, use_cuda=False, image_size=[3, 32, 32], unit_size=16, num_classes=10,
                 fc1_size=512, fc2_size=1024):
        super(CapsuleNet,self).__init__()
        self.use_reconstruction_loss = True
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=256,kernel_size=9,stride=1)
        self.primary = CapsuleLayer(in_capsules=-1, in_channels=256, out_capsules=8,
                                    unit_size=-1, use_routing=False, num_iters=0,
                                    use_cuda=use_cuda)
        pch = self.primary.size(1)
        self.classes = CapsuleLayer(in_capsules=8, in_channels=pch, out_capsules=num_classes,
                                    unit_size=unit_size, use_routing=True, num_iters=3,
                                    use_cuda=use_cuda)

        self.decoder = n.Sequential(
        nn.Linear(unit_size * num_classes, fc1_size),
        nn.ReLU(inplace=True),
        nn.Linear(fc1_size, fc2_size),
        nn.ReLU(inplace=True),
        nn.Linear(fc2_size, reduce(lambda x, y: x * y, image_size, 1))
        nn.Sigmoid()
        )

    def forward(self,x,y=None):
        #x = [batch_size,3,32,32]
        #y = [batch_size,10]

        x = F.relu(self.conv1(x))
        x = self.primary(x)
        x = self.classes(x)

        # Do reconstruction
        if y = None:
          # Get max index
           _, max_length_indices = torch.norm(x,p=2,dim=3).squeeze(2).max(dim=1)
          y = Variable(torch.sparse.torch.eye(x.size(3))).index_select(dim=0, index=max_length_indices.data)
        
        mask = y.unsqueeze(2).unsqueeze(3)
        masked = mask*x

        r = x.view(x.size(0), -1)
        r = F.relu(self.r_fc1(r))
        r = F.relu(self.r_fc2(r))
        r = F.sigmoid(self.r_fc3(r))

        return x
Beispiel #2
0
    def __init__(self,
                 num_conv_in_channel,
                 num_conv_out_channel,
                 num_primary_unit,
                 primary_unit_size,
                 num_classes,
                 output_unit_size,
                 num_routing,
                 use_reconstruction_loss,
                 regularization_scale,
                 cuda_enabled,
                 caps_channels=32,
                 kernel_size=9,
                 stride=2):
        """
        In the constructor we instantiate one ConvLayer module and two CapsuleLayer modules
        and assign them as member variables.
        """
        super(Net, self).__init__()

        self.cuda_enabled = cuda_enabled

        # Configurations used for image reconstruction.
        self.use_reconstruction_loss = use_reconstruction_loss
        self.image_width = 224  # MNIST digit image width
        self.image_height = 224  # MNIST digit image height
        self.image_channel = 3  # MNIST digit image channel
        self.regularization_scale = regularization_scale

        # Layer 1: Conventional Conv2d layer
        # self.conv1 = ConvLayer(in_channel=num_conv_in_channel,
        #                        out_channel=num_conv_out_channel,
        #                        kernel_size=9)

        features = list(models.alexnet(pretrained=True).features)[:-3]
        # features.pop()
        # features.pop()
        # features.pop()
        self.conv1 = nn.Sequential(*features)

        # ConvLayer(in_channel=num_conv_in_channel,
        #                        out_channel=num_conv_out_channel,
        #                        kernel_size=9)

        # PrimaryCaps
        # Layer 2: Conv2D layer with `squash` activation
        # print ('n_channel=',num_conv_out_channel)
        # print ('num_unit=',num_primary_unit)
        # print ('unit_size=',primary_unit_size)
        # print ('use_routing=',False)
        # print ('num_routing=',num_routing)
        # print ('cuda_enabled=',cuda_enabled)
        self.primary = CapsuleLayer(
            in_unit=0,
            in_channel=num_conv_out_channel,
            num_unit=num_primary_unit,
            unit_size=primary_unit_size,  # capsule outputs
            use_routing=False,
            num_routing=num_routing,
            cuda_enabled=cuda_enabled,
            caps_channels=caps_channels,
            kernel_size=kernel_size,
            stride=stride)

        # DigitCaps
        # Final layer: Capsule layer where the routing algorithm is.
        # print ('SECOND')
        # print ('in_unit=',num_primary_unit)
        # print ('in_channel=',primary_unit_size)
        # print ('num_unit=',num_classes)
        # print ('unit_size=',output_unit_size)
        # print ('use_routing=',True)
        # print ('num_routing=',num_routing)
        # print ('cuda_enabled=',cuda_enabled)

        self.digits = CapsuleLayer(
            in_unit=num_primary_unit,
            in_channel=primary_unit_size,
            num_unit=num_classes,
            unit_size=output_unit_size,  # 16D capsule per digit class
            use_routing=True,
            num_routing=num_routing,
            cuda_enabled=cuda_enabled)

        # Reconstruction network
        if use_reconstruction_loss:
            # The output of the digit capsule is fed into a decoder consisting of
            # 3 fully connected layers that model the pixel intensities.
            fc1_output_size = 512
            fc2_output_size = 1024
            self.fc1 = nn.Linear(num_classes * output_unit_size,
                                 fc1_output_size)
            self.fc2 = nn.Linear(fc1_output_size, fc2_output_size)
            self.fc3 = nn.Linear(fc2_output_size, 784)
            # Activation functions
            self.relu = nn.ReLU(inplace=True)
            self.sigmoid = nn.Sigmoid()
Beispiel #3
0
 def __init__(self, roi_size, spatial_scale, num_input_primary_capsule_channel, num_output_primary_capsule_channel, num_primary_unit, 
              primary_unit_size, num_predictions, output_unit_size, num_routing, model_path=None, K=1, network_status = 'Train', 
              regrouping_type = None, group_attention =True, high_cap_conv = False, single_conv = None, noTM = False, 
              penalty_attention =True, fc = False, all_fc = False):
     super(capMDNet, self).__init__()
     self.K = K
     self.roi_size = roi_size
     self.num_predictions = num_predictions
     self.all_fc = all_fc
     self.spatial_scale = spatial_scale
     if not self.all_fc:
         self.layers = nn.Sequential(OrderedDict([
                 ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=1, padding=3),
                                         nn.ReLU(),
                                         LRN(),
                                         nn.MaxPool2d(kernel_size=2, stride=2, padding=0))),
                 ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
                                         nn.ReLU(),
                                         LRN(),
                                         nn.MaxPool2d(kernel_size=2, stride=2, padding=0))),
                 ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
                                         nn.ReLU(),
                                         nn.Dropout(0.5)))])) # Add a Dropout layer
         
         # Also we can add another conv layer to reduce the map dimension
         #nn.Conv2d
         # ROI extraction based on the feature maps and bbox coordinate in the image  
         #self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
         #self.roi = _RoIPooling(self.roi_size, self.roi_size, self.spatial_scale)#another version of roipooling
         self.roi = RoIAlignAvg(self.roi_size, self.roi_size, self.spatial_scale)
     
     
         # Capsule parts
         self.primary = nn.Sequential(OrderedDict([
                 ('capPrimary', CapsuleLayer(in_unit=0,
                             in_channel=num_input_primary_capsule_channel,
                             num_unit=num_primary_unit,
                             unit_size=primary_unit_size, 
                             use_routing=False,
                             num_routing=num_routing,
                             out_channel = num_output_primary_capsule_channel,
                             status = network_status,
                             regrouping_type = regrouping_type,
                             group_attention = group_attention))]))
         
         self.branches = nn.ModuleList([nn.Sequential(CapsuleLayer(in_unit=num_primary_unit,
                                    in_channel=primary_unit_size, 
                                    num_unit=num_predictions,     
                                    unit_size=output_unit_size,   
                                    use_routing=True,
                                    num_routing=num_routing, 
                                    status = network_status,
                                    high_cap_conv = high_cap_conv,
                                    single_conv = single_conv,
                                    noTM = noTM,
                                    penalty_attention = penalty_attention,
                                    fc = fc)) for _ in range(K)])
     else:
         self.layers = nn.Sequential(OrderedDict([
                 ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=1, padding=3),
                                         nn.ReLU(),
                                         LRN(),
                                         nn.MaxPool2d(kernel_size=2, stride=2, padding=0))),
                 ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
                                         nn.ReLU(),
                                         LRN(),
                                         nn.MaxPool2d(kernel_size=2, stride=2, padding=0))),
                 ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
                                         nn.BatchNorm2d(512), # the difference
                                         nn.ReLU(),
                                         nn.Dropout(0.5)))])) # Add a Dropout layer
         self.roi = RoIAlignAvg(self.roi_size, self.roi_size, self.spatial_scale)
         self.cap_fc = nn.Sequential(OrderedDict([
                 ('cap_fc', nn.Sequential(nn.Linear(512*7*7,2
                                                    )))]))
     
     if model_path is not None:
         if os.path.splitext(model_path)[1] == '.pth':
             self.load_model(model_path)
         elif os.path.splitext(model_path)[1] == '.mat':
             self.load_mat_model(model_path)
         else:
             raise RuntimeError("Unkown model format: %s" % (model_path))
     self.build_param_dict()
    def __init__(self, num_conv_in_channel, num_conv_out_channel,
                 num_primary_unit, primary_unit_size, num_classes,
                 output_unit_size, num_routing, use_reconstruction_loss,
                 regularization_scale, input_width, input_height,
                 cuda_enabled):
        """
        In the constructor we instantiate one ConvLayer module and two CapsuleLayer modules
        and assign them as member variables.
        """
        super(Net, self).__init__()

        self.cuda_enabled = cuda_enabled

        # Configurations used for image reconstruction.
        self.use_reconstruction_loss = use_reconstruction_loss
        # Input image size and number of channel.
        # By default, for MNIST, the image width and height is 28x28
        # and 1 channel for black/white.
        self.image_width = input_width
        self.image_height = input_height
        self.image_channel = num_conv_in_channel

        # Also known as lambda reconstruction. Default value is 0.0005.
        # We use sum of squared errors (SSE) similar to paper.
        self.regularization_scale = regularization_scale

        # Layer 1: Conventional Conv2d layer.
        self.conv1 = ConvLayer(in_channel=num_conv_in_channel,
                               out_channel=num_conv_out_channel,
                               kernel_size=9)

        # PrimaryCaps
        # Layer 2: Conv2D layer with `squash` activation.
        self.primary = CapsuleLayer(
            in_unit=0,
            in_channel=num_conv_out_channel,
            num_unit=num_primary_unit,
            unit_size=primary_unit_size,  # capsule outputs
            use_routing=False,
            num_routing=num_routing,
            cuda_enabled=cuda_enabled)

        # DigitCaps
        # Final layer: Capsule layer where the routing algorithm is.
        self.digits = CapsuleLayer(
            in_unit=num_primary_unit,
            in_channel=primary_unit_size,
            num_unit=num_classes,
            unit_size=output_unit_size,  # 16D capsule per digit class
            use_routing=True,
            num_routing=num_routing,
            cuda_enabled=cuda_enabled)
        self.fc = nn.Linear(output_unit_size, 1)
        self.sig = nn.Sigmoid()

        # Reconstruction network
        if use_reconstruction_loss:
            self.decoder = Decoder(num_classes, output_unit_size, input_width,
                                   input_height, num_conv_in_channel,
                                   cuda_enabled)

        self.out_features = None
Beispiel #5
0
import torch as th

from capsule_layer import CapsuleLayer
from dgl_capsule_batch import DGLBatchCapsuleLayer

device = 'cuda'
th.manual_seed(12)
# th.cuda.seed(12)
x = th.randn((128, 8, 1152)).to(device)
W = th.randn(1152, 10, 16, 8).to(device)

model2 = CapsuleLayer(in_unit=8,
                      in_channel=1152,
                      num_unit=10,
                      use_routing=True,
                      unit_size=16,
                      num_routing=3,
                      cuda_enabled=True).to("cuda")
model2.weight.data = W.clone().unsqueeze(0)
kkk2 = model2(x)
print(kkk2.norm())

model1 = DGLBatchCapsuleLayer(in_unit=8,
                              in_channel=1152,
                              num_unit=10,
                              use_routing=True,
                              unit_size=16,
                              num_routing=3,
                              cuda_enabled=True).to("cuda")
model1.weight.data = W.clone()
kkk1 = model1(x)
Beispiel #6
0
    def build(self):
        with tf.variable_scope("c1_layer"):
            c1 = tf.contrib.layers.conv2d(self.x,
                                          100,
                                          9,
                                          stride=1,
                                          padding='Valid')
            print('c1')
            print(c1.get_shape())
            # assert c1.get_shape() == [self.batch_size, 20, 20, 256]

        with tf.variable_scope('PrimaryCaps_layer'):
            primary_caps = CapsuleLayer(self.batch_size,
                                        self.epsilon,
                                        16,
                                        2,
                                        l_type='CONVOLUTION',
                                        with_routing=False)
            caps1 = primary_caps(c1, 3, kernel_size=4, stride=2)
            print('caps1: ')
            print(caps1.get_shape())
            # assert caps1.get_shape() == [self.batch_size, 1152, 8, 1]

        with tf.variable_scope('FC_Caps_layer'):
            fc_caps = CapsuleLayer(self.batch_size,
                                   self.epsilon,
                                   2,
                                   16,
                                   l_type='FC',
                                   with_routing=True)
            self.caps2 = fc_caps(caps1, 3)
            print('caps2')
            print(self.caps2.get_shape())
            # assert self.caps2.get_shape() == [self.batch_size, 10, 16, 1]

        with tf.variable_scope('Masking'):
            self.v_length = tf.sqrt(
                tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) +
                self.epsilon)
            sm_v = tf.nn.softmax(self.v_length)
            print('sm_v')
            print(sm_v.get_shape())
            # assert sm_v.get_shape() == [self.batch_size, 4, 1, 1]

            self.arg_max_idx = tf.to_int32(tf.argmax(sm_v, axis=1))
            print('arg_max_idx')
            print(self.arg_max_idx.get_shape())
            # assert self.arg_max_idx.get_shape() == [self.batch_size, 1, 1]
            self.arg_max_idx = tf.reshape(self.arg_max_idx,
                                          shape=(self.batch_size, ))

            self.masked_v = tf.multiply(tf.squeeze(self.caps2),
                                        tf.reshape(self.y, (-1, 2, 1)))
            self.v_length = tf.sqrt(
                tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) +
                self.epsilon)

        with tf.variable_scope('Decoder'):
            v_j = tf.reshape(self.masked_v, shape=(self.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(v_j, num_outputs=512)
            assert fc1.get_shape() == [self.batch_size, 512]
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            assert fc2.get_shape() == [self.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(
                fc2, num_outputs=9660, activation_fn=tf.nn.relu)
    def __init__(
            self,
            image_width,  #  28
            image_height,  # 28
            image_channels,  #1
            conv_inputs,  # 1
            conv_outputs,  #256
            num_primary_units,  #8
            primary_unit_size,  #32*6*6
            num_output_units,  #3
            output_unit_size):  #32
        super(CapsuleNetwork, self).__init__()

        self.reconstructed_image_count = 0

        self.image_channels = image_channels
        self.image_width = image_width
        self.image_height = image_height

        self.max_pool = nn.MaxPool2d(4, stride=4)
        # images第一个卷积层
        self.images_conv1 = CreateConv(
            in_channels=1,
            out_channels=32,
            kernel_size=8,  # fixme constant
            stride=2,
            padding=3,
            bias=True)
        # images第二个卷积层
        self.images_conv2 = CreateConv(
            in_channels=32,
            out_channels=32,
            kernel_size=9,  # fixme constant
            stride=1,
            padding=0,
            bias=True)

        # corp_images第一个卷积层
        self.corp_images_conv1 = CreateConv(
            in_channels=1,
            out_channels=32,
            kernel_size=8,  # fixme constant
            stride=2,
            padding=3,
            bias=True)
        # corp_images第二个卷积层
        self.corp_images_conv2 = CreateConv(
            in_channels=32,
            out_channels=32,
            kernel_size=9,  # fixme constant
            stride=1,
            padding=0,
            bias=True)

        # self.merge_conv = CapsuleConvLayer(in_channels=64,
        #                               out_channels=conv_outputs)

        self.primary = CapsuleLayer(in_units=0,
                                    in_channels=64,
                                    num_units=num_primary_units,
                                    unit_size=primary_unit_size,
                                    use_routing=False)

        self.digits = CapsuleLayer(in_units=num_primary_units,
                                   in_channels=primary_unit_size,
                                   num_units=num_output_units,
                                   unit_size=output_unit_size,
                                   use_routing=True)

        reconstruction_size = image_width * image_height * image_channels
        self.reconstruct0 = nn.Linear(32, 256)
        self.reconstruct1 = CapsuleUpsampleConvLayer(16, 4, 8, 'nearest')
        self.reconstruct2 = CapsuleUpsampleConvLayer(4, 8, 4, 'nearest')
        self.reconstruct3 = CapsuleUpsampleConvLayer(8, 16, 4, 'nearest')
        self.compact_layer = nn.Conv2d(
            in_channels=16,
            out_channels=1,
            kernel_size=3,  # fixme constant
            stride=1,
            padding=1,
            bias=True)

        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()
        self.mish = Mish()

        self.dropout = nn.Dropout(p=0.7)