class CapsuleNet(nn.Module): def __init__(self, use_cuda=False, image_size=[3, 32, 32], unit_size=16, num_classes=10, fc1_size=512, fc2_size=1024): super(CapsuleNet,self).__init__() self.use_reconstruction_loss = True self.conv1 = nn.Conv2d(in_channels=3,out_channels=256,kernel_size=9,stride=1) self.primary = CapsuleLayer(in_capsules=-1, in_channels=256, out_capsules=8, unit_size=-1, use_routing=False, num_iters=0, use_cuda=use_cuda) pch = self.primary.size(1) self.classes = CapsuleLayer(in_capsules=8, in_channels=pch, out_capsules=num_classes, unit_size=unit_size, use_routing=True, num_iters=3, use_cuda=use_cuda) self.decoder = n.Sequential( nn.Linear(unit_size * num_classes, fc1_size), nn.ReLU(inplace=True), nn.Linear(fc1_size, fc2_size), nn.ReLU(inplace=True), nn.Linear(fc2_size, reduce(lambda x, y: x * y, image_size, 1)) nn.Sigmoid() ) def forward(self,x,y=None): #x = [batch_size,3,32,32] #y = [batch_size,10] x = F.relu(self.conv1(x)) x = self.primary(x) x = self.classes(x) # Do reconstruction if y = None: # Get max index _, max_length_indices = torch.norm(x,p=2,dim=3).squeeze(2).max(dim=1) y = Variable(torch.sparse.torch.eye(x.size(3))).index_select(dim=0, index=max_length_indices.data) mask = y.unsqueeze(2).unsqueeze(3) masked = mask*x r = x.view(x.size(0), -1) r = F.relu(self.r_fc1(r)) r = F.relu(self.r_fc2(r)) r = F.sigmoid(self.r_fc3(r)) return x
def __init__(self, num_conv_in_channel, num_conv_out_channel, num_primary_unit, primary_unit_size, num_classes, output_unit_size, num_routing, use_reconstruction_loss, regularization_scale, cuda_enabled, caps_channels=32, kernel_size=9, stride=2): """ In the constructor we instantiate one ConvLayer module and two CapsuleLayer modules and assign them as member variables. """ super(Net, self).__init__() self.cuda_enabled = cuda_enabled # Configurations used for image reconstruction. self.use_reconstruction_loss = use_reconstruction_loss self.image_width = 224 # MNIST digit image width self.image_height = 224 # MNIST digit image height self.image_channel = 3 # MNIST digit image channel self.regularization_scale = regularization_scale # Layer 1: Conventional Conv2d layer # self.conv1 = ConvLayer(in_channel=num_conv_in_channel, # out_channel=num_conv_out_channel, # kernel_size=9) features = list(models.alexnet(pretrained=True).features)[:-3] # features.pop() # features.pop() # features.pop() self.conv1 = nn.Sequential(*features) # ConvLayer(in_channel=num_conv_in_channel, # out_channel=num_conv_out_channel, # kernel_size=9) # PrimaryCaps # Layer 2: Conv2D layer with `squash` activation # print ('n_channel=',num_conv_out_channel) # print ('num_unit=',num_primary_unit) # print ('unit_size=',primary_unit_size) # print ('use_routing=',False) # print ('num_routing=',num_routing) # print ('cuda_enabled=',cuda_enabled) self.primary = CapsuleLayer( in_unit=0, in_channel=num_conv_out_channel, num_unit=num_primary_unit, unit_size=primary_unit_size, # capsule outputs use_routing=False, num_routing=num_routing, cuda_enabled=cuda_enabled, caps_channels=caps_channels, kernel_size=kernel_size, stride=stride) # DigitCaps # Final layer: Capsule layer where the routing algorithm is. # print ('SECOND') # print ('in_unit=',num_primary_unit) # print ('in_channel=',primary_unit_size) # print ('num_unit=',num_classes) # print ('unit_size=',output_unit_size) # print ('use_routing=',True) # print ('num_routing=',num_routing) # print ('cuda_enabled=',cuda_enabled) self.digits = CapsuleLayer( in_unit=num_primary_unit, in_channel=primary_unit_size, num_unit=num_classes, unit_size=output_unit_size, # 16D capsule per digit class use_routing=True, num_routing=num_routing, cuda_enabled=cuda_enabled) # Reconstruction network if use_reconstruction_loss: # The output of the digit capsule is fed into a decoder consisting of # 3 fully connected layers that model the pixel intensities. fc1_output_size = 512 fc2_output_size = 1024 self.fc1 = nn.Linear(num_classes * output_unit_size, fc1_output_size) self.fc2 = nn.Linear(fc1_output_size, fc2_output_size) self.fc3 = nn.Linear(fc2_output_size, 784) # Activation functions self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid()
def __init__(self, roi_size, spatial_scale, num_input_primary_capsule_channel, num_output_primary_capsule_channel, num_primary_unit, primary_unit_size, num_predictions, output_unit_size, num_routing, model_path=None, K=1, network_status = 'Train', regrouping_type = None, group_attention =True, high_cap_conv = False, single_conv = None, noTM = False, penalty_attention =True, fc = False, all_fc = False): super(capMDNet, self).__init__() self.K = K self.roi_size = roi_size self.num_predictions = num_predictions self.all_fc = all_fc self.spatial_scale = spatial_scale if not self.all_fc: self.layers = nn.Sequential(OrderedDict([ ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=1, padding=3), nn.ReLU(), LRN(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0))), ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2), nn.ReLU(), LRN(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0))), ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Dropout(0.5)))])) # Add a Dropout layer # Also we can add another conv layer to reduce the map dimension #nn.Conv2d # ROI extraction based on the feature maps and bbox coordinate in the image #self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale) #self.roi = _RoIPooling(self.roi_size, self.roi_size, self.spatial_scale)#another version of roipooling self.roi = RoIAlignAvg(self.roi_size, self.roi_size, self.spatial_scale) # Capsule parts self.primary = nn.Sequential(OrderedDict([ ('capPrimary', CapsuleLayer(in_unit=0, in_channel=num_input_primary_capsule_channel, num_unit=num_primary_unit, unit_size=primary_unit_size, use_routing=False, num_routing=num_routing, out_channel = num_output_primary_capsule_channel, status = network_status, regrouping_type = regrouping_type, group_attention = group_attention))])) self.branches = nn.ModuleList([nn.Sequential(CapsuleLayer(in_unit=num_primary_unit, in_channel=primary_unit_size, num_unit=num_predictions, unit_size=output_unit_size, use_routing=True, num_routing=num_routing, status = network_status, high_cap_conv = high_cap_conv, single_conv = single_conv, noTM = noTM, penalty_attention = penalty_attention, fc = fc)) for _ in range(K)]) else: self.layers = nn.Sequential(OrderedDict([ ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=1, padding=3), nn.ReLU(), LRN(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0))), ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2), nn.ReLU(), LRN(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0))), ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), # the difference nn.ReLU(), nn.Dropout(0.5)))])) # Add a Dropout layer self.roi = RoIAlignAvg(self.roi_size, self.roi_size, self.spatial_scale) self.cap_fc = nn.Sequential(OrderedDict([ ('cap_fc', nn.Sequential(nn.Linear(512*7*7,2 )))])) if model_path is not None: if os.path.splitext(model_path)[1] == '.pth': self.load_model(model_path) elif os.path.splitext(model_path)[1] == '.mat': self.load_mat_model(model_path) else: raise RuntimeError("Unkown model format: %s" % (model_path)) self.build_param_dict()
def __init__(self, num_conv_in_channel, num_conv_out_channel, num_primary_unit, primary_unit_size, num_classes, output_unit_size, num_routing, use_reconstruction_loss, regularization_scale, input_width, input_height, cuda_enabled): """ In the constructor we instantiate one ConvLayer module and two CapsuleLayer modules and assign them as member variables. """ super(Net, self).__init__() self.cuda_enabled = cuda_enabled # Configurations used for image reconstruction. self.use_reconstruction_loss = use_reconstruction_loss # Input image size and number of channel. # By default, for MNIST, the image width and height is 28x28 # and 1 channel for black/white. self.image_width = input_width self.image_height = input_height self.image_channel = num_conv_in_channel # Also known as lambda reconstruction. Default value is 0.0005. # We use sum of squared errors (SSE) similar to paper. self.regularization_scale = regularization_scale # Layer 1: Conventional Conv2d layer. self.conv1 = ConvLayer(in_channel=num_conv_in_channel, out_channel=num_conv_out_channel, kernel_size=9) # PrimaryCaps # Layer 2: Conv2D layer with `squash` activation. self.primary = CapsuleLayer( in_unit=0, in_channel=num_conv_out_channel, num_unit=num_primary_unit, unit_size=primary_unit_size, # capsule outputs use_routing=False, num_routing=num_routing, cuda_enabled=cuda_enabled) # DigitCaps # Final layer: Capsule layer where the routing algorithm is. self.digits = CapsuleLayer( in_unit=num_primary_unit, in_channel=primary_unit_size, num_unit=num_classes, unit_size=output_unit_size, # 16D capsule per digit class use_routing=True, num_routing=num_routing, cuda_enabled=cuda_enabled) self.fc = nn.Linear(output_unit_size, 1) self.sig = nn.Sigmoid() # Reconstruction network if use_reconstruction_loss: self.decoder = Decoder(num_classes, output_unit_size, input_width, input_height, num_conv_in_channel, cuda_enabled) self.out_features = None
import torch as th from capsule_layer import CapsuleLayer from dgl_capsule_batch import DGLBatchCapsuleLayer device = 'cuda' th.manual_seed(12) # th.cuda.seed(12) x = th.randn((128, 8, 1152)).to(device) W = th.randn(1152, 10, 16, 8).to(device) model2 = CapsuleLayer(in_unit=8, in_channel=1152, num_unit=10, use_routing=True, unit_size=16, num_routing=3, cuda_enabled=True).to("cuda") model2.weight.data = W.clone().unsqueeze(0) kkk2 = model2(x) print(kkk2.norm()) model1 = DGLBatchCapsuleLayer(in_unit=8, in_channel=1152, num_unit=10, use_routing=True, unit_size=16, num_routing=3, cuda_enabled=True).to("cuda") model1.weight.data = W.clone() kkk1 = model1(x)
def build(self): with tf.variable_scope("c1_layer"): c1 = tf.contrib.layers.conv2d(self.x, 100, 9, stride=1, padding='Valid') print('c1') print(c1.get_shape()) # assert c1.get_shape() == [self.batch_size, 20, 20, 256] with tf.variable_scope('PrimaryCaps_layer'): primary_caps = CapsuleLayer(self.batch_size, self.epsilon, 16, 2, l_type='CONVOLUTION', with_routing=False) caps1 = primary_caps(c1, 3, kernel_size=4, stride=2) print('caps1: ') print(caps1.get_shape()) # assert caps1.get_shape() == [self.batch_size, 1152, 8, 1] with tf.variable_scope('FC_Caps_layer'): fc_caps = CapsuleLayer(self.batch_size, self.epsilon, 2, 16, l_type='FC', with_routing=True) self.caps2 = fc_caps(caps1, 3) print('caps2') print(self.caps2.get_shape()) # assert self.caps2.get_shape() == [self.batch_size, 10, 16, 1] with tf.variable_scope('Masking'): self.v_length = tf.sqrt( tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + self.epsilon) sm_v = tf.nn.softmax(self.v_length) print('sm_v') print(sm_v.get_shape()) # assert sm_v.get_shape() == [self.batch_size, 4, 1, 1] self.arg_max_idx = tf.to_int32(tf.argmax(sm_v, axis=1)) print('arg_max_idx') print(self.arg_max_idx.get_shape()) # assert self.arg_max_idx.get_shape() == [self.batch_size, 1, 1] self.arg_max_idx = tf.reshape(self.arg_max_idx, shape=(self.batch_size, )) self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.y, (-1, 2, 1))) self.v_length = tf.sqrt( tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + self.epsilon) with tf.variable_scope('Decoder'): v_j = tf.reshape(self.masked_v, shape=(self.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(v_j, num_outputs=512) assert fc1.get_shape() == [self.batch_size, 512] fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024) assert fc2.get_shape() == [self.batch_size, 1024] self.decoded = tf.contrib.layers.fully_connected( fc2, num_outputs=9660, activation_fn=tf.nn.relu)
def __init__( self, image_width, # 28 image_height, # 28 image_channels, #1 conv_inputs, # 1 conv_outputs, #256 num_primary_units, #8 primary_unit_size, #32*6*6 num_output_units, #3 output_unit_size): #32 super(CapsuleNetwork, self).__init__() self.reconstructed_image_count = 0 self.image_channels = image_channels self.image_width = image_width self.image_height = image_height self.max_pool = nn.MaxPool2d(4, stride=4) # images第一个卷积层 self.images_conv1 = CreateConv( in_channels=1, out_channels=32, kernel_size=8, # fixme constant stride=2, padding=3, bias=True) # images第二个卷积层 self.images_conv2 = CreateConv( in_channels=32, out_channels=32, kernel_size=9, # fixme constant stride=1, padding=0, bias=True) # corp_images第一个卷积层 self.corp_images_conv1 = CreateConv( in_channels=1, out_channels=32, kernel_size=8, # fixme constant stride=2, padding=3, bias=True) # corp_images第二个卷积层 self.corp_images_conv2 = CreateConv( in_channels=32, out_channels=32, kernel_size=9, # fixme constant stride=1, padding=0, bias=True) # self.merge_conv = CapsuleConvLayer(in_channels=64, # out_channels=conv_outputs) self.primary = CapsuleLayer(in_units=0, in_channels=64, num_units=num_primary_units, unit_size=primary_unit_size, use_routing=False) self.digits = CapsuleLayer(in_units=num_primary_units, in_channels=primary_unit_size, num_units=num_output_units, unit_size=output_unit_size, use_routing=True) reconstruction_size = image_width * image_height * image_channels self.reconstruct0 = nn.Linear(32, 256) self.reconstruct1 = CapsuleUpsampleConvLayer(16, 4, 8, 'nearest') self.reconstruct2 = CapsuleUpsampleConvLayer(4, 8, 4, 'nearest') self.reconstruct3 = CapsuleUpsampleConvLayer(8, 16, 4, 'nearest') self.compact_layer = nn.Conv2d( in_channels=16, out_channels=1, kernel_size=3, # fixme constant stride=1, padding=1, bias=True) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid() self.mish = Mish() self.dropout = nn.Dropout(p=0.7)