def _make_layer(self, block, planes, blocks, stride=1, dilate=False): norm_layer = self._norm_layer downsample = None previous_dilation = self.dilation if dilate: self.dilation *= stride stride = 1 if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( conv1x1(self.inplanes, planes * block.expansion, stride), norm_layer(planes * block.expansion), ) layers = [] layers.append( block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append( block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer)) return nn.Sequential(*layers)
def _make_res_layer(self, block, planes, blocks, stride=1, expansion=4, dilation=1): downsample = None # either only first block of all ResBlock is downsampled to match x with the output dimension # or also if the block has in_planes = out_planes assert dilation == 1 or dilation % 2 == 0 if stride != 1 or self.inplanes != planes * expansion: downsample = nn.Sequential( conv1x1(self.inplanes, planes * expansion, stride), nn.BatchNorm2d(planes * expansion), ) layers = list([]) # add first convblock: this is the only one with potential downsampling (stride != 1 or dilation != 1) layers.append( block(self.inplanes, planes, stride, downsample, expansion, dilation=(dilation, dilation))) # then add the others self.inplanes = planes * expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers)
def __init__(self, inplanes, planes, stride=1, downsample=None, kernel_size=3): super(BottleneckNoBn, self).__init__() self.conv1 = conv1x1(inplanes, planes) # self.bn1 = nn.BatchNorm2d(planes) if kernel_size == 3: self.conv2 = conv3x3(planes, planes, stride) else: self.conv2 = conv1x1(planes, planes, stride) # self.bn2 = nn.BatchNorm2d(planes) self.conv3 = conv1x1(planes, planes * self.expansion) # self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride
def __init__(self, in_channels, out_channels, block=BasicBlock): assert in_channels == out_channels super(Masking1, self).__init__() filters = [in_channels, in_channels * 2, in_channels * 4, in_channels * 8] self.downsample1 = nn.Sequential( conv1x1(filters[0], filters[1], 1), nn.BatchNorm2d(filters[1]), ) self.conv1 = block(filters[0], filters[1], downsample=self.downsample1) self.downsample2 = nn.Sequential( conv1x1(filters[1], filters[0], 1), nn.BatchNorm2d(filters[0]), ) self.conv2 = block(filters[1], filters[0], downsample=self.downsample2) # init weight for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None): super(CustomBlock, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d width = int(planes * (base_width / 64.)) * groups # Both self.conv2 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv1x1(inplanes, width) self.bn1 = norm_layer(width) self.conv2 = conv3x3(width, width, stride, groups, dilation) self.bn2 = norm_layer(width) self.conv3 = conv1x1(width, planes * self.expansion) self.bn3 = norm_layer(planes * self.expansion) # self.relu = nn.ReLU(inplace=True) self.relu = nn.functional.gelu self.downsample = downsample self.stride = stride
def __init__(self, in_channels, out_channels, block=BasicBlock): assert in_channels == out_channels super(Masking4, self).__init__() filters = [in_channels, in_channels * 2, in_channels * 4, in_channels * 8, in_channels * 16] self.downsample1 = nn.Sequential( conv1x1(filters[0], filters[1], 1), nn.BatchNorm2d(filters[1]), ) self.downsample2 = nn.Sequential( conv1x1(filters[1], filters[2], 1), nn.BatchNorm2d(filters[2]), ) self.downsample3 = nn.Sequential( conv1x1(filters[2], filters[3], 1), nn.BatchNorm2d(filters[3]), ) self.downsample4 = nn.Sequential( conv1x1(filters[3], filters[4], 1), nn.BatchNorm2d(filters[4]), ) ''' self.conv1 = block(filters[0], filters[1], downsample=conv1x1(filters[0], filters[1], 1)) self.conv2 = block(filters[1], filters[2], downsample=conv1x1(filters[1], filters[2], 1)) self.conv3 = block(filters[2], filters[3], downsample=conv1x1(filters[2], filters[3], 1)) ''' self.conv1 = block(filters[0], filters[1], downsample=self.downsample1) self.conv2 = block(filters[1], filters[2], downsample=self.downsample2) self.conv3 = block(filters[2], filters[3], downsample=self.downsample3) self.conv4 = block(filters[3], filters[4], downsample=self.downsample4) self.down_pooling = nn.MaxPool2d(kernel_size=2) self.downsample5 = nn.Sequential( conv1x1(filters[4], filters[3], 1), nn.BatchNorm2d(filters[3]), ) self.downsample6 = nn.Sequential( conv1x1(filters[3], filters[2], 1), nn.BatchNorm2d(filters[2]), ) self.downsample7 = nn.Sequential( conv1x1(filters[2], filters[1], 1), nn.BatchNorm2d(filters[1]), ) self.downsample8 = nn.Sequential( conv1x1(filters[1], filters[0], 1), nn.BatchNorm2d(filters[0]), ) ''' self.up_pool4 = up_pooling(filters[3], filters[2]) self.conv4 = block(filters[3], filters[2], downsample=conv1x1(filters[3], filters[2], 1)) self.up_pool5 = up_pooling(filters[2], filters[1]) self.conv5 = block(filters[2], filters[1], downsample=conv1x1(filters[2], filters[1], 1)) self.conv6 = block(filters[1], filters[0], downsample=conv1x1(filters[1], filters[0], 1)) ''' self.up_pool5 = up_pooling(filters[4], filters[3]) self.conv5 = block(filters[4], filters[3], downsample=self.downsample5) self.up_pool6 = up_pooling(filters[3], filters[2]) self.conv6 = block(filters[3], filters[2], downsample=self.downsample6) self.up_pool7 = up_pooling(filters[2], filters[1]) self.conv7 = block(filters[2], filters[1], downsample=self.downsample7) self.conv8 = block(filters[1], filters[0], downsample=self.downsample8) # init weight for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
transform=train_process, train=True, download=True) test_data = datasets.CIFAR10(root='data/cifar10', transform=test_process, train=False, download=True) num_classes = 10 train_dataLoader = DataLoader(train_data, batch_size=batch_size, shuffle=True) test_dataLoader = DataLoader(test_data, batch_size=batch_size, shuffle=False) ##load model if (s_name == 'resnet18'): Student = resnet.resnet18(num_classes=num_classes) Student.fc = nn.Linear(128, num_classes) Student.branch1 = resnet.conv1x1(16, 128) Student.branch2 = resnet.conv1x1(32, 256) Student.branch3 = resnet.conv1x1(64, 512) Student.branch4 = resnet.conv1x1(128, 1024) elif (s_name == 'resnet34'): Student = resnet.resnet34(num_classes=num_classes) Student.fc = nn.Linear(128, num_classes) Student.branch1 = resnet.conv1x1(16, 128) Student.branch2 = resnet.conv1x1(32, 256) Student.branch3 = resnet.conv1x1(64, 512) Student.branch4 = resnet.conv1x1(128, 1024) elif (s_name == 'resnet50'): Student = resnet.resnet50(num_classes=num_classes) Student.fc = nn.Linear(512, num_classes) Student.branch1 = resnet.conv1x1(64, 128) Student.branch2 = resnet.conv1x1(128, 256)
def conv_layer(self, in_channel, out_channel): from resnet import Bottleneck, conv1x1 downsample = nn.Sequential( conv1x1(in_channel, 4 * out_channel, stride=1), nn.BatchNorm2d(4 * out_channel)) return Bottleneck(in_channel, out_channel, downsample=downsample)