def __init__(self, block=BasicBlock, layers=[2, 2, 2, 2], zero_init_residual=False): super(ResNet, self).__init__() self.in_planes = 64 self.conv1 = MetaConv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = MetaBatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) # self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self._init_conv() # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def conv_block(in_channels, out_channels, bias=True, activation=nn.ReLU(inplace=True), use_dropout=False, p=0.1): res = MetaSequential( OrderedDict([ ('conv', MetaConv2d(int(in_channels), int(out_channels), kernel_size=3, padding=1, bias=bias)), ('norm', MetaBatchNorm2d(int(out_channels), momentum=1., track_running_stats=False)), ('relu', activation), ('pool', nn.MaxPool2d(2)), ])) if use_dropout: res.add_module('dropout', nn.Dropout2d(p)) return res
def conv3x3(in_channels, out_channels, **kwargs): return MetaSequential( MetaConv2d(in_channels, out_channels, kernel_size=3, padding=1, **kwargs), MetaBatchNorm2d(out_channels, momentum=1., track_running_stats=False), nn.ReLU(), nn.MaxPool2d(2))
def __init__(self, nc, num_classes, block, num_blocks): super(ResNet, self).__init__() self.in_planes = 64 self.conv1 = MetaConv2d(nc, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = MetaBatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = MetaLinear(512 * block.expansion, num_classes)
def __init__(self, inplanes, planes, stride=1, downsample=None, drop_rate=0.0, drop_block=False, block_size=1): super(BasicBlock, self).__init__() self.conv1 = MetaConv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = MetaBatchNorm2d(planes, track_running_stats=False) self.relu1 = nn.LeakyReLU(0.1) self.conv2 = MetaConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = MetaBatchNorm2d(planes, track_running_stats=False) self.relu2 = nn.LeakyReLU(0.1) self.conv3 = MetaConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn3 = MetaBatchNorm2d(planes, track_running_stats=False) self.relu3 = nn.LeakyReLU(0.1) self.maxpool = nn.MaxPool2d(stride) self.downsample = downsample self.stride = stride self.drop_rate = drop_rate self.num_batches_tracked = 0 self.drop_block = drop_block self.block_size = block_size self.DropBlock = DropBlock(block_size=self.block_size)
def _make_layer(self, block, planes, num_blocks, stride=1): downsample = None if stride != 1 or self.in_planes != planes * block.expansion: downsample = MetaSequential( conv1x1(self.in_planes, planes * block.expansion, stride), MetaBatchNorm2d(planes * block.expansion)) layers = [] layers.append(block(self.in_planes, planes, stride, downsample)) self.in_planes = planes * block.expansion for _ in range(1, num_blocks): layers.append(block(self.in_planes, planes)) return MetaSequential(*layers)