def __init__(self, feature_levels=(3, 4), pretrained=True, l2_norm=True, **kwargs): super().__init__() assert feature_levels == (3, 4) assert pretrained, "Only pretrained VGG16 is provided." _check_levels(feature_levels) self.forward_levels = tuple(range(1, feature_levels[-1] + 1)) self.feature_levels = feature_levels from torchvision.models import vgg16 backbone = vgg16(pretrained=True) f = backbone.features f[4].ceil_mode = True f[9].ceil_mode = True f[16].ceil_mode = True f[23].ceil_mode = True f[30].kernel_size = (3, 3) f[30].stride = (1, 1) f[30].padding = (1, 1) conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) fc6 = backbone.classifier[0] fc6_weight = fc6.weight.data.view(4096, 512, 7, 7) fc6_bias = fc6.bias.data conv6.weight.data = decimate(fc6_weight, m=[4, None, 3, 3]) conv6.bias.data = decimate(fc6_bias, m=[4]) conv7 = nn.Conv2d(1024, 1024, kernel_size=1) fc7 = backbone.classifier[3] fc7_weight = fc7.weight.data.view(4096, 4096, 1, 1) fc7_bias = fc7.bias.data conv7.weight.data = decimate(fc7_weight, m=[4, 4, None, None]) conv7.bias.data = decimate(fc7_bias, m=[4]) self.layer1 = f[:9] self.layer2 = f[9:16] self.layer3 = f[16:23] self.l2_norm = L2Norm(512, 20) if l2_norm else nn.Identity() self.layer4 = nn.Sequential( *f[23:], conv6, nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True), ) self.out_channels = [512, 1024]
def __init__(self, feature_levels=(3, 4), pretrained=True): super().__init__() assert pretrained, "Only pretrained VGG16 is provided." self.feature_levels = feature_levels from torchvision.models import vgg16_bn backbone = vgg16_bn(pretrained=True) f = backbone.features f[6].ceil_mode = True f[13].ceil_mode = True f[23].ceil_mode = True f[33].ceil_mode = True f[43].kernel_size = (3, 3) f[43].stride = (1, 1) f[43].padding = (1, 1) conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) fc6 = backbone.classifier[0] fc6_weight = fc6.weight.data.view(4096, 512, 7, 7) fc6_bias = fc6.bias.data conv6.weight.data = decimate(fc6_weight, m=[4, None, 3, 3]) conv6.bias.data = decimate(fc6_bias, m=[4]) conv7 = nn.Conv2d(1024, 1024, kernel_size=1) fc7 = backbone.classifier[3] fc7_weight = fc7.weight.data.view(4096, 4096, 1, 1) fc7_bias = fc7.bias.data conv7.weight.data = decimate(fc7_weight, m=[4, 4, None, None]) conv7.bias.data = decimate(fc7_bias, m=[4]) self.stage0 = f[:6] self.stage1 = f[6:13] self.stage2 = f[13:23] self.stage3 = f[23:33] self.stage4 = nn.Sequential( *f[33:], conv6, nn.BatchNorm2d(1024), nn.ReLU(inplace=True), conv7, nn.BatchNorm2d(1024), nn.ReLU(inplace=True), ) self.out_channels = [ get_out_channels(getattr(self, ("stage%d" % i))) for i in feature_levels ]