def __init__(self, n_classes=21): super(RES16, self).__init__() resnet = torchvision.models.resnet34(pretrained=True) self.conv = resnet.conv1 self.bn = resnet.bn1 self.relu = resnet.relu self.maxpool = resnet.maxpool self.maxpool.ceil_mode = True self.layer1 = resnet.layer1 self.layer2 = resnet.layer2 self.layer3 = resnet.layer3 self.layer4 = resnet.layer4 self.score_layer3 = nn.Conv2d(256, n_classes, kernel_size=1) self.score_layer3.weight.data.zero_() self.score_layer3.bias.data.zero_() self.upscore_layer3 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=4, stride=2, bias=False) self.upscore_layer3.weight.data.copy_( get_upsampling_weight(n_classes, n_classes, 4)) self.upscore16 = nn.ConvTranspose2d(n_classes, n_classes, 32, 16, bias=False) self.upscore16.weight.data.copy_( get_upsampling_weight(n_classes, n_classes, 32)) self.score_fr = nn.Conv2d(512, n_classes, 1) self.score_fr.weight.data.zero_() self.score_fr.bias.data.zero_()
def __init__(self, n_classes=21, pretrained=True): super(FCN16, self).__init__() vgg = torchvision.models.vgg16(pretrained=True) features, classifier = list(vgg.features.children()), list( vgg.classifier.children()) features[0].padding = (100, 100) for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features4 = nn.Sequential(*features[:24]) self.features5 = nn.Sequential(*features[24:]) self.score_pool4 = nn.Conv2d(512, n_classes, kernel_size=1) self.score_pool4.weight.data.zero_() self.score_pool4.bias.data.zero_() fc6 = nn.Conv2d(512, 4096, kernel_size=7) fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7)) fc6.bias.data.copy_(classifier[0].bias.data) fc7 = nn.Conv2d(4096, 4096, kernel_size=1) fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1)) fc7.bias.data.copy_(classifier[3].bias.data) score_fr = nn.Conv2d(4096, n_classes, kernel_size=1) score_fr.weight.data.zero_() score_fr.bias.data.zero_() self.score_fr = nn.Sequential(fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr) self.upscore2 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=4, stride=2, bias=False) self.upscore16 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=32, stride=16, bias=False) self.upscore2.weight.data.copy_( get_upsampling_weight(n_classes, n_classes, 4)) self.upscore16.weight.data.copy_( get_upsampling_weight(n_classes, n_classes, 32))
def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) if isinstance(m, nn.ConvTranspose2d): assert m.kernel_size[0] == m.kernel_size[1] initial_weight = get_upsampling_weight( m.in_channels, m.out_channels, m.kernel_size[0]) m.weight.data.copy_(initial_weight)
def __init__(self, n_classes=21): super(FCN8, self).__init__() vgg = torchvision.models.vgg16(pretrained=True) features, classifier = list(vgg.features.children()), list( vgg.classifier.children()) ''' 100 padding for 2 reasons: 1) support very small input size 2) allow cropping in order to match size of different layers' feature maps Note that the cropped part corresponds to a part of the 100 padding Spatial information of different layers' feature maps cannot be align exactly because of cropping, which is bad ''' features[0].padding = (100, 100) for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features3 = nn.Sequential(*features[:17]) self.features4 = nn.Sequential(*features[17:24]) self.features5 = nn.Sequential(*features[24:]) self.score_pool3 = nn.Conv2d(256, n_classes, kernel_size=1) self.score_pool4 = nn.Conv2d(512, n_classes, kernel_size=1) self.score_pool3.weight.data.zero_() self.score_pool3.bias.data.zero_() self.score_pool4.weight.data.zero_() self.score_pool4.bias.data.zero_() fc6 = nn.Conv2d(512, 4096, kernel_size=7) fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7)) fc6.bias.data.copy_(classifier[0].bias.data) fc7 = nn.Conv2d(4096, 4096, kernel_size=1) fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1)) fc7.bias.data.copy_(classifier[3].bias.data) score_fr = nn.Conv2d(4096, n_classes, kernel_size=1) score_fr.weight.data.zero_() score_fr.bias.data.zero_() self.score_fr = nn.Sequential(fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr) self.upscore2 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=4, stride=2, bias=False) self.upscore_pool4 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=4, stride=2, bias=False) self.upscore8 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=16, stride=8, bias=False) self.upscore2.weight.data.copy_( get_upsampling_weight(n_classes, n_classes, 4)) self.upscore_pool4.weight.data.copy_( get_upsampling_weight(n_classes, n_classes, 4)) self.upscore8.weight.data.copy_( get_upsampling_weight(n_classes, n_classes, 16))
def __init__(self, n_classes=21, learned_billinear=True): super(fcn8s, self).__init__() self.learned_billinear = learned_billinear self.n_classes = n_classes self.loss = functools.partial(cross_entropy2d, size_average=False) self.conv_block1 = nn.Sequential( nn.Conv2d(3, 64, 3, padding=100), nn.ReLU(inplace=True), nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block2 = nn.Sequential( nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block3 = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block4 = nn.Sequential( nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block5 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.classifier = nn.Sequential( nn.Conv2d(512, 4096, 7), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, 4096, 1), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, self.n_classes, 1), ) self.score_pool4 = nn.Conv2d(512, self.n_classes, 1) self.score_pool3 = nn.Conv2d(256, self.n_classes, 1) if self.learned_billinear: self.upscore2 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4, stride=2, bias=False) self.upscore4 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4, stride=2, bias=False) self.upscore8 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 16, stride=8, bias=False) for m in self.modules(): if isinstance(m, nn.ConvTranspose2d): m.weight.data.copy_( get_upsampling_weight(m.in_channels, m.out_channels, m.kernel_size[0]))