def __init__(self, num_classes, pretrained=True): super(FCN8s, self).__init__() vgg = models.vgg16(pretrained=pretrained) features, classifier = list(vgg.features.children()), list( vgg.classifier.children()) for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features3 = nn.Sequential(*features[:17]) self.features4 = nn.Sequential(*features[17:24]) self.features5 = nn.Sequential(*features[24:]) self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1) self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1) self.score_pool3.weight.data.zero_() self.score_pool3.bias.data.zero_() self.score_pool4.weight.data.zero_() self.score_pool4.bias.data.zero_() fc6 = nn.Conv2d(512, 4096, kernel_size=7) fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7)) fc6.bias.data.copy_(classifier[0].bias.data) fc7 = nn.Conv2d(4096, 4096, kernel_size=1) fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1)) fc7.bias.data.copy_(classifier[3].bias.data) score_fr = nn.Conv2d(4096, num_classes, kernel_size=1) score_fr.weight.data.zero_() score_fr.bias.data.zero_() self.score_fr = nn.Sequential(fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr) self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False) self.upscore_pool4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False) self.upscore8 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8, bias=False) self.upscore2.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 4)) self.upscore_pool4.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 4)) self.upscore8.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 16))
def __init__(self, num_classes=[21, 20], pretrained=False, depth=18): super(FCN32RESNET_MULTI, self).__init__() if depth == 18: resnet = models.resnet18(pretrained=pretrained) elif depth == 34: resnet = models.resnet34(pretrained=pretrained) elif depth == 50: resnet = models.resnet50(pretrained=pretrained) elif depth == 101: resnet = models.resnet101(pretrained=pretrained) elif depth == 152: resnet = models.resnet152(pretrained=pretrained) else: raise TypeError('Invalid Resnet depth') features = [*resnet.children()] num_channels = features[-1].in_features features = features[ 0:-1] # remove the original 1000-dimension Linear layer for f in features: if 'MaxPool' in f.__class__.__name__ or 'AvgPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features = nn.Sequential(*features) final1 = nn.Conv2d(num_channels, num_classes[0], kernel_size=1) final1.weight.data.zero_() final1.bias.data.zero_() upscore1 = nn.ConvTranspose2d(num_classes[0], num_classes[0], kernel_size=64, stride=32, bias=False) upscore1.weight.data.copy_( get_upsampling_weight(num_classes[0], num_classes[0], 64)) self.final1 = nn.Sequential( OrderedDict([('conv6', final1), ('tconv7', upscore1)])) final2 = nn.Conv2d(num_channels, num_classes[1], kernel_size=1) final2.weight.data.zero_() final2.bias.data.zero_() upscore2 = nn.ConvTranspose2d(num_classes[1], num_classes[1], kernel_size=64, stride=32, bias=False) upscore2.weight.data.copy_( get_upsampling_weight(num_classes[1], num_classes[1], 64)) self.final2 = nn.Sequential( OrderedDict([('conv6', final2), ('tconv7', upscore2)]))
def __init__(self, num_classes, pretrained=True): super(FCN16VGG, self).__init__() vgg = models.vgg16() if pretrained: vgg.load_state_dict(torch.load(vgg16_caffe_path)) features, classifier = list(vgg.features.children()), list( vgg.classifier.children()) features[0].padding = (100, 100) for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features4 = nn.Sequential(*features[:24]) self.features5 = nn.Sequential(*features[24:]) self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1) self.score_pool4.weight.data.zero_() self.score_pool4.bias.data.zero_() fc6 = nn.Conv2d(512, 4096, kernel_size=7) fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7)) fc6.bias.data.copy_(classifier[0].bias.data) fc7 = nn.Conv2d(4096, 4096, kernel_size=1) fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1)) fc7.bias.data.copy_(classifier[3].bias.data) score_fr = nn.Conv2d(4096, num_classes, kernel_size=1) score_fr.weight.data.zero_() score_fr.bias.data.zero_() self.score_fr = nn.Sequential(fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr) self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False) self.upscore16 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=32, stride=16, bias=False) self.upscore2.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 4)) self.upscore16.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 32))
def __init__(self, num_classes, pretrained=True, feat=64, **kwargs): super(FCN8s, self).__init__() vgg = vgg16(pretrained=pretrained, feat=feat, **kwargs) features, classifier = list(vgg.features.children()), list(vgg.classifier.children()) ''' 100 padding for 2 reasons: 1) support very small input size 2) allow cropping in order to match size of different layers' feature maps Note that the cropped part corresponds to a part of the 100 padding Spatial information of different layers' feature maps cannot be align exactly because of cropping, which is bad ''' features[0].padding = (100, 100) ft = feat for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features3 = nn.Sequential(*features[: 17]) self.features4 = nn.Sequential(*features[17: 24]) self.features5 = nn.Sequential(*features[24:]) self.score_pool3 = nn.Conv2d(ft*4, num_classes, kernel_size=1) self.score_pool4 = nn.Conv2d(ft*8, num_classes, kernel_size=1) self.score_pool3.weight.data.zero_() self.score_pool3.bias.data.zero_() self.score_pool4.weight.data.zero_() self.score_pool4.bias.data.zero_() fc6 = nn.Conv2d(ft*8, ft*64, kernel_size=7) fc6.weight.data.copy_(classifier[0].weight.data.view(ft*64, ft*8, 7, 7)) fc6.bias.data.copy_(classifier[0].bias.data) fc7 = nn.Conv2d(ft*64, ft*64, kernel_size=1) fc7.weight.data.copy_(classifier[3].weight.data.view(ft*64, ft*64, 1, 1)) fc7.bias.data.copy_(classifier[3].bias.data) score_fr = nn.Conv2d(ft*64, num_classes, kernel_size=1) score_fr.weight.data.zero_() score_fr.bias.data.zero_() self.score_fr = nn.Sequential( fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr ) self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False) self.upscore_pool4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False) self.upscore8 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8, bias=False) self.upscore2.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 4)) self.upscore_pool4.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 4)) self.upscore8.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 16))
def __init__(self, num_classes=21, pretrained=False, depth=18, dprob=0.1): super(FCN32RESNET, self).__init__() print('pretrained = {}, depth = {}'.format(pretrained, depth)) if depth == 18: resnet = models.resnet18(pretrained=pretrained) elif depth == 34: resnet = models.resnet34(pretrained=pretrained) elif depth == 50: resnet = models.resnet50(pretrained=pretrained) elif depth == 101: resnet = models.resnet101(pretrained=pretrained) elif depth == 152: resnet = models.resnet152(pretrained=pretrained) else: raise TypeError('Invalid Resnet depth') features = [*resnet.children()] num_channels = features[-1].in_features features = features[ 0:-1] # remove the original 1000-dimension Linear layer for f in features: if 'MaxPool' in f.__class__.__name__ or 'AvgPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True # Add Dropout module after each conv layer for torchvision.models.resnet # modified_features = [] # for f in features: # if f.__class__.__name__ == 'Sequential': # new_seq = [] # for ff in f.children(): # list_modules = [*ff.children()] # for module in list_modules: # new_seq.append(module) # if 'Conv' in module.__class__.__name__: # new_seq.append(nn.Dropout(p=dprob)) # modified_features.append(nn.Sequential(*new_seq)) # else: # modified_features.append(f) self.features = nn.Sequential(*features) final = nn.Conv2d(num_channels, num_classes, kernel_size=1) final.weight.data.zero_() final.bias.data.zero_() upscore = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=64, stride=32, bias=False) upscore.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 64)) self.final = nn.Sequential( OrderedDict([('conv6', final), ('tconv7', upscore)]))
def __init__(self, num_classes=21, pretrained=False): super(FCN32VGG, self).__init__() vgg = models.vgg16(pretrained=pretrained) features, classifier = list(vgg.features.children()), list( vgg.classifier.children()) # Why pad the input: # https://github.com/shelhamer/fcn.berkeleyvision.org#frequently-asked-questions # features[0].padding = (100, 100) for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features5 = nn.Sequential(*features) # As the shapes are different, we can't use load_state_dict/state_dict directly fc6 = nn.Conv2d(512, 4096, kernel_size=7) param6 = classifier[0].state_dict() param6['weight'] = param6['weight'].view(4096, 512, 7, 7) fc6.load_state_dict(param6, strict=True) fc7 = nn.Conv2d(4096, 4096, kernel_size=1) param7 = classifier[3].state_dict() param7['weight'] = param7['weight'].view(4096, 4096, 1, 1) fc7.load_state_dict(param7, strict=True) final = nn.Conv2d(4096, num_classes, kernel_size=1) final.weight.data.zero_() final.bias.data.zero_() upscore = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=64, stride=32, bias=False) upscore.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 64)) self.final = nn.Sequential( OrderedDict([('conv0', fc6), ('relu1', nn.ReLU(inplace=True)), ('dropout2', nn.Dropout()), ('conv3', fc7), ('relu4', nn.ReLU(inplace=True)), ('dropout5', nn.Dropout()), ('conv6', final), ('tconv7', upscore)]))
def __init__(self, num_classes, pretrained=True): super(FCN32s, self).__init__() # load the pretrained models. vgg = models.vgg16(pretrained=pretrained) # freeze the pretrained parameters. for param in vgg.parameters(): param.requires_grad = False features, classifier = list(vgg.features.children()), list(vgg.classifier.children()) features[0].padding = (100, 100) for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features5 = nn.Sequential(*features) fc6 = nn.Conv2d(512, 4096, kernel_size=7) fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7)) fc6.bias.data.copy_(classifier[0].bias.data) fc7 = nn.Conv2d(4096, 4096, kernel_size=1) fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1)) fc7.bias.data.copy_(classifier[3].bias.data) score_fr = nn.Conv2d(4096, num_classes, kernel_size=1) score_fr.weight.data.zero_() score_fr.bias.data.zero_() # self.score_fr = nn.Sequential( # fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr # ) self.score_fr = nn.Sequential( fc6, nn.ReLU(inplace=True), fc7, nn.ReLU(inplace=True), score_fr ) # # do not update the parameters of the fully connected layer? # for param in self.score_fr.parameters(): # param.requires_grad = False self.upscore = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=64, stride=32, bias=False) self.upscore.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 64))
def __init__(self, num_inputs, action_space): super(A3Clstm, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2) self.maxp1 = nn.MaxPool2d(2, 2) # self.bn1 = nn.BatchNorm2d(32) self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1) self.maxp2 = nn.MaxPool2d(2, 2) # self.bn2 = nn.BatchNorm2d(32) self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1) self.maxp3 = nn.MaxPool2d(2, 2) # self.bn3 = nn.BatchNorm2d(64) self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1) self.maxp4 = nn.MaxPool2d(2, 2) # self.bn4 = nn.BatchNorm2d(64) # self.lstm = nn.LSTMCell(1024, 512) self.lstm = nn.LSTMCell(2249, 512) num_outputs = action_space self.critic_linear = nn.Linear(512, 1) self.actor_linear = nn.Linear(512, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.conv4.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) # For Fully Convolutional Layers self.upscore1 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, output_padding=1, bias=False) self.upscore1.weight.data.normal_(0.0, 0.02) self.upscore2 = nn.ConvTranspose2d(1, 1, kernel_size=4, stride=2, output_padding=1, bias=False) self.upscore2.weight.data.copy_(get_upsampling_weight(1, 1, 4)) self.upscore3 = nn.ConvTranspose2d(1, 1, kernel_size=5, stride=2, bias=False) self.upscore3.weight.data.copy_(get_upsampling_weight(1, 1, 5)) self.upscore4 = nn.ConvTranspose2d(1, 1, kernel_size=5, stride=2, bias=False) self.upscore4.weight.data.copy_(get_upsampling_weight(1, 1, 5)) self.train()
def __init__(self, num_classes, pretrained=True, caffe=False): super(FCN8s, self).__init__() vgg = models.vgg16() if pretrained: if caffe: # load the pretrained vgg16 used by the paper's author vgg.load_state_dict(torch.load(vgg16_caffe_path)) else: vgg.load_state_dict(torch.load(vgg16_path)) features, classifier = list(vgg.features.children()), list( vgg.classifier.children()) ''' 100 padding for 2 reasons: 1) support very small input size 2) allow cropping in order to match size of different layers' feature maps Note that the cropped part corresponds to a part of the 100 padding Spatial information of different layers' feature maps cannot be align exactly because of cropping, which is bad ''' features[0].padding = (100, 100) for f in features: if 'MaxPool' in f.__class__.__name__: f.ceil_mode = True elif 'ReLU' in f.__class__.__name__: f.inplace = True self.features3 = nn.Sequential(*features[:17]) self.features4 = nn.Sequential(*features[17:24]) self.features5 = nn.Sequential(*features[24:]) self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1) self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1) self.score_pool3.weight.data.zero_() self.score_pool3.bias.data.zero_() self.score_pool4.weight.data.zero_() self.score_pool4.bias.data.zero_() fc6 = nn.Conv2d(512, 4096, kernel_size=7) fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7)) fc6.bias.data.copy_(classifier[0].bias.data) fc7 = nn.Conv2d(4096, 4096, kernel_size=1) fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1)) fc7.bias.data.copy_(classifier[3].bias.data) score_fr = nn.Conv2d(4096, num_classes, kernel_size=1) score_fr.weight.data.zero_() score_fr.bias.data.zero_() self.score_fr = nn.Sequential(fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr) self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False) self.upscore_pool4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False) self.upscore8 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8, bias=False) self.upscore2.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 4)) self.upscore_pool4.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 4)) self.upscore8.weight.data.copy_( get_upsampling_weight(num_classes, num_classes, 16))
def __init__(self, n_classes=21, learned_billinear=True): super(fcn8s, self).__init__() self.learned_billinear = learned_billinear self.n_classes = n_classes self.loss = functools.partial(cross_entropy2d, size_average=False) self.conv_block1 = nn.Sequential( nn.Conv2d(3, 64, 3, padding=100), nn.ReLU(inplace=True), nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block2 = nn.Sequential( nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block3 = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block4 = nn.Sequential( nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block5 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.classifier = nn.Sequential( nn.Conv2d(512, 4096, 7), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, 4096, 1), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, self.n_classes, 1), ) self.score_pool4 = nn.Conv2d(512, self.n_classes, 1) self.score_pool3 = nn.Conv2d(256, self.n_classes, 1) if self.learned_billinear: self.upscore2 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4, stride=2, bias=False) self.upscore4 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4, stride=2, bias=False) self.upscore8 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 16, stride=8, bias=False) for m in self.modules(): if isinstance(m, nn.ConvTranspose2d): m.weight.data.copy_( get_upsampling_weight(m.in_channels, m.out_channels, m.kernel_size[0]))