def __init__(self, pretrained, num_classes, num_maps, num_boxes, img_size, feat_size): super(WSLDetPipe, self).__init__() self.num_boxes = num_boxes self.num_classes = num_classes model = models.vgg16(pretrained=False) if pretrained: model_path = '/home/jshi31/SPN.pytorch/demo/models/vgg16_official.pth' # model_path = './models/vgg16_official.pth' print 'syspath', sys.path print 'env', os.path.isfile(model_path) if os.path.isfile(model_path): print 'loading pretrained model...' state_dict = torch.load(model_path) model.load_state_dict(state_dict) else: print('Please download the pretrained VGG16 into ./models') # model.feature[28] is the last conv layer in VGG16 num_features = model.features[28].out_channels # nn.Sequntial is the success of module class. So add_module is the method in module # pooling : conv->relu->sp->sum """ Whether should I pool?""" pooling = nn.Sequential() pooling.add_module( 'adconv', nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True)) pooling.add_module('maps', nn.ReLU()) sp_layer = SoftProposal() model.sp_hook = sp_layer.register_forward_hook(_sp_hook) pooling.add_module('sp', sp_layer) self.summing = nn.Sequential(SpatialSumOverMap()) # the output shape of sp is batch, num_maps, 7, 7, I guess it is coupled with the conv layer, so it keeps shape self.features = nn.Sequential(*list(model.features.children())[:-1]) self.spatial_pooling = pooling # classification layer self.classifier = nn.Sequential(nn.Dropout(0.5), nn.Linear(num_maps, num_classes)) # image normalization self.image_normalization_mean = [103.939, 116.779, 123.68] """at last you need to return the class level prediction, but do not know how to output it.""" self.feat_offset = Feat_offset(num_boxes, feat_size, img_size[2:]) self.WSLdetpred = nn.Sequential( WSLDetPred(num_boxes, num_classes, feat_size)) self.batch_num, _, self.h, self.w = img_size """write the hook function, that can hook the variable spn""" self.hook_spn() self.class_response_maps = torch.zeros( [self.batch_num, self.num_classes, feat_size[2], feat_size[3]])
def vgg16_sp(num_classes, pretrained=True, num_maps=1024): model = models.vgg16(pretrained=False) print model if pretrained: model_path = './models/vgg16_official.pth' if os.path.isfile(model_path): print 'loading pretrained model...' state_dict = torch.load(model_path) model.load_state_dict(state_dict) else: print('Please download the pretrained VGG16 into ./models') # model.feature[28] is the last conv layer in VGG16 num_features = model.features[28].out_channels # nn.Sequntial is the success of module class. So add_module is the method in module # pooling : conv->relu->sp->sum pooling = nn.Sequential() pooling.add_module( 'adconv', nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True)) pooling.add_module('maps', nn.ReLU()) pooling.add_module('sp', SoftProposal()) # the output shape of sp is batch, num_maps, 7, 7, I guess it is coupled with the conv layer, so it keeps shape pooling.add_module('sum', SpatialSumOverMap()) # output shape of sum (batch, num_maps) # model: VGG16, the last conv5 layer. # num_classes: the number of output class # num_maps: output channel # pooling: the tail module, containing the sp module and sum function. return SPNetWSL(model, num_classes, num_maps, pooling)
def vgg16_sp(num_classes, pretrained=True, num_maps=1024): model = models.vgg16(pretrained=False) if pretrained: model_path = 'models/VGG16_ImageNet.pt' if os.path.isfile(model_path): state_dict = torch.load(model_path) model.load_state_dict(state_dict) else: print('Please download the pretrained VGG16 into ./models') num_features = model.features[28].out_channels pooling = nn.Sequential() pooling.add_module( 'adconv', nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True)) pooling.add_module('maps', nn.ReLU()) pooling.add_module('sp', SoftProposal()) pooling.add_module('sum', SpatialSumOverMap()) return SPNetWSL(model, num_classes, num_maps, pooling)
def alexnet_sp(num_classes, num_maps=1024, **kwargs): model = alexnet(pretrained=False, **kwargs) num_features = model.features[10].out_channels pooling = nn.Sequential() pooling.add_module('adconv', nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True)) pooling.add_module('maps', nn.ReLU()) pooling.add_module('sp', SoftProposal()) pooling.add_module('sum', SpatialSumOverMap()) return SPNetWSL(model, num_classes, num_maps, pooling)
def vgg16_sp(num_classes, batch_norm=False, num_maps=1024, **kwargs): model = vgg16(pretrained=False, batch_norm=batch_norm, **kwargs) num_features = model.features[40 if batch_norm else 28].out_channels pooling = nn.Sequential() pooling.add_module('adconv', nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True)) # num_maps=num_features pooling.add_module('maps', nn.ReLU()) pooling.add_module('sp', SoftProposal()) pooling.add_module('sum', SpatialSumOverMap()) return SPNetWSL(model, num_classes, num_maps, pooling)
def resnet34_sp(num_classes, num_maps=1024, **kwargs): model = resnet34(pretrained=False, **kwargs) num_features = list(list(model.features.children())[-2][1].children())[3].out_channels pooling = nn.Sequential() pooling.add_module('adconv', nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True)) # num_maps=num_features pooling.add_module('maps', nn.ReLU()) pooling.add_module('sp', SoftProposal()) pooling.add_module('sum', SpatialSumOverMap()) return SPNetWSL(model, num_classes, num_maps, pooling)
def resnet50_sp(num_classes, pretrained=True, num_maps=1024): model = models.resnet50(pretrained) num_features = 2048 pooling = nn.Sequential() pooling.add_module( 'adconv', nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True)) pooling.add_module('maps', nn.ReLU()) pooling.add_module('sp', SoftProposal()) pooling.add_module('sum', spatialpooling.SpatialSumOverMap()) return SPNetWSL(model, num_classes, num_maps, pooling)
def __init__(self, num_classes=20, num_maps=1024): super(WSL, self).__init__() model = models.vgg16(pretrained=True) num_features = model.features[28].out_channels self.features = nn.Sequential(*list(model.features.children())[:-1]) # self.spatial_pooling = pooling self.addconv = nn.Conv2d(num_features, num_maps, kernel_size=3, stride=1, padding=1, groups=2, bias=True) self.maps = nn.ReLU() self.sp = SoftProposal() self.sum = spatialpooling.SpatialSumOverMap() # classification layer self.classifier = nn.Sequential(nn.Dropout(0.5), nn.Linear(num_maps, num_classes))