def __init__(self, model, return_layers, in_channels_list=[], out_channels=None, fpn=False, classifier=False): super(Backbone, self).__init__() self.backbone = BackboneWithFPN(model, return_layers, in_channels_list, out_channels) self.classifier = None if classifier and hasattr(model, 'fc'): # XXX hardcoded for resnet family models self.classifier = model.fc if not fpn: del self.backbone._modules['fpn']
def resnet_fpn_backbone(backbone_name, pretrained): if backbone_name == "resnet101_lpf": print( "**************Adding Resnet 101 AntiAliaing backbone***************" ) backbone = resnet.resnet101(filter_size=5) backbone.load_state_dict( torch.load('./checkpoints/resnet101_lpf5.pth.tar')['state_dict']) else: print("**************Adding Resnet 101 backbone***************") backbone = torchvision.models.resnet101(pretrained=True) for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def resnest_fpn_backbone(pretrained, norm_layer=misc_nn_ops.FrozenBatchNorm2d, trainable_layers=3): # resnet_backbone = resnet.__dict__['resnet152'](pretrained=pretrained,norm_layer=norm_layer) backbone = resnest101e(pretrained=pretrained) # select layers that wont be frozen assert trainable_layers <= 5 and trainable_layers >= 0 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # freeze layers only if pretrained backbone is used for name, parameter in backbone.named_parameters(): if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) return_layers = { 'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3' } in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def fasterrcnn_resnetxx_fpnxx(cfg): backbone = resnet.__dict__[cfg['backbone_name']]( pretrained=cfg['backbone_pretrained'], norm_layer=misc_nn_ops.FrozenBatchNorm2d) # freeze layers for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) # wrapper backbone with fpn return_layers = cfg['fpn']['return_layers'] in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [in_channels_stage2 * 2**i for i in range(len(return_layers))] out_channels = cfg['fpn']['out_channels'] backbone_fpn = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) anchor_generator = AnchorGenerator(**cfg['anchor_generator']) # print(anchor_generator.num_anchors_per_location()) roi_pooler = MultiScaleRoIAlign(**cfg['box_roi_pool']) model = FasterRCNN(backbone_fpn, num_classes=cfg['num_classes'], rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) if os.path.exists(cfg['fasterrcnn_pretrained']): state_dict = torch.load(cfg['fasterrcnn_pretrained']) model.load_state_dict(state_dict) return model
def get_backbone(model_name="resnet101", pretrained=False, usize=256, use_FPN=False): model_dict = { 'resnet18': 512, 'resnet34': 512, 'resnet50': 2048, 'resnet101': 2048, 'resnet152': 2048, 'resnext50_32x4d': 2048, 'resnext101_32x8d': 2048, 'wide_resnet50_2': 2048, 'wide_resnet101_2': 2048 } assert model_name in model_dict, "%s must be in %s" % (model_name, model_dict.keys()) backbone_size = model_dict[model_name] _model = torchvision.models.resnet.__dict__[model_name]( pretrained=pretrained) # backbone = resnet.__dict__[model_name]( # pretrained=pretrained, # norm_layer=misc_nn_ops.FrozenBatchNorm2d) backbone = nn.Sequential( OrderedDict([ ('conv1', _model.conv1), ('bn1', _model.bn1), ('relu1', _model.relu), ('maxpool1', _model.maxpool), ("layer1", _model.layer1), ("layer2", _model.layer2), ("layer3", _model.layer3), ("layer4", _model.layer4), ])) if use_FPN: # freeze layers (layer1) for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} in_channels_list = [ backbone_size // 8, # 64 layer1 输出特征维度 backbone_size // 4, # 128 layer2 输出特征维度 backbone_size // 2, # 256 layer3 输出特征维度 backbone_size, # 512 layer4 输出特征维度 ] out_channels = usize # 每个FPN层输出维度 (这个值不固定,也可以设置为64,512等) backbone = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) return backbone
def get_mask_rcnn_model(layers,num_classes,out_channels=256,cfg=None): backbone = ResNet(layers,cfg) return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} in_channels_list = [] cfg = backbone.cfg for i in range(1, len(cfg)): layer_size = len(cfg[i]) in_channels_list.append(cfg[i][layer_size - 1]) print(in_channels_list) backbone = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) model = models.detection.MaskRCNN(backbone, num_classes) return model
def resnet_fpn_backbone_DCN(backbone_name, pretrained, norm_layer=misc_nn_ops.FrozenBatchNorm2d, trainable_layers=5): backbone = resnet.__dict__[backbone_name]( pretrained=pretrained, norm_layer=norm_layer) for i in range(1, 3): backbone.layer4[i].conv2 = DeformConvUnit(512, 512) """ Constructs a specified ResNet backbone with FPN on top. Freezes the specified number of layers in the backbone. Examples:: >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3) >>> # get some dummy image >>> x = torch.rand(1,3,64,64) >>> # compute the output >>> output = backbone(x) >>> print([(k, v.shape) for k, v in output.items()]) >>> # returns >>> [('0', torch.Size([1, 256, 16, 16])), >>> ('1', torch.Size([1, 256, 8, 8])), >>> ('2', torch.Size([1, 256, 4, 4])), >>> ('3', torch.Size([1, 256, 2, 2])), >>> ('pool', torch.Size([1, 256, 1, 1]))] Arguments: backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' norm_layer (torchvision.ops): it is recommended to use the default value. For details visit: (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ # select layers that wont be frozen assert trainable_layers <= 5 and trainable_layers >= 0 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # freeze layers only if pretrained backbone is used for name, parameter in backbone.named_parameters(): if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def resnet_fpn_backbone(backbone_name, pretrained): backbone = resnet.__dict__[backbone_name]( pretrained=pretrained, norm_layer=misc_nn_ops.FrozenBatchNorm2d) # freeze layers for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} out_channels = 256 in_channels_list = get_out_channels(backbone) return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def get_model_instance(num_classes): resent50 = torchvision.models.resnet50(pretrained=False) return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} in_channels_stage2 = resent50.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 backbone = BackboneWithFPN(resent50, return_layers, in_channels_list, out_channels) model = torchvision.models.detection.FasterRCNN(backbone, num_classes, image_mean=[0.5, 0.5, 0.5], image_std=[1, 1, 1]) return model
def get_fpn_backbone(backbone, freeze_layers): if freeze_layers: for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def resnet_fpn_backbone(backbone): return_layers = { 'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3' } in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def custom_resnet_fpn_backbone(backbone_name, backbone_params_config, norm_layer=misc_nn_ops.FrozenBatchNorm2d): layer1_config = backbone_params_config.get('layer1', None) layer1 = None if layer1_config is not None: compressor_config = layer1_config.get('compressor', None) compressor = None if compressor_config is None \ else get_bottleneck_processor(compressor_config['name'], **compressor_config['params']) decompressor_config = layer1_config.get('decompressor', None) decompressor = None if decompressor_config is None \ else get_bottleneck_processor(decompressor_config['name'], **decompressor_config['params']) layer1_type = layer1_config['type'] if layer1_type == 'Bottleneck4SmallResNet' and backbone_name in {'custom_resnet18', 'custom_resnet34'}: layer1 = Bottleneck4SmallResNet(layer1_config['bottleneck_channel'], compressor, decompressor) elif layer1_type == 'Bottleneck4LargeResNet'\ and backbone_name in {'custom_resnet50', 'custom_resnet101', 'custom_resnet152'}: layer1 = Bottleneck4LargeResNet(layer1_config['bottleneck_channel'], compressor, decompressor) prefix = 'custom_' start_idx = backbone_name.find(prefix) + len(prefix) org_backbone_name = backbone_name[start_idx:] if backbone_name.startswith(prefix) else backbone_name backbone = resnet.__dict__[org_backbone_name]( pretrained=backbone_params_config.get('pretrained', False), norm_layer=norm_layer ) if layer1 is not None: backbone.layer1 = layer1 trainable_layers = backbone_params_config.get('trainable_backbone_layers', 3) # select layers that wont be frozen assert 0 <= trainable_layers <= 5 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # freeze layers only if pretrained backbone is used for name, parameter in backbone.named_parameters(): if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) returned_layers = backbone_params_config.get('returned_layers', [1, 2, 3, 4]) return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)} in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def mobilenetV3_fpn_backbone(pretrained_path=False): ''' This function builds FPN on mobileNetV3 using torchvision utils ''' from torchvision.models.detection.backbone_utils import BackboneWithFPN backbone = MobileNetV3_forFPN() if pretrained_path: load_pretrained_fpn(backbone, pretrained_path) return_layers = { 'layers_os4': 0, 'layers_os8': 1, 'layers_os16': 2, 'layers_os32': 3 } # TO-DO: it should be possible to choose which layers to use in_channels_list = [16, 24, 48, 96] out_channels = 100 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def resnet50_fpn(pretrained=True, num_classes=2, **kwargs): """Pretrained FPN-ResNet-18 with FasterRCNN""" norm_layer = misc_nn_ops.FrozenBatchNorm2d # BackboneWithFPN only gets layers specified in return_layers (below) # using IntermediateLayerGetter, so avgpool, fc, etc. aren't important backbone = resnet.resnet50(pretrained=pretrained, norm_layer=norm_layer) # freeze layers for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) return_layers = { 'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3' } in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 backbone = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) # backbone = resnet_fpn_backbone('resnet18', pretrained=pretrained) # out channels is already defined as 256 # attach FasterRCNN head model = FasterRCNN(backbone, num_classes=num_classes) return model """
def my_resnet_fpn_backbone(backbone, pretrained, norm_layer=misc_nn_ops.FrozenBatchNorm2d, trainable_layers=3, returned_layers=None, extra_blocks=None): # select layers that wont be frozen assert 0 <= trainable_layers <= 5 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] if trainable_layers == 5: layers_to_train.append('bn1') for name, parameter in backbone.named_parameters(): if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) if extra_blocks is None: extra_blocks = LastLevelMaxPool() if returned_layers is None: returned_layers = [1, 2, 3, 4] assert min(returned_layers) > 0 and max(returned_layers) < 5 return_layers = { f'layer{k}': str(v) for v, k in enumerate(returned_layers) } in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2 * 2**(i - 1) for i in returned_layers ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)
def resnest_fpn_backbone(pretrained, norm_layer=misc_nn_ops.FrozenBatchNorm2d, trainable_layers=3): # resnet_backbone = resnet.__dict__['resnet152'](pretrained=pretrained,norm_layer=norm_layer) backbone = resnest101e(pretrained=pretrained) # select layers that wont be frozen assert trainable_layers <= 5 and trainable_layers >= 0 return_layers = { 'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3' } in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def customMaskRcnn(): cfg = [[32], [27, 24, 10, 5, 4, 2, 1, 9, 2], [23, 23, 12, 4, 6, 1, 2, 6, 1], [9, 2, 3, 5, 19, 2, 5, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9]] #backbone = Myresnet([3,4,23,3]) backbone = Myresnet([3,3,3,3],cfg) return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} #cfg = backbone.cfg in_channels_list = [] for i in range(1,len(cfg)): layer_size = len(cfg[i]) in_channels_list.append(cfg[i][layer_size-1]) print(in_channels_list) out_channels = 256 backbone = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) num_classes = 91 model = models.detection.MaskRCNN(backbone, num_classes) x = torch.rand(1,3,200,200) model.eval() with torch.no_grad(): out = model(x) print(out)
def timm_resnet_fpn_backbone(backbone_name, pretrained=True, trainable_layers=None): """Constructs a fpn backbone for fasterrcnn""" backbone = timm.create_model(backbone_name, pretrained=pretrained) return_layers = { "layer1": "0", "layer2": "1", "layer3": "2", "layer4": "3", } in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def faster_rcnn_resnet_fpn(backbone_name, image_size, num_classes, max_objs_per_image, backbone_pretrained=False, logger=None, obj_thresh=0.1): resnet = models.resnet.__dict__[backbone_name]( pretrained=backbone_pretrained) return_layers = { 'layer1': 'c2', 'layer2': 'c3', 'layer3': 'c4', 'layer4': 'c5' } in_channels_stage2 = resnet.inplanes // 8 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 256 from torchvision.models.detection.backbone_utils import BackboneWithFPN backbone = BackboneWithFPN(resnet, return_layers, in_channels_list, out_channels) rpn_in_channels = out_channels roi_pooling_output_size = 7 dim_roi_features = 1024 # roi特征向量长度 from torchvision.models.detection.faster_rcnn import TwoMLPHead roi_head = TwoMLPHead(out_channels * roi_pooling_output_size**2, dim_roi_features) # roi_head = nn.Sequential() # roi_head.add_module("0", nn.Conv2d(out_channels, out_channels, 3, 2, padding=1)) # roi_head.add_module("1", nn.BatchNorm2d(out_channels)) # roi_head.add_module("2", nn.ReLU()) # roi_head.add_module("3", TwoMLPHead(out_channels * floor(roi_pooling_output_size / 2) ** 2, dim_roi_features)) strides = (2**2, 2**3, 2**4, 2**5, 2**6) # P* 的步长 sizes = [(ceil(image_size[0] / i), ceil(image_size[1] / i)) for i in strides] sizes = tuple(sizes) scales = ((32**2, ), (64**2, ), (128**2, ), (256**2, ), (512**2, )) ratios = ((0.5, 1, 2), ) * len(scales) return FasterRCNN( backbone=backbone, roi_head=roi_head, dim_roi_features=dim_roi_features, image_size=image_size, num_classes=num_classes, strides=strides, sizes=sizes, scales=scales, ratios=ratios, rpn_in_channels=rpn_in_channels, max_objs_per_image=max_objs_per_image, roi_pooling="roi_align", roi_pooling_output_size=roi_pooling_output_size, obj_thresh=obj_thresh, logger=logger, )
class Backbone(nn.Module): r"""Select a subset of feature maps from a model to make a backbone with or without FPN. Internally, it uses torchvision.models._utils.IntermediateLayerGetter to extract a submodel that returns the feature maps specified in return_layers. The same limitations of IntermediatLayerGetter apply here. This adaptation allows not to use FPN and the last feature maps would be reshaped in two dimensions. Args: backbone (nn.Module): the full-fleged model to adapt return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). in_channels_list (List[int]): number of channels for each feature map that is returned, in the order they are present in the OrderedDict out_channels (int): number of channels in the FPN. fpn (bool): whether to add FPN or not. Attributes: out_channels (int): the number of channels in the FPN """ def __init__(self, model, return_layers, in_channels_list=[], out_channels=None, fpn=False, classifier=False): super(Backbone, self).__init__() self.backbone = BackboneWithFPN(model, return_layers, in_channels_list, out_channels) self.classifier = None if classifier and hasattr(model, 'fc'): # XXX hardcoded for resnet family models self.classifier = model.fc if not fpn: del self.backbone._modules['fpn'] def __getattr__(self, name): try: return super(Backbone, self).__getattr__(name) except AttributeError: return getattr(self.backbone, name) def freeze(self, exceptions=None): exceptions = [] if exceptions is None else exceptions for name, parameter in self.backbone.body.named_parameters(): if all(map(lambda k: k not in name, exceptions)): parameter.requires_grad = False if self.classifier is not None: for parameter in self.classifier.parameters(): parameter.requires_grad = False def forward(self, x): # torchvision>=0.5 if 'fpn' in self.backbone._modules: x = self.backbone.body(x) return self.backbone.fpn(x) else: # XXX explict shape necessarily for trt outputs = self.backbone.body(x) last = len(outputs) - 1 avgpool = outputs[last] outputs[last] = avgpool.flatten(1) # logging.info(f"output shapes={[(k, tuple(v.shape)) for k, v in outputs.items()]}") return tuple(outputs.values()) def load_state_dict(self, state_dict, strict=True): return self.backbone.body.load_state_dict(state_dict, strict=strict) def state_dict(self, destination=None, prefix='', keep_vars=False): # Save some checkpoint return self.backbone.body.state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars)
def resnest_fpn_backbone( backbone_fn, pretrained, # norm_layer=misc_nn_ops.FrozenBatchNorm2d, trainable_layers=3, returned_layers=None, extra_blocks=None, ): """ Constructs a specified ResNest backbone with FPN on top. Freezes the specified number of layers in the backbone. Examples:: >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3) >>> # get some dummy image >>> x = torch.rand(1,3,64,64) >>> # compute the output >>> output = backbone(x) >>> print([(k, v.shape) for k, v in output.items()]) >>> # returns >>> [('0', torch.Size([1, 256, 16, 16])), >>> ('1', torch.Size([1, 256, 8, 8])), >>> ('2', torch.Size([1, 256, 4, 4])), >>> ('3', torch.Size([1, 256, 2, 2])), >>> ('pool', torch.Size([1, 256, 1, 1]))] Arguments: backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' norm_layer (torchvision.ops): it is recommended to use the default value. For details visit: (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ backbone = backbone_fn(pretrained=pretrained) backbone.fc = Identity() backbone.avgpool = Identity() # select layers that wont be frozen assert trainable_layers <= 5 and trainable_layers >= 0 layers_to_train = ["layer4", "layer3", "layer2", "layer1", "conv1"][:trainable_layers] # freeze layers only if pretrained backbone is used for name, parameter in backbone.named_parameters(): if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) if extra_blocks is None: extra_blocks = LastLevelMaxPool() if returned_layers is None: returned_layers = [1, 2, 3, 4] assert min(returned_layers) > 0 and max(returned_layers) < 5 return_layers = { f"layer{k}": str(v) for v, k in enumerate(returned_layers) } in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [ in_channels_stage2 * 2**(i - 1) for i in returned_layers ] out_channels = 256 return BackboneWithFPN( backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks, )
def FasterRCNN1(num_classes=2, model_name="resnet101", pretrained=False, usize=256, use_FPN=False): # super(FasterRCNN1, self).__init__() model_dict = { 'resnet18': 512, 'resnet34': 512, 'resnet50': 2048, 'resnet101': 2048, 'resnet152': 2048, 'resnext50_32x4d': 2048, 'resnext101_32x8d': 2048, 'wide_resnet50_2': 2048, 'wide_resnet101_2': 2048 } assert model_name in model_dict, "%s must be in %s" % (model_name, model_dict.keys()) backbone_size = model_dict[model_name] _model = torchvision.models.resnet.__dict__[model_name]( pretrained=pretrained) # backbone = resnet.__dict__[model_name]( # pretrained=pretrained, # norm_layer=misc_nn_ops.FrozenBatchNorm2d) backbone = nn.Sequential( OrderedDict([ ('conv1', _model.conv1), ('bn1', _model.bn1), ('relu1', _model.relu), ('maxpool1', _model.maxpool), ("layer1", _model.layer1), ("layer2", _model.layer2), ("layer3", _model.layer3), ("layer4", _model.layer4), ])) if use_FPN: # freeze layers (layer1) for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} in_channels_list = [ backbone_size // 8, # 64 layer1 输出特征维度 backbone_size // 4, # 128 layer2 输出特征维度 backbone_size // 2, # 256 layer3 输出特征维度 backbone_size, # 512 layer4 输出特征维度 ] out_channels = usize # 每个FPN层输出维度 (这个值不固定,也可以设置为64,512等) backbone = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) model = FasterRCNN(backbone, num_classes) else: backbone.out_channels = model_dict[model_name] # 特征的输出维度 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model