def decom_resnet50(): if opt.caffe_pretrain: model = resnet_fpn_backbone(backbone_name="resnet50", pretrained=False) if not opt.load_path: model.load_state_dict(t.load(opt.caffe_pretrain_path)) else: model = resnet_fpn_backbone(backbone_name="resnet50", pretrained=False) # freeze top conv and bn layers for p in model.conv1.parameters(): p.requires_grad = False for p in model.layer1.parameters(): p.requires_grad = False model.apply(set_bn_fix) # resnet.layer0 to resnet.layer3 for extractor features_extractor = nn.Sequential(model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3) # layer4 for classifier features_classifier = nn.Sequential(model.layer4) return features_extractor, features_classifier
def maskrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, is_double_backbone=True, **kwargs): """ Constructs a Mask R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box - masks (``UInt8Tensor[N, 1, H, W]``): the segmentation binary masks for each instance The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN, and the mask loss. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to obtain the final segmentation masks, the soft masks can be thresholded, generally with a value of 0.5 (``mask >= 0.5``) Example:: >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) backbone2 = resnet_fpn_backbone('resnet50', pretrained_backbone) model = MaskRCNN(backbone, backbone2, is_double_backbone, num_classes, **kwargs) return model
def get_model_instance_detection(num_classes, backbone_name='resnet101', pretrained_backbone=True, trainable_layers=3): if backbone_name == 'shape-resnet50': url_resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN = 'https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/60b770e128fffcbd8562a3ab3546c1a735432d03/resnet50_finetune_60_epochs_lr_decay_after_30_start_resnet50_train_45_epochs_combined_IN_SF-ca06340c.pth.tar' try: checkpoint = model_zoo.load_url( url_resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN) except RuntimeError as e: checkpoint = model_zoo.load_url( url_resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN, map_location=torch.device('cpu')) # Some magic to rename the keys so that it loads as resnet_fpn_backbone state_dict_body = dict([('.'.join(['body'] + k.split('.')[1:]), v) for k, v in checkpoint["state_dict"].items()]) # This is to resolve the issue of NANs coming up in training. # See fbn = partial(FrozenBatchNorm2d, eps=1E-5) try: backbone = resnet_fpn_backbone( 'resnet50', pretrained=pretrained_backbone, norm_layer=fbn, trainable_layers=trainable_layers).cuda() except (RuntimeError, AssertionError) as e: backbone = resnet_fpn_backbone( 'resnet50', pretrained=pretrained_backbone, norm_layer=fbn, trainable_layers=trainable_layers).cpu() missing, unexpected = backbone.load_state_dict(state_dict_body, strict=False) print( 'When creating shape-resnet50...\nMissing states: {}\nUnexpected states: {}' .format(missing, unexpected)) else: backbone = resnet_fpn_backbone(backbone_name, pretrained=pretrained_backbone, trainable_layers=trainable_layers) model = FasterRCNN(backbone, num_classes=num_classes) return model
def get_instance_segmentation_model(num_classes, backbone, dropout=False): # load an instance segmentation model where backbone is pretrained ImageNet backbone = resnet_fpn_backbone(backbone, pretrained=True) model = MaskRCNN(backbone, num_classes) if dropout: # add drop out after FC layer of box head resolution = model.roi_heads.box_roi_pool.output_size[0] representation_size = 1024 model.roi_heads.box_head = TwoMLPHead( backbone.out_channels * resolution**2, representation_size) # add drop out in mask head mask_layers = (256, 256, 256, 256) mask_dilation = 1 model.roi_heads.mask_head = MaskRCNNHeads(backbone.out_channels, mask_layers, mask_dilation) # get the number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) return model
def get_model(pretrained=False, progress=True, nr_class=91, attr_score_thresh=0.7, pos_weight=100., pretrained_backbone=True, **kwargs): # model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) if pretrained: pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = TripleMaskRCNN(backbone, nr_class, attr_score_thresh=attr_score_thresh, pos_weight=pos_weight, **kwargs) # self.attr_predictor = AttrPredictor( # in_channels=self.roi_heads.box_head.fc6.in_features, # num_classes=259, # hidden_layer=1024) # TODO: check whether use strict=False? if pretrained: state_dict = load_state_dict_from_url( model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress) msg = model.load_state_dict(state_dict) return model
def fasterrcnn_resnet34(num_classes, **kwargs): backbone = resnet_fpn_backbone("resnet34", pretrained=True) model = FasterRCNN(backbone, num_classes, **kwargs) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model
def test_resnet50_fpn_backbone(self): device = torch.device('cpu') x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device) resnet50_fpn = resnet_fpn_backbone(backbone_name='resnet50', pretrained=False) y = resnet50_fpn(x) self.assertEqual(list(y.keys()), ['0', '1', '2', '3', 'pool'])
def test_resnet50_fpn_backbone(self): device = torch.device('cpu') x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device) resnet50_fpn = resnet_fpn_backbone(backbone_name='resnet50', pretrained=False) y = resnet50_fpn(x) assert list(y.keys()) == [0, 1, 2, 3, 'pool']
def add_resent18_backbone(num_classes): # load a pre-trained model for classification and return # only the features backbone = resnet_fpn_backbone(backbone_name='resnet34', pretrained=True) # FasterRCNN needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here #backbone.out_channels = 1280 # let's make the RPN generate 5 x 3 anchors per spatial # location, with 5 different sizes and 3 different aspect # ratios. We have a Tuple[Tuple[int]] because each feature # map could potentially have different sizes and # aspect ratios anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) # let's define what are the feature maps that we will # use to perform the region of interest cropping, as well as # the size of the crop after rescaling. # if your backbone returns a Tensor, featmap_names is expected to # be [0]. More generally, the backbone should return an # OrderedDict[Tensor], and in featmap_names you can choose which # feature maps to use. roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def FasterRCNN_resnet101(): backbone = resnet_fpn_backbone('resnet101', pretrained=True) # net = torchvision.models.resnet101(pretrained=True) # modules = list(net.children())[:-2] # backbone = nn.Sequential(*modules) # backbone = torchvision.models.resnet101(pretrained=True).features # test_backbone(backbone) backbone.out_channels = 256 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=config.CLASSES, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def __init__(self, n_classes, box_size=7, backbone_name='resnet50', pretrained_backbone=True): super().__init__() _dum = set(dir(self)) self.n_classes = n_classes self.pretrained_backbone = pretrained_backbone self.box_size = box_size self.half_box_size = box_size // 2 self._input_names = list( set(dir(self)) - _dum ) #i want the name of this fields so i can access them if necessary anchor_sizes = ((4, ), (8, ), (16, ), (32, ), (64, )) aspect_ratios = ((1.0, ), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone) # i am using n_classes + 1 because 0 correspond to the background in the torchvision fasterrcnn convension self.fasterrcnn = FasterRCNN(backbone, n_classes + 1, rpn_anchor_generator=rpn_anchor_generator)
def fasterrcnn_resnet_fpn(resnet_name='resnet50', pretrained_backbone=True, progress=True, num_classes=2, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction """ backbone = resnet_fpn_backbone(resnet_name, pretrained_backbone) model = FasterRCNN(backbone, num_classes, box_detections_per_img=100, **kwargs) return model
def resnet_fpn_backbone_test(): from torchvision.models.detection.backbone_utils import resnet_fpn_backbone # ResNet Paper | torchvision.models.ResNet | torchvision.models.detection.backbone_utils.BackboneWithFPN #----------------------------------------------------------------------------------------------------------------- # conv1 1/2 | conv1 + bn1 + relu + maxpool | # conv2 1/4 | layer1 | 0 # conv3 1/8 | layer2 | 1 # conv4 1/16 | layer3 | 2 # conv5 1/32 | layer4 | 3 # torchvision.models.detection.backbone_utils.BackboneWithFPN. backbone = resnet_fpn_backbone("resnet50", pretrained=True, trainable_layers=5) #backbone = resnet_fpn_backbone("resnet50", weights=ResNet50_Weights.DEFAULT, trainable_layers=5) #print(backbone) # Get some dummy image. x = torch.rand(1, 3, 64, 64) #x = torch.rand(5, 3, 224, 27) #x = [torch.rand(3, 64, 64), torch.rand(3, 128, 128)] # Error. # Compute the output. backbone_outputs = backbone(x) print([(name, outp.shape) for name, outp in backbone_outputs.items()])
def maskrcnn_resnet_fpn(pretrained=False, backbone_name='resnet50', num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): """ Constructs a Mask R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN, and the mask loss. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to obtain the final segmentation masks, the soft masks can be thresholded, generally with a value of 0.5 (``mask >= 0.5``) Mask R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Example:: >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) >>> >>> # optionally, if you want to export the model to ONNX: >>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet num_classes (int): number of output classes of the model (including the background) trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): trainable_backbone_layers = 5 backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone, trainable_layers=trainable_backbone_layers) model = MaskRCNN(backbone, num_classes, **kwargs) # if pretrained: # state_dict = load_state_dict_from_url(model_urls['maskrcnn_resnet50_fpn_coco'], # progress=progress) # model.load_state_dict(state_dict) return model
def resnet_frcnn_model(pretrained, num_classes=91): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', False) model = FasterRCNNExtractor(backbone, num_classes) return model
def create_fasterrcnn_backbone( backbone: str, fpn: bool = True, pretrained: Optional[str] = None, trainable_backbone_layers: int = 3, **kwargs: Any ) -> nn.Module: """ Args: backbone: Supported backones are: "resnet18", "resnet34","resnet50", "resnet101", "resnet152", "resnext50_32x4d", "resnext101_32x8d", "wide_resnet50_2", "wide_resnet101_2", as resnets with fpn backbones. Without fpn backbones supported are: "resnet18", "resnet34", "resnet50","resnet101", "resnet152", "resnext101_32x8d", "mobilenet_v2", "vgg11", "vgg13", "vgg16", "vgg19", fpn: If True then constructs fpn as well. pretrained: If None creates imagenet weights backbone. trainable_backbone_layers: number of trainable resnet layers starting from final block. """ if fpn: # Creates a torchvision resnet model with fpn added. backbone = resnet_fpn_backbone(backbone, pretrained=True, trainable_layers=trainable_backbone_layers, **kwargs) else: # This does not create fpn backbone, it is supported for all models backbone, _ = create_torchvision_backbone(backbone, pretrained) return backbone
def __init__(self, f): trainable_backbone_layers = 5 pretrained = True backbone = resnet_fpn_backbone( 'resnet50', True, trainable_layers=trainable_backbone_layers) self.model = FasterRCNN(backbone, num_classes=10, max_size=3840, min_size=2160, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=2000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=2000, box_detections_per_img=100, rpn_nms_thresh=0.01, box_nms_thresh=0.01) #num_classes = 10 #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) #in_features = self.model.roi_heads.box_predictor.cls_score.in_features #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.model.to(device) if (isinstance(f, str)): #local file print("Loading model from local file at {}".format(f)) self.model.load_state_dict(torch.load(f, map_location=device)) elif (isinstance(f, io.BytesIO)): #stream print("Loading model from stream") pass
def create_fastercnn_backbone(backbone: str, fpn: bool = True, pretrained: str = None, trainable_backbone_layers: int = 3, **kwargs) -> nn.Module: """ Args: backbone (str): Supported backones are: "resnet18", "resnet34","resnet50", "resnet101", "resnet152", "resnext50_32x4d", "resnext101_32x8d", "wide_resnet50_2", "wide_resnet101_2", as resnets with fpn backbones. Without fpn backbones supported are: "resnet18", "resnet34", "resnet50","resnet101", "resnet152", "resnext101_32x8d", "mobilenet_v2", "vgg11", "vgg13", "vgg16", "vgg19", fpn (bool): If True then constructs fpn as well. pretrained (str): If None creates imagenet weights backbone. """ if fpn: # Creates a torchvision resnet model with fpn added. print("Resnet FPN Backbones works only for imagenet weights") backbone = resnet_fpn_backbone(backbone, pretrained=True, trainable_layers=trainable_backbone_layers, **kwargs) else: # This does not create fpn backbone, it is supported for all models print("FPN is not supported for Non Resnet Backbones") backbone, _ = create_torchvision_backbone(backbone, pretrained) return backbone
def __init__(self): backbone = resnet_fpn_backbone('resnet50', False) super(Countor_NN, self).__init__(backbone, 91) # get cars trucks and bus classes self.selected_classes = [3, 6, 8] # area minimal self.det_min_area = 100 self.tck_min_area = 100 # area maximal self.det_max_area = 1000000 # thresh score #print(self.roi_heads.score_thresh) self.det_score_thresh = 0.5 self.tck_score_thresh = 0.05 # nms thresh #print(self.roi_heads.nms_thresh) self.det_nms_thresh = 0.5 self.tck_nms_thresh = 0.5 #self.empty_var = torch.empty(0, device=device) # porcentage ROI in self.det_min_ROI_in = 0.3 self.tck_min_ROI_in = 0.1
def __init__(self, backbone=None, architecture=None, detector=None, num_classes=None, device='cpu', *args, **kwargs): assert backbone is not None, ValueError('backbone can not None') assert architecture is not None, ValueError( 'architecture can not None') assert detector is not None, ValueError('detector can not None') assert num_classes is not None, ValueError('num_classes can not None') assert device is not None, ValueError('device can not None.') self.device = device super.__init__() if backbone == 'efficientnet': backbone = EfficientNet.from_pretrained(architecture) backbone.out_channels = 1280 elif backbone == 'fishnet': if architecture == 'fishnet99': backbone = fishnet99() elif architecture == 'fishnet150': backbone = fishnet150() else: backbone = fishnet201() backbone.out_channels = 1000 elif backbone == 'resnet': backbone = resnet_fpn_backbone(architecture, pretrained=True) self.model = MaskRCNN(backbone, num_classes=num_classes) self.model.to(device)
def fasterrcnn_resnet50_fpn( pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs ): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone("resnet50", pretrained_backbone) anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios) model = FasterRCNN( backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs ) # min_size = 300 # max_size = 400 # anchor_sizes = ((12,), (24,), (48,), (96,), (192,)) # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) # rpn_anchor_generator = CachelessAnchorGenerator( # anchor_sizes, aspect_ratios # ) # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress ) model.load_state_dict(state_dict) return model
def __init__(self, num_classes): backbone = resnet_fpn_backbone('resnet50', False) super(FRCNN_FPN, self).__init__(backbone, num_classes) # these values are cached to allow for feature reuse self.original_image_sizes = None self.preprocessed_images = None self.features = None
def _resnet_fpn(name: str, pretrained: bool = True, **kwargs): model = resnet_fpn_backbone(backbone_name=name, pretrained=pretrained, **kwargs) patch_param_groups(model) return model
def __init__(self, num_class=10, snap=None, trainable_layers=5): backbone = resnet_fpn_backbone('resnet101', True, trainable_layers=trainable_layers) anchor_sizes = ((8, 16, 32, 64, 128), ) aspect_ratios = [(0.5, 1.0, 2.0) for _ in range(len(anchor_sizes))] rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) super(MaskRCNN, self).__init__( backbone, num_class, # rpn_anchor_generator=rpn_anchor_generator, ) if snap is not None: state_dict = torch.load(open(self.snap, 'rb')) for k in list(state_dict.keys()): if k not in self.state_dict(): continue if self.state_dict()[k].shape != state_dict[k].shape: print(f'removing key {k}') del state_dict[k] # del state_dict['roi_heads.box_predictor.cls_score.weight'] # del state_dict['roi_heads.box_predictor.cls_score.bias'] # del state_dict['roi_heads.box_predictor.bbox_pred.weight'] # del state_dict['roi_heads.box_predictor.bbox_pred.bias'] # del state_dict['roi_heads.mask_predictor.mask_fcn_logits.weight'] # del state_dict['roi_heads.mask_predictor.mask_fcn_logits.bias'] unused = self.load_state_dict(state_dict, strict=False)
def maskkeypointrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=2, num_keypoints=7, pretrained_backbone=True, **kwargs): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = MaskKeypointRCNN(backbone, num_classes=num_classes, num_keypoints=num_keypoints, **kwargs) if pretrained: # TODO: merge parameters from pretrained maskrcnn and keypointrcnn # load mask_rcnn pretrained weights state_dict_mask = load_state_dict_from_url( model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress) delete_predictor_weights(state_dict_mask) # load keypoint_rcnn pretrained weights state_dict_keypoint = load_state_dict_from_url( model_urls['keypointrcnn_resnet50_fpn_coco'], progress=progress) delete_predictor_weights(state_dict_keypoint) model.load_state_dict(state_dict_mask, strict=False) model.load_state_dict(state_dict_keypoint, strict=False) return model
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Example:: >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) >>> # For training >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4) >>> labels = torch.randint(1, 91, (4, 11)) >>> images = list(image for image in images) >>> targets = [] >>> for i in range(len(images)): >>> d = {} >>> d['boxes'] = boxes[i] >>> d['labels'] = labels[i] >>> targets.append(d) >>> output = model(images, targets) >>> # For inference >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) >>> >>> # optionally, if you want to export the model to ONNX: >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
def __init__(self, backbone): super().__init__() backbone = resnet_fpn_backbone(backbone, pretrained=True) for n, p in backbone.named_parameters(): p.requires_grad = True self.backbone = backbone
def create_model_with_backbone(arch='resnet101', pretrained=True, num_classes=2): """creates model with backbone of specified arch and weights pretrained on imagenet""" backbone = resnet_fpn_backbone(arch, pretrained=pretrained) model = FasterRCNN(backbone, num_classes=num_classes) return model
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Example:: >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # noqa >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) # noqa Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr Parameters ---------- pretrained progress pretrained_backbone num_classes """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, model_dir=None, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet num_classes (int): number of output classes of the model (including the background) trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): trainable_backbone_layers = 5 if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress, model_dir=model_dir) model.load_state_dict(state_dict) return model