def test_validate_resnet_inputs_detection(self):
     # default number of backbone layers to train
     ret = backbone_utils._validate_trainable_layers(
         pretrained=True,
         trainable_backbone_layers=None,
         max_value=5,
         default_value=3)
     assert ret == 3
     # can't go beyond 5
     with pytest.raises(
             ValueError,
             match=r"Trainable backbone layers should be in the range"):
         ret = backbone_utils._validate_trainable_layers(
             pretrained=True,
             trainable_backbone_layers=6,
             max_value=5,
             default_value=3)
     # if not pretrained, should use all trainable layers and warn
     with pytest.warns(UserWarning):
         ret = backbone_utils._validate_trainable_layers(
             pretrained=False,
             trainable_backbone_layers=0,
             max_value=5,
             default_value=3)
     assert ret == 5
 def test_validate_resnet_inputs_detection(self):
     # default number of backbone layers to train
     ret = backbone_utils._validate_trainable_layers(
         pretrained=True, trainable_backbone_layers=None, max_value=5, default_value=3)
     self.assertEqual(ret, 3)
     # can't go beyond 5
     with self.assertRaises(AssertionError):
         ret = backbone_utils._validate_trainable_layers(
             pretrained=True, trainable_backbone_layers=6, max_value=5, default_value=3)
     # if not pretrained, should use all trainable layers and warn
     with self.assertWarns(UserWarning):
         ret = backbone_utils._validate_trainable_layers(
             pretrained=False, trainable_backbone_layers=0, max_value=5, default_value=3)
     self.assertEqual(ret, 5)
Esempio n. 3
0
def _yolov5_mobilenet_v3_small_fpn(
    weights_name: str,
    pretrained: bool = False,
    progress: bool = True,
    num_classes: int = 80,
    pretrained_backbone: bool = True,
    trainable_backbone_layers: Optional[int] = None,
    **kwargs,
):
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3)

    if pretrained:
        pretrained_backbone = False
    backbone = mobilenet_backbone(
        "mobilenet_v3_small",
        pretrained_backbone,
        trainable_layers=trainable_backbone_layers,
    )
    strides = [8, 16, 32, 64]
    anchor_grids = [
        [19, 27, 44, 40, 38, 94],
        [96, 68, 86, 152, 180, 137],
        [140, 301, 303, 264, 238, 542],
        [436, 615, 739, 380, 925, 792],
    ]
    anchor_generator = AnchorGenerator(strides, anchor_grids)
    head = YOLOHead(
        backbone.out_channels,
        anchor_generator.num_anchors,
        anchor_generator.strides,
        num_classes,
    )
    model = YOLO(backbone,
                 num_classes,
                 anchor_generator=anchor_generator,
                 head=head,
                 **kwargs)
    if pretrained:
        if model_urls.get(weights_name, None) is None:
            raise ValueError(
                f"No checkpoint is available for model {weights_name}")
        state_dict = load_state_dict_from_url(model_urls[weights_name],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model
Esempio n. 4
0
    def backbone1(self,
                  pretrained_backbone,
                  pretrained=False,
                  trainable_backbone_layers=None):
        trainable_backbone_layers = _validate_trainable_layers(
            pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

        if pretrained:
            # no need to download the backbone if pretrained is set
            pretrained_backbone = False
        # skip P2 because it generates too many anchors (according to their paper)
        backbone = resnet_fpn_backbone(
            'resnet18',
            pretrained_backbone,
            returned_layers=[2, 3, 4],
            extra_blocks=LastLevelP6P7(256, 256),
            trainable_layers=trainable_backbone_layers)
        return backbone
def my_fasterrcnn_resnet50_fpn(pretrained=False,
                               progress=True,
                               num_classes=21,
                               pretrained_backbone=True,
                               trainable_backbone_layers=None,
                               **kwargs):

    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

    backbone = SimCLRModel()
    backbone = my_resnet_fpn_backbone(
        backbone,
        pretrained_backbone,
        trainable_layers=trainable_backbone_layers)
    out_channels = 256
    backbone.out_channels = out_channels
    model = FasterRCNN(backbone, num_classes, **kwargs)
    return model
Esempio n. 6
0
def _fasterrcnn_mobilenet_v3_large_fpn(weights_name,
                                       pretrained=False,
                                       progress=True,
                                       num_classes=91,
                                       pretrained_backbone=True,
                                       trainable_backbone_layers=None,
                                       **kwargs):
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3)

    if pretrained:
        pretrained_backbone = False
    backbone = mobilenet_backbone("mobilenet_v3_large",
                                  pretrained_backbone,
                                  True,
                                  trainable_layers=trainable_backbone_layers)

    anchor_sizes = ((
        32,
        64,
        128,
        256,
        512,
    ), ) * 3
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)

    model = FasterRCNN(backbone,
                       num_classes,
                       rpn_anchor_generator=AnchorGenerator(
                           anchor_sizes, aspect_ratios),
                       **kwargs)
    if pretrained:
        if model_urls.get(weights_name, None) is None:
            raise ValueError(
                "No checkpoint is available for model {}".format(weights_name))
        state_dict = load_state_dict_from_url(model_urls[weights_name],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model
Esempio n. 7
0
def retinanet_resnet50_fpn(pretrained=False,
                           progress=True,
                           num_classes=91,
                           pretrained_backbone=True,
                           trainable_backbone_layers=None,
                           **kwargs):
    """
    Constructs a RetinaNet model with a ResNet-50-FPN backbone.

    Reference: `"Focal Loss for Dense Object Detection" <https://arxiv.org/abs/1708.02002>`_.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:

        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows, where ``N`` is the number of detections:

        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the predicted labels for each detection
        - scores (``Tensor[N]``): the scores of each detection

    For more details on the output, you may refer to :ref:`instance_seg_output`.

    Example::

        >>> model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)

    Args:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        num_classes (int): number of output classes of the model (including the background)
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    # skip P2 because it generates too many anchors (according to their paper)
    backbone = resnet_fpn_backbone('resnet50',
                                   pretrained_backbone,
                                   returned_layers=[2, 3, 4],
                                   extra_blocks=LastLevelP6P7(256, 256),
                                   trainable_layers=trainable_backbone_layers)
    model = RetinaNet(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['retinanet_resnet50_fpn_coco'], progress=progress)
        model.load_state_dict(state_dict)
        overwrite_eps(model, 0.0)
    return model
Esempio n. 8
0
def get_model(config):

    model = None
    # input_size = 0

    if config.model_name == "resnet":
        """ Resnet34
        """
        model = models.resnet18(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.fc.in_features
        model.fc = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "alexnet":
        """ Alexnet
        """
        model = models.alexnet(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "vgg":
        """ VGG16_bn
        """
        model = models.vgg16_bn(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "densenet":
        """ Densenet
        """
        model = models.densenet121(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier.in_features
        model.classifier = nn.Linear(n_features, config.n_classes)
        # input_size = 224
        
    elif config.model_name == 'mobilenet':
        model = models.mobilenet_v2(pretrained = config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)

        n_features

    elif config.model_name == "KeypointRCNN":
      backbone = models.mobilenet_v2(pretrained=True).features
      backbone.out_channels = 1280
      roi_pooler = MultiScaleRoIAlign(
          featmap_names=['0'],
          output_size=7,
          sampling_ratio=2
      )
      anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))
      keypoint_roi_pooler = MultiScaleRoIAlign(
          featmap_names=['0'],
          output_size=14,
          sampling_ratio=2
      )

      model = KeypointRCNN(
          backbone, 
          num_classes=2,
          num_keypoints=24,
          box_roi_pool=roi_pooler,
          keypoint_roi_pool=keypoint_roi_pooler,rpn_anchor_generator=anchor_generator
      )

    elif config.model_name == "keypointrcnn_resnet50":
      model = models.detection.keypointrcnn_resnet50_fpn(pretrained=config.use_pretrained, progress=False)
      model.roi_heads.keypoint_predictor.kps_score_lowres = nn.ConvTranspose2d(512, 24, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    
    elif config.model_name == "keypointrcnn_resnet101":
      pretrained_backbone = True
      pretrained = False
      trainable_backbone_layers = None
      trainable_backbone_layers = _validate_trainable_layers(
              pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

      backbone = resnet_fpn_backbone('resnet101', pretrained_backbone, trainable_layers=trainable_backbone_layers)

      model = KeypointRCNN(
          backbone, 
          num_classes=2,
          num_keypoints=24)

    else:
        raise NotImplementedError('You need to specify model name.')

    return model
Esempio n. 9
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            trainable_backbone_layers=None,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.

    Reference: `"Faster R-CNN: Towards Real-Time Object Detection with
    Region Proposal Networks" <https://arxiv.org/abs/1506.01497>`_.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:

        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows, where ``N`` is the number of detections:

        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the predicted labels for each detection
        - scores (``Tensor[N]``): the scores of each detection

    For more details on the output, you may refer to :ref:`instance_seg_output`.

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Example::

        >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        >>> # For training
        >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
        >>> labels = torch.randint(1, 91, (4, 11))
        >>> images = list(image for image in images)
        >>> targets = []
        >>> for i in range(len(images)):
        >>>     d = {}
        >>>     d['boxes'] = boxes[i]
        >>>     d['labels'] = labels[i]
        >>>     targets.append(d)
        >>> output = model(images, targets)
        >>> # For inference
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
        >>>
        >>> # optionally, if you want to export the model to ONNX:
        >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)

    Args:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        num_classes (int): number of output classes of the model (including the background)
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50',
                                   pretrained_backbone,
                                   trainable_layers=trainable_backbone_layers)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress)
        model.load_state_dict(state_dict)
        overwrite_eps(model, 0.0)
    return model
Esempio n. 10
0
def maskrcnn_resnet50_fpn(pretrained=False,
                          progress=True,
                          num_classes=91,
                          pretrained_backbone=True,
                          trainable_backbone_layers=None,
                          **kwargs):
    """
    Constructs a Mask R-CNN model with a ResNet-50-FPN backbone.

    Reference: `"Mask R-CNN" <https://arxiv.org/abs/1703.06870>`_.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:

        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
        - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN, and the mask loss.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows, where ``N`` is the number of detected instances:

        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the predicted labels for each instance
        - scores (``Tensor[N]``): the scores or each instance
        - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to
          obtain the final segmentation masks, the soft masks can be thresholded, generally
          with a value of 0.5 (``mask >= 0.5``)

    For more details on the output and on how to plot the masks, you may refer to :ref:`instance_seg_output`.

    Mask R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Example::

        >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
        >>>
        >>> # optionally, if you want to export the model to ONNX:
        >>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11)

    Args:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        num_classes (int): number of output classes of the model (including the background)
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50',
                                   pretrained_backbone,
                                   trainable_layers=trainable_backbone_layers)
    model = MaskRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress)
        model.load_state_dict(state_dict)
        overwrite_eps(model, 0.0)
    return model