def decom_resnet50():

    if opt.caffe_pretrain:
        model = resnet_fpn_backbone(backbone_name="resnet50", pretrained=False)
        if not opt.load_path:
            model.load_state_dict(t.load(opt.caffe_pretrain_path))
    else:
        model = resnet_fpn_backbone(backbone_name="resnet50", pretrained=False)

    # freeze top conv and bn layers
    for p in model.conv1.parameters():
        p.requires_grad = False
    for p in model.layer1.parameters():
        p.requires_grad = False
    model.apply(set_bn_fix)

    # resnet.layer0 to resnet.layer3 for extractor
    features_extractor = nn.Sequential(model.conv1, model.bn1, model.relu,
                                       model.maxpool, model.layer1,
                                       model.layer2, model.layer3)

    # layer4 for classifier
    features_classifier = nn.Sequential(model.layer4)

    return features_extractor, features_classifier
Ejemplo n.º 2
0
def maskrcnn_resnet50_fpn(pretrained=False,
                          progress=True,
                          num_classes=91,
                          pretrained_backbone=True,
                          is_double_backbone=True,
                          **kwargs):
    """
    Constructs a Mask R-CNN model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
          between ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
        - masks (``UInt8Tensor[N, 1, H, W]``): the segmentation binary masks for each instance

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN, and the mask loss.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
          ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction
        - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to
          obtain the final segmentation masks, the soft masks can be thresholded, generally
          with a value of 0.5 (``mask >= 0.5``)

    Example::

        >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    backbone2 = resnet_fpn_backbone('resnet50', pretrained_backbone)

    model = MaskRCNN(backbone, backbone2, is_double_backbone, num_classes,
                     **kwargs)

    return model
Ejemplo n.º 3
0
def get_model_instance_detection(num_classes,
                                 backbone_name='resnet101',
                                 pretrained_backbone=True,
                                 trainable_layers=3):
    if backbone_name == 'shape-resnet50':
        url_resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN = 'https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/60b770e128fffcbd8562a3ab3546c1a735432d03/resnet50_finetune_60_epochs_lr_decay_after_30_start_resnet50_train_45_epochs_combined_IN_SF-ca06340c.pth.tar'

        try:
            checkpoint = model_zoo.load_url(
                url_resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN)
        except RuntimeError as e:
            checkpoint = model_zoo.load_url(
                url_resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN,
                map_location=torch.device('cpu'))

        # Some magic to rename the keys so that it loads as resnet_fpn_backbone
        state_dict_body = dict([('.'.join(['body'] + k.split('.')[1:]), v)
                                for k, v in checkpoint["state_dict"].items()])

        # This is to resolve the issue of NANs coming up in training.
        # See
        fbn = partial(FrozenBatchNorm2d, eps=1E-5)

        try:
            backbone = resnet_fpn_backbone(
                'resnet50',
                pretrained=pretrained_backbone,
                norm_layer=fbn,
                trainable_layers=trainable_layers).cuda()
        except (RuntimeError, AssertionError) as e:
            backbone = resnet_fpn_backbone(
                'resnet50',
                pretrained=pretrained_backbone,
                norm_layer=fbn,
                trainable_layers=trainable_layers).cpu()

        missing, unexpected = backbone.load_state_dict(state_dict_body,
                                                       strict=False)
        print(
            'When creating shape-resnet50...\nMissing states: {}\nUnexpected states: {}'
            .format(missing, unexpected))
    else:
        backbone = resnet_fpn_backbone(backbone_name,
                                       pretrained=pretrained_backbone,
                                       trainable_layers=trainable_layers)
    model = FasterRCNN(backbone, num_classes=num_classes)

    return model
Ejemplo n.º 4
0
def get_instance_segmentation_model(num_classes, backbone, dropout=False):
    # load an instance segmentation model where backbone is pretrained ImageNet
    backbone = resnet_fpn_backbone(backbone, pretrained=True)
    model = MaskRCNN(backbone, num_classes)

    if dropout:
        # add drop out after FC layer of box head
        resolution = model.roi_heads.box_roi_pool.output_size[0]
        representation_size = 1024
        model.roi_heads.box_head = TwoMLPHead(
            backbone.out_channels * resolution**2, representation_size)
        # add drop out in mask head
        mask_layers = (256, 256, 256, 256)
        mask_dilation = 1
        model.roi_heads.mask_head = MaskRCNNHeads(backbone.out_channels,
                                                  mask_layers, mask_dilation)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256

    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model
Ejemplo n.º 5
0
def get_model(pretrained=False,
              progress=True,
              nr_class=91,
              attr_score_thresh=0.7,
              pos_weight=100.,
              pretrained_backbone=True,
              **kwargs):
    # model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    if pretrained:
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = TripleMaskRCNN(backbone,
                           nr_class,
                           attr_score_thresh=attr_score_thresh,
                           pos_weight=pos_weight,
                           **kwargs)

    # self.attr_predictor = AttrPredictor(
    #     in_channels=self.roi_heads.box_head.fc6.in_features,
    #     num_classes=259,
    #     hidden_layer=1024)

    # TODO: check whether use strict=False?
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress)
        msg = model.load_state_dict(state_dict)
    return model
Ejemplo n.º 6
0
def fasterrcnn_resnet34(num_classes, **kwargs):
    backbone = resnet_fpn_backbone("resnet34", pretrained=True)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model
Ejemplo n.º 7
0
 def test_resnet50_fpn_backbone(self):
     device = torch.device('cpu')
     x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device)
     resnet50_fpn = resnet_fpn_backbone(backbone_name='resnet50',
                                        pretrained=False)
     y = resnet50_fpn(x)
     self.assertEqual(list(y.keys()), ['0', '1', '2', '3', 'pool'])
 def test_resnet50_fpn_backbone(self):
     device = torch.device('cpu')
     x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device)
     resnet50_fpn = resnet_fpn_backbone(backbone_name='resnet50',
                                        pretrained=False)
     y = resnet50_fpn(x)
     assert list(y.keys()) == [0, 1, 2, 3, 'pool']
Ejemplo n.º 9
0
def add_resent18_backbone(num_classes):
    # load a pre-trained model for classification and return
    # only the features
    backbone = resnet_fpn_backbone(backbone_name='resnet34', pretrained=True)
    # FasterRCNN needs to know the number of
    # output channels in a backbone. For mobilenet_v2, it's 1280
    # so we need to add it here
    #backbone.out_channels = 1280

    # let's make the RPN generate 5 x 3 anchors per spatial
    # location, with 5 different sizes and 3 different aspect
    # ratios. We have a Tuple[Tuple[int]] because each feature
    # map could potentially have different sizes and
    # aspect ratios
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))

    # let's define what are the feature maps that we will
    # use to perform the region of interest cropping, as well as
    # the size of the crop after rescaling.
    # if your backbone returns a Tensor, featmap_names is expected to
    # be [0]. More generally, the backbone should return an
    # OrderedDict[Tensor], and in featmap_names you can choose which
    # feature maps to use.
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)

    # put the pieces together inside a FasterRCNN model
    model = FasterRCNN(backbone,
                    num_classes=num_classes,
                    rpn_anchor_generator=anchor_generator,
                    box_roi_pool=roi_pooler)

    return model
Ejemplo n.º 10
0
def FasterRCNN_resnet101():
    backbone = resnet_fpn_backbone('resnet101', pretrained=True)
    # net = torchvision.models.resnet101(pretrained=True)
    # modules = list(net.children())[:-2]
    # backbone = nn.Sequential(*modules)

    # backbone = torchvision.models.resnet101(pretrained=True).features

    # test_backbone(backbone)

    backbone.out_channels = 256

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)

    model = FasterRCNN(backbone,
                    num_classes=config.CLASSES,
                    rpn_anchor_generator=anchor_generator,
                    box_roi_pool=roi_pooler)


    return model
Ejemplo n.º 11
0
    def __init__(self,
                 n_classes,
                 box_size=7,
                 backbone_name='resnet50',
                 pretrained_backbone=True):
        super().__init__()

        _dum = set(dir(self))
        self.n_classes = n_classes
        self.pretrained_backbone = pretrained_backbone
        self.box_size = box_size
        self.half_box_size = box_size // 2
        self._input_names = list(
            set(dir(self)) - _dum
        )  #i want the name of this fields so i can access them if necessary

        anchor_sizes = ((4, ), (8, ), (16, ), (32, ), (64, ))
        aspect_ratios = ((1.0, ), ) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone)
        # i am using n_classes + 1 because 0 correspond to the background in the torchvision fasterrcnn convension
        self.fasterrcnn = FasterRCNN(backbone,
                                     n_classes + 1,
                                     rpn_anchor_generator=rpn_anchor_generator)
Ejemplo n.º 12
0
def fasterrcnn_resnet_fpn(resnet_name='resnet50',
                          pretrained_backbone=True,
                          progress=True,
                          num_classes=2,
                          **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-FPN backbone.
    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.
    The behavior of the model changes depending if it is in training or evaluation mode.
    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
          between ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.
    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
          ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction
    """
    backbone = resnet_fpn_backbone(resnet_name, pretrained_backbone)
    model = FasterRCNN(backbone,
                       num_classes,
                       box_detections_per_img=100,
                       **kwargs)
    return model
Ejemplo n.º 13
0
def resnet_fpn_backbone_test():
    from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

    # ResNet Paper		| torchvision.models.ResNet		| torchvision.models.detection.backbone_utils.BackboneWithFPN
    #-----------------------------------------------------------------------------------------------------------------
    # conv1		1/2		| conv1 + bn1 + relu + maxpool	|
    # conv2		1/4		| layer1						| 0
    # conv3		1/8		| layer2						| 1
    # conv4		1/16	| layer3						| 2
    # conv5		1/32	| layer4						| 3

    # torchvision.models.detection.backbone_utils.BackboneWithFPN.
    backbone = resnet_fpn_backbone("resnet50",
                                   pretrained=True,
                                   trainable_layers=5)
    #backbone = resnet_fpn_backbone("resnet50", weights=ResNet50_Weights.DEFAULT, trainable_layers=5)
    #print(backbone)

    # Get some dummy image.
    x = torch.rand(1, 3, 64, 64)
    #x = torch.rand(5, 3, 224, 27)
    #x = [torch.rand(3, 64, 64), torch.rand(3, 128, 128)]  # Error.

    # Compute the output.
    backbone_outputs = backbone(x)
    print([(name, outp.shape) for name, outp in backbone_outputs.items()])
Ejemplo n.º 14
0
def maskrcnn_resnet_fpn(pretrained=False,
                        backbone_name='resnet50',
                        num_classes=91,
                        pretrained_backbone=True,
                        trainable_backbone_layers=3,
                        **kwargs):
    """
    Constructs a Mask R-CNN model with a ResNet-50-FPN backbone.
    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.
    The behavior of the model changes depending if it is in training or evaluation mode.
    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format,  with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
        - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance
    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN, and the mask loss.
    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format,  with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction
        - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to
          obtain the final segmentation masks, the soft masks can be thresholded, generally
          with a value of 0.5 (``mask >= 0.5``)
    Mask R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.
    Example::
        >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
        >>>
        >>> # optionally, if you want to export the model to ONNX:
        >>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11)
    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        num_classes (int): number of output classes of the model (including the background)
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5

    backbone = resnet_fpn_backbone(backbone_name,
                                   pretrained_backbone,
                                   trainable_layers=trainable_backbone_layers)
    model = MaskRCNN(backbone, num_classes, **kwargs)
    # if pretrained:
    #     state_dict = load_state_dict_from_url(model_urls['maskrcnn_resnet50_fpn_coco'],
    #                                           progress=progress)
    #     model.load_state_dict(state_dict)
    return model
Ejemplo n.º 15
0
def resnet_frcnn_model(pretrained, num_classes=91):
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', False)
    model = FasterRCNNExtractor(backbone, num_classes)
    return model
Ejemplo n.º 16
0
def create_fasterrcnn_backbone(
    backbone: str,
    fpn: bool = True,
    pretrained: Optional[str] = None,
    trainable_backbone_layers: int = 3,
    **kwargs: Any
) -> nn.Module:
    """
    Args:
        backbone:
            Supported backones are: "resnet18", "resnet34","resnet50", "resnet101", "resnet152",
            "resnext50_32x4d", "resnext101_32x8d", "wide_resnet50_2", "wide_resnet101_2",
            as resnets with fpn backbones.
            Without fpn backbones supported are: "resnet18", "resnet34", "resnet50","resnet101",
            "resnet152", "resnext101_32x8d", "mobilenet_v2", "vgg11", "vgg13", "vgg16", "vgg19",
        fpn: If True then constructs fpn as well.
        pretrained: If None creates imagenet weights backbone.
        trainable_backbone_layers: number of trainable resnet layers starting from final block.
    """

    if fpn:
        # Creates a torchvision resnet model with fpn added.
        backbone = resnet_fpn_backbone(backbone, pretrained=True, trainable_layers=trainable_backbone_layers, **kwargs)
    else:
        # This does not create fpn backbone, it is supported for all models
        backbone, _ = create_torchvision_backbone(backbone, pretrained)
    return backbone
Ejemplo n.º 17
0
    def __init__(self, f):
        trainable_backbone_layers = 5
        pretrained = True
        backbone = resnet_fpn_backbone(
            'resnet50', True, trainable_layers=trainable_backbone_layers)
        self.model = FasterRCNN(backbone,
                                num_classes=10,
                                max_size=3840,
                                min_size=2160,
                                rpn_pre_nms_top_n_train=2000,
                                rpn_pre_nms_top_n_test=2000,
                                rpn_post_nms_top_n_train=2000,
                                rpn_post_nms_top_n_test=2000,
                                box_detections_per_img=100,
                                rpn_nms_thresh=0.01,
                                box_nms_thresh=0.01)

        #num_classes = 10
        #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        #in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model.to(device)
        if (isinstance(f, str)):  #local file
            print("Loading model from local file at {}".format(f))
            self.model.load_state_dict(torch.load(f, map_location=device))
        elif (isinstance(f, io.BytesIO)):  #stream
            print("Loading model from stream")
            pass
Ejemplo n.º 18
0
def create_fastercnn_backbone(backbone: str, fpn: bool = True, pretrained: str = None,
                              trainable_backbone_layers: int = 3, **kwargs) -> nn.Module:

    """
    Args:
        backbone (str):
            Supported backones are: "resnet18", "resnet34","resnet50", "resnet101", "resnet152",
            "resnext50_32x4d", "resnext101_32x8d", "wide_resnet50_2", "wide_resnet101_2",
            as resnets with fpn backbones.
            Without fpn backbones supported are: "resnet18", "resnet34", "resnet50","resnet101",
            "resnet152", "resnext101_32x8d", "mobilenet_v2", "vgg11", "vgg13", "vgg16", "vgg19",
        fpn (bool): If True then constructs fpn as well.
        pretrained (str): If None creates imagenet weights backbone.
    """

    if fpn:
        # Creates a torchvision resnet model with fpn added.
        print("Resnet FPN Backbones works only for imagenet weights")
        backbone = resnet_fpn_backbone(backbone, pretrained=True,
                                       trainable_layers=trainable_backbone_layers, **kwargs)
    else:
        # This does not create fpn backbone, it is supported for all models
        print("FPN is not supported for Non Resnet Backbones")
        backbone, _ = create_torchvision_backbone(backbone, pretrained)
    return backbone
    def __init__(self):
        backbone = resnet_fpn_backbone('resnet50', False)
        super(Countor_NN, self).__init__(backbone, 91)
        # get cars trucks and bus classes
        self.selected_classes = [3, 6, 8]

        # area minimal
        self.det_min_area = 100
        self.tck_min_area = 100

        # area maximal
        self.det_max_area = 1000000

        # thresh score
        #print(self.roi_heads.score_thresh)
        self.det_score_thresh = 0.5
        self.tck_score_thresh = 0.05

        # nms thresh
        #print(self.roi_heads.nms_thresh)
        self.det_nms_thresh = 0.5
        self.tck_nms_thresh = 0.5
        #self.empty_var = torch.empty(0, device=device)

        # porcentage ROI in
        self.det_min_ROI_in = 0.3
        self.tck_min_ROI_in = 0.1
Ejemplo n.º 20
0
    def __init__(self,
                 backbone=None,
                 architecture=None,
                 detector=None,
                 num_classes=None,
                 device='cpu',
                 *args,
                 **kwargs):

        assert backbone is not None, ValueError('backbone can not None')
        assert architecture is not None, ValueError(
            'architecture can not None')
        assert detector is not None, ValueError('detector can not None')
        assert num_classes is not None, ValueError('num_classes can not None')
        assert device is not None, ValueError('device can not None.')
        self.device = device

        super.__init__()
        if backbone == 'efficientnet':
            backbone = EfficientNet.from_pretrained(architecture)
            backbone.out_channels = 1280
        elif backbone == 'fishnet':
            if architecture == 'fishnet99':
                backbone = fishnet99()
            elif architecture == 'fishnet150':
                backbone = fishnet150()
            else:
                backbone = fishnet201()

            backbone.out_channels = 1000
        elif backbone == 'resnet':
            backbone = resnet_fpn_backbone(architecture, pretrained=True)

        self.model = MaskRCNN(backbone, num_classes=num_classes)
        self.model.to(device)
Ejemplo n.º 21
0
def fasterrcnn_resnet50_fpn(
    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs
):
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone("resnet50", pretrained_backbone)

    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios)
    model = FasterRCNN(
        backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs
    )

    # min_size = 300
    # max_size = 400
    # anchor_sizes = ((12,), (24,), (48,), (96,), (192,))
    # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    # rpn_anchor_generator = CachelessAnchorGenerator(
    #     anchor_sizes, aspect_ratios
    # )
    # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs)

    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress
        )
        model.load_state_dict(state_dict)
    return model
Ejemplo n.º 22
0
 def __init__(self, num_classes):
     backbone = resnet_fpn_backbone('resnet50', False)
     super(FRCNN_FPN, self).__init__(backbone, num_classes)
     # these values are cached to allow for feature reuse
     self.original_image_sizes = None
     self.preprocessed_images = None
     self.features = None
Ejemplo n.º 23
0
def _resnet_fpn(name: str, pretrained: bool = True, **kwargs):
    model = resnet_fpn_backbone(backbone_name=name,
                                pretrained=pretrained,
                                **kwargs)
    patch_param_groups(model)

    return model
Ejemplo n.º 24
0
 def __init__(self, num_class=10, snap=None, trainable_layers=5):
     backbone = resnet_fpn_backbone('resnet101', True, trainable_layers=trainable_layers)
     anchor_sizes = ((8, 16, 32, 64, 128), )
     aspect_ratios = [(0.5, 1.0, 2.0) for _ in range(len(anchor_sizes))]
     rpn_anchor_generator = AnchorGenerator(
         anchor_sizes, aspect_ratios
     )
     super(MaskRCNN, self).__init__(
         backbone,
         num_class,
         # rpn_anchor_generator=rpn_anchor_generator,
     )
     if snap is not None:
         state_dict = torch.load(open(self.snap, 'rb'))
         for k in list(state_dict.keys()):
             if k not in self.state_dict():
                 continue
             if self.state_dict()[k].shape != state_dict[k].shape:
                 print(f'removing key {k}')
                 del state_dict[k]
         # del state_dict['roi_heads.box_predictor.cls_score.weight']
         # del state_dict['roi_heads.box_predictor.cls_score.bias']
         # del state_dict['roi_heads.box_predictor.bbox_pred.weight']
         # del state_dict['roi_heads.box_predictor.bbox_pred.bias']
         # del state_dict['roi_heads.mask_predictor.mask_fcn_logits.weight']
         # del state_dict['roi_heads.mask_predictor.mask_fcn_logits.bias']
         unused = self.load_state_dict(state_dict, strict=False)
Ejemplo n.º 25
0
def maskkeypointrcnn_resnet50_fpn(pretrained=False,
                                  progress=True,
                                  num_classes=2,
                                  num_keypoints=7,
                                  pretrained_backbone=True,
                                  **kwargs):
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = MaskKeypointRCNN(backbone,
                             num_classes=num_classes,
                             num_keypoints=num_keypoints,
                             **kwargs)
    if pretrained:
        # TODO: merge parameters from pretrained maskrcnn and keypointrcnn
        # load mask_rcnn pretrained weights
        state_dict_mask = load_state_dict_from_url(
            model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress)
        delete_predictor_weights(state_dict_mask)

        # load keypoint_rcnn pretrained weights
        state_dict_keypoint = load_state_dict_from_url(
            model_urls['keypointrcnn_resnet50_fpn_coco'], progress=progress)
        delete_predictor_weights(state_dict_keypoint)

        model.load_state_dict(state_dict_mask, strict=False)
        model.load_state_dict(state_dict_keypoint, strict=False)
    return model
Ejemplo n.º 26
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.
    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.
    The behavior of the model changes depending if it is in training or evaluation mode.
    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.
    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction
    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.
    Example::
        >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        >>> # For training
        >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
        >>> labels = torch.randint(1, 91, (4, 11))
        >>> images = list(image for image in images)
        >>> targets = []
        >>> for i in range(len(images)):
        >>>     d = {}
        >>>     d['boxes'] = boxes[i]
        >>>     d['labels'] = labels[i]
        >>>     targets.append(d)
        >>> output = model(images, targets)
        >>> # For inference
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
        >>>
        >>> # optionally, if you want to export the model to ONNX:
        >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress)
        model.load_state_dict(state_dict)
    return model
Ejemplo n.º 27
0
    def __init__(self, backbone):
        super().__init__()
        backbone = resnet_fpn_backbone(backbone, pretrained=True)

        for n, p in backbone.named_parameters():
            p.requires_grad = True

        self.backbone = backbone
Ejemplo n.º 28
0
def create_model_with_backbone(arch='resnet101',
                               pretrained=True,
                               num_classes=2):
    """creates model with backbone of specified arch and weights pretrained on imagenet"""
    backbone = resnet_fpn_backbone(arch, pretrained=pretrained)

    model = FasterRCNN(backbone, num_classes=num_classes)
    return model
Ejemplo n.º 29
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
          between ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
          ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Example::

        >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)  # noqa
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)  # noqa

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr

    Parameters
    ----------
    pretrained
    progress
    pretrained_backbone
    num_classes
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress)
        model.load_state_dict(state_dict)
    return model
Ejemplo n.º 30
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            trainable_backbone_layers=3,
                            model_dir=None,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        num_classes (int): number of output classes of the model (including the background)
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'],
            progress=progress,
            model_dir=model_dir)
        model.load_state_dict(state_dict)
    return model