def retinanet_resnet50_fpn(pretrained=False, progress=True,
                           num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None,tfidf=None, **kwargs):
    """
    Constructs a RetinaNet model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:

        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:

        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Example::

        >>> model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)

    Args:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        num_classes (int): number of output classes of the model (including the background)
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    # skip P2 because it generates too many anchors (according to their paper)
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, returned_layers=[2, 3, 4],
                                   extra_blocks=LastLevelP6P7(256, 256), trainable_layers=trainable_backbone_layers)
    model = RetinaNet(backbone, num_classes,tfidf, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['retinanet_resnet50_fpn_coco'],
                                              progress=progress)
        model.load_state_dict(state_dict)
        overwrite_eps(model, 0.0)
    return model
def retinanet_resnet50_fpn(
    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs
):
    """
    Constructs a RetinaNet model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
          between ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
          ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Example::

        >>> model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    # skip P2 because it generates too many anchors (according to their paper)
    backbone = resnet_fpn_backbone(
        "resnet50",
        pretrained_backbone,
        returned_layers=[2, 3, 4],
        extra_blocks=LastLevelP6P7(256, 256),
    )
    model = RetinaNet(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls["retinanet_resnet50_fpn_coco"], progress=progress
        )
        model.load_state_dict(state_dict)
    return model
Exemple #3
0
def create_model(backbone_name: str, num_classes: int = 10, **kwargs):
    """
    backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50',
             'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
    """
    backbone = resnet_fpn_backbone(backbone_name, False,
                                   returned_layers=[2, 3, 4],
                                   extra_blocks=LastLevelP6P7(256, 256),
                                   trainable_layers=5)
    return RetinaNet(backbone, num_classes, **kwargs)
Exemple #4
0
def resnext50_32x4d_fpn(
    pretrained: bool = True,
    returned_layers=(2, 3, 4),
    extra_blocks=LastLevelP6P7(256, 256),
    **kwargs,
):
    return resnet_fpn.resnext50_32x4d_fpn(
        pretrained=pretrained,
        returned_layers=returned_layers,
        extra_blocks=extra_blocks,
        **kwargs,
    )
Exemple #5
0
def wide_resnet101_2_fpn(
    pretrained: bool = True,
    returned_layers=(2, 3, 4),
    extra_blocks=LastLevelP6P7(256, 256),
    **kwargs,
):
    return resnet_fpn.wide_resnet101_2_fpn(
        pretrained=pretrained,
        returned_layers=returned_layers,
        extra_blocks=extra_blocks,
        **kwargs,
    )
Exemple #6
0
 def __init__(
     self,
     backbone_fn,
     returned_layers=(2, 3, 4),
     extra_blocks=LastLevelP6P7(256, 256),
     **backbone_fn_kwargs,
 ):
     super().__init__(
         backbone_fn=backbone_fn,
         returned_layers=returned_layers,
         extra_blocks=extra_blocks,
         **backbone_fn_kwargs,
     )
    def backbone1(self,
                  pretrained_backbone,
                  pretrained=False,
                  trainable_backbone_layers=None):
        trainable_backbone_layers = _validate_trainable_layers(
            pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

        if pretrained:
            # no need to download the backbone if pretrained is set
            pretrained_backbone = False
        # skip P2 because it generates too many anchors (according to their paper)
        backbone = resnet_fpn_backbone(
            'resnet18',
            pretrained_backbone,
            returned_layers=[2, 3, 4],
            extra_blocks=LastLevelP6P7(256, 256),
            trainable_layers=trainable_backbone_layers)
        return backbone
Exemple #8
0
    def __init__(self,
                 backbone_name,
                 pretrained=False,
                 norm_layer=misc_nn_ops.FrozenBatchNorm2d,
                 trainable_layers=3,
                 out_channels=256):
        super().__init__()
        # Get ResNet
        backbone = resnet.__dict__[backbone_name](pretrained=pretrained,
                                                  norm_layer=norm_layer)
        # select layers that wont be frozen
        assert 0 <= trainable_layers <= 5
        layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1',
                           'conv1'][:trainable_layers]
        # freeze layers only if pretrained backbone is used
        for name, parameter in backbone.named_parameters():
            if all([not name.startswith(layer) for layer in layers_to_train]):
                parameter.requires_grad_(False)

        return_layers = {
            'layer1': '0',
            'layer2': '1',
            'layer3': '2',
            'layer4': '3'
        }

        in_channels_stage2 = backbone.inplanes // 8
        self.in_channels_list = [
            0,
            in_channels_stage2 * 2,
            in_channels_stage2 * 4,
            in_channels_stage2 * 8,
        ]

        self.body = IntermediateLayerGetter(backbone,
                                            return_layers=return_layers)
        self.fpn = FeaturePyramidNetwork(
            in_channels_list=self.in_channels_list[1:],  # nonzero only
            out_channels=out_channels,
            extra_blocks=LastLevelP6P7(out_channels, out_channels),
        )
        self.out_channels = out_channels
Exemple #9
0
    def __init__(self,
                 backbone,
                 return_layers,
                 in_channels_list,
                 out_channels,
                 last_level='pool'):

        body = IntermediateLayerGetter(backbone, return_layers=return_layers)

        if last_level == 'pool':
            extra_blocks = LastLevelMaxPool()
        elif last_level == 'p6p7':
            extra_blocks = LastLevelP6P7(in_channels_list[-1], out_channels)
        else:
            raise ValueError(f'Not implemented {last_level}.')

        fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=extra_blocks,
        )
        super(BackboneWithFPN,
              self).__init__(OrderedDict([("body", body), ("fpn", fpn)]))
        self.out_channels = out_channels
Exemple #10
0
 def __init__(self, in_channels, out_channels):
     super().__init__()
     self.p6p7 = LastLevelP6P7(in_channels, out_channels)
        losses = {}
        detections: List[Dict[str, Tensor]] = []
        if self.training:
            assert targets is not None
            losses = self.compute_loss(targets, head_outputs, anchors, orig_targets, seg_features)

pretrained=True, 
progress=True,
num_classes=91,
pretrained_backbone=True

if pretrained:
    pretrained_backbone=False

backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, returned_layers=[2,3,4], extra_blocks=LastLevelP6P7(256, 256))

model = RetinaNet(backbone, num_classes)

if pretrained:
    state_dict = load_state_dict_from_url(model_urls['retinanet_resnet50_fpn_coco'], progress = progress)
    model.load_state_dict(state_dict)
    overwrite_eps(model, 0.0)

print(model)