예제 #1
0
def test():
    backbone = torchvision.models.mobilenet_v2(pretrained=False).features

    backbone.out_channels = 1280

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)

    # model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)
    model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator)

    model.eval().cuda()

    x = [
        torch.rand(3, 300, 400).float().cuda(),
        torch.rand(3, 500, 400).float().cuda()
    ]

    predictions = model(x)
    print(len(predictions))
    print(predictions[0].keys())
def predict_and_show(model: FasterRCNN,
                     im_pil: Image.Image,
                     im_tensor: Tensor,
                     score_th: float = 0.5) -> None:
    model.eval()

    pred = model([im_tensor])[0]

    # filter predicted boxes by scores and nms
    ii_select = nms(pred['boxes'], pred['scores'], iou_threshold=0.5).tolist()
    ii_select.extend(torch.nonzero(pred['scores'] > score_th)[:0].tolist())
    ii_select = list(set(ii_select))
    pred = {key: val[ii_select] for key, val in pred.items()}

    plt.figure(figsize=(14, 14))

    plt.subplot(1, 3, 1)
    plt.imshow(im_pil)
    plt.axis('image')
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.imshow(draw_boxes(img=im_pil, annot=pred))
    plt.title('predict')
    plt.axis('off')

    plt.subplot(1, 3, 3)
    show_legend(CLS_SELECT, COLORS)
    plt.show()
예제 #3
0
def custom_fasterrcnn_resnet_fpn(backbone,
                                 pretrained=True,
                                 progress=True,
                                 num_classes=91,
                                 pretrained_backbone=True,
                                 trainable_backbone_layers=3,
                                 **kwargs):
    backbone_name = backbone['name']
    backbone_params_config = backbone['params']
    assert 0 <= trainable_backbone_layers <= 5
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        backbone_params_config['trainable_backbone_layers'] = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        backbone_params_config['pretrained'] = False

    backbone_model = custom_resnet_fpn_backbone(backbone_name,
                                                backbone_params_config)
    num_feature_maps = len(backbone_model.body.return_layers)
    box_roi_pool = None if num_feature_maps == 4 \
        else MultiScaleRoIAlign(featmap_names=[str(i) for i in range(num_feature_maps)],
                                output_size=7, sampling_ratio=2)
    model = FasterRCNN(backbone_model,
                       num_classes,
                       box_roi_pool=box_roi_pool,
                       **kwargs)
    if pretrained and backbone_name.endswith('resnet50'):
        state_dict = load_state_dict_from_url(
            fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'],
            progress=progress)
        model.load_state_dict(state_dict, strict=False)
    return model
예제 #4
0
    def __init__(self, f):
        trainable_backbone_layers = 5
        pretrained = True
        backbone = resnet_fpn_backbone(
            'resnet50', True, trainable_layers=trainable_backbone_layers)
        self.model = FasterRCNN(backbone,
                                num_classes=10,
                                max_size=3840,
                                min_size=2160,
                                rpn_pre_nms_top_n_train=2000,
                                rpn_pre_nms_top_n_test=2000,
                                rpn_post_nms_top_n_train=2000,
                                rpn_post_nms_top_n_test=2000,
                                box_detections_per_img=100,
                                rpn_nms_thresh=0.01,
                                box_nms_thresh=0.01)

        #num_classes = 10
        #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        #in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model.to(device)
        if (isinstance(f, str)):  #local file
            print("Loading model from local file at {}".format(f))
            self.model.load_state_dict(torch.load(f, map_location=device))
        elif (isinstance(f, io.BytesIO)):  #stream
            print("Loading model from stream")
            pass
예제 #5
0
def fasterrcnn_resnet50_fpn(
    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs
):
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone("resnet50", pretrained_backbone)

    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios)
    model = FasterRCNN(
        backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs
    )

    # min_size = 300
    # max_size = 400
    # anchor_sizes = ((12,), (24,), (48,), (96,), (192,))
    # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    # rpn_anchor_generator = CachelessAnchorGenerator(
    #     anchor_sizes, aspect_ratios
    # )
    # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs)

    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress
        )
        model.load_state_dict(state_dict)
    return model
예제 #6
0
def custom_fasterrcnn_resnet_fpn(backbone,
                                 pretrained=True,
                                 progress=True,
                                 num_classes=91,
                                 pretrained_backbone=True,
                                 trainable_backbone_layers=3,
                                 **kwargs):
    backbone_name = backbone['name']
    backbone_params_config = backbone['params']
    assert 0 <= trainable_backbone_layers <= 5
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        backbone_params_config['trainable_backbone_layers'] = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        backbone_params_config['pretrained'] = False

    backbone_model = custom_resnet_fpn_backbone(backbone_name,
                                                backbone_params_config)
    model = FasterRCNN(backbone_model, num_classes, **kwargs)
    if pretrained and backbone_name.endswith('resnet50'):
        state_dict = load_state_dict_from_url(
            fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'],
            progress=progress)
        model.load_state_dict(state_dict, strict=False)
    return model
예제 #7
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            trainable_backbone_layers=3,
                            model_dir=None,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        num_classes (int): number of output classes of the model (including the background)
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'],
            progress=progress,
            model_dir=model_dir)
        model.load_state_dict(state_dict)
    return model
예제 #8
0
def fasterrcnn_resnet101_fpn(pretrained=False,
                             progress=True,
                             num_classes=91,
                             pretrained_backbone=True,
                             model_dir=None,
                             **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-101-FPN backbone.
    Note that it is NOT an official model.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet101', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet101_fpn_coco'],
            progress=progress,
            model_dir=model_dir)
        model.load_state_dict(state_dict['model'])

    return model
예제 #9
0
def faster_rcnn_mobile(self_pretrained, num_classes):
    if not self_pretrained:
        print('load mobilenet_v2 backbone pretrained on ImageNet')

    pretrained = False if self_pretrained else True  # pretrain on ImageNet
    backbone = torchvision.models.mobilenet_v2(pretrained=pretrained).features
    backbone.out_channels = 1280

    # 考虑更多长条形
    anchor_generator = AnchorGenerator(
        sizes=((32, 64, 128, 256, 512), ),
        aspect_ratios=((0.5, 1.0, 2.0), ))  # todo: add 0.3 4.0

    # need this, as mobilenet out 1 level features
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=[0],  # can be multi
        output_size=7,
        sampling_ratio=2)
    # model will do normalize and resize itself
    # box_nms_thresh used during inference
    model = FasterRCNN(backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    return model
    def __init__(self,
                 n_classes,
                 box_size=7,
                 backbone_name='resnet50',
                 pretrained_backbone=True):
        super().__init__()

        _dum = set(dir(self))
        self.n_classes = n_classes
        self.pretrained_backbone = pretrained_backbone
        self.box_size = box_size
        self.half_box_size = box_size // 2
        self._input_names = list(
            set(dir(self)) - _dum
        )  #i want the name of this fields so i can access them if necessary

        anchor_sizes = ((4, ), (8, ), (16, ), (32, ), (64, ))
        aspect_ratios = ((1.0, ), ) * len(anchor_sizes)
        rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone)
        # i am using n_classes + 1 because 0 correspond to the background in the torchvision fasterrcnn convension
        self.fasterrcnn = FasterRCNN(backbone,
                                     n_classes + 1,
                                     rpn_anchor_generator=rpn_anchor_generator)
예제 #11
0
def fasterrcnn_resnet_fpn(resnet_name='resnet50',
                          pretrained_backbone=True,
                          progress=True,
                          num_classes=2,
                          **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-FPN backbone.
    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.
    The behavior of the model changes depending if it is in training or evaluation mode.
    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
          between ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.
    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
          ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction
    """
    backbone = resnet_fpn_backbone(resnet_name, pretrained_backbone)
    model = FasterRCNN(backbone,
                       num_classes,
                       box_detections_per_img=100,
                       **kwargs)
    return model
예제 #12
0
def faster_rcnn_mobile(input_size, num_classes, self_pretrained):
    if not self_pretrained:
        print('load mobilenet_v2 backbone pretrained on ImageNet')
    backbone = torchvision.models.mobilenet_v2(
        pretrained=not self_pretrained).features
    backbone.out_channels = 1280
    # anchors
    anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)

    anchor_generator = AnchorGenerator(sizes=(
        (32, ),
        (64, ),
        (128, ),
        (256, ),
        (512, ),
    ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))
    # need this, as mobilenet out 1 level features
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=[0],  # can be multi
        output_size=7,
        sampling_ratio=2)
    # model will do normalize and resize itself
    # box_nms_thresh used during inference
    model = FasterRCNN(backbone,
                       num_classes=num_classes,
                       min_size=input_size[0],
                       max_size=input_size[1],
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    return model
예제 #13
0
def get_mobilenet_model(num_classes):
    """
        Seguir ejemplo en https://github.com/pytorch/vision/blob/master/torchvision/models/detection/faster_rcnn.py
    """

    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    backbone.out_channels = 1280

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)

    # stats for test images
    #Original Width avg 172.58  std_dev 122.58 min 31 max 1083
    #Original Height avg 105.00 std_dev 52.75 min 13 max 516

    model = FasterRCNN(backbone,
                       num_classes=num_classes,
                       min_size=100,
                       max_size=300,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    return model
예제 #14
0
def create_fastercnn(
    num_classes: int = 91,
    backbone: nn.Module = None,
    **kwargs,
):
    """
    Creates Faster RCNN implementation based on torchvision library.
    Args:
    num_classes (int) : number of classes.
    Do not have class_id "0" it is reserved as background.
    num_classes = number of classes to label + 1 for background.
    """

    if backbone is None:
        # Creates the default fasterrcnn as given in pytorch. Trained on COCO dataset
        model = fasterrcnn_resnet50_fpn(
            pretrained=True,
            **kwargs,
        )
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

    else:
        model = FasterRCNN(backbone, num_classes=num_classes, **kwargs)

    return model
예제 #15
0
def model(num_classes: int,
          backbone: Optional[nn.Module] = None,
          remove_internal_transforms: bool = True,
          **faster_rcnn_kwargs) -> nn.Module:
    """ FasterRCNN model given by torchvision

    Args:
        num_classes (int): Number of classes.
        backbone (nn.Module): Backbone model to use. Defaults to a resnet50_fpn model.

    Return:
        nn.Module
    """
    if backbone is None:
        model = fasterrcnn_resnet50_fpn(pretrained=True, **faster_rcnn_kwargs)

        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

        backbone_param_groups = resnet_fpn.param_groups(model.backbone)
    else:
        model = FasterRCNN(backbone,
                           num_classes=num_classes,
                           **faster_rcnn_kwargs)
        backbone_param_groups = backbone.param_groups()

    patch_param_groups(model=model,
                       backbone_param_groups=backbone_param_groups)

    if remove_internal_transforms:
        remove_internal_model_transforms(model)

    return model
def get_faster_rcnn(
    backbone: torch.nn.Module,
    anchor_generator: AnchorGenerator,
    roi_pooler: MultiScaleRoIAlign,
    num_classes: int,
    image_mean: List[float] = [0.485, 0.456, 0.406],
    image_std: List[float] = [0.229, 0.224, 0.225],
    min_size: int = 512,
    max_size: int = 1024,
    **kwargs,
) -> FasterRCNN:
    """Returns the Faster-RCNN model. Default normalization: ImageNet"""
    model = FasterRCNN(
        backbone=backbone,
        rpn_anchor_generator=anchor_generator,
        box_roi_pool=roi_pooler,
        num_classes=num_classes,
        image_mean=image_mean,  # ImageNet
        image_std=image_std,  # ImageNet
        min_size=min_size,
        max_size=max_size,
        **kwargs,
    )
    model.num_classes = num_classes
    model.image_mean = image_mean
    model.image_std = image_std
    model.min_size = min_size
    model.max_size = max_size

    return model
예제 #17
0
    def __init__(
        self,
        learning_rate: float = 0.0001,
        num_classes: int = 91,
        backbone: str = None,
        fpn: bool = True,
        pretrained_backbone: str = None,
        trainable_backbone_layers: int = 3,
        **kwargs,
    ):
        """
        Args:
            learning_rate: the learning rate
            num_classes: number of detection classes (including background)
            pretrained: if true, returns a model pre-trained on COCO train2017
            pretrained_backbone (str): if "imagenet", returns a model with backbone pre-trained on Imagenet
            trainable_backbone_layers: number of trainable resnet layers starting from final block
        """
        super().__init__()
        self.learning_rate = learning_rate
        self.num_classes = num_classes
        self.backbone = backbone
        if backbone is None:
            self.model = fasterrcnn_resnet50_fpn(
                pretrained=True,
                trainable_backbone_layers=trainable_backbone_layers,
            )

            in_features = self.model.roi_heads.box_predictor.cls_score.in_features
            self.model.roi_heads.box_predictor = FastRCNNPredictor(
                in_features, self.num_classes)

        else:
            backbone_model = create_fastercnn_backbone(
                self.backbone,
                fpn,
                pretrained_backbone,
                trainable_backbone_layers,
                **kwargs,
            )
            self.model = FasterRCNN(backbone_model,
                                    num_classes=num_classes,
                                    **kwargs)
예제 #18
0
    def get_model(
        model_name,
        num_classes,
        backbone,
        fpn,
        pretrained,
        pretrained_backbone,
        trainable_backbone_layers,
        anchor_generator,
        **kwargs,
    ):
        if backbone is None:
            # Constructs a model with a ResNet-50-FPN backbone when no backbone is specified.
            if model_name == "fasterrcnn":
                model = _models[model_name](
                    pretrained=pretrained,
                    pretrained_backbone=pretrained_backbone,
                    trainable_backbone_layers=trainable_backbone_layers,
                )
                in_features = model.roi_heads.box_predictor.cls_score.in_features
                head = FastRCNNPredictor(in_features, num_classes)
                model.roi_heads.box_predictor = head
            else:
                model = _models[model_name](
                    pretrained=pretrained,
                    pretrained_backbone=pretrained_backbone)
                model.head = RetinaNetHead(
                    in_channels=model.backbone.out_channels,
                    num_anchors=model.head.classification_head.num_anchors,
                    num_classes=num_classes,
                    **kwargs)
        else:
            backbone_model, num_features = ObjectDetector.backbones.get(
                backbone)(
                    pretrained=pretrained_backbone,
                    trainable_layers=trainable_backbone_layers,
                    **kwargs,
                )
            backbone_model.out_channels = num_features
            if anchor_generator is None:
                anchor_generator = AnchorGenerator(
                    sizes=((32, 64, 128, 256, 512), ),
                    aspect_ratios=((0.5, 1.0, 2.0), )) if not hasattr(
                        backbone_model, "fpn") else None

            if model_name == "fasterrcnn":
                model = FasterRCNN(backbone_model,
                                   num_classes=num_classes,
                                   rpn_anchor_generator=anchor_generator)
            else:
                model = RetinaNet(backbone_model,
                                  num_classes=num_classes,
                                  anchor_generator=anchor_generator)
        return model
def rcnn_model():
    anchor_sizes = ((52,), (95,), (245,), (348,), (457,))
    aspect_ratios = ((0.58, 1.0, 2.0),) * len(anchor_sizes)
    rpn_anchor_generator = AnchorGenerator(
        anchor_sizes, aspect_ratios
    )

    cls_weights = torch.tensor([1,1.38, 1.,1.84,6.73,12.55,72.34,56.89]).to(torch.device('cuda'))
    backbone = resnet_fpn_backbone('resnet50', pretrained=False)
    model = FasterRCNN(backbone, 91, rpn_anchor_generator=rpn_anchor_generator,box_nms_thresh=0.3,cross_weights=cls_weights)
    return model
예제 #20
0
def _init_faster_rcnn(backbone='ResNet', num_classes=91, **kwargs):
    global MODEL_NAME
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    res50 = models.resnet.__dict__['resnet50'](
        pretrained=False, norm_layer=misc_nn_ops.FrozenBatchNorm2d)
    res50.load_state_dict(
        torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR,
                                'resnet50-19c8e357.pth'),
                   map_location=device))
    backbone = _resnet_fpn_backbone(res50)

    model = FasterRCNN(backbone, num_classes, **kwargs)
    model.load_state_dict(
        torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR,
                                MODEL_NAME['Faster_RCNN']),
                   map_location=device))

    # model.load_state_dict(torch.load(os.path.join(
    #     PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME['Faster_RCNN']), map_location=device))
    return model
예제 #21
0
def fasterrcnn_resnet101_fpn(pretrained=False,
                             progress=True,
                             num_classes=91,
                             pretrained_backbone=True,
                             trainable_backbone_layers=3,
                             **kwargs):
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet101',
                                   pretrained_backbone,
                                   trainable_layers=trainable_backbone_layers)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        model.load_state_dict('resnet101_7a82fa4a.pth')
    return model
예제 #22
0
    def __init__(self, channels=3, num_classes=2, lr=1e-3, pretrained=True):
        super().__init__()
        self.lr = lr
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

        # resnet101 = torchvision.models.resnet101(pretrained=True)
        # resnetLayers = list(resnet101.children())[:-1]
        # backbone_resnet101 = nn.Sequential(*resnetLayers)
        # backbone_resnet101.out_channels = 2048

        backbone_resnet_fpn = torchvision.models.detection.backbone_utils.resnet_fpn_backbone(
            'resnet101',
            pretrained=pretrained,
            trainable_layers=5,
        )
        #########################################

        self.model = FasterRCNN(backbone_resnet_fpn, num_classes=num_classes)
        self.t = transforms.ToTensor()
예제 #23
0
    def __init__(self, backbone_arch, num_labels, img_sz, pretrained=True):
        super().__init__()

        backbone = resnet_fpn_backbone(backbone_arch, pretrained)
        self.model = FasterRCNN(backbone,
                                num_labels,
                                min_size=img_sz,
                                max_size=img_sz)
        self.subloss_names = [
            'total_loss', 'loss_box_reg', 'loss_classifier', 'loss_objectness',
            'loss_rpn_box_reg'
        ]
예제 #24
0
def build_model_vgg16(config, device):
    if config.backbone == 'vgg16':
        vgg = vgg16(pretrained=True)

    backbone = vgg.features[:-1]
    for layer in backbone[:10]:
        for p in layer.parameters():
            p.requires_grad = False
    backbone.out_channels = 512

    class BoxHead(torch.nn.Module):
        def __init__(self, vgg, dropout=False):
            super(BoxHead, self).__init__()
            classifier = list(vgg.classifier._modules.values())[:-1]
            if not dropout:
                del classifier[5]
                del classifier[2]
            self.classifier = torch.nn.Sequential(*classifier)

        def forward(self, x):
            x = x.flatten(start_dim=1)
            x = self.classifier(x)
            return x

    box_head = BoxHead(vgg)

    anchor_generator = AnchorGenerator(sizes=config.anchor_sizes,
                                       aspect_ratios=config.aspect_ratios)

    # Head - Box RoI pooling
    roi_pooler = MultiScaleRoIAlign(featmap_names=['0'],
                                    output_size=7,
                                    sampling_ratio=2)

    # Faster RCNN - Model
    model = FasterRCNN(backbone=backbone,
                       min_size=224,
                       max_size=224,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler,
                       box_head=box_head,
                       box_predictor=FastRCNNPredictor(4096, num_classes=2))

    # Init weights
    torch.nn.init.normal_(model.roi_heads.box_predictor.cls_score.weight,
                          std=0.01)
    torch.nn.init.constant_(model.roi_heads.box_predictor.cls_score.bias, 0)
    torch.nn.init.normal_(model.roi_heads.box_predictor.bbox_pred.weight,
                          std=0.001)
    torch.nn.init.constant_(model.roi_heads.box_predictor.bbox_pred.bias, 0)

    return model
예제 #25
0
def FasterRCNN_VGG():
    backbone = vgg16(pretrained=True).features
    backbone._modules.pop('30')  # 去掉最后一层Max_Pool层

    # for layer in range(10):  # 冻结conv3之前的层
    #     for p in backbone[layer].parameters():
    #         p.requires_grad = False

    backbone.out_channels = 512
    # backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes=opt.num_classes + 1)

    return model
예제 #26
0
def get_model(num_classes, backbone=None):
    '''
    Model function to output network according to arguments.
    Args:
        num_classes: number of classes(total_classes+1 for background)
        backbone: to design network with other backbone, default backbone
                  of faster RCNN is resnet50.
    Returns:
        model.
    '''

    if backbone == 'mobile_net':
        net = mobilenet_v2(pretrained=True)
        backbone_ft = net.features
        backbone_ft.out_channels = 1280

    elif backbone == 'vgg19':
        net = vgg19(pretrained=True)
        backbone_ft = net.features
        backbone_ft.out_channels = 512

    # https://stackoverflow.com/questions/58362892/resnet-18-as-backbone-in-faster-r-cnn
    elif backbone == 'resnet101':
        net = resnet101(pretrained=True)
        modules = list(net.children())[:-1]
        backbone_ft = nn.Sequential(*modules)
        backbone_ft.out_channels = 2048

    if backbone is None:

        model = fasterrcnn_resnet50_fpn(pretrained=True)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # print(in_features) = 1024
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)
        return model

    else:

        anchor_gen = AnchorGenerator(sizes=((32, 64, 128), ))
        # featmap_names = [0] gives list index out of range error.
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                        output_size=7,
                                                        sampling_ratio=2)
        model = FasterRCNN(backbone_ft,
                           num_classes,
                           rpn_anchor_generator=anchor_gen,
                           box_roi_pool=roi_pooler)

        return model
예제 #27
0
파일: exp01.py 프로젝트: amoshyc/afed
def get_model():
    backbone = resnet_fpn_backbone('resnet34', pretrained=True)
    rpn_anchor_generator = AnchorGenerator(
        sizes=((16, ), (32, ), (64, ), (128, ), (256, )),
        aspect_ratios=((0.5, 1.0, 2.0), ) * 5)
    model = FasterRCNN(
        backbone,
        num_classes=1 + 3,
        min_size=512,
        max_size=512,
        rpn_anchor_generator=rpn_anchor_generator,
    )

    return model
예제 #28
0
def _fasterrcnn_resnet_fpn(backbone='resnet50',
                           num_classes=91,
                           pretrained_backbone=True,
                           **kwargs):
    if import_error is not None:
        raise import_error

    from torchvision.models.detection.faster_rcnn import FasterRCNN, resnet_fpn_backbone

    backbone = resnet_fpn_backbone(backbone, pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)

    detection.fasterrcnn_resnet50_fpn()
    return model
def fasterrcnn_resnet18_fpn(num_classes=2, pretrained_backbone=True, **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-18-FPN backbone.
    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.
    The behavior of the model changes depending if it is in training or evaluation mode.
    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
          between ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.
    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
          ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction
    Example::
        >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
    """
    backbone = resnet_fpn_backbone('resnet18', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    # Modifications make the model smaller -- lessen overfitting
    # model.backbone.body.layer3 = nn.Sequential()
    # model.backbone.body.layer4 = nn.Sequential()
    # model.backbone.fpn.inner_blocks[1] = nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
    # model.backbone.fpn.inner_blocks[2] = nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
    # model.backbone.fpn.inner_blocks[3] = nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
    # model.backbone.fpn.layer_blocks[0] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    # model.backbone.fpn.layer_blocks[1] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    # model.backbone.fpn.layer_blocks[2] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    # model.backbone.fpn.layer_blocks[3] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    # model.rpn.head.conv = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    # model.rpn.head.cls_logits = nn.Conv2d(128, 3, kernel_size=(1, 1), stride=(1, 1))
    # model.rpn.head.bbox_pred = nn.Conv2d(128, 12, kernel_size=(1, 1), stride=(1, 1))
    # model.rpn.conv = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    # model.rpn.cls_logits = nn.Conv2d(128, 3, kernel_size=(1, 1), stride=(1, 1))
    # model.rpn.bbox_pred = nn.Conv2d(128, 12, kernel_size=(1, 1), stride=(1, 1))
    # model.roi_heads.box_head.fc6 = nn.Linear(in_features=6272, out_features=256, bias=True)
    # model.roi_heads.box_head.fc7 = nn.Linear(in_features=256, out_features=256, bias=True)
    # model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=256, out_features=2, bias=True)
    # model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=256, out_features=8, bias=True)
    return model
예제 #30
0
def model(basic_layers, rpn_anchor_generator, box_roi_pool, box_predictor):
    """
	Creates a faster r-cnn model with all the required modules.
	:param basic_layers: The backbone (e.g. VGG16).
	:param box_roi_pool: The roi pool must be a (sub-)type of MultiScaleRoIAlign.
	:return: The faster r-cnn model.
	"""
    return FasterRCNN(backbone=basic_layers.backbone,
                      min_size=512,
                      max_size=512,
                      rpn_anchor_generator=rpn_anchor_generator,
                      rpn_batch_size_per_image=32,
                      box_roi_pool=box_roi_pool,
                      box_head=basic_layers.classifier,
                      box_predictor=box_predictor,
                      box_detections_per_img=32)