Beispiel #1
0
def fasterrcnn_resnet50_fpn(
    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs
):
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone("resnet50", pretrained_backbone)

    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios)
    model = FasterRCNN(
        backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs
    )

    # min_size = 300
    # max_size = 400
    # anchor_sizes = ((12,), (24,), (48,), (96,), (192,))
    # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    # rpn_anchor_generator = CachelessAnchorGenerator(
    #     anchor_sizes, aspect_ratios
    # )
    # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs)

    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress
        )
        model.load_state_dict(state_dict)
    return model
def custom_fasterrcnn_resnet_fpn(backbone,
                                 pretrained=True,
                                 progress=True,
                                 num_classes=91,
                                 pretrained_backbone=True,
                                 trainable_backbone_layers=3,
                                 **kwargs):
    backbone_name = backbone['name']
    backbone_params_config = backbone['params']
    assert 0 <= trainable_backbone_layers <= 5
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        backbone_params_config['trainable_backbone_layers'] = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        backbone_params_config['pretrained'] = False

    backbone_model = custom_resnet_fpn_backbone(backbone_name,
                                                backbone_params_config)
    num_feature_maps = len(backbone_model.body.return_layers)
    box_roi_pool = None if num_feature_maps == 4 \
        else MultiScaleRoIAlign(featmap_names=[str(i) for i in range(num_feature_maps)],
                                output_size=7, sampling_ratio=2)
    model = FasterRCNN(backbone_model,
                       num_classes,
                       box_roi_pool=box_roi_pool,
                       **kwargs)
    if pretrained and backbone_name.endswith('resnet50'):
        state_dict = load_state_dict_from_url(
            fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'],
            progress=progress)
        model.load_state_dict(state_dict, strict=False)
    return model
Beispiel #3
0
def custom_fasterrcnn_resnet_fpn(backbone,
                                 pretrained=True,
                                 progress=True,
                                 num_classes=91,
                                 pretrained_backbone=True,
                                 trainable_backbone_layers=3,
                                 **kwargs):
    backbone_name = backbone['name']
    backbone_params_config = backbone['params']
    assert 0 <= trainable_backbone_layers <= 5
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        backbone_params_config['trainable_backbone_layers'] = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        backbone_params_config['pretrained'] = False

    backbone_model = custom_resnet_fpn_backbone(backbone_name,
                                                backbone_params_config)
    model = FasterRCNN(backbone_model, num_classes, **kwargs)
    if pretrained and backbone_name.endswith('resnet50'):
        state_dict = load_state_dict_from_url(
            fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'],
            progress=progress)
        model.load_state_dict(state_dict, strict=False)
    return model
Beispiel #4
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            trainable_backbone_layers=3,
                            model_dir=None,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        num_classes (int): number of output classes of the model (including the background)
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
    """
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'],
            progress=progress,
            model_dir=model_dir)
        model.load_state_dict(state_dict)
    return model
Beispiel #5
0
class PyTorchModel:

    #create model by loading it in from Google Drive path
    def __init__(self, f):
        trainable_backbone_layers = 5
        pretrained = True
        backbone = resnet_fpn_backbone(
            'resnet50', True, trainable_layers=trainable_backbone_layers)
        self.model = FasterRCNN(backbone,
                                num_classes=10,
                                max_size=3840,
                                min_size=2160,
                                rpn_pre_nms_top_n_train=2000,
                                rpn_pre_nms_top_n_test=2000,
                                rpn_post_nms_top_n_train=2000,
                                rpn_post_nms_top_n_test=2000,
                                box_detections_per_img=100,
                                rpn_nms_thresh=0.01,
                                box_nms_thresh=0.01)

        #num_classes = 10
        #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        #in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model.to(device)
        if (isinstance(f, str)):  #local file
            print("Loading model from local file at {}".format(f))
            self.model.load_state_dict(torch.load(f, map_location=device))
        elif (isinstance(f, io.BytesIO)):  #stream
            print("Loading model from stream")
            pass

    def predict(self, image) -> List[Label]:
        frame = torchvision.transforms.ToTensor()(image)
        frame = frame[None, :, :]
        self.model.eval()
        prediction = self.model(frame)
        print(prediction)
        boxes = prediction[0]["boxes"]
        labels = prediction[0]["labels"]
        scores = prediction[0]["scores"]
        ret = list()
        for i in range(0, len(boxes)):
            score: float = float(scores[i].item())
            xmin: int = int(boxes[i][0].item())
            ymin: int = int(boxes[i][1].item())
            xmax: int = int(boxes[i][2].item())
            ymax: int = int(boxes[i][3].item())
            group: str = classes[str(labels[i].item())]["category"]
            color: str = classes[str(labels[i].item())]["color"]
            ret.append(Label(i, group, xmin, xmax, ymin, ymax, color, score))
        return ret
Beispiel #6
0
def fasterrcnn_resnet101_fpn(pretrained=False,
                             progress=True,
                             num_classes=91,
                             pretrained_backbone=True,
                             model_dir=None,
                             **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-101-FPN backbone.
    Note that it is NOT an official model.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet101', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet101_fpn_coco'],
            progress=progress,
            model_dir=model_dir)
        model.load_state_dict(state_dict['model'])

    return model
Beispiel #7
0
def fasterrcnn_resnet101_fpn(pretrained=False,
                             progress=True,
                             num_classes=91,
                             pretrained_backbone=True,
                             trainable_backbone_layers=3,
                             **kwargs):
    assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
    # dont freeze any layers if pretrained model or backbone is not used
    if not (pretrained or pretrained_backbone):
        trainable_backbone_layers = 5
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet101',
                                   pretrained_backbone,
                                   trainable_layers=trainable_backbone_layers)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        model.load_state_dict('resnet101_7a82fa4a.pth')
    return model
Beispiel #8
0
def _init_faster_rcnn(backbone='ResNet', num_classes=91, **kwargs):
    global MODEL_NAME
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    res50 = models.resnet.__dict__['resnet50'](
        pretrained=False, norm_layer=misc_nn_ops.FrozenBatchNorm2d)
    res50.load_state_dict(
        torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR,
                                'resnet50-19c8e357.pth'),
                   map_location=device))
    backbone = _resnet_fpn_backbone(res50)

    model = FasterRCNN(backbone, num_classes, **kwargs)
    model.load_state_dict(
        torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR,
                                MODEL_NAME['Faster_RCNN']),
                   map_location=device))

    # model.load_state_dict(torch.load(os.path.join(
    #     PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME['Faster_RCNN']), map_location=device))
    return model
Beispiel #9
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Example::

        >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        >>> # For training
        >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
        >>> labels = torch.randint(1, 91, (4, 11))
        >>> images = list(image for image in images)
        >>> targets = []
        >>> for i in range(len(images)):
        >>>     d = {}
        >>>     d['boxes'] = boxes[i]
        >>>     d['labels'] = labels[i]
        >>>     targets.append(d)
        >>> output = model(images, targets)
        >>> # For inference
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
        >>>
        >>> # optionally, if you want to export the model to ONNX:
        >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = torch.load(
            'D:/Models/torchpretrainedmodels/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth'
        )
        model.load_state_dict(state_dict)
    return model
Beispiel #10
0
import time
import torch
import torchvision
from class_labels import GROCERY_LIST_V0
from PIL import Image
from torch.autograd import Variable
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

model = FasterRCNN(resnet_fpn_backbone("resnet50", False), num_classes=64)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_path = "synthdet_faster_rcnn.pth"
model_save = torch.load(model_path, map_location=device) # no CUDA on macOS   
model.load_state_dict(model_save["model"])
model.to(device)
model.eval()

# preprocess on test image
image_path = sys.argv[1]
image = Image.open(image_path)
image_to_tensor = transforms.Compose([
    transforms.ToTensor()
])
tensor = image_to_tensor(image)

# inference
threshold = 0.5

# Start timing