class ResNet50_FasterRCNN:
    def __init__(self, pretrained=False):
        # Building our FasterRCNN model for objects detection
        backbone = resnet_fpn_backbone('resnet50', pretrained=pretrained)
        num_classes = 4 + 1

        anchor_generator = AnchorGenerator(sizes=(40, 60, 150, 200, 250),
                                           aspect_ratios=(0.7, 1.0, 1.3))
        self.model = FRCNN(backbone,
                           num_classes=num_classes,
                           rpn_anchor_generator=anchor_generator)

    def train(self):
        self.model.train()

    def to(self, device):
        self.model.to(device)

    def eval(self):
        self.model.eval()

    def parameters(self):
        return self.model.parameters()

    def get_state_dict(self):
        return self.model.state_dict()

    def set_state_dict(self, state_dict):
        self.model.load_state_dict(state_dict)

    def fit_batch(self, images, target):
        return self.model(images, target)

    def predict_batch(self, images):
        return self.model(images)
def get_model(
    backbone_name="resnet50",
    detector_name="fasterrcnn",
    trainable_layers=3,
    model_ckpt=None,
):
    """Constructs a fasterrcnn or maskrcnn detector with the given backbone"""
    num_classes = 2  # 1 class (wheat) + background
    if model_ckpt:
        # backbone = resnet_fpn_backbone('resnet101', True)
        backbone = timm_resnet_fpn_backbone(backbone_name, False,
                                            trainable_layers)
    else:
        backbone = timm_resnet_fpn_backbone(backbone_name, True,
                                            trainable_layers)
    if detector_name == "fasterrcnn":
        model = FasterRCNN(backbone, num_classes)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)
    elif detector_name == "maskrcnn":
        model = MaskRCNN(backbone, num_classes)
        in_features_mask = (
            model.roi_heads.mask_predictor.conv5_mask.in_channels)
        hidden_layer = 256
        model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)
    else:
        raise Exception(f"{detector_name} is not supported")
    if model_ckpt is not None:
        model.load_state_dict(torch.load(model_ckpt)["model_state_dict"])
        print("loaded ckpt")
    return model
Example #3
0
def fasterrcnn_resnetxx_fpnxx(cfg):
    backbone = resnet.__dict__[cfg['backbone_name']](
        pretrained=cfg['backbone_pretrained'],
        norm_layer=misc_nn_ops.FrozenBatchNorm2d)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)

    # wrapper backbone with fpn
    return_layers = cfg['fpn']['return_layers']
    in_channels_stage2 = backbone.inplanes // 8
    in_channels_list = [in_channels_stage2 * 2**i for i in range(len(return_layers))]
    out_channels = cfg['fpn']['out_channels']
    backbone_fpn = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)

    anchor_generator = AnchorGenerator(**cfg['anchor_generator'])
    # print(anchor_generator.num_anchors_per_location())

    roi_pooler = MultiScaleRoIAlign(**cfg['box_roi_pool'])
    model = FasterRCNN(backbone_fpn, num_classes=cfg['num_classes'], rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    if os.path.exists(cfg['fasterrcnn_pretrained']):
        state_dict = torch.load(cfg['fasterrcnn_pretrained'])
        model.load_state_dict(state_dict)

    return model
Example #4
0
class TorchDetector:
    """
    Torch object detector
    """
    def __init__(self, config, logger):
        self._logger = logger
        self._threshold = config['threshold']
        modelfile = config['model']
        self._device = config['device']  # cpu, cuda, cuda:0
        backbone = resnet_fpn_backbone('resnet50', False)
        self._model = FasterRCNN(backbone, 8)  # 8 classes
        checkpoint = torch.load(modelfile, map_location=self._device)
        self._model.load_state_dict(checkpoint['model_state_dict'])
        device = torch.device(self._device)
        self._model.to(device)
        self._model.eval()

    def stop(self):
        """
        Destruction
        """

    def detectObjects(self, img) -> List[e.DetectedObject]:
        """
        Implementation of detector interface
        """
        wsize = 1600
        hsize = 800
        _pretransform = A.Compose([
            A.Resize(hsize, wsize),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ])

        image_tensor = _pretransform(image=img)['image']

        tstart = time.time()

        outputs = self._model.forward(
            image_tensor.unsqueeze(0).float().to(device=self._device))

        classes = outputs[0]['labels'].detach().cpu().numpy()
        scores = outputs[0]['scores'].detach().cpu().numpy()
        boxes = outputs[0]['boxes'].detach().cpu().numpy()

        self._logger.debug(
            f'Torch model inferring time: {time.time() - tstart}')

        result = zip(classes, scores, boxes)

        h, w, _ = img.shape
        wscale = w / wsize
        hscale = h / hsize
        #print(f'h,w:{h},{w}; wsc,hsc:{wscale},{hscale}')
        #print(list(result))

        return ObjectDetector.getDetectedObjectsCollection(
            result, hscale, wscale, self._threshold, False)
Example #5
0
class Detect:
    def __init__(self):
        super().__init__()
        backbone = torchvision.models.vgg16(pretrained=False).features
        backbone.out_channels = 512
        anchor_sizes = ((8, 16, 32, 64, 128, 256, 512), )
        aspect_ratios = ((1 / 2, 1 / 3, 1 / 4, 1 / 5, 1 / 6, 1 / math.sqrt(2),
                          1, 2, math.sqrt(2), 3, 4, 5, 6, 7, 8), )
        anchor_generator = AnchorGenerator(sizes=anchor_sizes,
                                           aspect_ratios=aspect_ratios)
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(
            featmap_names=['0', '1', '2', '3', '4'],
            output_size=7,
            sampling_ratio=2)
        self.model = FasterRCNN(backbone,
                                num_classes=7,
                                rpn_anchor_generator=anchor_generator,
                                box_roi_pool=roi_pooler)
        self.device = torch.device('cpu')
        self.model.load_state_dict(torch.load('2.pth'))
        self.model.to(self.device)
        self.model.eval()

    def forward(self, img):
        img = torch.tensor(img, dtype=torch.float32) / 255
        img = img.permute((2, 0, 1))
        output = model([img.to(self.device)])
        boxes = output[0]['boxes']
        labels = output[0]['labels']
        scores = output[0]['scores']
        last = {}
        result = {}
        for i, v in enumerate(labels):
            if v == 1 and scores[i] > last['send']:
                last['send'] = scores[i]
                result['send'] = boxes[i]
            elif v == 2 and scores[i] > last['number']:
                last['number'] = scores[i]
                result['number'] = boxes[i]
            elif v == 3 and scores[i] > last['date']:
                last['date'] = scores[i]
                result['date'] = boxes[i]
            elif v == 4 and scores[i] > last['quote']:
                last['quote'] = scores[i]
            elif v == 5 and scores[i] > last['header']:
                last['header'] = scores[i]
                result['header'] = boxes[i]
            elif v == 6 and scores[i] > last['motto']:
                last['motto'] = scores[i]
                result['motto'] = boxes[i]
            # elif v == 7 and scores[i] > last['secrete']:
            #     last['secrete'] = scores[i]
            #     result['secrete'] = boxes[i]
            # elif v == 8 and scores[i] > last['sign']:
            #     last['sign'] = scores[i]
            #     result['sign'] = boxes[i]
        return result
Example #6
0
def fasterrcnn_resnet_fpn(backbone_name,
                          pretrained=False,
                          trainable_backbone_layers=None,
                          num_classes=81,
                          pretrained_backbone=True,
                          **kwargs):
    """
    Construct Faster R-CNN with a ResNet-FPN backbone

    Arguments:
        backbone_name(str): Name of the backbone.
            Refer to torchvision.models.resnet.__dict__ for details
        pretrained(bool, optional): If True, load weights for the detector
            pretrained on MS COCO. Only ResNet50-FPN is supported for the moment.
        trainable_backbone_layers(int, optional): Number of trainable (not frozen)
            resnet layers starting from final block.
        num_classes(int, optional): Number of target classes.
        pretrained_backbone(bool, optional): If True, load weights for backbone
            pre-trained on ImageNet

        Refer to torchvision.models.detection.FasterRCNN for kwargs
    """
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

    if pretrained and backbone_name == 'resnet50':
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained and backbone_name == 'resnet50':
        state_dict = models.utils.load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'])
        if num_classes == 81:
            # Remove the parameters for the additional classes
            state_dict['roi_heads.box_predictor.cls_score.weight'] = \
                state_dict['roi_heads.box_predictor.cls_score.weight'][KEEP]
            state_dict['roi_heads.box_predictor.cls_score.bias'] = \
                state_dict['roi_heads.box_predictor.cls_score.bias'][KEEP]
            state_dict['roi_heads.box_predictor.bbox_pred.weight'] = \
                state_dict['roi_heads.box_predictor.bbox_pred.weight'][KEEPX4.flatten()]
            state_dict['roi_heads.box_predictor.bbox_pred.bias'] = \
                state_dict['roi_heads.box_predictor.bbox_pred.bias'][KEEPX4.flatten()]

        model.load_state_dict(state_dict)
    elif pretrained:
        print(
            "WARNING: No pretrained detector on MS COCO with {}.".format(
                backbone_name),
            "Proceed with only pretrained backbone on ImageNet.")
    return model
Example #7
0
class FasterRCNN_Encoder(nn.Module):
    def __init__(self, out_dim=None, fine_tune=False):
        super(FasterRCNN_Encoder, self).__init__()
        backbone = resnet_fpn_backbone('resnet50', False)
        self.faster_rcnn = FasterRCNN(backbone,
                                      num_classes=91,
                                      rpn_post_nms_top_n_train=200,
                                      rpn_post_nms_top_n_test=100)
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'], progress=True)
        self.faster_rcnn.load_state_dict(state_dict)

        # modify the last linear layer of the ROI pooling if there is
        # a special requirement of output size
        if out_dim is not None:
            self.faster_rcnn.roi_heads.box_head.fc7 = nn.Linear(
                in_features=1024, out_features=out_dim)

        # in captioning task, we may not want fine-tune faster-rcnn model
        if not fine_tune:
            for param in self.faster_rcnn.parameters():
                param.requires_grad = False

    def forward(self, images, targets=None):
        '''
        Forward propagation of faster-rcnn encoder
        Args:
            images: List[Tensor], a list of image data
            targets: List[Tensor], a list of ground-truth bounding box data,
                     used only in fine-tune
        Returns:
            proposal features after ROI pooling and RPN loss
        '''
        images, targets = self.faster_rcnn.transform(images, targets)
        # the base features produced by backbone network, i.e. resnet50
        features = self.faster_rcnn.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([(0, features)])
        # proposals produced by RPN, i.e. the coordinates of bounding box
        # which contain foreground objects
        proposals, proposal_losses = self.faster_rcnn.rpn(
            images, features, targets)
        # get the corresponding features of the proposals produced by RPN and perform roi pooling
        box_features = self.faster_rcnn.roi_heads.box_roi_pool(
            features, proposals, images.image_sizes)
        # project the features to shape (batch_size, num_boxes, feature_dim)
        box_features = self.faster_rcnn.roi_heads.box_head(box_features)
        return box_features, proposal_losses
def fasterrcnn_resnet_fpn(pretrained=False,
                          progress=True,
                          resnet='resnet50',
                          num_classes=91,
                          pretrained_backbone=True,
                          **kwargs):
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone(resnet, pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        target_url = model_urls['fasterrcnn_' + resnet + '_fpn_coco']
        state_dict = load_state_dict_from_url(target_url, progress=progress)
        model.load_state_dict(state_dict)
    return model
Example #9
0
def fasterrcnn_resnet101_fpn(pretrained=False, progress=False,
                            num_classes=91, pretrained_backbone=True, **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-101-FPN backbone.
    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.
    The behavior of the model changes depending if it is in training or evaluation mode.
    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
          between ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.
    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
          ``0`` and ``H`` and ``0`` and ``W``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction
    Example::
        >>> model = torchvision.models.detection.fasterrcnn_resnet101_fpn(pretrained=False)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017(currently don't have pre-trained models)
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        raise Exception("resnet101 cannot use pre-trained models") 
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet101', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['fasterrcnn_resnet101_fpn_coco'],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model
Example #10
0
def fasterrcnn_resnet50_fpn(pretrained=False,
                            progress=True,
                            num_classes=91,
                            pretrained_backbone=True,
                            **kwargs):
    """
    Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
          between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
        - labels (``Int64Tensor[N]``): the predicted labels for each image
        - scores (``Tensor[N]``): the scores or each prediction

    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Example::

        >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        >>> # For training
        >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
        >>> labels = torch.randint(1, 91, (4, 11))
        >>> images = list(image for image in images)
        >>> targets = []
        >>> for i in range(len(images)):
        >>>     d = {}
        >>>     d['boxes'] = boxes[i]
        >>>     d['labels'] = labels[i]
        >>>     targets.append(d)
        >>> output = model(images, targets)
        >>> # For inference
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
        >>>
        >>> # optionally, if you want to export the model to ONNX:
        >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)

    Arguments:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, num_classes, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress)
        model.load_state_dict(state_dict)
    return model
data_loader_validation = torch.utils.data.DataLoader(
    dataset_validation, batch_size=1, shuffle=False, num_workers=0,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=0,
    collate_fn=utils.collate_fn)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# move model to the right device
model.to(device)

model_path = './model_mixed_epoch10.pt'
model.load_state_dict(torch.load(model_path,map_location=torch.device('cpu')))
'''
evaluate(model, data_loader_validation, device=device)
'''
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.Adam(params)
# # torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)
    allFileList.sort()
    allFileList.sort(key=lambda x: int(x[:-4]))

    # Load model
    backbone = backboneNet_efficient()  # use efficientnet as our backbone
    backboneFPN = backboneWithFPN(backbone)  # add FPN

    anchor_generator = AnchorGenerator(cfg.anchor_sizes, cfg.aspect_ratios)

    model_ft = FasterRCNN(backboneFPN,
                          num_classes=cfg.num_classes,
                          rpn_anchor_generator=anchor_generator,
                          min_size=cfg.min_size,
                          max_size=cfg.max_size)

    model_ft.load_state_dict(torch.load(cfg.model_name).state_dict())
    model_ft.to(device)

    with open(cfg.json_name, 'w', encoding='utf-8') as json_f:
        for file in allFileList:
            if os.path.isfile(cfg.test_path + file):
                print(file)
                output_dict = {}
                path = test_path + file
                img = Image.open(path).convert('RGB')
                img = data_transforms(img)
                img = img.unsqueeze(0)
                with torch.no_grad():
                    model_ft.eval()
                    img = img.to(device)
                    output = model_ft(img)
Example #13
0
def get_fasterrcnn_model(arch_str,
                         num_classes,
                         pretrained=True,
                         pretrained_backbone=True,
                         trainable_layers=5,
                         **kwargs):
    """Creates FasterRCNN model with resnet backbone"""

    #if pretrained == True: pretrained_backbone=False

    backbone = resnet_fpn_backbone(arch_str,
                                   pretrained=pretrained_backbone,
                                   trainable_layers=trainable_layers)

    anchor_sizes = (
        (16, ),
        (32, ),
        (64, ),
        (128, ),
        (256, ),
    )
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)

    anchor_generator = AnchorGenerator(sizes=anchor_sizes,
                                       aspect_ratios=aspect_ratios)

    model = FasterRCNN(
        backbone,
        num_classes=num_classes,
        rpn_anchor_generator=anchor_generator,
        box_fg_iou_thresh=0.5,
        box_bg_iou_thresh=0.5,
        image_mean=[0.0, 0.0, 0.0],  # already normalized by fastai
        image_std=[1.0, 1.0, 1.0],
        #min_size = 1,
        #box_score_thresh=0.6,
        **kwargs)

    if pretrained:
        try:
            pretrained_dict = load_state_dict_from_url(
                _model_urls['fasterrcnn_' + arch_str + '_fpn_coco'],
                progress=True)
            model_dict = model.state_dict()

            pretrained_dict = {
                k: v
                for k, v in pretrained_dict.items() if (k in model_dict) and (
                    model_dict[k].shape == pretrained_dict[k].shape)
            }

            model_dict.update(pretrained_dict)
            model.load_state_dict(model_dict)
            #overwrite_eps(model, 0.0)
            for module in model.modules():
                if isinstance(module, FrozenBatchNorm2d):
                    module.eps = 0.0

        except Exception as e:
            #print(e)
            print("No pretrained coco model found for fasterrcnn_" + arch_str)
            print("This does not affect the backbone.")

    return model.train()
Example #14
0
def main():
    parser = argparse.ArgumentParser(
        description='VISUM 2019 competition - baseline inference script',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-d',
                        '--data_path',
                        default='/home/master/dataset/test',
                        metavar='',
                        help='test data directory path')
    parser.add_argument('-m',
                        '--model_path',
                        default='./model.pth',
                        metavar='',
                        help='model file')
    parser.add_argument('-o',
                        '--output',
                        default='./predictions.csv',
                        metavar='',
                        help='output CSV file name')
    args = vars(parser.parse_args())

    NMS_THR = 0.1  # non maximum suppresion threshold
    REJECT_THR_KNOWN = 0.9  # rejection threshold to classify as unknown class (naive approach!)
    REJECT_THR = 0.17  # rejection threshold to classify as unknown class (naive approach!)

    def get_transform(train):
        transforms = []
        # converts the image, a PIL image, into a PyTorch Tensor
        transforms.append(T.ToTensor())
        if train:
            # during training, randomly flip the training images
            # and ground-truth for data augmentation
            transforms.append(T.RandomHorizontalFlip(0.5))
        return T.Compose(transforms)

    # Load datasets
    test_data = VisumData(args['data_path'],
                          'rgb',
                          mode='test',
                          transforms=get_transform(False))

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # initial
    # model = torch.load(args['model_path'])
    # new
    backbone = torchvision.models.detection.backbone_utils.resnet_fpn_backbone(
        'resnet50', True)
    backbone.out_channels = 256
    anchor_generator = AnchorGenerator(sizes=(8, 16, 32, 64, 128),
                                       aspect_ratios=(0.5, 1.0, 2.0))
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)
    # put the pieces together inside a FasterRCNN model
    model = FasterRCNN(backbone,
                       num_classes=11,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    model.load_state_dict(args['model_path'])

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=4,
                                              collate_fn=utils.collate_fn)

    predictions = list()
    for i, (imgs, _, file_names) in enumerate(test_loader):
        # set the model to evaluation mode
        model.eval()
        with torch.no_grad():
            prediction = model(list(img.to(device) for img in imgs))

        boxes = np.array(prediction[0]['boxes'].cpu())
        labels = list(prediction[0]['labels'].cpu())
        scores = list(prediction[0]['scores'].cpu())

        nms_boxes, nms_labels, nms_scores = nms(boxes, labels, scores, NMS_THR)

        for bb in range(len(nms_labels)):
            if nms_scores[bb] >= REJECT_THR:
                pred = np.concatenate(
                    (list(file_names), list(nms_boxes[bb, :])))  # bounding box
                if nms_scores[bb] >= REJECT_THR_KNOWN:
                    pred = np.concatenate(
                        (pred, [nms_labels[bb] - 1]))  # object label
                else:
                    pred = np.concatenate((pred, [-1]))  # Rejects to classify
                pred = np.concatenate(
                    (pred, [nms_scores[bb]]))  # BEST CLASS SCORE
                pred = list(pred)
                predictions.append(pred)

    with open(args['output'], 'w') as f:
        for pred in predictions:
            f.write("{},{},{},{},{},{},{}\n".format(pred[0], float(pred[1]),
                                                    float(pred[2]),
                                                    float(pred[3]),
                                                    float(pred[4]),
                                                    int(pred[5]),
                                                    float(pred[6])))
Example #15
0
api = Namespace('frcnn', description='Model related operations')
image_upload = reqparse.RequestParser()
image_upload.add_argument('image',
                          location='files',
                          type=FileStorage,
                          required=True,
                          help='Image')

categories = os.getenv('CATEGORIES').split(',')
device = int(os.getenv('GPU_DEVICE'))

logger.info('Loading model')
backbone = resnet_fpn_backbone(os.getenv('BACKBONE'), False)
model = FasterRCNN(backbone, len(categories))
state_dict = torch.load(os.getenv('MODEL_PATH'))
model.load_state_dict(state_dict['model'])
model.to(device)
model.eval()
logger.info('Model ready')

logger.info('Loading detecton model')
cfg = get_cfg()
cfg.merge_from_file(os.getenv('DETECTRON_CONFIG'))
cfg.MODEL.WEIGHTS = os.getenv('DETECTRON_MODEL')
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
cfg.freeze()
predictor = DefaultPredictor(cfg)
logger.info('Detecton model ready')


@api.route('/vision')
Example #16
0
def get_result_from_model(test_img, thresh):

    test_data = torchvision.datasets.ImageFolder('C:/Users/skyho/Desktop/test_image_folder/',loader = plt.imread,transform=transforms.ToTensor())

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 200),), aspect_ratios=((0.5, 1.0, 2.0),))
    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    backbone.out_channels = 1280
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)
    rcnn_v1 = FasterRCNN(backbone, num_classes=32, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)

    try:
        # model_path = "bs{0}_lr{1}_epoch{2}_checkpoint_{3}".format(8,0.00005,2,15)
        checkpoint = torch.load('C:/Users/skyho/Desktop/final_model.pth', map_location='cpu') #, map_location='cpu'
        rcnn_v1.load_state_dict(checkpoint['model_state_dict'])
        rcnn_v1.eval()
    except IOError:
        print("Can't find saved model~")

    # result = []
    #data = torchvision.datasets.ImageFolder(img_path, loader=plt.imread, transform=transforms.ToTensor())
    result = []
    with torch.no_grad():
        result.append(rcnn_v1([test_data[0][0]]))

    # plot the boxes on the result image
    # print labels
    # save the image somewhere and return the path
    # cv2_im = cv2.imread(test_img)

    font                   = cv2.FONT_HERSHEY_SIMPLEX
    fontScale              = 1
    fontColor              = (255,255,255)
    lineType               = 2

    cv2_im = []
    #for i in range(len(test_data)):
    cv2_im.append(cv2.imread(test_data.imgs[0][0]))

    #for i in range(len(test_data)):
    i = 0
    first_box = result[i][0]['boxes'][0].unsqueeze(0)
    box_id = 0

    for box in result[i][0]['boxes']:

        if (box_id==0 or jaccard(first_box,box.unsqueeze(0)).tolist()[0][0]<0.6):
            if result[i][0]['scores'].tolist()[box_id]>=thresh:
                x1 = int(box[0])
                y1 = int(box[1])
                x2 = int(box[2])
                y2 = int(box[3])
                #check other boxes

                flag = True
                for each in range(result[i][0]['boxes'].shape[0]):
                    if each!=box_id and result[i][0]['scores'].tolist()[each]>=thresh and jaccard(first_box,result[i][0]['boxes'][each].unsqueeze(0)).tolist()[0][0]<0.6:
                        o_x1 = int(result[i][0]['boxes'][each][0])
                        o_y1 = int(result[i][0]['boxes'][each][1])
                        o_x2 = int(result[i][0]['boxes'][each][2])
                        o_y2 = int(result[i][0]['boxes'][each][3])
                        if x1>=o_x1-3 and y1>=o_y1-3 and x2<=o_x2+3 and y2<=o_y2+3 and result[i][0]['labels'][box_id]==result[i][0]['labels'][each]:
                            flag = False
                            break
                    if flag:
                        cv2_im[i] = cv2.rectangle(cv2_im[i],(x1,y1),(x2,y2),(0,255,0),3)
                        cv2.putText(cv2_im[i],classes[result[i][0]['labels'][box_id]],
                            (x1,y2),
                            font,
                            fontScale,
                            fontColor,
                            lineType)
        box_id += 1

    detection_result = test_img[:-4] + '_result.png'
    #for i in range(len(test_data)):
    cv2.imwrite(detection_result, cv2_im[i])
    return detection_result
Example #17
0
            optimizer.step()

            # model.eval()
            # prediction = model(images)
            # print(prediction[0])

        torch.save(model.state_dict(), "./weight_2/{}.pt".format(ep))
    # img_array = predictions.permute(1,2,0).detach().cpu().numpy().astype(uint8)
    # cv2.imshow("img", cv2.fromarray(img_array))

if cfg.predict:
    img_path = "../../data/SUMIT/rs_images_sampled/"
    dataset = os.listdir(img_path)
    indices = torch.randperm(len(dataset)).tolist()

    model.load_state_dict(torch.load("./weight_2/9.pt"), strict=False)
    model.to(device)
    model.eval()

    for idi in indices[-10:]:
        img = Image.open(img_path + dataset[idi]).convert("RGB")
        # print(np.array(img).shape)
        img = torch.tensor(np.array(img)).float().permute(
            2, 0, 1).unsqueeze(0).to(device)
        predict = model(img)
        boxes_list = predict[0]["boxes"].data.cpu().numpy()
        print(predict[0]["boxes"].data.cpu().numpy())

        iimg = cv2.imread(img_path + dataset[idi])
        for box in boxes_list:
            print(box)
Example #18
0
device = torch.device('cuda:0')

backbone = torchvision.models.vgg16(pretrained=False).features
backbone.out_channels = 512
anchor_sizes = ((8, 16, 32, 64, 128, 256, 512), )
aspect_ratios = ((1 / 2, 1 / 3, 1 / 4, 1 / 5, 1 / 6, 1 / math.sqrt(2), 1, 2,
                  math.sqrt(2), 3, 4, 5, 6, 7, 8), )
anchor_generator = AnchorGenerator(sizes=anchor_sizes,
                                   aspect_ratios=aspect_ratios)
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
    featmap_names=['0', '1', '2', '3', '4'], output_size=7, sampling_ratio=2)
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)
model.load_state_dict(torch.load('1.pth'))
model.to(device)
model.eval()
# real_img = cv2.imread(
#     '/home/dung/DocData/cp/145/110.png')
real_img = cv2.imread(
    '/home/dung/Project/Python/keras-frcnn/result/0_19_0.png')
img = torch.tensor(real_img, dtype=torch.float32) / 255
img = img.permute((2, 0, 1))

output = model([img.to(device)])

boxes = output[0]['boxes']

a = output[0]['boxes'].detach().to('cpu').numpy()
a = np.round(a)
Example #19
0
ears = ["right ear/", "left ear/"]

transform_img = transforms.Compose([transforms.ToTensor()])
EPOCH = 250
CLASSES = 3
DEVICE = torch.device("cuda")
BATCH_SIZE = 10

anchor_generator = AnchorGenerator(sizes=((32, 64), ),
                                   aspect_ratios=((0.6, 1.0, 1.6), ))
backbone = torchvision.models.vgg19(pretrained=False).features
backbone.out_channels = 512
model = FasterRCNN(backbone,
                   num_classes=CLASSES,
                   rpn_anchor_generator=anchor_generator)
model.load_state_dict(
    torch.load('models_new/' + 'model_' + str(EPOCH) + '.pth'))
model.to(DEVICE)
model.eval()
start_time = time.time()
ear_count = 0
for T in types:
    for E in ears:
        CTs = os.listdir(data_path + dataset_name + T + E)
        for CT in CTs:
            print('current path:{}'.format(data_path + dataset_name + T + E +
                                           CT))
            ear_count += 1
            img_names = glob.glob(data_path + dataset_name + T + E + CT +
                                  '/*.jpg')
            sorted(img_names, key=lambda x: x.split('\\')[-1])
            with torch.no_grad():
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   min_size=800,
                   max_size=1200,
                   box_roi_pool=roi_pooler,
                   box_detections_per_img=200)

model.cuda()
model.load_state_dict(torch.load('./checkpoint/efficient_model_L_7.pth'))
model.eval()

start = time.time()
print(img.size())
results = model([img.cuda()])
open_cv_image = np.array(imge)
open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
for box in results[0]['boxes']:
    box = box[:4].tolist()
    cv2.rectangle(open_cv_image, (int(box[0]), int(
        box[1]), int(box[2]) - int(box[0]), int(box[3]) - int(box[1])),
                  (255, 225, 0), 2)
cv2.imshow("sd", open_cv_image)
cv2.imwrite("demo.jpg", open_cv_image)
cv2.waitKey(30000)