Beispiel #1
0
    def predict_boxes(self, images, boxes):
        device = list(self.parameters())[0].device
        images = images.to(device)
        boxes = boxes.to(device)

        targets = None
        original_image_sizes = [img.shape[-2:] for img in images]

        images, targets = self.transform(images, targets)

        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = collections.OrderedDict([(0, features)])

        # proposals, proposal_losses = self.rpn(images, features, targets)
        from torchvision.models.detection.transform import resize_boxes
        boxes = resize_boxes(boxes, original_image_sizes[0],
                             images.image_sizes[0])
        proposals = [boxes]

        box_features = self.roi_heads.box_roi_pool(features, proposals,
                                                   images.image_sizes)
        box_features = self.roi_heads.box_head(box_features)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()
        pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0],
                                  original_image_sizes[0])
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()
        return pred_boxes, pred_scores
    def predict_boxes(self, boxes, box_head=None, box_predictor=None):
        device = list(self.parameters())[0].device
        boxes = boxes.to(device)

        if isinstance(self.fpn_features, torch.Tensor):
            self.fpn_features = OrderedDict([(0, self.fpn_features)])

        from torchvision.models.detection.transform import resize_boxes
        boxes = resize_boxes(boxes, self.original_image_size[0],
                             self.image_size[0])
        proposals = [boxes]

        box_features = self.roi_heads.box_roi_pool(self.fpn_features,
                                                   proposals, self.image_size)
        if box_head is None:
            box_head = self.roi_heads.box_head
        box_features = box_head(box_features)
        if box_predictor is None:
            box_predictor = self.roi_heads.box_predictor
        class_logits, box_regression = box_predictor(box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()
        pred_boxes = resize_boxes(pred_boxes, self.image_size[0],
                                  self.original_image_size[0])
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()
        return pred_boxes, pred_scores
Beispiel #3
0
    def predict_boxes(self, images, boxes):
        device = list(self.parameters())[0].device
        images = images.to(device)
        boxes = boxes.to(device)

        targets = None
        original_image_sizes = [img.shape[-2:] for img in images]

        images, targets = self.transform(images, targets)

        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([(0, features)])

        # proposals, proposal_losses = self.rpn(images, features, targets)
        from torchvision.models.detection.transform import resize_boxes
        boxes = resize_boxes(boxes, original_image_sizes[0],
                             images.image_sizes[0])
        proposals = [boxes]

        box_features = self.roi_heads.box_roi_pool(features, proposals,
                                                   images.image_sizes)
        box_features = self.roi_heads.box_head(box_features)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        # score_thresh = self.roi_heads.score_thresh
        # nms_thresh = self.roi_heads.nms_thresh

        # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0
        # self.roi_heads.score_thresh = 0.0
        # self.roi_heads.nms_thresh = 1.0
        # detections, detector_losses = self.roi_heads(
        #     features, [boxes.squeeze(dim=0)], images.image_sizes, targets)

        # self.roi_heads.score_thresh = score_thresh
        # self.roi_heads.nms_thresh = nms_thresh

        # detections = self.transform.postprocess(
        #     detections, images.image_sizes, original_image_sizes)

        # detections = detections[0]
        # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu()

        #pred_masks = class_logits.argmax(1).eq(1)

        pred_boxes = pred_boxes[:, 1, :].squeeze(dim=1).detach()

        pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0],
                                  original_image_sizes[0])

        pred_scores = torch.max(pred_scores[:, 1:],
                                1)[0].detach()  # .squeeze(dim=1)

        return pred_boxes, pred_scores  # [pred_masks]
Beispiel #4
0
    def predict_boxes(self, boxes):
        device = list(self.parameters())[0].device

        boxes = boxes.to(device)

        try:
            boxes = resize_boxes(boxes, self.original_image_sizes[0],
                                 self.preprocessed_images.image_sizes[0])
        except IndexError:
            print(boxes.size())
            raise IndexError
        proposals = [boxes]

        box_features = self.roi_heads.box_roi_pool(
            self.features, proposals, self.preprocessed_images.image_sizes)
        box_features = self.roi_heads.box_head(box_features)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        # score_thresh = self.roi_heads.score_thresh
        # nms_thresh = self.roi_heads.nms_thresh

        # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0
        # self.roi_heads.score_thresh = 0.0
        # self.roi_heads.nms_thresh = 1.0
        # detections, detector_losses = self.roi_heads(
        #     features, [boxes.squeeze(dim=0)], images.image_sizes, targets)

        # self.roi_heads.score_thresh = score_thresh
        # self.roi_heads.nms_thresh = nms_thresh

        # detections = self.transform.postprocess(
        #     detections, images.image_sizes, original_image_sizes)

        # detections = detections[0]
        # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu()

        pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()
        pred_boxes = resize_boxes(pred_boxes,
                                  self.preprocessed_images.image_sizes[0],
                                  self.original_image_sizes[0])
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()
        pred_boxes = box_ops.clip_boxes_to_image(pred_boxes,
                                                 self.original_image_sizes[0])
        if self.version == 'v2':
            for box, box_feature in zip(pred_boxes, box_features):
                self.box_features[str(int(box[0])) + ',' + str(int(box[1])) +
                                  ',' + str(int(box[2])) + ',' +
                                  str(int(box[3]))] = box_feature
        return pred_boxes, pred_scores
    def predict_boxes(self, images, boxes):
        self.eval()
        device = list(self.parameters())[0].device
        images = images.to(device)
        boxes = boxes.to(device)

        targets = None
        original_image_sizes = [img.shape[-2:] for img in images]

        images, targets = self.transform(images, targets)

        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([(0, features)])

        # proposals, proposal_losses = self.rpn(images, features, targets)
        from torchvision.models.detection.transform import resize_boxes

        boxes = resize_boxes(boxes, original_image_sizes[0],
                             images.image_sizes[0])
        proposals = [boxes]

        box_feats = self.roi_heads.box_roi_pool(features, proposals,
                                                images.image_sizes)
        box_features = self.roi_heads.box_head(box_feats)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()
        pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0],
                                  original_image_sizes[0])
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()

        mask_features = self.roi_heads.mask_roi_pool(features, proposals,
                                                     images.image_sizes)
        cropped_features = self.roi_heads.mask_head(mask_features)
        mask_logits = self.roi_heads.mask_predictor(cropped_features)

        switch_channel_masks = torch.zeros(mask_logits.size())
        switch_channel_masks[:, 0, :, :] = mask_logits[:, 1, :, :]

        # workaround that only works with 2 classes. otherwise try to get maskrcnn_inference running
        # or manually filter out the class with highest score here
        switch_channel_masks = torch.sigmoid(switch_channel_masks)
        pred_masks = paste_masks_in_image(switch_channel_masks, pred_boxes,
                                          original_image_sizes[0]).detach()

        return pred_boxes, pred_scores, pred_masks
Beispiel #6
0
def ml_collate(elems):
    """
    Multi-level version of the `collate` function defined above.
    """
    boxes_in, boxes_target, boxes_all, image_features, orig_image_sizes, image_sizes, lengths, feat_trans = zip(*elems)
    boxes_in = default_collate(boxes_in)
    lengths = default_collate(lengths)
    boxes_target = default_collate(boxes_target)

    orig_image_sizes = torch.cat(orig_image_sizes)
    image_sizes = torch.cat(image_sizes)

    # get resized bounding boxes for later RoI pooling
    first_idc = [int(sum(lengths[:i])) for i in range(0, len(lengths))]
    boxes_resized = []
    feat_translation_resized = []
    for seq_start, boxes, feat_trans in zip(first_idc, boxes_all, feat_trans):
        boxes_resized.append(resize_boxes(boxes, orig_image_sizes[seq_start], image_sizes[seq_start]))
        feat_translation_resized.append(
            resize_boxes(feat_trans.repeat(1, 2), orig_image_sizes[seq_start], image_sizes[seq_start])[:, :2]
        )
    boxes_resized = torch.cat(boxes_resized)

    # calculate feature translation in feature scale
    scales = [infer_scale(feat, image_sizes[0]) for feat in image_features]
    feat_trans = [(t_resized * scale).round() for t_resized, scale in zip(feat_translation_resized, scales)]

    # apply translation to feature map
    all_feat_out = []
    for i, feat in enumerate(image_features):
        pad_w = int(feat_trans[i][:, 0].abs().max())
        pad_h = int(feat_trans[i][:, 1].abs().max())
        if pad_w == 0 and pad_h == 0:
            feat_out = feat
        else:
            feat_padded = F.pad(feat, [pad_w, pad_w, pad_h, pad_h])
            origin = torch.tensor([pad_w, pad_h])
            new_coords = (origin - feat_trans[i]).long()

            h, w = feat.shape[-2:]
            feat_out = []
            for i in range(feat_padded.shape[0]):
                x, y = new_coords[i]
                feat_out.append(feat_padded[i, :, y:(y + h), x:(x + w)])
            feat_out = torch.stack(feat_out)

        all_feat_out.append(feat_out)

    levels = [roi_scales.index(s) for s in scales]
    return boxes_in, boxes_target, boxes_resized, all_feat_out, image_sizes, lengths, levels
Beispiel #7
0
    def postprocess(self, results, image_shapes, original_image_sizes):
        if self.training:
            loss = results.pop()

        for pred, im_s, o_im_s in zip(results, image_shapes,
                                      original_image_sizes):
            boxes_h, boxes_o = pred['boxes_h'], pred['boxes_o']
            boxes_h = transform.resize_boxes(boxes_h, im_s, o_im_s)
            boxes_o = transform.resize_boxes(boxes_o, im_s, o_im_s)
            pred['boxes_h'], pred['boxes_o'] = boxes_h, boxes_o

        if self.training:
            results.append(loss)

        return results
def get_features(obj_detect, img_list, curr_frame_offset, curr_gt_app):
    """
    Input:
        -img_list: list (len=clip_len) of (3, w, h). Can be different sizes. 
        -curr_frame_offset: (batch,)
        -curr_gt_app: (batch, 4)
    Output:
        -box_features: (batch, 256, 7, 7) CUDA
        -box_head_features: (batch, 1024) CUDA
    """
    box_features_list = []
    box_head_features_list = []

    with torch.no_grad():
        gts = curr_gt_app.cuda()
        for i, frame_idx in enumerate(curr_frame_offset):
            obj_detect.load_image(img_list[frame_idx].unsqueeze(0))

            gt = gts[i].unsqueeze(0)
            gt = clip_boxes_to_image(gt, img_list[frame_idx].shape[-2:])
            gt = resize_boxes(gt, obj_detect.original_image_sizes[0], obj_detect.preprocessed_images.image_sizes[0])
            gt = [gt]

            box_features = obj_detect.roi_heads.box_roi_pool(obj_detect.features, gt, obj_detect.preprocessed_images.image_sizes)
            box_head_features = obj_detect.roi_heads.box_head(box_features)
            box_features_list.append(box_features.squeeze(0))
            box_head_features_list.append(box_head_features.squeeze(0))

        return torch.stack(box_features_list, 0), torch.stack(box_head_features_list, 0)
Beispiel #9
0
    def resize(self, image, target=None):
        # type: (Tensor, Optional[Dict[str, Tensor]])
        h, w = image.shape[-2:]
        im_shape = torch.tensor(image.shape[-2:])
        min_size = float(torch.min(im_shape))
        max_size = float(torch.max(im_shape))
#         if self.training:  ### COMMENTING OUT FOR NOW
#             size = float(self.torch_choice(self.min_size))
#         else:
            # FIXME assume for now that testing uses the largest scale
        size = float(self.model.cfg['INPUT']['MIN_SIZE_TEST'])
        scale_factor = size / min_size
        if max_size * scale_factor > self.model.cfg['INPUT']['MAX_SIZE_TEST']:
            scale_factor = self.model.cfg['INPUT']['MAX_SIZE_TEST'] / max_size
        image = torch.nn.functional.interpolate(
            image, size = 1024, mode='bilinear', align_corners=False)[0] # had to remove the [None] part
        # removed scale_factor=scale_factor

        if target is None:
            return image #, target

        bbox = target["boxes"]
        bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
        target["boxes"] = bbox

        if "masks" in target:
            mask = target["masks"]
            mask = misc_nn_ops.interpolate(mask[None].float(), scale_factor=scale_factor)[0].byte()
            target["masks"] = mask

        if "keypoints" in target:
            keypoints = target["keypoints"]
            keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:])
            target["keypoints"] = keypoints
        return image #, target
Beispiel #10
0
    def get_roi_features(self, obj_detect, img_list, gts):
        """
        Input:
            -img_list: list of (1, 3, w, h). Can be different sizes. 
            -gts: (batch, 4)
        Output:
            -box_features: (batch, 256, 7, 7)
        """
        box_features_list = []

        with torch.no_grad():
            for i, img in enumerate(img_list):
                obj_detect.load_image(img)

                gt = gts[i].unsqueeze(0)
                gt = clip_boxes_to_image(gt, img.shape[-2:])
                gt = resize_boxes(
                    gt, obj_detect.original_image_sizes[0],
                    obj_detect.preprocessed_images.image_sizes[0])
                gt = [gt]

                box_features = obj_detect.roi_heads.box_roi_pool(
                    obj_detect.features, gt,
                    obj_detect.preprocessed_images.image_sizes)
                box_features_list.append(box_features.squeeze(0))

            return torch.stack(box_features_list, 0)
    def detect_with_proposal(self, img, t_1_proposal):
        """
            https://github.com/pytorch/vision/blob/master/torchvision/models/detection/generalized_rcnn.py
            https://github.com/pytorch/vision/blob/master/torchvision/models/detection/roi_heads.py
        """
        images = img

        device = list(self.parameters())[0].device
        images = images.to(device)

        original_image_sizes = []
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2
            original_image_sizes.append((val[0], val[1]))

        images, _ = self.transform(images, None)
        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([('0', features)])

        if not len(t_1_proposal):
            return torch.Tensor([]), torch.Tensor([])

        tt = resize_boxes(t_1_proposal.to(device), original_image_sizes[0],
                          images.image_sizes[0])

        # detections, _ = self.roi_heads(features, tt, images.image_sizes, None)
        # detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)

        # detections = detections[0]

        box_features = self.roi_heads.box_roi_pool(features, [tt],
                                                   images.image_sizes)
        box_features = self.roi_heads.box_head(box_features)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, [tt])
        pred_scores = F.softmax(class_logits, -1)

        pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()
        pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0],
                                  original_image_sizes[0])
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()

        return pred_boxes, pred_scores
Beispiel #12
0
    def bbox_regression(self, img, boxes):
        """
        Tracking of the objects from previous frame with the bounding box regressor of the FRCNN_FPN
        """

        # Move image and boxes to the device
        device = list(self.parameters())[0].device
        img = img.to(device)
        boxes = boxes.to(device)

        # Perform input transformation before feeding the image into a GeneralizedRCNN model of torchvision
        img_size = img.shape[-2:]
        img_transformed, targets = self.transform(img)
        img_transformed_size = img_transformed.image_sizes[0]

        # Calculate the backbone features and put them in a compatible format with RoIHeads and RPN classes of torchvision
        backbone_features = self.backbone(img_transformed.tensors)
        if isinstance(backbone_features, torch.Tensor):
            backbone_features = OrderedDict([('0', backbone_features)])

        # Resize boxes to img_transformed size
        boxes = resize_boxes(boxes, img_size, img_transformed_size)

        # Forward pass of the RoIHeads class of torchvision
        box_features = self.roi_heads.box_roi_pool(backbone_features, [boxes],
                                                   [img_transformed_size])
        box_features = self.roi_heads.box_head(box_features)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        # Post-process the detections
        boxes = self.roi_heads.box_coder.decode(box_regression, [boxes])
        scores = F.softmax(class_logits, -1)

        # Remove predictions with the background label
        boxes = boxes[:, 1:]
        scores = scores[:, 1:]

        # Put the tensors in the correct shape for the Track class
        boxes = boxes.squeeze(dim=1)
        scores = scores.squeeze(dim=1)

        # Resize to img size
        boxes = resize_boxes(boxes, img_transformed_size, img_size)

        return boxes.detach().cpu(), scores.detach().cpu()
Beispiel #13
0
def collate(elems):
    """
    Collate function for PyTorch `DataLoader` that handles efficient batching of image features.
    """
    boxes_in, boxes_target, boxes_all, image_features, orig_image_sizes, image_sizes, lengths, feat_trans = zip(*elems)
    boxes_in = default_collate(boxes_in)
    lengths = default_collate(lengths)
    boxes_target = default_collate(boxes_target)
    image_features = torch.cat(image_features)
    orig_image_sizes = torch.cat(orig_image_sizes)
    image_sizes = torch.cat(image_sizes)

    # get resized bounding boxes for later RoI pooling
    first_idc = [int(sum(lengths[:i])) for i in range(0, len(lengths))]
    boxes_resized = []
    feat_translation_resized = []
    for seq_start, boxes, feat_trans in zip(first_idc, boxes_all, feat_trans):
        boxes_resized.append(resize_boxes(boxes, orig_image_sizes[seq_start], image_sizes[seq_start]))
        feat_translation_resized.append(
            resize_boxes(feat_trans.repeat(1, 2), orig_image_sizes[seq_start], image_sizes[seq_start])[:, :2]
        )
    boxes_resized = torch.cat(boxes_resized)
    feat_translation_resized = torch.cat(feat_translation_resized)

    # calculate feature translation in feature scale
    scale = infer_scale(image_features, image_sizes[0])
    feat_trans = (feat_translation_resized * scale).round()

    # apply translation to feature map
    pad_w = int(feat_trans[:, 0].abs().max())
    pad_h = int(feat_trans[:, 1].abs().max())
    if pad_w == 0 and pad_h == 0:
        feat_out = image_features
    else:
        feat_padded = F.pad(image_features, [pad_w, pad_w, pad_h, pad_h])
        origin = torch.tensor([pad_w, pad_h])
        new_coords = (origin - feat_trans).long()

        h, w = image_features.shape[-2:]
        feat_out = []
        for i in range(feat_padded.shape[0]):
            x, y = new_coords[i]
            feat_out.append(feat_padded[i, :, y:(y + h), x:(x + w)])
        feat_out = torch.stack(feat_out)

    return boxes_in, boxes_target, boxes_resized, feat_out, image_sizes, lengths, None
    def predict_boxes(self, boxes):
        device = list(self.parameters())[0].device
        boxes = boxes.to(device)

        boxes = resize_boxes(boxes, self.original_image_sizes[0],
                             self.preprocessed_images.image_sizes[0])
        proposals = [boxes]

        box_features = self.roi_heads.box_roi_pool(
            self.features, proposals, self.preprocessed_images.image_sizes)
        box_features = self.roi_heads.box_head(box_features)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        # score_thresh = self.roi_heads.score_thresh
        # nms_thresh = self.roi_heads.nms_thresh

        # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0
        # self.roi_heads.score_thresh = 0.0
        # self.roi_heads.nms_thresh = 1.0
        # detections, detector_losses = self.roi_heads(
        #     features, [boxes.squeeze(dim=0)], images.image_sizes, targets)

        # self.roi_heads.score_thresh = score_thresh
        # self.roi_heads.nms_thresh = nms_thresh

        # detections = self.transform.postprocess(
        #     detections, images.image_sizes, original_image_sizes)

        # detections = detections[0]
        # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu()
        #print("Pred Boxes 1")
        #print(pred_boxes.shape)
        pred_boxes = pred_boxes[:, 10, :].detach()
        #print("Pred Boxes 2")
        #print(pred_boxes.shape)
        pred_boxes = resize_boxes(pred_boxes,
                                  self.preprocessed_images.image_sizes[0],
                                  self.original_image_sizes[0])
        pred_scores = pred_scores[:, 10].detach()
        return pred_boxes, pred_scores
    def predict_with_correlation(self, prev_boxes, current_boxes,
                                 boxes_to_shift):

        prev_boxes_features, current_boxes_features = self.get_feature_patches(
            prev_boxes, current_boxes)

        boxes_deltas = self.correlation_head(prev_boxes_features,
                                             current_boxes_features)

        boxes_to_shift = resize_boxes(boxes_to_shift,
                                      self.original_image_sizes[0],
                                      self.preprocessed_images.image_sizes[0])
        pred_boxes = self.roi_heads.box_coder.decode(
            boxes_deltas, [boxes_to_shift]).squeeze(dim=1)
        pred_boxes = resize_boxes(pred_boxes,
                                  self.preprocessed_images.image_sizes[0],
                                  self.original_image_sizes[0])

        return pred_boxes
Beispiel #16
0
    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor]): images to be processed
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        original_image_sizes = [img.shape[-2:] for img in images]
        images, targets = self.transform(images, targets)
        features = self.backbone(images.tensors)

        if self.n_channel_backbone < 5:
            in_channels = [(i, features[i])
                           for i in range(self.n_channel_backbone)]
            features = OrderedDict(in_channels)

        if self.n_channel_backbone > 5:
            in_channels = [(i, features[i]) for i in range(5)]
            features = OrderedDict(in_channels)

        if isinstance(features, torch.Tensor):
            features = OrderedDict([(0, features)])

        proposals, scores, proposal_losses = self.rpn(images, features,
                                                      targets)

        detections, detector_losses = self.roi_heads(features, proposals,
                                                     images.image_sizes,
                                                     targets)

        detections = self.transform.postprocess(detections, images.image_sizes,
                                                original_image_sizes)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)

        for i, (pred, im_s, o_im_s) in enumerate(
                zip(proposals, images.image_sizes, original_image_sizes)):
            boxes = resize_boxes(pred, im_s, o_im_s)
            proposals[i] = boxes

        if self.training:
            return losses

        #return detections, proposals
        return detections, features
    def preprocess(self, images, detections, targets=None):
        original_image_sizes = [img.shape[-2:] for img in images]
        images, targets = self.transform(images, targets)

        for det, o_im_s, im_s in zip(detections, original_image_sizes,
                                     images.image_sizes):
            boxes = det['boxes']
            boxes = transform.resize_boxes(boxes, o_im_s, im_s)
            det['boxes'] = boxes

        return images, detections, targets, original_image_sizes
    def get_feature_patches(self, prev_boxes, current_boxes):
        device = list(self.parameters())[0].device
        prev_boxes = prev_boxes.to(device)
        current_boxes = current_boxes.to(device)

        prev_boxes = resize_boxes(prev_boxes,
                                  self.prev_original_image_sizes[0],
                                  self.prev_preprocessed_images.image_sizes[0])
        current_boxes = resize_boxes(current_boxes,
                                     self.original_image_sizes[0],
                                     self.preprocessed_images.image_sizes[0])

        prev_boxes_features = self.roi_heads.box_roi_pool(
            self.prev_features, [prev_boxes],
            self.prev_preprocessed_images.image_sizes)
        current_boxes_features = self.roi_heads.box_roi_pool(
            self.features, [current_boxes],
            self.preprocessed_images.image_sizes)

        return prev_boxes_features, current_boxes_features
    def losses(self, batch, loss):

        patch1, patch2, gt_boxes, prev_boxes, _, _, _, preprocessed_image_sizes, original_image_sizes = batch

        patch1 = Variable(patch1).cuda()
        patch2 = Variable(patch2).cuda()

        gt_boxes = gt_boxes.cuda()
        prev_boxes = prev_boxes.cuda()

        # print("fmap:")
        # print(patch1*100)
        # print("fmap_enlarged:")
        # print(patch2*100)
        # print("labels:")
        # print(gt_boxes)

        boxes_deltas = self.forward(patch1, patch2)

        prev_boxes = resize_boxes(prev_boxes, original_image_sizes[0],
                                  preprocessed_image_sizes[0])
        pred_boxes = self.roi_heads.box_coder.decode(
            boxes_deltas, [prev_boxes]).squeeze(dim=1)
        pred_boxes = resize_boxes(pred_boxes, preprocessed_image_sizes[0],
                                  original_image_sizes[0])

        if loss == "GIoU":
            total_loss = self.giou_loss(pred_boxes, gt_boxes)
        elif loss == "IoU":
            total_loss = box_iou(pred_boxes, gt_boxes).diag()
            total_loss = torch.mean(total_loss)
        elif loss == "MSE":
            total_loss = F.mse_loss(pred_boxes, gt_boxes)
        elif loss == "fasterRCNN":
            total_loss = self.smooth_l1_loss(pred_boxes, gt_boxes)
            total_loss /= len(gt_boxes)
        else:
            raise NotImplementedError("Loss: {}".format(loss))

        return total_loss
Beispiel #20
0
 def postprocess(self, result, image_shapes, original_image_sizes):
     if self.training:
         return result
     for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
         boxes = pred["boxes"]
         boxes = resize_boxes(boxes, im_s, o_im_s)
         result[i]["boxes"] = boxes
         for k in ['pose2d', 'body_pose2d', 'hand_pose2d', 'face_pose2d']:
             if k in pred and pred[k] is not None:
                 pose2d = pred[k]
                 pose2d = resize_keypoints(pose2d, im_s, o_im_s)
                 result[i][k] = pose2d    
     return result
Beispiel #21
0
    def resize(self, image, target):
        """
        Override method to resize box pairs
        """
        h, w = image.shape[-2:]
        min_size = float(min(image.shape[-2:]))
        max_size = float(max(image.shape[-2:]))
        scale_factor = min(self.min_size[0] / min_size,
                           self.max_size / max_size)

        image = nn.functional.interpolate(image[None],
                                          scale_factor=scale_factor,
                                          mode='bilinear',
                                          align_corners=False)[0]
        if target is None:
            return image, target

        target['boxes_h'] = transform.resize_boxes(target['boxes_h'], (h, w),
                                                   image.shape[-2:])
        target['boxes_o'] = transform.resize_boxes(target['boxes_o'], (h, w),
                                                   image.shape[-2:])

        return image, target
    def bounding_box_regression(self, image, prev_boxes):
        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
        original_image_sizes.append((image.size()[2], image.size()[3]))

        images, targets = self.obj_detect.transform(image.cuda(), None)
        prev_boxes = torch.Tensor(prev_boxes)
        # plot_boxes(image,prev_boxes)

        prev_boxes = resize_boxes(prev_boxes.squeeze(1), original_image_sizes[0], images.image_sizes[0])
        feats = self.obj_detect.backbone(images.tensors)
        roi_heads = self.obj_detect.roi_heads
        box_features = roi_heads.box_roi_pool(feats, [prev_boxes.cuda()], images.image_sizes)
        box_features = roi_heads.box_head(box_features)
        class_logits, box_regression = roi_heads.box_predictor(box_features)

        pred_boxes = roi_heads.box_coder.decode(box_regression, [prev_boxes.cuda()])
        pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()  # new boxes
        pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0], original_image_sizes[0])

        pred_scores = F.softmax(class_logits, -1)  # classification scores for new boxes
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()

        return pred_boxes, pred_scores
Beispiel #23
0
    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor]): images to be processed
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields.
        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        original_image_sizes = [img.shape[-2:] for img in images]
        images, targets = self.transform(images, targets)
        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([(0, features)])
        proposals, scores, proposal_losses = self.rpn(images, features,
                                                      targets)
        boxes, scores = self.filter_proposals(proposals, scores)
        result = []
        for i in range(len(scores)):
            score = scores[i].cpu().numpy()
            eps = 0.05
            score = (score -
                     np.min(score)) / abs(np.max(score) - np.min(score)) + eps

            #score = np.ones(len(score))

            score = torch.tensor(score)
            result.append({
                "boxes": boxes[i],
                "scores": score,
                "labels": torch.tensor([1] * len(scores[i]))
            })
        detections = self.transform.postprocess(result, images.image_sizes,
                                                original_image_sizes)

        for i, (pred, im_s, o_im_s) in enumerate(
                zip(proposals, images.image_sizes, original_image_sizes)):
            boxes = resize_boxes(pred, im_s, o_im_s)
            proposals[i] = boxes

        losses = {}
        losses.update(proposal_losses)
        if self.training:
            return losses

        return detections, proposals
Beispiel #24
0
    def predict_boxes(self, boxes):
        device = self.model.cfg['MODEL']['DEVICE']
        boxes = boxes.to(device)

        boxes = resize_boxes(boxes, self.original_image_sizes[0], self.preprocessed_images.shape)
        proposals = [box_class(box) for box in boxes]
        boxes = box_class(boxes)
        proposals = [boxes] # proposals lookg ood 
        
#         print('proposals: ',proposals[0].tensor)
#         print('feature keys: ',self.features.keys())
#         print('features: ',self.features)
#         print('image_shape: ',self.preprocessed_images.shape)
        try:
            box_features = self.model.model.roi_heads.box_pooler(self.features, proposals) # image_sizes  # self.preprocessed_images.shape
        except:
#             print('trying pure list')
            feat_list = [self.features[k] for k in self.features][:-1]
#             for box in boxes: 
#                 boxc = box_class(box)
            box_features = self.model.model.roi_heads.box_pooler(feat_list, proposals) # image_sizes  # self.preprocessed_images.shape
        #box_features = self.roi_heads.box_roi_pool(self.features, proposals, self.preprocessed_images.image_sizes)

        #self.features, # removed self.features
        box_features = self.model.model.roi_heads.box_head(box_features)
        class_logits, box_regression = self.model.model.roi_heads.box_predictor(box_features)
#         print(box_regression)

#         pred_boxes = self.model.model.roi_heads.box_coder.decode(box_regression, proposals) # failing here 
        pred_boxes = proposals[0].tensor #.detach()
        pred_scores = F.softmax(class_logits, -1)

#         pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()
        pred_boxes = resize_boxes(pred_boxes, self.preprocessed_images.shape, self.original_image_sizes[0]) # image_sizes[0]
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()
        return pred_boxes, pred_scores
Beispiel #25
0
def get_features(obj_detect, img, gts):
    with torch.no_grad():
        obj_detect.load_image(img)

        gts = gts.squeeze(0).cuda()
        gts = resize_boxes(gts, obj_detect.original_image_sizes[0],
                           obj_detect.preprocessed_images.image_sizes[0])
        gts = [gts]

        box_features = obj_detect.roi_heads.box_roi_pool(
            obj_detect.features, gts,
            obj_detect.preprocessed_images.image_sizes)
        box_head_features = obj_detect.roi_heads.box_head(box_features)

    return box_features.cpu(), box_head_features.cpu()
Beispiel #26
0
    def preprocess(
        self,
        images: List[Tensor],
        detections: List[dict],
        targets: Optional[List[dict]] = None
    ) -> Tuple[List[Tensor], List[dict], List[dict], List[Tuple[int, int]]]:
        original_image_sizes = [img.shape[-2:] for img in images]
        images, targets = self.transform(images, targets)

        for det, o_im_s, im_s in zip(detections, original_image_sizes,
                                     images.image_sizes):
            boxes = det['boxes']
            boxes = transform.resize_boxes(boxes, o_im_s, im_s)
            det['boxes'] = boxes

        return images, detections, targets, original_image_sizes
    def __call__(self, image, target):
        h, w = image.shape[-2:]
        min_size = float(min(image.shape[-2:]))
        max_size = float(max(image.shape[-2:]))

        size = self.min_side
        scale_factor = size / min_size
        if max_size * scale_factor > self.max_side:
            scale_factor = self.max_side / max_size
        image = torch.nn.functional.interpolate(
            image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0]

        if target is None or target["boxes"].nelement() == 0:
            return image, target

        bbox = target["boxes"]
        bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
        target["boxes"] = bbox

        return image, target
    def get_pooled_features(self, bboxs):
        """
        Get roi-pooled features from backbone of object detector, if not using BackboneMotionModel.
        Input:
            -bboxs: (N, 4)
        Output:
            -box_features: (N, 256, 7, 7)
            -box_head_features: (N, 1024)
        """
        bboxs = resize_boxes(
            bboxs, self.obj_detect.original_image_sizes[0],
            self.obj_detect.preprocessed_images.image_sizes[0])
        bboxs = [bboxs]

        box_features = self.obj_detect.roi_heads.box_roi_pool(
            self.obj_detect.features, bboxs,
            self.obj_detect.preprocessed_images.image_sizes)
        box_head_features = self.obj_detect.roi_heads.box_head(box_features)

        return box_features, box_head_features
Beispiel #29
0
    def forward(self, images, features, targets=None):

        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")

        num_images = len(images.tensors)
        device = images.tensors.device

        proposals = []
        for idx in range(num_images):
            image_id = int(targets[idx]['image_id'].item())
            orig_size = targets[idx]["size"]
            new_size = images.image_sizes[idx]
            box = self.edgeboxes[image_id]
            box = torch.Tensor(box).float()
            box = resize_boxes(box, orig_size, new_size)
            box = box.to(device)
            proposals.append(box)

        boxes = proposals
        losses = {}
        return boxes, losses
Beispiel #30
0
    def resize(self, image, target, fixed_size=None):
        h, w = image.shape[-2:]
        im_shape = torch.tensor(image.shape[-2:])
        min_size = float(torch.min(im_shape))
        max_size = float(torch.max(im_shape))
        if fixed_size is not None:
            size = fixed_size
        elif self.training:
            size = random.choice(self.min_size)
        else:
            # FIXME assume for now that testing uses the largest scale
            size = self.min_size[-1]
        scale_factor = size / min_size
        if max_size * scale_factor > self.max_size:
            scale_factor = self.max_size / max_size
        image = torch.nn.functional.interpolate(image[None],
                                                scale_factor=scale_factor,
                                                mode='bilinear',
                                                align_corners=False)[0]

        if target is None:
            return image, target

        bbox = target["boxes"]
        bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
        target["boxes"] = bbox

        if "masks" in target:
            mask = target["masks"]
            mask = misc_nn_ops.interpolate(
                mask[None].float(), scale_factor=scale_factor)[0].byte()
            target["masks"] = mask

        if "keypoints" in target:
            keypoints = target["keypoints"]
            keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:])
            target["keypoints"] = keypoints
        return image, target