Ejemplo n.º 1
0
    def forward(self,
                image: torch.Tensor,  # (batch_size, c, h, w)
                image_sizes: torch.Tensor,  # (batch_size, 2)
                boxes: torch.Tensor = None,  # (batch_size, max_boxes_in_batch, 4)
                box_classes: torch.Tensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        im_sizes = [(x[1].item(), x[0].item()) for x in image_sizes]
        image_list = ImageList(image, im_sizes)
        features = self.backbone.forward(image)
        objectness, rpn_box_regression = self._rpn_head(features)
        anchors: List[torch.Tensor] = self.anchor_generator(image_list, features)
        num_anchors_per_level = [o[0].numel() for o in objectness]
        objectness, rpn_box_regression = \
            concat_box_prediction_layers(objectness, rpn_box_regression)

        out = {'features': features,
               'objectness': objectness,
               'rpn_box_regression': rpn_box_regression,
               'anchors': anchors,
               'sizes': image_sizes,
               'num_anchors_per_level': num_anchors_per_level}
        if boxes is not None:
            labels, matched_gt_boxes = self.assign_targets_to_anchors(
                    anchors, object_utils.unpad(boxes))
            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)

            sampled_pos_inds, sampled_neg_inds = self.sampler(labels)
            sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
            sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

            sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

            objectness = objectness.flatten()

            labels = torch.cat(labels, dim=0)
            regression_targets = torch.cat(regression_targets, dim=0)

            loss_rpn_box_reg = F.l1_loss(
                    rpn_box_regression[sampled_pos_inds],
                    regression_targets[sampled_pos_inds],
                    reduction="sum",
            ) / (sampled_inds.numel())

            loss_objectness = F.binary_cross_entropy_with_logits(
                    objectness[sampled_inds], labels[sampled_inds]
            )
            self._loss_meters['rpn_cls_loss'](loss_objectness.item())
            self._loss_meters['rpn_reg_loss'](loss_rpn_box_reg.item())
            out["loss_objectness"] = loss_objectness
            out["loss_rpn_box_reg"] = loss_rpn_box_reg
            out["loss"] = loss_objectness + 10*loss_rpn_box_reg
        return out
Ejemplo n.º 2
0
    def forward(
        self,
        images,  # type: List[Tensor]
        targets=None  # type: Optional[List[Dict[str, Tensor]]]
    ):
        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]
        images = [img for img in images]
        if targets is not None:
            # make a copy of targets to avoid modifying it in-place
            # once torchscript supports dict comprehension
            # this can be simplified as as follows
            # targets = [{k: v for k,v in t.items()} for t in targets]
            targets_copy: List[Dict[str, Tensor]] = []
            for t in targets:
                #if t is not None:
                if len(t) == 5:
                    data: Dict[str, Tensor] = {}
                    for k, v in t.items():
                        data[k] = v
                    targets_copy.append(data)
                else:
                    data: Dict[str, Tensor] = {}
                    for k, v in t.items():
                        data[k] = v
                    targets_copy.append(data)
            targets = targets_copy
        for i in range(len(images)):
            image = images[i]
            target_index = targets[i] if targets is not None and {} else None

            if image.dim() != 3:
                raise ValueError(
                    "images is expected to be a list of 3d tensors "
                    "of shape [C, H, W], got {}".format(image.shape))
            image = self.normalize(image)
            image, target_index = self.resize(image, target_index)
            images[i] = image
            if targets is not None and target_index is not None:
                targets[i] = target_index

        image_sizes = [img.shape[-2:] for img in images]
        images = self.batch_images(images)
        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])
        for image_size in image_sizes:
            assert len(image_size) == 2
            image_sizes_list.append((image_size[0], image_size[1]))

        image_list = ImageList(images, image_sizes_list)
        return image_list, targets
def forward(self, images, targets=None):
    for i in range(len(images)):
        image = images[i]
        target = targets[i] if targets is not None else targets
        if image.dim() != 3:
            raise ValueError("images is expected to be a list of 3d tensors "
                             "of shape [C, H, W], got {}".format(image.shape))
        # image = self.normalize(image)
        # image, target = self.resize(image, target)
        images[i] = image
        if targets is not None:
            targets[i] = target
    image_sizes = [img.shape[-2:] for img in images]
    images = self.batch_images(images)
    image_list = ImageList(images, image_sizes)
    return image_list, targets
Ejemplo n.º 4
0
def inference(model, input, transform, device="cuda"):
    input_image = Image.open(input)
    data = {'image': input_image}
    # data pre-processing
    data = transform(data)
    with torch.no_grad():
        input_image_list = ImageList([data['image'].to(device)], image_sizes=[input_image.size[::-1]])
        panoptic_result, _ = model.forward(input_image_list)
        semseg_logics = [o.to('cpu') for o in panoptic_result["semantic_segmentation_result"]]
        # Export the result
        output = input.replace("/data/", "/output/")
        os.makedirs(parent(output), exist_ok=True)
        assert os.path.exists(parent(output))
        semseg_prob = [torch.argmax(semantic_logit, dim=0) for semantic_logit in semseg_logics]
        seg_vis = visualize_segmentation_image(semseg_prob[0], input_image, cityscapes_colormap_sky)
        Image.fromarray(seg_vis.astype('uint8')).save(output)
Ejemplo n.º 5
0
 def forward(self, images, targets=None):
     for i in range(len(images)):
         if targets == None:
             img, target = self.rescale(images[i])
         else:
             img, target = self.rescale(images[i], targets[i])
         img = self.normalize(img)
         images[i] = img
         if targets:
             targets[i] = target
     images_batched = self.batchimages(images)
     imgs_size = [img.shape[-2:] for img in images]
     imgs_list = ImageList(images_batched, imgs_size)
     if targets:
         return imgs_list, targets
     return imgs_list
Ejemplo n.º 6
0
 def forward(self, images, targets=None):
     # # type: (List[Tensor], Optional[List[Dict[str, Tensor]]])
     images = [img for img in images]
     for idx, img in enumerate(images):
         target_index = targets[idx] if targets is not None else None
         img = self.normalize(img)
         img, target_index = self.resize(img, target_index)
         images[idx] = img
         if targets is not None and target_index is not None:
             assert img.shape[-2:] == target_index['masks'].shape[-2:]
             targets[idx] = target_index
     image_sizes = [img.shape[-2:] for img in images]
     images = self.batch_images(images)
     # image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])
     # for image_size in image_sizes:
     # assert len(image_size) == 2
     # image_sizes_list.append((image_size[0], image_size[1]))
     image_list = ImageList(images, image_sizes)
     return image_list, targets
Ejemplo n.º 7
0
def box_validation(box_head, test_loader,optimizer,epoch,backbone,rpn,keep_topK):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    epoch_loss = 0
    epoch_clas_loss = 0
    epoch_regr_loss = 0

    # TODO double check following two values, just placehoder for now
    l = 5
    effective_batch = 32  # suggestd 150 for 4 images

    for i,data in enumerate(test_loader):
        # imgs, label_list, mask_list, bbox_list, index_list = [data[key] for key in data.keys()]
        images = data['images'].to(device)
        boxes = data['bbox']
        labels = data['labels']
        with torch.no_grad():
            backout = backbone(images)
            im_lis = ImageList(images, [(800, 1088)]*images.shape[0])
            rpnout = rpn(im_lis, backout)
            proposals=[proposal[0:keep_topK,:] for proposal in rpnout[0]]
            fpn_feat_list= list(backout.values())
            gt_labels, gt_regressor_target = box_head.create_ground_truth(proposals, labels, boxes)
            roi_align_result = box_head.MultiScaleRoiAlign(fpn_feat_list, proposals)  # This is the input to Box head
            clas_out, regr_out = box_head.forward(roi_align_result.to(device))
            loss, loss_c, loss_r = box_head.compute_loss(clas_out, regr_out, gt_labels, gt_regressor_target, l,
                                                         effective_batch)
        epoch_loss += loss.item()
        epoch_clas_loss += loss_c.item()
        epoch_regr_loss += loss_r.item()

        #delete variables after usage to free GPU ram, double check if these variables are needed for future!!!!!!!
        del loss ,loss_c , loss_r
        del images, labels, boxes
        del clas_out, regr_out
        del gt_labels, gt_regressor_target
        torch.cuda.empty_cache()

    epoch_loss /= i
    epoch_clas_loss /= i
    epoch_regr_loss /= i

    return epoch_loss, epoch_clas_loss, epoch_regr_loss
    def test_defaultbox_generator(self):
        images = torch.zeros(2, 3, 15, 15)
        features = [torch.zeros(2, 8, 1, 1)]
        image_shapes = [i.shape[-2:] for i in images]
        images = ImageList(images, image_shapes)

        model = self._init_test_defaultbox_generator()
        model.eval()
        dboxes = model(images, features)

        dboxes_output = torch.tensor([[6.3750, 6.3750, 8.6250, 8.6250],
                                      [4.7443, 4.7443, 10.2557, 10.2557],
                                      [5.9090, 6.7045, 9.0910, 8.2955],
                                      [6.7045, 5.9090, 8.2955, 9.0910]])

        self.assertEqual(len(dboxes), 2)
        self.assertEqual(tuple(dboxes[0].shape), (4, 4))
        self.assertEqual(tuple(dboxes[1].shape), (4, 4))
        self.assertTrue(dboxes[0].allclose(dboxes_output))
        self.assertTrue(dboxes[1].allclose(dboxes_output))
    def test_defaultbox_generator(self):
        images = torch.zeros(2, 3, 15, 15)
        features = [torch.zeros(2, 8, 1, 1)]
        image_shapes = [i.shape[-2:] for i in images]
        images = ImageList(images, image_shapes)

        model = self._init_test_defaultbox_generator()
        model.eval()
        dboxes = model(images, features)

        dboxes_output = torch.tensor([[6.9750, 6.9750, 8.0250, 8.0250],
                                      [6.7315, 6.7315, 8.2685, 8.2685],
                                      [6.7575, 7.1288, 8.2425, 7.8712],
                                      [7.1288, 6.7575, 7.8712, 8.2425]])

        self.assertEqual(len(dboxes), 2)
        self.assertEqual(tuple(dboxes[0].shape), (4, 4))
        self.assertEqual(tuple(dboxes[1].shape), (4, 4))
        self.assertTrue(dboxes[0].allclose(dboxes_output))
        self.assertTrue(dboxes[1].allclose(dboxes_output))
Ejemplo n.º 10
0
    def test_anchor_generator(self):
        images = torch.randn(2, 3, 16, 32)
        features = self.get_features(images)
        features = list(features.values())
        image_shapes = [i.shape[-2:] for i in images]
        images = ImageList(images, image_shapes)

        model = self._init_test_anchor_generator()
        model.eval()
        anchors = model(images, features)

        # Compute target anchors numbers
        grid_sizes = [f.shape[-2:] for f in features]
        num_anchors_estimated = 0
        for sizes, num_anchors_per_loc in zip(
                grid_sizes, model.num_anchors_per_location()):
            num_anchors_estimated += sizes[0] * sizes[1] * num_anchors_per_loc

        self.assertEqual(num_anchors_estimated, 126)
        self.assertEqual(len(anchors), 2)
        self.assertEqual(tuple(anchors[0].shape), (num_anchors_estimated, 4))
        self.assertEqual(tuple(anchors[1].shape), (num_anchors_estimated, 4))
Ejemplo n.º 11
0
    def test_defaultbox_generator(self):
        images = torch.zeros(2, 3, 15, 15)
        features = [torch.zeros(2, 8, 1, 1)]
        image_shapes = [i.shape[-2:] for i in images]
        images = ImageList(images, image_shapes)

        model = self._init_test_defaultbox_generator()
        model.eval()
        dboxes = model(images, features)

        dboxes_output = torch.tensor([
            [6.3750, 6.3750, 8.6250, 8.6250],
            [4.7443, 4.7443, 10.2557, 10.2557],
            [5.9090, 6.7045, 9.0910, 8.2955],
            [6.7045, 5.9090, 8.2955, 9.0910]
        ])

        assert len(dboxes) == 2
        assert tuple(dboxes[0].shape) == (4, 4)
        assert tuple(dboxes[1].shape) == (4, 4)
        torch.testing.assert_close(dboxes[0], dboxes_output, rtol=1e-5, atol=1e-8)
        torch.testing.assert_close(dboxes[1], dboxes_output, rtol=1e-5, atol=1e-8)
Ejemplo n.º 12
0
        with torch.no_grad():

            # if epoch > 0:
            #     box_head.gt_dict.clear()
            #     gt_path = os.path.join('.', 'drive', 'My Drive', 'CIS 680', 'Faster_RCNN', 'saved_dicts3',
            #                            'gt_val_dict.h5')
            #     box_head.gt_dict = torch.load(gt_path)

            for j, data in enumerate(test_loader, 0):
                images, label, bbox, index = data

                # Take the features from the backbone
                backout = backbone(images.float())

                # The RPN implementation takes as first argument the following image list
                im_lis = ImageList(images.float(), [(800, 1088)] * images.shape[0])
                # Then we pass the image list and the backbone output through the rpn
                rpnout = rpn(im_lis, backout)

                # The final output is
                # A list of proposal tensors: list:len(bz){(keep_topK,4)}
                proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]]
                # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)}

                fpn_feat_list = list(backout.values())

                del images, backout, rpnout, im_lis
                torch.cuda.empty_cache()

                feature_vectors = box_head.MultiScaleRoiAlign(fpn_feat_list, proposals, index)
Ejemplo n.º 13
0
def run_inference(images, index, backbone, rpn, boxHead):
    """
    Run inference and visualization for one image
    :param images:
    :param index:
    :param backbone:
    :param rpn:
    :param boxHead:
    :return:
    """
    with torch.no_grad():
        # Take the features from the backbone
        backout = backbone(images)

        # The RPN implementation takes as first argument the following image list
        im_lis = ImageList(images, [(800, 1088)] * images.shape[0])
        # Then we pass the image list and the backbone output through the rpn
        rpnout = rpn(im_lis, backout)

        # The final output is
        # A list of proposal tensors: list:len(bz){(keep_topK,4)}
        proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]]
        # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)}
        fpn_feat_list = list(backout.values())

        feature_vectors = boxHead.MultiScaleRoiAlign(fpn_feat_list, proposals)

        class_logits, box_pred = boxHead(feature_vectors)
        class_logits = torch.softmax(
            class_logits, dim=1)  # todo: check softmax is applied everywhere

        # convert proposal to xywh
        proposal_torch = torch.cat(proposals, dim=0)  # x1 y1 x2 y2
        proposal_xywh = torch.zeros_like(proposal_torch,
                                         device=proposal_torch.device)
        proposal_xywh[:,
                      0] = ((proposal_torch[:, 0] + proposal_torch[:, 2]) / 2)
        proposal_xywh[:,
                      1] = ((proposal_torch[:, 1] + proposal_torch[:, 3]) / 2)
        proposal_xywh[:, 2] = torch.abs(proposal_torch[:, 2] -
                                        proposal_torch[:, 0])
        proposal_xywh[:, 3] = torch.abs(proposal_torch[:, 3] -
                                        proposal_torch[:, 1])

        # decode output
        prob_simp, class_simp, box_simp = utils.simplifyOutputs(
            class_logits, box_pred)
        # box_decoded: format x1, y1, x2, y2
        box_decoded = utils.decode_output(proposal_xywh, box_simp)

        # Do whaterver post processing you find performs best
        post_nms_prob, post_nms_class, post_nms_box = boxHead.postprocess_detections(
            prob_simp,
            class_simp,
            box_decoded,
            conf_thresh=0.8,
            keep_num_preNMS=200,
            keep_num_postNMS=3,
            IOU_thresh=0.5)

        # visualization: PostNMS
        assert post_nms_class.dim() == 1
        assert post_nms_box.dim() == 2
        return post_nms_prob, post_nms_class, post_nms_box
    def forward(self, _images, targets=None, return_result=False):
        bs = _images.size(0)
        assert bs == 1

        # Process images
        device = _images.device
        images = torch.zeros(1, 6, 3, 400, 400)
        for i in range(6):
            images[0, i] = self.img_transform(_images[0, i].cpu())
        del _images
        images = images.to(device)

        # Process targets
        #         label_index = targets[0]['labels'] == 2
        #         targets[0]['boxes'] = targets[0]['boxes'][label_index]
        #         targets[0]['labels'] = targets[0]['labels'][label_index]

        targets = [{k: v for k, v in t.items()} for t in targets]
        targets[0]['old_boxes'] = targets[0]['boxes'] / 2.
        min_coordinates, _ = torch.min(targets[0]['boxes'], 2)
        max_coordinates, _ = torch.max(targets[0]['boxes'], 2)
        targets[0]['boxes'] = torch.cat([min_coordinates, max_coordinates], 1)
        temp_tensor = torch.zeros(1, 3, 800, 800)
        _, targets = self.target_transform(temp_tensor, targets)

        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2
            original_image_sizes.append((val[0], val[1]))

        # images, targets = self.transform(images, targets)
        # HACK
        images = ImageList(images, ((400, 400), ) * images.size(0))
        targets = [{
            k: v.to(images.tensors.device)
            for k, v in t.items() if k != 'masks'
        } for t in targets]

        # Pass images from 6 camera angle to different backbone
        features_list = torch.stack([
            self.backbone(images.tensors[:, i])['0']
            for i in range(self.input_img_num)
        ],
                                    dim=1)

        feature_h, feature_w = features_list.size()[-2:]
        features_list = features_list.view(
            bs, self.backbone_out_channels * self.input_img_num, feature_h,
            feature_w)

        features = OrderedDict([('0', features_list)])
        #         if isinstance(features, torch.Tensor):
        #             features = OrderedDict([('0', features)])

        proposals, proposal_losses = self.rpn(images, features, targets)
        detections, detector_losses = self.roi_heads(features, proposals,
                                                     images.image_sizes,
                                                     targets)
        detections = self.transform.postprocess(detections, images.image_sizes,
                                                original_image_sizes)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)
        losses.update(
            {'loss_mask': torch.zeros(1, device=images.tensors.device)})

        mask_ts = 0.
        mask_ts_numerator = 0
        mask_ts_denominator = 1

        with torch.no_grad():

            # Get object detection threat score
            cpu_detections = [{k: v.cpu()
                               for k, v in t.items()} for t in detections]
            # TODO: add threshold more than 0.5
            detection_ts, detection_ts_numerator, detection_ts_denominator =\
                get_detection_threat_score(cpu_detections, targets, 0.5)

        if return_result:
            # DEBUG
            masks = 0
            #             return losses, mask_ts, mask_ts_numerator,\
            #                    mask_ts_denominator, detection_ts, detection_ts_numerator,\
            #                    detection_ts_denominator, detections, masks
            return mask_ts, mask_ts_numerator,\
                   mask_ts_denominator, detection_ts, detection_ts_numerator,\
                   detection_ts_denominator, detections, masks
        else:
            #             return losses, mask_ts, mask_ts_numerator, mask_ts_denominator,\
            #                    detection_ts, detection_ts_numerator, detection_ts_denominator
            return losses
Ejemplo n.º 15
0
def box_train(box_head, train_loader,optimizer,epoch,backbone,rpn,keep_topK,l):
    start_time = time.time()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    epoch_loss = 0
    epoch_clas_loss = 0
    epoch_regr_loss = 0
    running_loss =0
    running_clas_loss =0
    running_regr_loss =0

    # TODO double check following two values, just placehoder for now

    effective_batch = 32  # used in TA's test case
    batch_loss = []
    batch_loss_c = []
    batch_loss_r = []
    for i,data in enumerate(train_loader):

        optimizer.zero_grad()
        # images, labels, mask, boxes, indexes = [data[key] for key in data.keys()]
        # images = images.to(device)
        images = data['images'].to(device)
        indexes = data['index']
        boxes = data['bbox']
        labels = data['labels']
        mask = data['masks']

        # Take the features from the backbone
        with torch.no_grad():
            backout = backbone(images)
            # The RPN implementation takes as first argument the following image list
            im_lis = ImageList(images, [(800, 1088)]*images.shape[0])
            # Then we pass the image list and the backbone output through the rpn
            rpnout = rpn(im_lis, backout)
            #The final output is
            # A list of proposal tensors: list:len(bz){(keep_topK,4)}
            proposals=[proposal[0:keep_topK,:] for proposal in rpnout[0]]
            # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)}
            fpn_feat_list= list(backout.values())
        # fpn_feat_list =  [item.to('cpu') for item in fpn_feat_list]

        gt_labels, gt_regressor_target = box_head.create_ground_truth(proposals,labels,boxes)

        #TOdo check this line

        # proposals_roi = copy.deepcopy(proposals)
        roi_align_result = box_head.MultiScaleRoiAlign(fpn_feat_list,proposals) #This is the input to Box head
        clas_out, regr_out = box_head.forward(roi_align_result.to(device))

        loss, loss_c, loss_r = box_head.compute_loss(clas_out,regr_out,gt_labels,gt_regressor_target,l,effective_batch)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_clas_loss += loss_c.item()
        epoch_regr_loss += loss_r.item()
        running_loss += loss.item()
        running_clas_loss += loss_c.item()
        running_regr_loss += loss_r.item()

        # batch_loss.append(loss.item())
        # batch_loss_c.append(loss_c.item())
        # batch_loss_r.append(loss_r.item())
        #print results every log_iter batch:
        # log_iter = 100
        # if i % log_iter == (log_iter-1):  # print every 100 mini-batches
        #     print('[%d, %5d] total_loss: %.5f clas_loss: %.5f  regr_loss: %.5f' %
        #           (epoch + 1, i + 1,
        #           running_loss / log_iter,
        #           running_clas_loss / log_iter,
        #           running_regr_loss / log_iter))
        #
        #     running_loss = 0
        #     running_clas_loss = 0
        #     running_regr_loss = 0
        #     print("--- %s minutes ---" % ((time.time() - start_time)/60))
        #     start_time = time.time()


        #delete variables after usage to free GPU ram, double check if these variables are needed for future!!!!!!!
        # del loss ,loss_c , loss_r
        # del images, labels, mask, boxes, indexes
        # del clas_out, regr_out
        # del gt_labels, gt_regressor_target
        # torch.cuda.empty_cache()

    # plt.figure()
    # plt.plot(batch_loss, label='Training')
    # plt.figure()
    # plt.plot(batch_loss_c, label='Training')
    # plt.figure()
    # plt.plot(batch_loss_r, label='Training')
    # plt.show()
    epoch_loss /= i
    epoch_clas_loss /= i
    epoch_regr_loss /= i
    # print('finished one epoch ')
    # exit()
    return epoch_loss, epoch_clas_loss, epoch_regr_loss
Ejemplo n.º 16
0
    def forward(self,
                _images,
                _targets=None,
                return_result=False,
                return_losses=False):
        bs = _images.size(0)
        assert bs == 1

        device = _images.device

        # Process images
        images = torch.zeros(1, 6, 3, 400, 400)
        depths = torch.zeros(1, 6, 3, 128, 416)
        for i in range(6):
            images[0, i] = self.img_transform(_images[0, i].cpu())
            depths[0, i] = self.depth_transform(_images[0, i].cpu())
        del _images
        images = images.to(device)
        depths = depths.to(device)

        # Get depth map
        depths = self.depth_estimator(depths.squeeze(0))[0]
        depths = self.depth_resize(depths.unsqueeze(1))
        depths = depths.view(1, 6, 1, 400, 400)
        images = torch.cat((images, depths), dim=2)
        del depths

        # Process targets
        dis = torch.mean(_targets[0]['boxes'], dim=2) - torch.tensor(
            [400., 400.])
        index_1 = torch.sqrt(torch.sum(torch.pow(dis, 2), dim=1)) < 300.
        index_2 = (_targets[0]['labels'] == 0) | (_targets[0]['labels'] == 2) |\
            (_targets[0]['labels'] == 4) | (_targets[0]['labels'] == 5)
        label_index = index_1 * index_2

        targets = [copy.deepcopy(_targets[0])]
        targets[0]['boxes'] = targets[0]['boxes'][label_index]
        targets[0]['labels'] = targets[0]['labels'][label_index]

        targets = [{k: v for k, v in t.items()} for t in targets]
        # targets[0]['old_boxes'] = targets[0]['boxes'] / 2.
        min_coordinates, _ = torch.min(targets[0]['boxes'], 2)
        max_coordinates, _ = torch.max(targets[0]['boxes'], 2)
        targets[0]['boxes'] = torch.cat([min_coordinates, max_coordinates], 1)
        temp_tensor = torch.zeros(1, 3, 800, 800)
        _, targets = self.target_transform(temp_tensor, targets)

        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]])
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2
            original_image_sizes.append((val[0], val[1]))

        # images, targets = self.transform(images, targets)
        device = images.device
        images = ImageList(images, ((400, 400), ) * images.size(0))
        target_masks = torch.stack(
            [t['masks'].float().to(device) for t in targets])
        targets = [{k: v.to(device)
                    for k, v in t.items() if k != 'masks'} for t in targets]

        # Mask backbone
        features_list = torch.stack([
            self.backbone(images.tensors[:, i])
            for i in range(self.input_img_num)
        ],
                                    dim=1)

        feature_h, feature_w = features_list.size()[-2:]
        combined_feature_map = features_list.view(bs, self.input_img_num,
                                                  feature_h, feature_w)

        masks, mask_losses = self.mask_net(combined_feature_map, target_masks)

        del features_list
        torch.cuda.empty_cache()

        # Detction backbone
        features_list = torch.stack([
            self.backbone_(images.tensors[:, i])
            for i in range(self.input_img_num)
        ],
                                    dim=1)

        feature_h, feature_w = features_list.size()[-2:]
        detection_combined_feature_map = features_list.view(
            bs, 64 * self.input_img_num, 400, 400)
        del features_list
        torch.cuda.empty_cache()

        road_map_features = OrderedDict([('0', combined_feature_map)])
        detection_features = OrderedDict([('0', detection_combined_feature_map)
                                          ])

        proposals, proposal_losses = self.rpn(images, road_map_features,
                                              targets)
        # try:
        #     detections, detector_losses = self.roi_heads(detection_features, proposals, images.image_sizes, targets)
        #     detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
        # except RuntimeError as e:
        #     print(e)
        #     detections = None
        #     detector_losses = {
        #         'loss_box_reg': torch.zeros(1),
        #         'loss_classifier': torch.zeros(1)}
        detections, detector_losses = self.roi_heads(detection_features,
                                                     proposals,
                                                     images.image_sizes,
                                                     targets)
        detections = self.transform.postprocess(detections, images.image_sizes,
                                                original_image_sizes)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)
        losses.update(mask_losses)

        if return_result:
            return masks, detections
        else:
            return losses
Ejemplo n.º 17
0
        num_bbox_class.append(num_class2)
        num_class3 = torch.count_nonzero(bbox_list[0] == 3)
        num_bbox_class.append(num_class3)

        image = transforms.functional.normalize(img[0].cpu().detach(),
                                                [-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
                                                [1 / 0.229, 1 / 0.224, 1 / 0.225], inplace=False)

        image_vis = image.permute(1, 2, 0).cpu().detach().numpy()
        num_grnd_box = len(bbox_list)

        # Take the features from the backbone
        backout = backbone(img)

        # The RPN implementation takes as first argument the following image list
        im_lis = ImageList(img, [(800, 1088)] * img.shape[0])
        rpnout = rpn(im_lis, backout)

        # The final output is a list of proposal tensors: list:len(bz){(keep_topK,4)}
        proposals = [proposal[0:keep_topK_check, :] for proposal in rpnout[0]]
        # generate gt labels
        labels, regressor_target = box_head.create_ground_truth(proposals, label_list, bbox_list)      #tx,ty,tw,twh
        labels = labels.flatten()
        # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)}
        fpn_feat_list = list(backout.values())
        proposal_torch = torch.cat(proposals, dim=0)  # x1 y1 x2 y2
        proposal_xywh = torch.zeros_like(proposal_torch, device=proposal_torch.device)
        proposal_xywh[:, 0] = ((proposal_torch[:, 0] + proposal_torch[:, 2]) / 2)
        proposal_xywh[:, 1] = ((proposal_torch[:, 1] + proposal_torch[:, 3]) / 2)
        proposal_xywh[:, 2] = torch.abs(proposal_torch[:, 2] - proposal_torch[:, 0])
        proposal_xywh[:, 3] = torch.abs(proposal_torch[:, 3] - proposal_torch[:, 1])
def proposal_confusion_matrix(loader):
    """Returns mean loss per sample in loader"""
        
    TP, FP, TN, FN = 0, 0, 0, 0

    with torch.no_grad():

        for idx, (batch, pad_lengths) in enumerate(loader):

            images, masks, bboxes, labels = batch

            images = images.to(device=DEVICE, dtype=torch.float)
            bboxes = bboxes.to(device=DEVICE, dtype=torch.float)
            labels = labels.to(device=DEVICE, dtype=torch.float)
            
            backbone_out = BACKBONE(images)

            img_list = ImageList(
                images, list(itertools.repeat((TARGET_HEIGHT, TARGET_WIDTH), len(images))))

            rpn_proposals = RPN(img_list, backbone_out)[0]

            sel_pos_proposals = [1]*len(images)
            sel_pos_bboxes = [1]*len(images)
            sel_pos_labels = [1]*len(images)

            sel_neg_proposals = [1]*len(images)
            sel_neg_bboxes = [1]*len(images)
            sel_neg_labels = [1]*len(images)
            
            # Sample rpn proposals for positive and negative proposals
            for ix, proposals in enumerate(rpn_proposals):
                ground_truth = sample_ground_truth(
                    proposals,
                    bboxes[ix][:pad_lengths["bboxes"][ix]],
                    labels[ix][:pad_lengths["labels"][ix]],
                    iou_thresh=0.5)

                positive_proposals, positive_bboxes, positive_labels, negative_proposals, negative_bboxes, negative_labels = ground_truth
                
                # Positive samples
                sel_pos_proposals[ix] = positive_proposals
                sel_pos_bboxes[ix] = positive_bboxes
                sel_pos_labels[ix] = positive_labels
                
                # Negatives samples
                sel_neg_proposals[ix] = negative_proposals
                sel_neg_bboxes[ix] = negative_bboxes
                sel_neg_labels[ix] = negative_labels

            sel_proposals = sel_pos_proposals + filter_none(sel_neg_proposals)
            sel_bboxes = sel_pos_bboxes + filter_none(sel_neg_bboxes)
            sel_labels = sel_pos_labels + filter_none(sel_neg_labels)

            # ROI Align
            roi_aligned_proposals = torchvision.ops.roi_align(
                backbone_out[0],
                sel_proposals,
                (7,7),
                spatial_scale=1./4.,
                sampling_ratio=4)

            sel_proposals = torch.cat(sel_proposals, dim=0)
            sel_bboxes = torch.cat(sel_bboxes, dim=0)
            sel_labels = torch.cat(sel_labels, dim=0)
        
            sel_pos_proposals = torch.cat(sel_pos_proposals, dim=0)
            sel_pos_bboxes = torch.cat(sel_pos_bboxes, dim=0)
            sel_pos_labels = torch.cat(sel_pos_labels, dim=0)
            
            # the total num. of positive proposals
            n_pos_proposals = len(sel_pos_proposals) 

            # Roi Aligned into Intermediate then Regressor/Classifier
            roi_out = ROI_NET(roi_aligned_proposals)
            class_out = CLASS_NET(roi_out)

            pred_probs, pred_classes = torch.softmax(class_out, dim=1).max(dim=1)

            tp = (pred_classes[(sel_labels != 0).nonzero().squeeze()] == sel_labels[(sel_labels != 0).nonzero().squeeze()]).sum(0).item()
            fp = (pred_classes != 0).sum(0).item() - tp

            tn = (pred_classes[(sel_labels == 0).nonzero().squeeze()] == sel_labels[(sel_labels == 0).nonzero().squeeze()]).sum(0).item()
            fn = (pred_classes == 0).sum(0).item() - tn

            TP += tp
            FP += fp
            TN += tn
            FN += fn

    accuracy = (TP + TN) / (TP + FP + TN + FN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    
    return accuracy, precision, recall
    for idx, (batch, pad_lengths) in enumerate(tiny_loader):

        # zero the parameter gradients
        optimizer.zero_grad()

        images, masks, bboxes, labels = batch

        images = images.to(device=DEVICE, dtype=torch.float)
        bboxes = bboxes.to(device=DEVICE, dtype=torch.float)
        labels = labels.to(device=DEVICE, dtype=torch.float)
        masks = masks.to(device=DEVICE, dtype=torch.float)
        
        backbone_out = BACKBONE(images)

        img_list = ImageList(
            images, list(itertools.repeat((TARGET_HEIGHT, TARGET_WIDTH), len(images))))

        rpn_proposals = RPN(img_list, backbone_out)[0]

        sel_pos_proposals = [1]*len(images)
        sel_pos_bboxes = [1]*len(images)
        sel_pos_labels = [1]*len(images)

        POS_CT = [1]*len(images)

        sel_neg_proposals = [1]*len(images)
        sel_neg_bboxes = [1]*len(images)
        sel_neg_labels = [1]*len(images)

        # Sample rpn proposals for positive and negative proposals
        for ix, proposals in enumerate(rpn_proposals):
    test_loader = test_build_loader.loader()

    # Here we keep the top 20, but during training you
    #   should keep around 200 boxes from the 1000 proposals
    keep_topK = 20

    with torch.no_grad():
        for iter, batch in enumerate(test_loader, 0):
            images, *other = batch
            images = images.to(device)

            # Take the features from the backbone
            backout = backbone(images)

            # The RPN implementation takes as first argument the following image list
            im_lis = ImageList(images, [(800, 1088)] * images.shape[0])
            # Then we pass the image list and the backbone output through the rpn
            rpnout = rpn(im_lis, backout)

            # The final output is
            # A list of proposal tensors:
            #   list:len(bz){(keep_topK,4)}
            proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]]
            # A list of features produces by the backbone's FPN levels:
            #   list:len(FPN){(bz,256,H_feat,W_feat)}
            fpn_feat_list = list(backout.values())

            print("For the proposals We have a list containing " +
                  str(len(proposals)) + " tensors")
            print("Each one with shape " + str(proposals[0].shape))
            print("")
Ejemplo n.º 21
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 optimizer,
                 config,
                 data_loader,
                 valid_data_loader=None,
                 lr_scheduler=None,
                 len_epoch=None):
        super().__init__(model, loss, metrics, optimizer, config)
        self.config = config
        self.data_loader = data_loader
        if len_epoch is None:
            # epoch-based training
            self.len_epoch = len(self.data_loader)
        else:
            # iteration-based training
            self.data_loader = inf_loop(data_loader)
            self.len_epoch = len_epoch
        self.valid_data_loader = valid_data_loader
        self.do_validation = self.valid_data_loader is not None
        self.lr_scheduler = lr_scheduler
        self.log_step = int(np.sqrt(data_loader.batch_size))

        # conv_size = [5, 10, 20, 40, 80, 160]
        conv_size = [160, 80, 40, 20, 10, 5]
        anchors = []
        # stride = 128
        stride = 4

        colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0),
                  (0, 255, 255), (255, 0, 255)]
        col_idx = 0
        image = np.zeros((640, 640, 3), np.uint8)
        img_list = ImageList(image, [(640, 640)])

        for fm_size in conv_size:
            # anchor_gene = AnchorGenerator(sizes=((stride * 4),), aspect_ratios=((1.0),))
            anchor = Generate_Anchors(stride, fm_size)
            # anchor = anchor_gene(img_list, torch.randn(1, fm_size, fm_size))[0]
            # anchors.append(anchor_gene(img_list, torch.randn(1, fm_size, fm_size))[0])
            stride *= 2
            for box in anchor:
                # x1 = box[1]
                # y1 = box[0]
                # x2 = box[3]
                # y2 = box[2]
                x1 = box[1]
                y1 = box[0]
                x2 = box[3]
                y2 = box[2]
                print(x1, y1, x2, y2)
                cv2.rectangle(image, (x1, y1), (x2, y2), colors[col_idx], 2)
            col_idx += 1

        anchors = torch.cat(anchors, 0)
        anchors[0]
        gt_box = torch.tensor([71., 42., 90., 62.], dtype=torch.float32)
        jaccard(gt_box, anchors)

        img = cv2.resize(image, (1000, 1000))
        cv2.imshow("anchors", img)
        cv2.waitKey()
        cv2.destroyAllWindows()
Ejemplo n.º 22
0
def do_eval(dataloader,
            checkpoint_file,
            device,
            result_dir=None,
            keep_topK=200,
            keep_num_preNMS=50,
            keep_num_postNMS=5):

    if result_dir is not None:
        os.makedirs(result_dir, exist_ok=True)
        os.makedirs("PreNMS", exist_ok=True)
        os.makedirs("PostNMS", exist_ok=True)

    # =========================== Pretrained ===============================
    # Put the path were you save the given pretrained model
    pretrained_path = '../pretrained/checkpoint680.pth'
    backbone, rpn = pretrained_models_680(pretrained_path)
    backbone = backbone.to(device)
    rpn = rpn.to(device)
    # ========================= Loading Model ==============================
    boxHead = BoxHead(Classes=3, P=7, device=device).to(device)
    if torch.cuda.is_available():
        checkpoint = torch.load(checkpoint_file)
    else:
        checkpoint = torch.load(checkpoint_file,
                                map_location=torch.device('cpu'))
    print("[INFO] Weight loaded from checkpoint file: {}".format(
        checkpoint_file))
    boxHead.load_state_dict(checkpoint['model_state_dict'])
    boxHead.eval()  # set to eval mode
    # ============================ Eval ================================
    for iter, data in enumerate(tqdm(dataloader), 0):
        img = data['images'].to(device)
        batch_size = img.shape[0]
        label_list = [x.to(device) for x in data['labels']]
        mask_list = [x.to(device) for x in data['masks']]
        bbox_list = [x.to(device) for x in data['bbox']]
        # index_list = data['index']
        img_shape = (img.shape[2], img.shape[3])
        with torch.no_grad():
            backout = backbone(img)
            im_lis = ImageList(img, [(800, 1088)] * img.shape[0])
            rpnout = rpn(im_lis, backout)
            proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]]
            fpn_feat_list = list(backout.values())
            feature_vectors = boxHead.MultiScaleRoiAlign(
                fpn_feat_list, proposals)
            class_logits, box_pred = boxHead(feature_vectors)
            class_logits = torch.softmax(class_logits, dim=1)
            proposal_torch = torch.cat(proposals, dim=0)  # x1 y1 x2 y2
            proposal_xywh = torch.zeros_like(proposal_torch,
                                             device=proposal_torch.device)
            proposal_xywh[:,
                          0] = ((proposal_torch[:, 0] + proposal_torch[:, 2]) /
                                2)
            proposal_xywh[:,
                          1] = ((proposal_torch[:, 1] + proposal_torch[:, 3]) /
                                2)
            proposal_xywh[:, 2] = torch.abs(proposal_torch[:, 2] -
                                            proposal_torch[:, 0])
            proposal_xywh[:, 3] = torch.abs(proposal_torch[:, 3] -
                                            proposal_torch[:, 1])
            result_prob, result_class, result_box = simplifyOutputs(
                class_logits, box_pred)
            box_decoded = decode_output(proposal_xywh, result_box)
            post_nms_prob, post_nms_class, post_nms_box = boxHead.postprocess_detections(
                result_prob,
                result_class,
                box_decoded,
                IOU_thresh=0.5,
                conf_thresh=0.5,
                keep_num_preNMS=keep_num_preNMS,
                keep_num_postNMS=keep_num_postNMS)
Ejemplo n.º 23
0
 def forward(self_module, images, features):
     images = ImageList(images, [i.shape[-2:] for i in images])
     return self_module.rpn(images, features)
Ejemplo n.º 24
0
 def first_filter(self, x, images):
     image_sizes = [img.shape[-2:] for img in images]
     scores = self.rpn.first_filter(ImageList(images, image_sizes),
                                    OrderedDict([(0, x)]))  #.reshape(-1,6)
     return scores
Ejemplo n.º 25
0
 def __init__(self_module, images):
     super(RPNModule, self_module).__init__()
     self_module.rpn = self._init_test_rpn()
     self_module.images = ImageList(images,
                                    [i.shape[-2:] for i in images])
Ejemplo n.º 26
0
 def forward(self, x, images, first=None):
     image_sizes = [img.shape[-2:] for img in images]
     proposals, scores = self.rpn(ImageList(images, image_sizes),
                                  OrderedDict([(0, x)]),
                                  first=first)  #.reshape(-1,6)
     return proposals, scores