Ejemplo n.º 1
0
class PlaneNetDetector():
    def __init__(self, options, config, checkpoint_dir=''):
        self.options = options
        self.config = config
        sys.path.append('../../existing_methods/')
        from PlaneNet.planenet_inference import PlaneNetDetector
        self.detector = PlaneNetDetector(predictNYU=False)
        return

    def detect(self, sample):

        detection_pair = []
        for indexOffset in [0, ]:
            images, image_metas, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks, gt_parameters, gt_depth, extrinsics, planes, gt_segmentation = sample[indexOffset + 0].cuda(), sample[indexOffset + 1].numpy(), sample[indexOffset + 2].cuda(), sample[indexOffset + 3].cuda(), sample[indexOffset + 4].cuda(), sample[indexOffset + 5].cuda(), sample[indexOffset + 6].cuda(), sample[indexOffset + 7].cuda(), sample[indexOffset + 8].cuda(), sample[indexOffset + 9].cuda(), sample[indexOffset + 10].cuda(), sample[indexOffset + 11].cuda()

            image = (images[0].detach().cpu().numpy().transpose((1, 2, 0)) + self.config.MEAN_PIXEL)[80:560]

            pred_dict = self.detector.detect(image)
            segmentation = pred_dict['segmentation']
            segmentation = np.concatenate([np.full((80, 640), fill_value=-1, dtype=np.int32), segmentation, np.full((80, 640), fill_value=-1, dtype=np.int32)], axis=0)

            planes = pred_dict['plane']

            masks = (segmentation == np.arange(len(planes), dtype=np.int32).reshape((-1, 1, 1))).astype(np.float32)
            depth = pred_dict['depth']
            depth = np.concatenate([np.zeros((80, 640), dtype=np.int32), depth, np.zeros((80, 640), dtype=np.int32)], axis=0)
            detections = np.concatenate([np.ones((len(planes), 4)), np.ones((len(planes), 2)), planes], axis=-1)

            detections = torch.from_numpy(detections).float().cuda()
            depth = torch.from_numpy(depth).unsqueeze(0).float().cuda()
            masks = torch.from_numpy(masks).float().cuda()
            detection_pair.append({'depth': depth, 'mask': masks.sum(0, keepdim=True), 'masks': masks, 'detection': detections})
            continue
        return detection_pair
Ejemplo n.º 2
0
class TraditionalDetector():
    def __init__(self, options, config, modelType=''):
        self.options = options
        self.config = config
        self.modelType = modelType
        if 'pred' in modelType:
            sys.path.append('../../')
            from PlaneNet.planenet_inference import PlaneNetDetector
            self.detector = PlaneNetDetector(predictSemantics=True)
            pass
        return

    def detect(self, sample):
        detection_pair = []
        for indexOffset in [0, ]:
            images, image_metas, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks, gt_parameters, gt_depth, extrinsics, planes, gt_segmentation, gt_semantics = sample[indexOffset + 0].cuda(), sample[indexOffset + 1].numpy(), sample[indexOffset + 2].cuda(), sample[indexOffset + 3].cuda(), sample[indexOffset + 4].cuda(), sample[indexOffset + 5].cuda(), sample[indexOffset + 6].cuda(), sample[indexOffset + 7].cuda(), sample[indexOffset + 8].cuda(), sample[indexOffset + 9].cuda(), sample[indexOffset + 10].cuda(), sample[indexOffset + 11].cuda(), sample[indexOffset + 12].cuda()

            image = (images[0].detach().cpu().numpy().transpose((1, 2, 0)) + self.config.MEAN_PIXEL)[80:560]

            input_dict = {'image': cv2.resize(image, (256, 192))}

            if 'gt' in self.modelType:
                input_dict['depth'] = cv2.resize(gt_depth[0].detach().cpu().numpy()[80:560], (256, 192))
                semantics = gt_semantics[0].detach().cpu().numpy()[80:560]
                input_dict['semantics'] = cv2.resize(semantics, (256, 192), interpolation=cv2.INTER_NEAREST)
            else:
                pred_dict = self.detector.detect(image)
                input_dict['depth'] = pred_dict['non_plane_depth'].squeeze()
                input_dict['semantics'] = pred_dict['semantics'].squeeze().argmax(-1)
                pass

            camera = sample[30][0].numpy()
            input_dict['info'] = np.array([camera[0], 0, camera[2], 0, 0, camera[1], camera[3], 0, 0, 0, 1, 0, 0, 0, 0, 1, camera[4], camera[5], 1000, 0])
            np.save('test/input_dict.npy', input_dict)
            os.system('rm test/output_dict.npy')
            os.system('python plane_utils.py ' + self.modelType)
            output_dict = np.load('test/output_dict.npy', encoding='latin1')[()]

            segmentation = cv2.resize(output_dict['segmentation'], (640, 480), interpolation=cv2.INTER_NEAREST)
            segmentation = np.concatenate([np.full((80, 640), fill_value=-1, dtype=np.int32), segmentation, np.full((80, 640), fill_value=-1, dtype=np.int32)], axis=0)

            planes = output_dict['plane']
            masks = (segmentation == np.arange(len(planes), dtype=np.int32).reshape((-1, 1, 1))).astype(np.float32)
            plane_depths = calcPlaneDepths(planes, 256, 192, camera, max_depth=10)
            depth = (plane_depths * (np.expand_dims(output_dict['segmentation'], -1) == np.arange(len(planes)))).sum(-1)
            depth = cv2.resize(depth, (640, 480), interpolation=cv2.INTER_LINEAR)
            depth = np.concatenate([np.zeros((80, 640)), depth, np.zeros((80, 640))], axis=0)
            detections = np.concatenate([np.ones((len(planes), 4)), np.ones((len(planes), 2)), planes], axis=-1)

            detections = torch.from_numpy(detections).float().cuda()
            depth = torch.from_numpy(depth).unsqueeze(0).float().cuda()
            masks = torch.from_numpy(masks).float().cuda()
            detection_pair.append({'depth': depth, 'mask': masks.sum(0, keepdim=True), 'masks': masks, 'detection': detections})
            continue
        return detection_pair