Ejemplo n.º 1
0
    def generate_semantic_mask(self, split):
        output_dir = os.path.join(self.kitti_path, 'training/semantic_mask')
        with open(os.path.join(self.kitti_path, split + '.txt')) as f:
            frame_ids = [line.rstrip('\n') for line in f]
        depth_map_dir = os.path.join(self.kitti_path, 'training/depth_map_dense')
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        for fid in frame_ids:
            fid = int(fid)
            depth_map = np.load(os.path.join(depth_map_dir, '{:06}.npy'.format(fid)))
            pc_dense = self.to_point_cloud(depth_map, fid)
            objects = self.kitti_dataset.get_label_objects(fid)
            image = self.kitti_dataset.get_image(fid)
            calib = self.kitti_dataset.get_calibration(fid) # 3 by 4 matrix
            objects = filter(lambda obj: obj.type in type_whitelist and obj.difficulty in difficulties_whitelist, objects)
            pts_mask = {
                'NonObject': np.zeros((len(pc_dense),), dtype=np.bool),
                'Car': np.zeros((len(pc_dense),), dtype=np.bool),
                'Pedestrian': np.zeros((len(pc_dense),), dtype=np.bool),
                'Cyclist': np.zeros((len(pc_dense),), dtype=np.bool)
            }
            for obj in objects:
                _,obj_box_3d = utils.compute_box_3d(obj, calib.P)
                _,mask = extract_pc_in_box3d(pc_dense, obj_box_3d)
                pts_mask[obj.type] = np.logical_or(mask, pts_mask[obj.type])
            pts_mask['NonObject'] = (pts_mask['Car'] + pts_mask['Pedestrian'] + pts_mask['Cyclist']) == 0

            semantic_mask = np.ones(image.shape[0:2], dtype=np.int32) * 255
            # disp_mask = np.ones(image.shape, dtype=np.int32) * 255
            # color_map = {
            #     'NonObject': (0,0,0),
            #     'Car': (0,0,142),
            #     'Pedestrian': (220, 20, 60),
            #     'Cyclist': (20,  255,  60)
            # }
            for k,v in pts_mask.items():
                pts_2d = calib.project_rect_to_image(pc_dense[v, :3])
                for u, v in pts_2d.astype(np.int32):
                    if u >= image.shape[1] or v >= image.shape[0]:
                        continue
                    semantic_mask[v, u] = g_type2onehotclass[k]
                    # disp_mask[v, u] = color_map[k]
            # cv2.imshow('semantic_mask', disp_mask/255.0)
            # cv2.waitKey(0)
            fout = os.path.join(output_dir, '{:06}.png'.format(fid))
            # np.save(fout, semantic_mask)
            cv2.imwrite(fout, semantic_mask)
            print('save', fid)
Ejemplo n.º 2
0
    def expand_points(self, pc_rect, proposal, calib, seed_ind, data_idx_str):
        prop = copy.deepcopy(proposal)
        prop.l += 1
        prop.w += 1
        prop_corners_image_2d, prop_corners_3d = utils.compute_box_3d(
            prop, calib.P)
        _, local_ind = extract_pc_in_box3d(pc_rect, prop_corners_3d)
        local_points = pc_rect[local_ind]
        local_seg = self.pc_seg[data_idx_str][local_ind]
        fg_ind = local_seg == 1
        keypoints = pc_rect[seed_ind]
        # print(np.sum(seed_ind))
        # print(np.sum(local_ind))
        # print(np.sum(fg_ind))
        print('before: ', keypoints.shape[0])

        candidates_ind = np.logical_and(np.logical_not(seed_ind), local_ind)
        candidates_ind = np.logical_and(candidates_ind,
                                        self.pc_seg[data_idx_str] == 1)

        candidates_points = pc_rect[candidates_ind]
        print('candidates size: ', candidates_points.shape[0])
        while True:
            tree = KDTree(keypoints[:, :3], leaf_size=2)
            new_kp = []
            selected = np.zeros((len(candidates_points), ))
            for i in range(len(candidates_points)):
                if tree.query_radius(np.expand_dims(candidates_points[i, :3],
                                                    axis=0),
                                     r=0.3,
                                     count_only=True) >= 3:
                    new_kp.append(candidates_points[i])
                selected[i] = 1
            if len(new_kp) == 0:
                break
            keypoints = np.concatenate((keypoints, new_kp), axis=0)
            candidates_points = candidates_points[selected == 0]
        print('after: ', keypoints.shape[0])
        return keypoints
Ejemplo n.º 3
0
def visualize(dataset,
              frame_id,
              prediction,
              seg_mask=None,
              show_3d=False,
              output_dir=None):
    fig_size = (12.42, 3.75)
    is_video = type(dataset).__name__ == 'kitti_object_video'
    # pred_fig, pred_2d_axes, pred_3d_axes = \
    #     vis_utils.visualization(dataset.image_dir,
    #                             int(frame_id),
    #                             display=False,
    #                             fig_size=fig_size)

    pred_fig, pred_3d_axes = vis_utils.visualize_single_plot(dataset.image_dir,
                                                             int(frame_id),
                                                             is_video,
                                                             flipped=False,
                                                             display=False,
                                                             fig_size=fig_size)
    calib = dataset.get_calibration(frame_id)  # 3 by 4 matrix

    # 2d visualization
    # draw groundtruth
    # labels = dataset.get_label_objects(frame_id)
    # labels = filter(lambda obj: obj.type in type_whitelist and obj.difficulty in difficulties_whitelist, labels)
    # draw_boxes(labels, calib, pred_2d_axes)
    # draw prediction on second image
    pred_corners = draw_boxes(prediction, calib, pred_3d_axes)
    if output_dir:
        filename = os.path.join(output_dir,
                                'result_2d_image/%06d.png' % frame_id)
        plt.savefig(filename)
        plt.close(pred_fig)
    else:
        plt.show()
        raw_input()

    if seg_mask is not None:
        img = dataset.get_image(frame_id)
        img = cv2.resize(img, (1200, 360))
        seg_img = add_mask_to_img(img, seg_mask)
        filename = os.path.join(output_dir,
                                'result_seg_image/%06d.png' % frame_id)
        cv2.imwrite(filename, seg_img)

    if show_3d:
        # 3d visualization
        pc_velo = dataset.get_lidar(frame_id)
        image = dataset.get_image(frame_id)
        img_height, img_width = image.shape[0:2]
        _, _, img_fov_inds = get_lidar_in_image_fov(pc_velo[:, 0:3], calib, 0,
                                                    0, img_width, img_height,
                                                    True)
        pc_velo = pc_velo[img_fov_inds, :]
        boxes3d_velo = []
        mask = np.zeros((len(pc_velo), ))
        for corners in pred_corners:
            pts_velo = calib.project_rect_to_velo(corners)
            boxes3d_velo.append(pts_velo)
            _, obj_mask = extract_pc_in_box3d(pc_velo, pts_velo)
            mask = np.logical_or(mask, obj_mask)
        fig = draw_lidar(pc_velo, pts_color=(1, 1, 1))
        fig = draw_lidar(pc_velo[mask == 1], fig=fig, pts_color=(1, 0, 0))
        fig = draw_gt_boxes3d(boxes3d_velo,
                              fig,
                              draw_text=False,
                              color=(1, 1, 1))
        # raw_input()
        if output_dir:
            filename = os.path.join(output_dir,
                                    'result_3d_image/%06d.png' % frame_id)
            mlab.savefig(filename, figure=fig)
Ejemplo n.º 4
0
    def load_frame_data(self,
                        data_idx_str,
                        random_flip=False,
                        random_rotate=False,
                        random_shift=False,
                        pca_jitter=False):
        '''load one frame'''
        data_idx = int(data_idx_str)
        # print(data_idx_str)
        calib = self.kitti_dataset.get_calibration(data_idx)  # 3 by 4 matrix
        if self.is_training:
            objects = self.kitti_dataset.get_label_objects(data_idx)
        else:
            objects = []
        objects = filter(
            lambda obj: obj.type in self.types_list and obj.difficulty in self.
            difficulties_list, objects)
        gt_boxes = []  # ground truth boxes
        image = self.kitti_dataset.get_image(data_idx)
        pc_velo = self.kitti_dataset.get_lidar(data_idx)
        img_height, img_width = image.shape[0:2]
        # data augmentation
        if pca_jitter:
            image = apply_pca_jitter(image)[0]

        # if random_flip and np.random.random()>0.5: # 50% chance flipping
        if random_flip and False:
            pc_velo[:, 1] *= -1
            # FIXME: to flip the scene, optical center also need to be adjusted
            # dont use with image now
            calib.P[0, 2] = img_width - calib.P[0, 2]
            image = np.flip(image, axis=1)
            for obj in objects:
                obj.t = [-obj.t[0], obj.t[1], obj.t[2]]
                # ensure that ry is [-pi, pi]
                if obj.ry >= 0:
                    obj.ry = np.pi - obj.ry
                else:
                    obj.ry = -np.pi - obj.ry

        # use original point cloud for rpn
        if not self.train_img_seg:
            _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(
                pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True)
            pc_velo = pc_velo[img_fov_inds, :]
            choice = np.random.choice(pc_velo.shape[0],
                                      self.npoints,
                                      replace=True)
            point_set = pc_velo[choice, :]
            pc_rect = np.zeros_like(point_set)
            pc_rect[:, 0:3] = calib.project_velo_to_rect(point_set[:, 0:3])
            pc_rect[:, 3] = point_set[:, 3]
        else:
            # use dense point cloud for training image segmentation
            pc_dense = self.dense_points.load_dense_points(data_idx)
            # get_lidar_in_image_fov
            pts_2d = calib.project_rect_to_image(pc_dense)
            fov_inds = (pts_2d[:,0]<img_width) & (pts_2d[:,0]>=0) & \
                (pts_2d[:,1]<img_height) & (pts_2d[:,1]>=0)
            # fov_inds = fov_inds & (pc_dense[:,2]>2.0)
            pc_dense = pc_dense[fov_inds, :]
            obj_mask = {
                'Car': np.zeros((len(pc_dense), ), dtype=np.bool),
                'Pedestrian': np.zeros((len(pc_dense), ), dtype=np.bool),
                'Cyclist': np.zeros((len(pc_dense), ), dtype=np.bool)
            }
            for obj in objects:
                _, obj_box_3d = utils.compute_box_3d(obj, calib.P)
                _, mask = extract_pc_in_box3d(pc_dense, obj_box_3d)
                obj_mask[obj.type] = np.logical_or(mask, obj_mask[obj.type])
            bg_mask = (obj_mask['Car'] + obj_mask['Pedestrian'] +
                       obj_mask['Cyclist']) == 0
            car = np.where(obj_mask['Car'])[0]
            # oversampling
            ped = np.where(obj_mask['Pedestrian'])[0]
            if ped.shape[0] > 0:
                #ped = ped[np.random.choice(ped.shape[0], car.shape[0], replace=True)]
                ped = ped[np.random.choice(ped.shape[0],
                                           ped.shape[0] * 5,
                                           replace=True)]
            cyc = np.where(obj_mask['Cyclist'])[0]
            if cyc.shape[0] > 0:
                #cyc = cyc[np.random.choice(cyc.shape[0], car.shape[0], replace=True)]
                cyc = cyc[np.random.choice(cyc.shape[0],
                                           cyc.shape[0] * 10,
                                           replace=True)]
            fg_num = car.shape[0] + ped.shape[0] + cyc.shape[0]
            #print('total points: ', len(pc_dense))
            #print('fg_num: {0}, car: {1}, ped: {2}, cyc: {3}'.format(fg_num, car.shape[0], ped.shape[0], cyc.shape[0]))
            bg = np.where(bg_mask)[0]
            bg = bg[np.random.choice(bg.shape[0],
                                     max(self.npoints - fg_num, 0),
                                     replace=True)]
            choice = np.concatenate((car, ped, cyc, bg), axis=0)[:self.npoints]
            #choice = np.random.choice(pc_dense.shape[0], self.npoints, replace=True)
            pc_rect = np.zeros((self.npoints, 4))
            pc_rect[:, :3] = pc_dense[choice, :]

        #start = time.time()
        seg_mask = np.zeros((pc_rect.shape[0]))
        # data augmentation
        # dont use with image now
        if random_rotate and False:
            ry = (np.random.random() - 0.5) * math.radians(
                20)  # -10~10 degrees
            pc_rect[:, 0:3] = rotate_points_along_y(pc_rect[:, 0:3], ry)
            for obj in objects:
                obj.t = rotate_points_along_y(obj.t, ry)
                obj.ry -= ry
                # ensure that ry is [-pi, pi]
                if obj.ry > np.pi:
                    obj.ry -= 2 * np.pi
                elif obj.ry < -np.pi:
                    obj.ry += 2 * np.pi
        proposal_of_point = {}  # point index to proposal vector
        gt_box_of_point = {}  # point index to corners_3d
        for obj in objects:
            _, obj_box_3d = utils.compute_box_3d(obj, calib.P)
            _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d)
            if np.sum(obj_mask) == 0:
                # label without 3d points
                # print('skip object without points')
                continue
            seg_mask[obj_mask] = g_type2onehotclass[obj.type]
            #seg_mask[obj_mask] = 1
            gt_boxes.append(obj_box_3d)
            obj_idxs = np.where(obj_mask)[0]
            # data augmentation
            # FIXME: jitter point will make valid loss growing
            # Also may go out of image view
            if random_shift and False:  # jitter object points
                pc_rect[obj_idxs, :3] = shift_point_cloud(
                    pc_rect[obj_idxs, :3], 0.02)
            for idx in obj_idxs:
                # to save time
                if self.train_img_seg:
                    proposal_of_point[idx] = [0, [0, 0, 0], 0, 0, 0, [0, 0, 0]]
                else:
                    proposal_of_point[idx] = box_encoder.encode(
                        obj, pc_rect[idx, :3])
                gt_box_of_point[idx] = obj_box_3d
        # self.viz_frame(pc_rect, seg_mask, gt_boxes)
        # return pc_rect, seg_mask, proposal_of_point, gt_box_of_point, gt_boxes
        calib_matrix = np.copy(calib.P)
        calib_matrix[0, :] *= (1200.0 / image.shape[1])
        calib_matrix[1, :] *= (360.0 / image.shape[0])
        #print('construct', time.time() - start)
        return {
            'pointcloud': pc_rect,
            'image': cv2.resize(image, (1200, 360)),
            'calib': calib_matrix,
            'mask_label': seg_mask,
            'proposal_of_point': self.get_proposal_out(proposal_of_point),
            'gt_box_of_point': self.get_gt_box_of_points(gt_box_of_point),
            'gt_boxes': gt_boxes,
            'pc_choice': choice
        }
Ejemplo n.º 5
0
    def get_one_sample(self, proposal, pc_rect, image, calib, iou, gt_box_3d,
                       gt_object, data_idx_str, img_seg_map):
        '''convert to frustum sample format'''
        prop_corners_image_2d, prop_corners_3d = utils.compute_box_3d(
            proposal, calib.P)
        if prop_corners_image_2d is None:
            print('skip proposal behind camera')
            return False
        # get points within proposal box
        # expand proposal
        proposal_expand = copy.deepcopy(proposal)
        proposal_expand.l += 0.5
        proposal_expand.w += 0.5
        proposal_expand.h += 0.5
        _, prop_corners_3d = utils.compute_box_3d(proposal_expand, calib.P)
        _, prop_inds = extract_pc_in_box3d(pc_rect, prop_corners_3d)
        pc_in_prop_box = pc_rect[prop_inds, :]
        seg_mask = np.zeros((pc_in_prop_box.shape[0]))
        if len(pc_in_prop_box) == 0:
            print('Reject proposal with no point')
            return False

        # Get frustum angle
        box2d_center = calib.project_rect_to_image(np.array([proposal.t]))[0]

        uvdepth = np.zeros((1, 3))
        uvdepth[0, 0:2] = box2d_center
        uvdepth[0, 2] = 20  # some random depth
        box2d_center_rect = calib.project_image_to_rect(uvdepth)
        frustum_angle = -1 * np.arctan2(box2d_center_rect[0, 2],
                                        box2d_center_rect[0, 0])

        if gt_object is not None:
            obj_type = gt_object.type

            _, inds = extract_pc_in_box3d(pc_in_prop_box, gt_box_3d)
            seg_mask[inds] = 1
            # Reject object with too few point
            if np.sum(seg_mask) < 5:
                print('Reject object with too few point')
                return False

            # Get 3D BOX heading
            heading_angle = gt_object.ry
            # Get 3D BOX size
            box3d_size = np.array([gt_object.l, gt_object.w, gt_object.h])
        else:
            obj_type = 'NonObject'
            gt_box_3d = np.zeros((8, 3))
            heading_angle = 0
            box3d_size = np.zeros((1, 3))
            #frustum_angle = 0

        rot_angle = self.get_center_view_rot_angle(frustum_angle)

        # Get point cloud
        if self.rotate_to_center:
            point_set = self.get_center_view_point_set(pc_in_prop_box,
                                                       rot_angle)
        else:
            point_set = pc_in_prop_box

        # ------------------------------ LABELS ----------------------------
        # classification
        # assert(obj_type in ['Car', 'Pedestrian', 'Cyclist', 'NonObject'])
        assert (obj_type in type_whitelist)
        cls_label = g_type2onehotclass[obj_type]

        # Get center point of 3D box
        if self.rotate_to_center:
            box3d_center = self.get_center_view_box3d_center(
                gt_box_3d, rot_angle)
        else:
            box3d_center = self.get_box3d_center(gt_box_3d)

        # Heading
        if self.rotate_to_center:
            heading_angle = heading_angle - rot_angle

        # Size
        size_class, size_residual = size2class(box3d_size, obj_type)

        angle_class, angle_residual = angle2class(heading_angle,
                                                  NUM_HEADING_BIN)

        self.sample_id_counter += 1
        return Sample(self.sample_id_counter, point_set, seg_mask, box3d_center, angle_class, angle_residual,\
            size_class, size_residual, rot_angle, cls_label, proposal, heading_angle, iou, img_seg_map, calib.P)
Ejemplo n.º 6
0
    def get_one_sample(self, proposal, pc_rect, image, calib, iou, gt_box_3d,
                       gt_object, data_idx_str, img_seg_map):
        '''convert to frustum sample format'''
        prop_corners_image_2d, prop_corners_3d = utils.compute_box_3d(
            proposal, calib.P)
        if prop_corners_image_2d is None:
            print('skip proposal behind camera')
            return False
        # get points within proposal box
        _, prop_inds = extract_pc_in_box3d(pc_rect, prop_corners_3d)
        pc_in_prop_box = pc_rect[prop_inds, :]
        seg_mask = np.zeros((pc_in_prop_box.shape[0]))
        if len(pc_in_prop_box) == 0:
            print('Reject proposal with no point')
            return False

        # Get frustum angle
        image_points = calib.project_rect_to_image(pc_in_prop_box[:, :3])
        expand_image_points = np.concatenate(
            (prop_corners_image_2d, image_points), axis=0)
        xmin, ymin = expand_image_points.min(0)
        xmax, ymax = expand_image_points.max(0)
        # TODO: frustum angle is important, make use of image
        # use gt angle for testing
        if gt_object is not None:
            xmin, ymin, xmax, ymax = gt_object.box2d

        box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0])
        uvdepth = np.zeros((1, 3))
        uvdepth[0, 0:2] = box2d_center
        uvdepth[0, 2] = 20  # some random depth
        box2d_center_rect = calib.project_image_to_rect(uvdepth)
        frustum_angle = -1 * np.arctan2(box2d_center_rect[0, 2],
                                        box2d_center_rect[0, 0])

        if gt_object is not None:
            obj_type = gt_object.type
            # TODO: use dbscan instead of ground truth
            #_,gt_inds = extract_pc_in_box3d(pc_rect, gt_box_3d)
            #prop_inds = np.logical_or(prop_inds, gt_inds)

            _, inds = extract_pc_in_box3d(pc_in_prop_box, gt_box_3d)
            seg_mask[inds] = 1
            # Reject object with too few point
            if np.sum(seg_mask) < 5:
                print('Reject object with too few point')
                return False

            # Get 3D BOX heading
            heading_angle = gt_object.ry
            # Get 3D BOX size
            box3d_size = np.array([gt_object.l, gt_object.w, gt_object.h])
        else:
            obj_type = 'NonObject'
            gt_box_3d = np.zeros((8, 3))
            heading_angle = 0
            box3d_size = np.zeros((1, 3))
            #frustum_angle = 0

        rot_angle = self.get_center_view_rot_angle(frustum_angle)

        # Get point cloud
        if self.rotate_to_center:
            point_set = self.get_center_view_point_set(pc_in_prop_box,
                                                       rot_angle)
        else:
            point_set = pc_in_prop_box

        # ------------------------------ LABELS ----------------------------
        # classification
        # assert(obj_type in ['Car', 'Pedestrian', 'Cyclist', 'NonObject'])
        assert (obj_type in type_whitelist)
        cls_label = g_type2onehotclass[obj_type]

        # Get center point of 3D box
        if self.rotate_to_center:
            box3d_center = self.get_center_view_box3d_center(
                gt_box_3d, rot_angle)
        else:
            box3d_center = self.get_box3d_center(gt_box_3d)

        # Heading
        if self.rotate_to_center:
            heading_angle = heading_angle - rot_angle

        # Size
        size_class, size_residual = size2class(box3d_size, obj_type)

        angle_class, angle_residual = angle2class(heading_angle,
                                                  NUM_HEADING_BIN)

        self.sample_id_counter += 1
        return Sample(self.sample_id_counter, point_set, seg_mask, box3d_center, angle_class, angle_residual,\
            size_class, size_residual, rot_angle, cls_label, proposal, heading_angle, iou, img_seg_map, calib.P)
Ejemplo n.º 7
0
    def get_sample(self, pc_rect, image, img_seg_map, calib, proposal_,
                   max_iou, max_idx, objects):
        thres_low = 0.3
        thres_high = 0.55
        if max_iou >= thres_high:
            label = objects[max_idx]
        if max_iou < thres_low:
            label = None
        if self.is_training and max_iou >= thres_low and max_iou < thres_high:
            return False
        # expand proposal boxes
        proposal_expand = copy.deepcopy(proposal_)
        proposal_expand.l += 0.5
        proposal_expand.w += 0.5
        _, box_3d = utils.compute_box_3d(proposal_expand, calib.P)
        _, mask = extract_pc_in_box3d(pc_rect, box_3d)
        # ignore proposal with no points
        if (np.sum(mask) == 0):
            return False

        points = pc_rect[mask, :]
        points_with_feats = np.zeros((points.shape[0], self.num_channel))
        points_with_feats[:, :6] = points  # xyz, intensity, seg_one_hot
        # pooled points canonical transformation
        points_with_feats[:, :3] -= proposal_.t
        points_with_feats[:, :3] = rotate_points_along_y(
            points_with_feats[:, :3], proposal_.ry)

        sample = {}
        self.sample_id_counter += 1
        sample['id'] = self.sample_id_counter
        sample['class'] = 0
        sample['pointcloud'] = points_with_feats
        sample['image'] = cv2.resize(image, (1200, 360))
        sample['img_seg_map'] = img_seg_map
        sample['calib'] = np.copy(calib.P)
        # scale projection matrix
        sample['calib'][0, :] *= (1200.0 / image.shape[1])
        sample['calib'][1, :] *= (360.0 / image.shape[0])
        sample['proposal_box'] = np.array([
            proposal_.t[0], proposal_.t[1], proposal_.t[2], proposal_.l,
            proposal_.h, proposal_.w, proposal_.ry
        ])
        sample['center_cls'] = np.zeros((2, ), dtype=np.int32)
        sample['center_res'] = np.zeros((3, ))
        sample['angle_cls'] = 0
        sample['angle_res'] = 0
        sample['size_cls'] = 0
        sample['size_res'] = np.zeros((3, ))
        sample['gt_box'] = np.zeros((8, 3))
        sample['train_regression'] = max_iou >= thres_high
        if label:
            sample['class'] = g_type2onehotclass[label.type]
            # rotation canonical transformation
            label_norm = copy.deepcopy(label)
            label_norm.ry = label.ry - proposal_.ry
            obj_vec = box_encoder.encode(label_norm, proposal_.t)
            sample['center_cls'] = obj_vec[0]
            sample['center_res'] = obj_vec[1]
            sample['angle_cls'] = obj_vec[2]
            sample['angle_res'] = obj_vec[3]
            sample['size_cls'] = obj_vec[4]
            sample['size_res'] = obj_vec[5]
            _, gt_box_3d = utils.compute_box_3d(label, calib.P)
            sample['gt_box'] = gt_box_3d
            #self.viz_sample(sample)
        return sample
Ejemplo n.º 8
0
    def load_frame_data(self, data_idx_str, aug):
        '''load one frame'''
        start = time.time()
        data_idx = int(data_idx_str)

        try:
            with open(os.path.join(self.data_dir, data_idx_str + '.pkl'),
                      'rb') as fin:
                rpn_out = pickle.load(fin)
            # load image segmentation output
            img_seg_map = np.load(
                os.path.join(self.data_dir, data_idx_str + '_seg.npy'))
        except Exception as e:
            print(e)
            return []
        proposals = self.get_proposals(rpn_out)
        #print(data_idx_str)
        calib = self.kitti_dataset.get_calibration(data_idx)  # 3 by 4 matrix
        if self.is_training:
            objects = self.kitti_dataset.get_label_objects(data_idx)
        else:
            # while testing, all proposals will have class 0
            objects = []
        image = self.kitti_dataset.get_image(data_idx)
        pc_velo = self.kitti_dataset.get_lidar(data_idx)
        img_height, img_width = image.shape[0:2]
        _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(
            pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True)
        pc_velo = pc_velo[img_fov_inds, :]
        # Same point sampling as RPN
        choice = rpn_out['pc_choices']
        point_set = pc_velo[choice, :]
        # point_set = pc_velo
        pc_rect = np.zeros_like(point_set)
        pc_rect[:, 0:3] = calib.project_velo_to_rect(point_set[:, 0:3])
        pc_rect[:, 3] = point_set[:, 3]
        # Segmentation results from RPN
        seg_one_hot = np.zeros((pc_rect.shape[0], 2))
        bg_ind = rpn_out['segmentation'] == 0
        fg_ind = rpn_out['segmentation'] == 1
        seg_one_hot[bg_ind, 0] = 1
        seg_one_hot[fg_ind, 1] = 1
        pc_rect = np.concatenate((pc_rect, seg_one_hot), axis=-1)  # 6 channels
        objects = filter(
            lambda obj: obj.type in self.types_list and obj.difficulty in self.
            difficulties_list, objects)
        gt_boxes = []  # ground truth boxes
        gt_boxes_xy = []
        recall = np.zeros((len(objects), ), dtype=np.int32)
        for obj in objects:
            _, obj_box_3d = utils.compute_box_3d(obj, calib.P)
            # skip label with no point
            _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d)
            if np.sum(obj_mask) == 0:
                continue
            gt_boxes.append(obj_box_3d)
            gt_boxes_xy.append(obj_box_3d[:4, [0, 2]])
        positive_samples = []
        negative_samples = []
        show_boxes = []

        # boxes_2d = []
        def process_proposal(prop):
            b2d, prop_box_3d = utils.compute_box_3d(prop, calib.P)
            prop_box_xy = prop_box_3d[:4, [0, 2]]
            max_idx, max_iou = find_match_label(prop_box_xy, gt_boxes_xy)
            sample = self.get_sample(pc_rect, image, img_seg_map, calib, prop,
                                     max_iou, max_idx, objects)
            # print(max_iou)
            if not sample:
                return -1
            if sample['class'] == 0:
                show_boxes.append(prop_box_3d)
                negative_samples.append(sample)
            else:
                positive_samples.append(sample)
                show_boxes.append(prop_box_3d)
                recall[max_idx] = 1
            return sample['class']

        aug_proposals = []
        AUG_X = {1: 1, 2: 2, 3: 2}
        for prop in proposals:
            cls = process_proposal(prop)
            if not aug or cls <= 0:
                continue
            for x in range(AUG_X[cls]):
                prop_ = random_shift_box3d(copy.deepcopy(prop), 0.1)
                aug_proposals.append(prop_)
        # add more proposals using label to increase training samples
        '''
        if self.split == 'train':
            miss_objs = [objects[i] for i in range(len(objects)) if recall[i]==0]
            aug_proposals += self.fill_proposals_with_gt(miss_objs)
        '''
        for prop in aug_proposals:
            process_proposal(prop)
        if self.is_training:
            random.shuffle(negative_samples)
            samples = positive_samples + negative_samples[:len(positive_samples
                                                               )]
        else:
            samples = positive_samples + negative_samples
        random.shuffle(samples)
        # self.viz_frame(pc_rect, np.zeros((pc_rect.shape[0],)), gt_boxes, show_boxes)
        return samples
Ejemplo n.º 9
0
 def stat_proposal(self):
     '''statistic of proposals'''
     total_iou_3d = 0
     total_iou_2d = 0
     total_angle_res = 0
     total = 0
     total_num = 0
     total_recall = 0
     total_gt = 0
     for frame_id in self.frame_ids:
         print(frame_id)
         try:
             with open(os.path.join(self.data_dir, frame_id+'.pkl'), 'rb') as fin:
                 rpn_out = pickle.load(fin)
         except Exception as e:
             print(e)
             continue
         data_idx = int(frame_id)
         pc_velo = self.kitti_dataset.get_lidar(data_idx)
         image = self.kitti_dataset.get_image(data_idx)
         calib = self.kitti_dataset.get_calibration(data_idx) # 3 by 4 matrix
         img_height, img_width = image.shape[0:2]
         _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(pc_velo[:,0:3],
             calib, 0, 0, img_width, img_height, True)
         pc_velo = pc_velo[img_fov_inds, :]
         pc_rect = np.zeros_like(pc_velo)
         pc_rect[:,0:3] = calib.project_velo_to_rect(pc_velo[:,0:3])
         pc_rect[:,3] = pc_velo[:,3]
         objects = self.kitti_dataset.get_label_objects(data_idx)
         objects = filter(lambda obj: obj.type in self.types_list and obj.difficulty in self.difficulties_list, objects)
         proposals = self.get_proposals(rpn_out)
         gt_boxes = [] # ground truth boxes
         gt_boxes_xy = []
         recall = np.zeros((len(objects),))
         for obj in objects:
             _,obj_box_3d = utils.compute_box_3d(obj, calib.P)
             # skip label with no point
             _,obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d)
             if np.sum(obj_mask) == 0:
                 continue
             gt_boxes.append(obj_box_3d)
             gt_boxes_xy.append(obj_box_3d[:4, [0,2]])
         for prop in proposals:
             b2d,prop_box_3d = utils.compute_box_3d(prop, calib.P)
             prop_box_xy = prop_box_3d[:4, [0,2]]
             gt_idx, gt_iou = find_match_label(prop_box_xy, gt_boxes_xy)
             if gt_idx == -1:
                 continue
             if gt_iou >= 0.5:
                 recall[gt_idx] = 1
             total_iou_2d += gt_iou
             ry_residual = abs(prop.ry - objects[gt_idx].ry)
             total_angle_res += ry_residual
             total += 1
         total_recall += np.sum(recall)
         total_gt += len(objects)
         total_num += len(proposals)
     print('Average IOU 2d {0}'.format(total_iou_2d/total))
     print('Average angle residual {0}'.format(total_angle_res/total))
     print('Average proposal number: {0}'.format(total_num/len(self.frame_ids)))
     print('Recall: {0}'.format(float(total_recall)/total_gt))
Ejemplo n.º 10
0
def stat(kitti_path, split, data_dir, type_whitelist, difficulties_whitelist):
    kitti_dataset = kitti_object(kitti_path, 'training')
    with open(os.path.join(kitti_path, split + '.txt')) as f:
        frame_ids = [line.rstrip('\n') for line in f]

    missed_sample = 0
    missed_seg = 0
    missed_seg_sample = 0
    missed_obj_point = 0
    tp = 0
    fp = 0
    fn = 0
    total_recall = {10: 0, 20: 0, 30: 0, 40: 0, 50: 0, 100: 0, 300: 0}
    label_count = 0
    for frame_id in frame_ids:
        print(frame_id)
        sys.stdout.flush()
        try:
            with open(os.path.join(data_dir, frame_id + '.pkl'), 'rb') as fin:
                rpn_out = pickle.load(fin)
        except Exception as e:
            print(e)
            continue
        data_idx = int(frame_id)
        pc_choices = rpn_out['pc_choices']
        seg_pred_point = rpn_out['segmentation']  # point seg
        seg_pred = rpn_out['segmentation_fuse']  # point+img seg
        fg_indices = rpn_out['fg_indices']  # after sampling

        seg_pred = seg_pred_point

        pc_velo = kitti_dataset.get_lidar(data_idx)
        image = kitti_dataset.get_image(data_idx)
        calib = kitti_dataset.get_calibration(data_idx)  # 3 by 4 matrix
        img_height, img_width = image.shape[0:2]
        _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(
            pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True)
        pc_velo = pc_velo[img_fov_inds, :3]
        pc_velo_sampled = pc_velo[pc_choices]
        pc_rect = calib.project_velo_to_rect(pc_velo)
        pc_rect_sampled = calib.project_velo_to_rect(pc_velo_sampled)  # 16384
        pc_seg = pc_rect_sampled[seg_pred > 0]  # all seg fg points
        if pc_seg.shape[0] > 0:
            pc_seg_sampled = pc_rect_sampled[
                fg_indices]  # sampled fg points 2048
        else:
            pc_seg_sampled = pc_seg
        objects = kitti_dataset.get_label_objects(data_idx)
        objects = filter(
            lambda obj: obj.type in type_whitelist and obj.difficulty in
            difficulties_whitelist, objects)
        gt_boxes = []  # ground truth boxes
        gt_boxes_xy = []
        seg_mask = np.zeros((pc_rect_sampled.shape[0], ))
        for obj in objects:
            _, obj_box_3d = utils.compute_box_3d(obj, calib.P)
            # skip label with no point
            _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d)
            if np.sum(obj_mask) == 0:
                continue
            gt_boxes.append(obj_box_3d)
            gt_boxes_xy.append(obj_box_3d[:4, [0, 2]])
            label_count += 1
            _, obj_mask = extract_pc_in_box3d(pc_rect_sampled, obj_box_3d)
            seg_mask[obj_mask] = 1
            if np.sum(obj_mask) == 0:
                missed_sample += 1
            _, mask = extract_pc_in_box3d(pc_seg, obj_box_3d)
            if np.sum(mask) == 0:
                missed_seg += 1
                missed_obj_point += np.sum(obj_mask)
            _, mask = extract_pc_in_box3d(pc_seg_sampled, obj_box_3d)
            if np.sum(mask) == 0:
                missed_seg_sample += 1

        tp += np.sum(np.logical_and(seg_pred == seg_mask, seg_mask != 0))
        fp += np.sum(np.logical_and(seg_pred != seg_mask, seg_mask == 0))
        fn += np.sum(np.logical_and(seg_pred != seg_mask, seg_mask != 0))
        for max_keep in total_recall.keys():
            recall = np.zeros((len(objects), ))
            proposals = get_proposals(rpn_out, 0.7, max_keep)
            for prop in proposals:
                if np.sum(recall) == len(objects):
                    break
                b2d, prop_box_3d = utils.compute_box_3d(prop, calib.P)
                prop_box_xy = prop_box_3d[:4, [0, 2]]
                gt_idx, gt_iou = find_match_label(prop_box_xy, gt_boxes_xy)
                if gt_idx == -1:
                    continue
                if gt_iou >= 0.5:
                    recall[gt_idx] = 1
                #if objects[gt_idx].type != 'Car' and gt_iou >= 0.5:
                #    print(objects[gt_idx].type)
            total_recall[max_keep] += np.sum(recall)

    print('Missed sample ratio: ', float(missed_sample) / label_count)
    print('Missed seg ratio: ', float(missed_seg) / label_count)
    print('Missed object average point: ', missed_obj_point / missed_seg)
    print('Missed seg sample ratio: ', float(missed_seg_sample) / label_count)
    for max_keep in total_recall.keys():
        total_recall[max_keep] = float(total_recall[max_keep]) / label_count
    print('Recall on proposal num: ', total_recall)
    print('Seg recall: ', float(tp) / (tp + fn))
    print('Seg precision: ', float(tp) / (tp + fp))
    sys.stdout.flush()
Ejemplo n.º 11
0
    def load_frame_data(self,
                        data_idx_str,
                        random_flip=False,
                        random_rotate=False,
                        random_shift=False,
                        pca_jitter=False):
        '''load one frame'''
        if self.use_aug_scene:
            data_idx = int(data_idx_str[2:])
        else:
            data_idx = int(data_idx_str)
        # print(data_idx_str)
        calib = self.kitti_dataset.get_calibration(data_idx)  # 3 by 4 matrix
        image = self.kitti_dataset.get_image(data_idx)
        img_height, img_width = image.shape[0:2]
        # data augmentation
        if pca_jitter:
            image = apply_pca_jitter(image)[0]

        objects = []
        if not self.use_aug_scene or data_idx_str[:2] == '00':
            pc_velo = self.kitti_dataset.get_lidar(data_idx)
            _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(
                pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True)
            pc_velo = pc_velo[img_fov_inds, :]
            choice = np.random.choice(pc_velo.shape[0],
                                      self.npoints,
                                      replace=True)
            point_set = pc_velo[choice, :]
            pc_rect = np.zeros_like(point_set)
            pc_rect[:, 0:3] = calib.project_velo_to_rect(point_set[:, 0:3])
            pc_rect[:, 3] = point_set[:, 3]
            if self.is_training:
                objects = self.kitti_dataset.get_label_objects(data_idx)
        else:
            pc_rect = utils.load_velo_scan(
                os.path.join(
                    self.kitti_path,
                    'aug_scene/rectified_data/{0}.bin'.format(data_idx_str)))
            choice = np.random.choice(pc_rect.shape[0],
                                      self.npoints,
                                      replace=True)
            pc_rect = pc_rect[choice, :]
            if self.is_training:
                objects = utils.read_label(
                    os.path.join(
                        self.kitti_path,
                        'aug_scene/aug_label/{0}.txt'.format(data_idx_str)))
        objects = filter(
            lambda obj: obj.type in self.types_list and obj.difficulty in self.
            difficulties_list, objects)
        gt_boxes = []  # ground truth boxes

        #start = time.time()
        seg_mask = np.zeros((pc_rect.shape[0]))
        # data augmentation
        if random_flip and np.random.random() > 0.5:  # 50% chance flipping
            pc_rect[:, 0] *= -1
            for obj in objects:
                obj.t = [-obj.t[0], obj.t[1], obj.t[2]]
                # ensure that ry is [-pi, pi]
                if obj.ry >= 0:
                    obj.ry = np.pi - obj.ry
                else:
                    obj.ry = -np.pi - obj.ry
        if random_rotate:
            ry = (np.random.random() - 0.5) * math.radians(
                20)  # -10~10 degrees
            pc_rect[:, 0:3] = rotate_points_along_y(pc_rect[:, 0:3], ry)
            for obj in objects:
                obj.t = rotate_points_along_y(obj.t, ry)
                obj.ry -= ry
                # ensure that ry is [-pi, pi]
                if obj.ry > np.pi:
                    obj.ry -= 2 * np.pi
                elif obj.ry < -np.pi:
                    obj.ry += 2 * np.pi
        proposal_of_point = {}  # point index to proposal vector
        gt_box_of_point = {}  # point index to corners_3d
        for obj in objects:
            _, obj_box_3d = utils.compute_box_3d(obj, calib.P)
            _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d)
            if np.sum(obj_mask) == 0:
                # label without 3d points
                # print('skip object without points')
                continue
            # IMPORTANT: this must match with NUM_SEG_CLASSES
            #seg_mask[obj_mask] = g_type2onehotclass[obj.type]
            seg_mask[obj_mask] = 1
            gt_boxes.append(obj_box_3d)
            obj_idxs = np.where(obj_mask)[0]
            # data augmentation
            # FIXME: jitter point will make valid loss growing
            # Also may go out of image view
            if random_shift and False:  # jitter object points
                pc_rect[obj_idxs, :3] = shift_point_cloud(
                    pc_rect[obj_idxs, :3], 0.02)
            for idx in obj_idxs:
                proposal_of_point[idx] = box_encoder.encode(
                    obj, pc_rect[idx, :3])
                gt_box_of_point[idx] = obj_box_3d
        # self.viz_frame(pc_rect, seg_mask, gt_boxes)
        # return pc_rect, seg_mask, proposal_of_point, gt_box_of_point, gt_boxes
        calib_matrix = np.copy(calib.P)
        calib_matrix[0, :] *= (1200.0 / image.shape[1])
        calib_matrix[1, :] *= (360.0 / image.shape[0])
        #print('construct', time.time() - start)
        return {
            'pointcloud': pc_rect,
            'image': cv2.resize(image, (1200, 360)),
            'calib': calib_matrix,
            'mask_label': seg_mask,
            'proposal_of_point': self.get_proposal_out(proposal_of_point),
            'gt_box_of_point': self.get_gt_box_of_points(gt_box_of_point),
            'gt_boxes': gt_boxes,
            'pc_choice': choice
        }