def _calculate_num_points_in_gt(data_path, infos, relative_path, remove_outside=True, num_features=4): for info in infos: if relative_path: v_path = str(pathlib.Path(data_path) / info["velodyne_path"]) else: v_path = info["velodyne_path"] points_v = np.fromfile( v_path, dtype=np.float32, count=-1).reshape([-1, num_features]) rect = info['calib/R0_rect'] Trv2c = info['calib/Tr_velo_to_cam'] P2 = info['calib/P2'] if remove_outside: points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, info["img_shape"]) # points_v = points_v[points_v[:, 0] > 0] annos = info['annos'] num_obj = len([n for n in annos['name'] if n != 'DontCare']) # annos = kitti.filter_kitti_anno(annos, ['DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] gt_boxes_camera = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes_lidar = box_np_ops.box_camera_to_lidar( gt_boxes_camera, rect, Trv2c) indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) num_ignored = len(annos['dimensions']) - num_obj num_points_in_gt = np.concatenate( [num_points_in_gt, -np.ones([num_ignored])]) annos["num_points_in_gt"] = num_points_in_gt.astype(np.int32)
def create_reduced_point_cloud(data_path, save_path=None, back=False): info_path = pathlib.Path(data_path) / 'ikg_infos_test.pkl' with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) for info in prog_bar(kitti_infos): v_path = info['velodyne_path'] #training/velodyne/000001.pcd v_path = pathlib.Path( data_path ) / v_path #./data/IKG/object/training/velodyne/000001.pcd pcd = pypcd.PointCloud.from_path(v_path) points = np.zeros((pcd.points, 4), dtype=np.float32) points[:, 0] = np.transpose(pcd.pc_data['x']) points[:, 1] = np.transpose(pcd.pc_data['y']) points[:, 2] = np.transpose(pcd.pc_data['z']) points[:, 3] = np.transpose(pcd.pc_data['intensity']) points[:, 3] /= 255 #kitti's intensity is normalized to 0-1, so ikg's intensity need to be normalized points_v = points points_v = box_np_ops.remove_outside_points( points_v, rect, Trv2c, P2, img_shape ) #img_shape :rgb img's shape, so that points not outside image v_path_bin = info['image_idx'] + '.bin' if save_path is None: save_filename = v_path.parent.parent / ( v_path.parent.stem + "_reduced") / v_path_bin #v_path.name else: save_filename = str(pathlib.Path(save_path) / v_path.name) with open(save_filename, 'w') as f: points_v.tofile(f)
def _create_reduced_point_cloud(data_path, info_path, save_path=None, back=False): with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) for info in prog_bar(kitti_infos): v_path = info['velodyne_path'] v_path = pathlib.Path(data_path) / v_path points_v = np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4]) rect = info['calib/R0_rect'] P2 = info['calib/P2'] Trv2c = info['calib/Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, info["img_shape"]) if save_path is None: save_filename = v_path.parent.parent / (v_path.parent.stem + "_reduced") / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += "_back" else: save_filename = str(pathlib.Path(save_path) / v_path.name) if back: save_filename += "_back" with open(save_filename, 'w') as f: points_v.tofile(f)
def inference_by_idx(): global BACKEND instance = request.json response = {"status": "normal"} if BACKEND.root_path is None: return error_response("root path is not set") if BACKEND.kitti_infos is None: return error_response("kitti info is not loaded") if BACKEND.inference_ctx is None: return error_response("inference_ctx is not loaded") image_idx = instance["image_idx"] idx = BACKEND.image_idxes.index(image_idx) kitti_info = BACKEND.kitti_infos[idx] v_path = str(Path(BACKEND.root_path) / kitti_info['velodyne_path']) num_features = 4 points = np.fromfile( str(v_path), dtype=np.float32, count=-1).reshape([-1, num_features]) rect = kitti_info['calib/R0_rect'] P2 = kitti_info['calib/P2'] Trv2c = kitti_info['calib/Tr_velo_to_cam'] if 'img_shape' in kitti_info: image_shape = kitti_info['img_shape'] points = box_np_ops.remove_outside_points( points, rect, Trv2c, P2, image_shape) print(points.shape[0]) t = time.time() inputs = BACKEND.inference_ctx.get_inference_input_dict( kitti_info, points) print("input preparation time:", time.time() - t) t = time.time() with BACKEND.inference_ctx.ctx(): dt_annos = BACKEND.inference_ctx.inference(inputs)[0] print("detection time:", time.time() - t) dims = dt_annos['dimensions'] num_obj = dims.shape[0] loc = dt_annos['location'] rots = dt_annos['rotation_y'] labels = dt_annos['name'] dt_boxes_camera = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) dt_boxes = box_np_ops.box_camera_to_lidar( dt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(dt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = dt_boxes[:, :3] dims = dt_boxes[:, 3:6] rots = np.concatenate([np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]], axis=1) response["dt_locs"] = locs.tolist() response["dt_dims"] = dims.tolist() response["dt_rots"] = rots.tolist() response["dt_labels"] = labels.tolist() response["dt_scores"] = dt_annos["score"].tolist() response = jsonify(results=[response]) response.headers['Access-Control-Allow-Headers'] = '*' return response
def _create_reduced_point_cloud( data_path='/mrtstorage/datasets/kitti/object_detection', info_path='/home/zhwang/second.pytorch/second/data/sets/kitti_second' '/kitti_infos_train.pkl', save_path='/home/zhwang/second.pytorch/second/data/sets/kitti_second/training' '/velodyne_reduced', back=False): with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) for info in prog_bar(kitti_infos): v_path = info['velodyne_path'] v_path = pathlib.Path(data_path) / v_path points_v = np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4]) rect = info['calib/R0_rect'] P2 = info['calib/P2'] Trv2c = info['calib/Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, info["img_shape"]) # print(points_v.dtype) """ fusion camera data to lidar """ image_idx = info['image_idx'] image_label = _get_semantic_segmentation_result( image_idx, data_dir='/home/zhwang/semantic-segmentation' '/kitti_train_results') points_v = _add_class_score(image_label, points_v, rect, Trv2c, P2) # print(points_v.dtype) if save_path is None: save_filename = v_path.parent.parent / (v_path.parent.stem + "_reduced") / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += "_back" else: save_filename = str(pathlib.Path(save_path) / v_path.name) if back: save_filename += "_back" with open(save_filename, 'w') as f: points_v.tofile(f)
def _create_reduced_point_cloud(data_path, info_path, save_path=None, tracking=False, back=False): with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) for info in prog_bar(kitti_infos): pc_info = info["point_cloud"] image_info = info["image"] calib = info["calib"] v_path = pc_info['velodyne_path'] v_path = Path(data_path) / v_path points_v = np.fromfile( str(v_path), dtype=np.float32, count=-1).reshape([-1, 4]) rect = calib['R0_rect'] P2 = calib['P2'] Trv2c = calib['Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, image_info["image_shape"]) if save_path is None: if tracking: # tracking index is 00XX/XXXXX, not XXXXX as detection save_filename = v_path.parent.parent.parent / ( v_path.parent.parent.stem + "_reduced") / v_path.parent.stem / v_path.name else: save_filename = v_path.parent.parent / ( v_path.parent.stem + "_reduced") / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += "_back" else: if tracking: save_filename = str(Path(save_path) / v_path.name) else: save_filename = str(Path(save_path) / v_path.name) if back: save_filename += "_back" with open(save_filename, 'w') as f: points_v.tofile(f)
def prep_remove_outside_points(input_dict, reference_detections, remove_outside_points, points): calib = None if "calib" in input_dict: calib = input_dict["calib"] if reference_detections is not None: assert calib is not None and "image" in input_dict C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points: assert calib is not None image_shape = input_dict["image"]["image_shape"] points = box_np_ops.remove_outside_points(points, calib["rect"], calib["Trv2c"], calib["P2"], image_shape)
def prep_pointcloud(input_dict, root_path, voxel_generator, target_assigner, db_sampler=None, max_voxels=20000, class_names=['Car'], remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, reduce_valid_area=False, remove_unknown=False, gt_rotation_noise=[-np.pi / 3, np.pi / 3], gt_loc_noise_std=[1.0, 1.0, 1.0], global_rotation_noise=[-np.pi / 4, np.pi / 4], global_scaling_noise=[0.95, 1.05], global_random_rot_range=[0.78, 2.35], generate_bev=False, without_reflectivity=False, num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=True, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, add_rgb_to_points=False, lidar_input=False, unlabeled_db_sampler=None, out_size_factor=2, min_gt_point_dict=None, bev_only=False, use_group_id=False, out_dtype=np.float32): """convert point cloud to voxels, create targets if ground truths exists. """ points = input_dict["points"] if training: gt_boxes = input_dict["gt_boxes"] gt_names = input_dict["gt_names"] difficulty = input_dict["difficulty"] group_ids = None if use_group_id and "group_ids" in input_dict: group_ids = input_dict["group_ids"] rect = input_dict["rect"] Trv2c = input_dict["Trv2c"] P2 = input_dict["P2"] unlabeled_training = unlabeled_db_sampler is not None #image_idx = input_dict["image_idx"] if reference_detections is not None: C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T # frustums = np.linalg.inv(R) @ frustums.T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points and not lidar_input: image_shape = input_dict["image_shape"] points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_shape) if remove_environment is True and training: selected = kitti.keep_arrays_by_name(gt_names, class_names) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] points = prep.remove_points_outside_boxes(points, gt_boxes) if training: # print(gt_names) selected = kitti.drop_arrays_by_name(gt_names, ["DontCare"]) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c) if remove_unknown: remove_mask = difficulty == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) gt_boxes = gt_boxes[keep_mask] gt_names = gt_names[keep_mask] difficulty = difficulty[keep_mask] if group_ids is not None: group_ids = group_ids[keep_mask] gt_boxes_mask = np.array([n in class_names for n in gt_names], dtype=np.bool_) if db_sampler is not None: sampled_dict = db_sampler.sample_all(root_path, gt_boxes, gt_names, num_point_features, random_crop, gt_group_ids=group_ids, rect=rect, Trv2c=Trv2c, P2=P2) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] # gt_names = gt_names[gt_boxes_mask].tolist() gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) # gt_names += [s["name"] for s in sampled] gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] group_ids = np.concatenate([group_ids, sampled_group_ids]) if remove_points_after_sample: points = prep.remove_points_in_boxes( points, sampled_gt_boxes) points = np.concatenate([sampled_points, points], axis=0) # unlabeled_mask = np.zeros((gt_boxes.shape[0], ), dtype=np.bool_) if without_reflectivity: used_point_axes = list(range(num_point_features)) used_point_axes.pop(3) points = points[:, used_point_axes] pc_range = voxel_generator.point_cloud_range if bev_only: # set z and h to limits gt_boxes[:, 2] = pc_range[2] gt_boxes[:, 5] = pc_range[5] - pc_range[2] prep.noise_per_object_v3_( gt_boxes, points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # should remove unrelated objects after noise per object gt_boxes = gt_boxes[gt_boxes_mask] gt_names = gt_names[gt_boxes_mask] if group_ids is not None: group_ids = group_ids[gt_boxes_mask] gt_classes = np.array([class_names.index(n) + 1 for n in gt_names], dtype=np.int32) gt_boxes, points = prep.random_flip(gt_boxes, points) gt_boxes, points = prep.global_rotation(gt_boxes, points, rotation=global_rotation_noise) gt_boxes, points = prep.global_scaling_v2(gt_boxes, points, *global_scaling_noise) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range) gt_boxes = gt_boxes[mask] gt_classes = gt_classes[mask] gt_names = gt_names[mask] if group_ids is not None: group_ids = group_ids[mask] # limit rad to [-pi, pi] gt_boxes[:, 6] = box_np_ops.limit_period(gt_boxes[:, 6], offset=0.5, period=2 * np.pi) if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) # [0, -40, -3, 70.4, 40, 1] voxel_size = voxel_generator.voxel_size pc_range = voxel_generator.point_cloud_range grid_size = voxel_generator.grid_size # [352, 400] voxels, coordinates, num_points = voxel_generator.generate( points, max_voxels) example = { 'voxels': voxels, 'num_points': num_points, 'coordinates': coordinates, "num_voxels": np.array([voxels.shape[0]], dtype=np.int64) } example.update({ 'rect': rect, 'Trv2c': Trv2c, 'P2': P2, }) # if not lidar_input: feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] if anchor_cache is not None: anchors = anchor_cache["anchors"] anchors_bv = anchor_cache["anchors_bv"] matched_thresholds = anchor_cache["matched_thresholds"] unmatched_thresholds = anchor_cache["unmatched_thresholds"] anchors_dict = anchor_cache["anchors_dict"] else: ret = target_assigner.generate_anchors(feature_map_size) anchors = ret["anchors"] anchors = anchors.reshape([-1, 7]) matched_thresholds = ret["matched_thresholds"] unmatched_thresholds = ret["unmatched_thresholds"] anchors_dict = target_assigner.generate_anchors_dict(feature_map_size) anchors_bv = box_np_ops.rbbox2d_to_near_bbox(anchors[:, [0, 1, 3, 4, 6]]) example["anchors"] = anchors # print("debug", anchors.shape, matched_thresholds.shape) # anchors_bv = anchors_bv.reshape([-1, 4]) anchors_mask = None if anchor_area_threshold >= 0: coors = coordinates dense_voxel_map = box_np_ops.sparse_sum_for_anchors_mask( coors, tuple(grid_size[::-1][1:])) dense_voxel_map = dense_voxel_map.cumsum(0) dense_voxel_map = dense_voxel_map.cumsum(1) anchors_area = box_np_ops.fused_get_anchors_area( dense_voxel_map, anchors_bv, voxel_size, pc_range, grid_size) anchors_mask = anchors_area > anchor_area_threshold # example['anchors_mask'] = anchors_mask.astype(np.uint8) example['anchors_mask'] = anchors_mask if not training: return example if create_targets: targets_dict = target_assigner.assign_v2(anchors_dict, gt_boxes, anchors_mask, gt_classes=gt_classes, gt_names=gt_names) example.update({ 'labels': targets_dict['labels'], 'reg_targets': targets_dict['bbox_targets'], 'reg_weights': targets_dict['bbox_outside_weights'], }) return example
def create_groundtruth_database(data_path, info_path=None, used_classes=None, database_save_path=None, db_info_save_path=None, relative_path=True, lidar_only=False, bev_only=False, coors_range=None): RGB_embedding = True root_path = pathlib.Path(data_path) if info_path is None: info_path = root_path / 'kitti_infos_train.pkl' if database_save_path is None: database_save_path = root_path / 'gt_database' else: database_save_path = pathlib.Path(database_save_path) if db_info_save_path is None: if RGB_embedding: db_info_save_path = root_path / "kitti_dbinfos_train_RGB.pkl" else: db_info_save_path = root_path / "kitti_dbinfos_train.pkl" database_save_path.mkdir(parents=True, exist_ok=True) with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) all_db_infos = {} if used_classes is None: used_classes = list(kitti.get_classes()) used_classes.pop(used_classes.index('DontCare')) for name in used_classes: all_db_infos[name] = [] group_counter = 0 for info in prog_bar(kitti_infos): velodyne_path = info['velodyne_path'] if relative_path: # velodyne_path = str(root_path / velodyne_path) + "_reduced" velodyne_path = str(root_path / velodyne_path) num_features = 4 if 'pointcloud_num_features' in info: num_features = info['pointcloud_num_features'] points = np.fromfile(velodyne_path, dtype=np.float32, count=-1).reshape([-1, num_features]) image_idx = info["image_idx"] rect = info['calib/R0_rect'] P2 = info['calib/P2'] Trv2c = info['calib/Tr_velo_to_cam'] if not lidar_only: points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, info["img_shape"]) annos = info["annos"] names = annos["name"] bboxes = annos["bbox"] difficulty = annos["difficulty"] gt_idxes = annos["index"] num_obj = np.sum(annos["index"] >= 0) rbbox_cam = kitti.anno_to_rbboxes(annos)[:num_obj] rbbox_lidar = box_np_ops.box_camera_to_lidar(rbbox_cam, rect, Trv2c) if bev_only: # set z and h to limits assert coors_range is not None rbbox_lidar[:, 2] = coors_range[2] rbbox_lidar[:, 5] = coors_range[5] - coors_range[2] if RGB_embedding: RGB_image = cv2.imread(str(root_path / info['img_path'])) points_camera = box_np_ops.box_lidar_to_camera( points[:, :3], rect, Trv2c) points_to_image_idx = box_np_ops.project_to_image( points_camera, P2) points_to_image_idx = points_to_image_idx.astype(int) mask = box_np_ops.remove_points_outside_image( RGB_image, points_to_image_idx) points = points[mask] points_to_image_idx = points_to_image_idx[mask] BGR = RGB_image[points_to_image_idx[:, 1], points_to_image_idx[:, 0]] points = np.concatenate((points, BGR), axis=1) group_dict = {} group_ids = np.full([bboxes.shape[0]], -1, dtype=np.int64) if "group_ids" in annos: group_ids = annos["group_ids"] else: group_ids = np.arange(bboxes.shape[0], dtype=np.int64) point_indices = box_np_ops.points_in_rbbox(points, rbbox_lidar) for i in range(num_obj): if RGB_embedding: filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}_RGB.bin" else: filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}.bin" filepath = database_save_path / filename gt_points = points[point_indices[:, i]] gt_points[:, :3] -= rbbox_lidar[i, :3] with open(filepath, 'w') as f: gt_points.tofile(f) if names[i] in used_classes: if relative_path: db_path = str(database_save_path.stem + "/" + filename) else: db_path = str(filepath) db_info = { "name": names[i], "path": db_path, "image_idx": image_idx, "gt_idx": gt_idxes[i], "box3d_lidar": rbbox_lidar[i], "num_points_in_gt": gt_points.shape[0], "difficulty": difficulty[i], # "group_id": -1, # "bbox": bboxes[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info["group_id"] = group_dict[local_group_id] if "score" in annos: db_info["score"] = annos["score"][i] all_db_infos[names[i]].append(db_info) for k, v in all_db_infos.items(): print(f"load {len(v)} {k} database infos") with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)
def prep_pointcloud( input_dict, root_path, # voxel_generator, fv_generator, target_assigner, db_sampler=None, max_voxels=20000, class_names=['Car'], remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, reduce_valid_area=False, remove_unknown=False, gt_rotation_noise=[-np.pi / 3, np.pi / 3], gt_loc_noise_std=[1.0, 1.0, 1.0], global_rotation_noise=[-np.pi / 4, np.pi / 4], global_scaling_noise=[0.95, 1.05], global_loc_noise_std=(0.2, 0.2, 0.2), global_random_rot_range=[0.78, 2.35], generate_bev=False, without_reflectivity=False, num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=False, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, add_rgb_to_points=False, lidar_input=False, unlabeled_db_sampler=None, out_size_factor=2, min_gt_point_dict=None, bev_only=False, use_group_id=False, out_dtype=np.float32, num_classes=1, RGB_embedding=False): """convert point cloud to voxels, create targets if ground truths exists. """ # prep_pointcloud_start = time.time() points = input_dict["points"] # if training: gt_boxes = input_dict["gt_boxes"] gt_names = input_dict["gt_names"] difficulty = input_dict["difficulty"] group_ids = None if use_group_id and "group_ids" in input_dict: group_ids = input_dict["group_ids"] rect = input_dict["rect"] Trv2c = input_dict["Trv2c"] P2 = input_dict["P2"] unlabeled_training = unlabeled_db_sampler is not None image_idx = input_dict["image_idx"] # t1 = time.time() - prep_pointcloud_start if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) # t2 = time.time() - prep_pointcloud_start # print("t2-t1: ", t2-t1) # 0.035 if reference_detections is not None: C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T # frustums = np.linalg.inv(R) @ frustums.T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points: # and not lidar_input: image_shape = input_dict["image_shape"] points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_shape) if remove_environment is True: # and training: selected = kitti.keep_arrays_by_name(gt_names, class_names) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] points = prep.remove_points_outside_boxes(points, gt_boxes) # if training: # print(gt_names) selected = kitti.drop_arrays_by_name(gt_names, ["DontCare"]) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] # t3 = time.time() - prep_pointcloud_start # print("t3-t2: ", t3 - t2) # 0.0002 gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c) if remove_unknown: remove_mask = difficulty == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) gt_boxes = gt_boxes[keep_mask] gt_names = gt_names[keep_mask] difficulty = difficulty[keep_mask] if group_ids is not None: group_ids = group_ids[keep_mask] gt_boxes_mask = np.array([n in class_names for n in gt_names], dtype=np.bool_) # t4 = time.time() - prep_pointcloud_start # print("t4-t3: ", t4 - t3) # 0.001 if RGB_embedding: RGB_image = cv2.imread(input_dict['image_path']) points_camera = box_np_ops.box_lidar_to_camera(points[:, :3], rect, Trv2c) points_to_image_idx = box_np_ops.project_to_image(points_camera, P2) points_to_image_idx = points_to_image_idx.astype(int) mask = box_np_ops.remove_points_outside_image(RGB_image, points_to_image_idx) points = points[mask] points_to_image_idx = points_to_image_idx[mask] BGR = RGB_image[points_to_image_idx[:, 1], points_to_image_idx[:, 0]] points = np.concatenate((points, BGR), axis=1) # test_mask = points_camera[mask][:, 0] < 0 # test_image_idx = points_to_image_idx[test_mask] # RGB_image[test_image_idx[:, 1], test_image_idx[:, 0]] = [255, 0, 0] # test_mask = points_camera[mask][:, 0] >= 0 # test_image_idx = points_to_image_idx[test_mask] # RGB_image[test_image_idx[:, 1], test_image_idx[:, 0]] = [0, 0, 255] # print() # t5 = time.time() - prep_pointcloud_start # print("t5-t4: ", t5 - t4) # 0.019 # TODO if db_sampler is not None and training: # and not RGB_embedding: if RGB_embedding: num_point_features += 3 fg_points_mask = box_np_ops.points_in_rbbox(points, gt_boxes) fg_points_list = [] bg_points_mask = np.zeros((points.shape[0]), dtype=bool) for i in range(fg_points_mask.shape[1]): fg_points_list.append(points[fg_points_mask[:, i]]) bg_points_mask = np.logical_or(bg_points_mask, fg_points_mask[:, i]) bg_points_mask = np.logical_not(bg_points_mask) sampled_dict = db_sampler.sample_all(root_path, points[bg_points_mask], gt_boxes, gt_names, fg_points_list, num_point_features, random_crop, gt_group_ids=group_ids, rect=rect, Trv2c=Trv2c, P2=P2) # sampled_dict = db_sampler.sample_all( # root_path, # gt_boxes, # gt_names, # num_point_features, # random_crop, # gt_group_ids=group_ids, # rect=rect, # Trv2c=Trv2c, # P2=P2) # t_sample_all = time.time() - prep_pointcloud_start # print("t_sample_all - t5: ", t_sample_all - t5) # 3.83 if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] remained_boxes_idx = sampled_dict["remained_boxes_idx"] # gt_names = gt_names[gt_boxes_mask].tolist() gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) # gt_names += [s["name"] for s in sampled] gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes]) gt_boxes_mask = np.concatenate([gt_boxes_mask, sampled_gt_masks], axis=0) gt_names = gt_names[remained_boxes_idx] gt_boxes = gt_boxes[remained_boxes_idx] gt_boxes_mask = gt_boxes_mask[remained_boxes_idx] if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] group_ids = np.concatenate([group_ids, sampled_group_ids]) group_ids = group_ids[remained_boxes_idx] # if remove_points_after_sample: # # points = prep.remove_points_in_boxes( # # points, sampled_gt_boxes) # locs = sampled_gt_boxes[:, 0:3] # dims = sampled_gt_boxes[:, 3:6] # angles = sampled_gt_boxes[:, 6] # camera_box_origin = [0.5, 0.5, 0] # # box_corners = box_np_ops.center_to_corner_box3d( # locs, dims, angles, camera_box_origin, axis=2) # box_corners_in_image = box_np_ops.project_to_fv_image( # box_corners, example['grid_size'][i], example['meta'][i]) # box_centers_in_image = box_np_ops.project_to_fv_image( # locs, example['grid_size'][i], example['meta'][i]) # t_sample_all2 = time.time() - prep_pointcloud_start # print("t_sample_all2 - t_sample_all: ", t_sample_all2 - t_sample_all) # 0.0002 # unlabeled_mask = np.zeros((gt_boxes.shape[0], ), dtype=np.bool_) # if without_reflectivity and training: # used_point_axes = list(range(num_point_features)) # used_point_axes.pop(3) # points = points[:, used_point_axes] # pc_range = voxel_generator.point_cloud_range # bev_only = False # if bev_only: # set z and h to limits # gt_boxes[:, 2] = pc_range[2] # gt_boxes[:, 5] = pc_range[5] - pc_range[2] if training: gt_loc_noise_std = [0.0, 0.0, 0.0] prep.noise_per_object_v3_( gt_boxes, points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # t_noise = time.time() - prep_pointcloud_start # print("t_noise - t_sample_all2: ", t_noise - t_sample_all2) # 12.01 # should remove unrelated objects after noise per object gt_boxes = gt_boxes[gt_boxes_mask] gt_names = gt_names[gt_boxes_mask] if group_ids is not None: group_ids = group_ids[gt_boxes_mask] gt_classes = np.array([class_names.index(n) + 1 for n in gt_names], dtype=np.int32) # t6 = time.time() - prep_pointcloud_start # print("t6-t5: ", t6 - t5) # 16.0 if training: gt_boxes, points = prep.random_flip(gt_boxes, points) # gt_boxes, points = prep.global_rotation( # gt_boxes, points, rotation=global_rotation_noise) gt_boxes, points = prep.global_scaling_v2(gt_boxes, points, *global_scaling_noise) # Global translation # gt_boxes, points = prep.global_translate(gt_boxes, points, global_loc_noise_std) # bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] bv_range = [0, -40, 70.4, 40] mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range) gt_boxes = gt_boxes[mask] gt_classes = gt_classes[mask] if group_ids is not None: group_ids = group_ids[mask] # limit rad to [-pi, pi] gt_boxes[:, 6] = box_np_ops.limit_period(gt_boxes[:, 6], offset=0.5, period=2 * np.pi) # TODO # if shuffle_points: # # shuffle is a little slow. # np.random.shuffle(points) # # t7 = time.time() - prep_pointcloud_start # # print("t7-t6: ", t7 - t6) # 1.95 # voxels, coordinates, num_points = voxel_generator.generate( # points, max_voxels, RGB_embedding=RGB_embedding) # # t8 = time.time() - prep_pointcloud_start # # print("t8-t7: ", t8 - t7) # 2.0 # voxel_size = voxel_generator.voxel_size # grid_size = voxel_generator.grid_size # pc_range = copy.deepcopy(voxel_generator.point_cloud_range) # grid_size = voxel_generator.grid_size # phi_min = voxel_generator.phi_min # theta_min = voxel_generator.theta_min # image_h, image_w = grid_size[1], grid_size[0] # c = np.array([image_w / 2., image_h / 2.]) # s = np.array([image_w, image_h], dtype=np.int32) # meta = {'c': c, 's': s, 'calib': P2, 'phi_min': phi_min, 'theta_min': theta_min} # t7 = time.time() - prep_pointcloud_start # print("t7-t6: ", t7 - t6) # 1.95 fv_image, points_mask = fv_generator.generate(points, RGB_embedding=RGB_embedding, occupancy_embedding=False) # t8 = time.time() - prep_pointcloud_start # print("t8-t7: ", t8 - t7) # 2.0 fv_dim = fv_generator.fv_dim pc_range = copy.deepcopy(fv_generator.spherical_coord_range) grid_size = fv_generator.grid_size phi_min = fv_generator.phi_min theta_min = fv_generator.theta_min image_h, image_w = fv_dim[1], fv_dim[0] c = np.array([image_w / 2., image_h / 2.]) s = np.array([image_w, image_h], dtype=np.int32) meta = { 'c': c, 's': s, 'calib': P2, 'phi_min': phi_min, 'theta_min': theta_min } fv_image = np.transpose(fv_image, [2, 1, 0]) max_objs = 50 num_objs = min(gt_boxes.shape[0], max_objs) box_np_ops.change_box3d_center_(gt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) spherical_gt_boxes = np.zeros((max_objs, gt_boxes.shape[1])) spherical_gt_boxes[:num_objs, :] = gt_boxes[:num_objs, :] spherical_gt_boxes[:num_objs, :] = convert_to_spherical_coor( gt_boxes[:num_objs, :]) spherical_gt_boxes[:num_objs, 0] -= phi_min spherical_gt_boxes[:num_objs, 1] -= theta_min spherical_gt_boxes[:num_objs, 0] /= grid_size[0] spherical_gt_boxes[:num_objs, 1] /= grid_size[1] spherical_gt_boxes, num_objs = filter_outside_range( spherical_gt_boxes, num_objs, fv_dim) # t9 = time.time() - prep_pointcloud_start # print("t9-t8: ", t9 - t8) # 0.0005 example = { 'fv_image': fv_image, 'grid_size': grid_size, 'pc_range': pc_range, 'meta': meta, 'spherical_gt_boxes': spherical_gt_boxes, 'resized_image_shape': grid_size } example.update({'rect': rect, 'Trv2c': Trv2c, 'P2': P2}) if RGB_embedding: RGB_image = cv2.resize(RGB_image, (image_w, image_h)) example.update({'RGB_image': RGB_image}) if training: hm = np.zeros((num_classes, image_h, image_w), dtype=np.float32) reg = np.zeros((max_objs, 2), dtype=np.float32) dep = np.zeros((max_objs, 1), dtype=np.float32) rotbin = np.zeros((max_objs, 2), dtype=np.int64) rotres = np.zeros((max_objs, 2), dtype=np.float32) dim = np.zeros((max_objs, 3), dtype=np.float32) ind = np.zeros((max_objs), dtype=np.int64) reg_mask = np.zeros((max_objs), dtype=np.uint8) rot_mask = np.zeros((max_objs), dtype=np.uint8) # # hm = np.zeros((num_classes, image_h, image_w), dtype=np.float32) # reg = np.zeros((image_w, image_h, 2), dtype=np.float32) # dep = np.zeros((image_w, image_h, 1), dtype=np.float32) # rotbin = np.zeros((image_w, image_h, 2), dtype=np.int64) # rotres = np.zeros((image_w, image_h, 2), dtype=np.float32) # dim = np.zeros((image_w, image_h, 3), dtype=np.float32) # # ind = np.zeros((max_objs), dtype=np.int64) # fg_mask = np.zeros((image_w, image_h), dtype=np.uint8) # # rot_mask = np.zeros((max_objs), dtype=np.uint8) draw_gaussian = draw_umich_gaussian # center heatmap radius = int(image_h / 30) # radius = int(image_h / 25) for k in range(num_objs): gt_3d_box = spherical_gt_boxes[k] cls_id = 0 # print('heatmap gaussian radius: ' + str(radius)) ct = np.array([gt_3d_box[0], gt_3d_box[1]], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct, radius) # depth(distance), wlh dep[k] = gt_3d_box[2] dim[k] = gt_3d_box[3:6] # dep[ct_int[0], ct_int[1], 0] = gt_3d_box[2] # dim[ct_int[0], ct_int[1], :] = gt_3d_box[3:6] # reg, ind, mask reg[k] = ct - ct_int ind[k] = ct_int[1] * image_w + ct_int[0] reg_mask[k] = rot_mask[k] = 1 # fg_mask[ct_int[0], ct_int[1]] = 1 # ry ry = gt_3d_box[6] if ry < np.pi / 6. or ry > 5 * np.pi / 6.: rotbin[k, 0] = 1 rotres[k, 0] = ry - (-0.5 * np.pi) # rotbin[ct_int[0], ct_int[1], 0] = 1 # rotres[ct_int[0], ct_int[1], 0] = ry - (-0.5 * np.pi) if ry > -np.pi / 6. or ry < -5 * np.pi / 6.: rotbin[k, 1] = 1 rotres[k, 1] = ry - (0.5 * np.pi) # rotbin[ct_int[0], ct_int[1], 1] = 1 # rotres[ct_int[0], ct_int[1], 1] = ry - (0.5 * np.pi) example.update({ 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind, 'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask, 'rot_mask': rot_mask, 'reg': reg }) # example.update({ # 'hm': hm, 'dep': dep, 'dim': dim, # 'rotbin': rotbin, 'rotres': rotres, # 'fg_mask': fg_mask, 'reg': reg # }) # t10 = time.time() - prep_pointcloud_start # print("total: ", t10) # 19.58 return example
def _create_reduced_point_cloud(data_path, info_path, save_path=None, color=True, back=False): bgr_path = None with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) for info in prog_bar(kitti_infos): pc_info = info["point_cloud"] image_info = info["image"] image_path = image_info['image_path'] calib = info["calib"] v_path = pc_info['velodyne_path'] v_path = Path(data_path) / v_path points_v = np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4]) rect = calib['R0_rect'] P2 = calib['P2'] Trv2c = calib['Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, image_info["image_shape"]) points_xyz = lidar_to_camera(points_v, rect, Trv2c) points_xy = project_to_image(points_xyz, P2) image_path = v_path.parent.parent.parent / image_path BGR_image = cv2.imread(str(image_path)) points_bgr = np.zeros((len(points_v), 3)) image_shape = image_info["image_shape"] for i in range(len(points_bgr)): new_y = int(points_xy[i][0]) if new_y >= image_shape[1]: new_y = image_shape[1] - 1 new_x = int(points_xy[i][1]) if new_x >= image_shape[0]: new_x = image_shape[0] - 1 points_bgr[i] = BGR_image[new_x][new_y] / 255 if bgr_path is None: save_bgr_filename = v_path.parent.parent / (v_path.parent.stem + "_bgr") / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += "_bgr_back" else: save_bgr_filename = str(Path(bgr_path) / v_path.name) if back: save_bgr_filename += "_bgr_back" with open(save_bgr_filename, 'w') as f: points_bgr.tofile(f) if save_path is None: save_filename = v_path.parent.parent / (v_path.parent.stem + "_bgr") / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += "_back" else: save_filename = str(Path(save_path) / v_path.name) if back: save_filename += "_back" with open(save_filename, 'w') as f: points_v.tofile(f)
def get_pointcloud(): global BACKEND instance = request.json response = {"status": "normal"} if BACKEND.root_path is None: return error_response("root path is not set") if BACKEND.kitti_infos is None: return error_response("kitti info is not loaded") image_idx = instance["image_idx"] enable_int16 = instance["enable_int16"] idx = BACKEND.image_idxes.index(image_idx) kitti_info = BACKEND.kitti_infos[idx] pc_info = kitti_info["point_cloud"] image_info = kitti_info["image"] calib = kitti_info["calib"] rect = calib['R0_rect'] Trv2c = calib['Tr_velo_to_cam'] P2 = calib['P2'] img_shape = image_info["image_shape"] # hw wh = np.array(img_shape[::-1]) whwh = np.tile(wh, 2) if 'annos' in kitti_info: annos = kitti_info['annos'] labels = annos['name'] num_obj = len([n for n in annos['name'] if n != 'DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] bbox = annos['bbox'][:num_obj] / whwh gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(gt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = gt_boxes[:, :3] dims = gt_boxes[:, 3:6] rots = np.concatenate( [np.zeros([num_obj, 2], dtype=np.float32), -gt_boxes[:, 6:7]], axis=1) frontend_annos = {} response["locs"] = locs.tolist() response["dims"] = dims.tolist() response["rots"] = rots.tolist() response["bbox"] = bbox.tolist() response["labels"] = labels[:num_obj].tolist() response["num_features"] = pc_info["num_features"] v_path = str(Path(BACKEND.root_path) / pc_info['velodyne_path']) points = np.fromfile(v_path, dtype=np.float32, count=-1).reshape([-1, pc_info["num_features"]]) if instance['remove_outside']: if 'image_shape' in image_info: image_shape = image_info['image_shape'] points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_shape) else: points = points[points[..., 2] > 0] if enable_int16: int16_factor = instance["int16_factor"] points *= int16_factor points = points.astype(np.int16) pc_str = base64.b64encode(points.tobytes()) response["pointcloud"] = pc_str.decode("utf-8") if "with_det" in instance and instance["with_det"]: if BACKEND.dt_annos is None: return error_response("det anno is not loaded") dt_annos = BACKEND.dt_annos[idx] dims = dt_annos['dimensions'] num_obj = dims.shape[0] loc = dt_annos['location'] rots = dt_annos['rotation_y'] bbox = dt_annos['bbox'] / whwh labels = dt_annos['name'] dt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) dt_boxes = box_np_ops.box_camera_to_lidar(dt_boxes_camera, rect, Trv2c) box_np_ops.change_box3d_center_(dt_boxes, src=[0.5, 0.5, 0], dst=[0.5, 0.5, 0.5]) locs = dt_boxes[:, :3] dims = dt_boxes[:, 3:6] rots = np.concatenate( [np.zeros([num_obj, 2], dtype=np.float32), -dt_boxes[:, 6:7]], axis=1) response["dt_locs"] = locs.tolist() response["dt_dims"] = dims.tolist() response["dt_rots"] = rots.tolist() response["dt_labels"] = labels.tolist() response["dt_bbox"] = bbox.tolist() response["dt_scores"] = dt_annos["score"].tolist() # if "score" in annos: # response["score"] = score.tolist() response = jsonify(results=[response]) response.headers['Access-Control-Allow-Headers'] = '*' print("send response with size {}!".format(len(pc_str))) return response
def prep_pointcloud(input_dict, root_path, voxel_generator, target_assigner, db_sampler=None, max_voxels=20000, remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, remove_unknown=False, gt_rotation_noise=(-np.pi / 3, np.pi / 3), gt_loc_noise_std=(1.0, 1.0, 1.0), global_rotation_noise=(-np.pi / 4, np.pi / 4), global_scaling_noise=(0.95, 1.05), global_random_rot_range=(0.78, 2.35), global_translate_noise_std=(0, 0, 0), num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=True, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, out_size_factor=2, use_group_id=False, out_dtype=np.float32): """convert point cloud to voxels, create targets if ground truths exists. input_dict format: dataset.get_sensor_data format """ # t = time.time() class_names = target_assigner.classes points = input_dict["lidar"]["points"] if training: anno_dict = input_dict["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": anno_dict["names"], } if "difficulty" not in anno_dict: difficulty = np.zeros([anno_dict["boxes"].shape[0]], dtype=np.int32) gt_dict["difficulty"] = difficulty else: gt_dict["difficulty"] = anno_dict["difficulty"] if use_group_id and "group_ids" in anno_dict: group_ids = anno_dict["group_ids"] gt_dict["group_ids"] = group_ids calib = None if "calib" in input_dict: calib = input_dict["calib"] if reference_detections is not None: assert calib is not None and "image" in input_dict C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points: assert calib is not None image_shape = input_dict["image"]["image_shape"] points = box_np_ops.remove_outside_points(points, calib["rect"], calib["Trv2c"], calib["P2"], image_shape) if remove_environment is True and training: selected = kitti.keep_arrays_by_name(gt_names, target_assigner.classes) _dict_select(gt_dict, selected) masks = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) points = points[masks.any(-1)] if training: # boxes_lidar = gt_dict["gt_boxes"] # bev_map = simplevis.nuscene_vis(points, boxes_lidar) # cv2.imshow('pre-noise', bev_map) selected = kitti.drop_arrays_by_name(gt_dict["gt_names"], ["DontCare"]) _dict_select(gt_dict, selected) if remove_unknown: remove_mask = gt_dict["difficulty"] == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) _dict_select(gt_dict, keep_mask) gt_dict.pop("difficulty") gt_boxes_mask = np.array( [n in class_names for n in gt_dict["gt_names"]], dtype=np.bool_) if db_sampler is not None: group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] sampled_dict = db_sampler.sample_all(root_path, gt_dict["gt_boxes"], gt_dict["gt_names"], num_point_features, random_crop, gt_group_ids=group_ids, calib=calib) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] gt_dict["group_ids"] = np.concatenate( [gt_dict["group_ids"], sampled_group_ids]) if remove_points_after_sample: masks = box_np_ops.points_in_rbbox(points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate([sampled_points, points], axis=0) pc_range = voxel_generator.point_cloud_range group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] prep.noise_per_object_v3_( gt_dict["gt_boxes"], points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # should remove unrelated objects after noise per object # for k, v in gt_dict.items(): # print(k, v.shape) _dict_select(gt_dict, gt_boxes_mask) gt_classes = np.array( [class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32) gt_dict["gt_classes"] = gt_classes gt_dict["gt_boxes"], points = prep.random_flip(gt_dict["gt_boxes"], points) gt_dict["gt_boxes"], points = prep.global_rotation( gt_dict["gt_boxes"], points, rotation=global_rotation_noise) gt_dict["gt_boxes"], points = prep.global_scaling_v2( gt_dict["gt_boxes"], points, *global_scaling_noise) prep.global_translate_(gt_dict["gt_boxes"], points, global_translate_noise_std) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range(gt_dict["gt_boxes"], bv_range) _dict_select(gt_dict, mask) # limit rad to [-pi, pi] gt_dict["gt_boxes"][:, 6] = box_np_ops.limit_period( gt_dict["gt_boxes"][:, 6], offset=0.5, period=2 * np.pi) # boxes_lidar = gt_dict["gt_boxes"] # bev_map = simplevis.nuscene_vis(points, boxes_lidar) # cv2.imshow('post-noise', bev_map) # cv2.waitKey(0) if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) # [0, -40, -3, 70.4, 40, 1] voxel_size = voxel_generator.voxel_size pc_range = voxel_generator.point_cloud_range grid_size = voxel_generator.grid_size # [352, 400] voxels, coordinates, num_points = voxel_generator.generate( points, max_voxels) example = { 'voxels': voxels, 'num_points': num_points, 'coordinates': coordinates, "num_voxels": np.array([voxels.shape[0]], dtype=np.int64) } if calib is not None: example["calib"] = calib feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] if anchor_cache is not None: anchors = anchor_cache["anchors"] anchors_bv = anchor_cache["anchors_bv"] anchors_dict = anchor_cache["anchors_dict"] else: ret = target_assigner.generate_anchors(feature_map_size) anchors = ret["anchors"] anchors = anchors.reshape([-1, 7]) anchors_dict = target_assigner.generate_anchors_dict(feature_map_size) anchors_bv = box_np_ops.rbbox2d_to_near_bbox(anchors[:, [0, 1, 3, 4, 6]]) example["anchors"] = anchors anchors_mask = None if anchor_area_threshold >= 0: # slow with high resolution. recommend disable this forever. coors = coordinates dense_voxel_map = box_np_ops.sparse_sum_for_anchors_mask( coors, tuple(grid_size[::-1][1:])) dense_voxel_map = dense_voxel_map.cumsum(0) dense_voxel_map = dense_voxel_map.cumsum(1) anchors_area = box_np_ops.fused_get_anchors_area( dense_voxel_map, anchors_bv, voxel_size, pc_range, grid_size) anchors_mask = anchors_area > anchor_area_threshold # example['anchors_mask'] = anchors_mask.astype(np.uint8) example['anchors_mask'] = anchors_mask if not training: return example # voxel_labels = box_np_ops.assign_label_to_voxel(gt_boxes, coordinates, # voxel_size, coors_range) """ example.update({ 'gt_boxes': gt_boxes.astype(out_dtype), 'num_gt': np.array([gt_boxes.shape[0]]), # 'voxel_labels': voxel_labels, }) """ if create_targets: targets_dict = target_assigner.assign_v2( anchors_dict, gt_dict["gt_boxes"], anchors_mask, gt_classes=gt_dict["gt_classes"], gt_names=gt_dict["gt_names"]) example.update({ 'labels': targets_dict['labels'], 'reg_targets': targets_dict['bbox_targets'], 'reg_weights': targets_dict['bbox_outside_weights'], }) return example
def prep_pointcloud(input_dict, root_path, voxel_generator, target_assigner, db_sampler=None, max_voxels=20000, class_names=['Car', "Cyclist", "Pedestrian"], remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, reduce_valid_area=False, remove_unknown=False, gt_rotation_noise=[-np.pi / 3, np.pi / 3], gt_loc_noise_std=[1.0, 1.0, 1.0], global_rotation_noise=[-np.pi / 4, np.pi / 4], global_scaling_noise=[0.95, 1.05], global_loc_noise_std=(0.2, 0.2, 0.2), global_random_rot_range=[0.78, 2.35], generate_bev=False, without_reflectivity=False, num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=True, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, add_rgb_to_points=False, lidar_input=False, unlabeled_db_sampler=None, out_size_factor=2, min_gt_point_dict=None, bev_only=False, use_group_id=False, out_dtype=np.float32, max_objs=300, length=248, width=216): """convert point cloud to voxels, create targets if ground truths exists. """ points = input_dict["points"] pc_range = voxel_generator.point_cloud_range hist, bin_edges = np.histogram(points[:, 2], bins=10, range=(pc_range[2], pc_range[5])) idx = np.argmax(hist) ground = (bin_edges[idx] + bin_edges[idx + 1]) / 2 if training: gt_boxes = input_dict["gt_boxes"] gt_names = input_dict["gt_names"] difficulty = input_dict["difficulty"] group_ids = None if use_group_id and "group_ids" in input_dict: group_ids = input_dict["group_ids"] rect = input_dict["rect"] Trv2c = input_dict["Trv2c"] P2 = input_dict["P2"] unlabeled_training = unlabeled_db_sampler is not None image_idx = input_dict["image_idx"] if reference_detections is not None: C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T # frustums = np.linalg.inv(R) @ frustums.T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points and not lidar_input: image_shape = input_dict["image_shape"] points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_shape) if remove_environment is True and training: selected = kitti.keep_arrays_by_name(gt_names, class_names) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] points = prep.remove_points_outside_boxes(points, gt_boxes) if training: # print(gt_names) selected = kitti.drop_arrays_by_name(gt_names, ["DontCare"]) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: group_ids = group_ids[selected] gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c) if remove_unknown: remove_mask = difficulty == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) gt_boxes = gt_boxes[keep_mask] gt_names = gt_names[keep_mask] difficulty = difficulty[keep_mask] if group_ids is not None: group_ids = group_ids[keep_mask] gt_boxes_mask = np.array([n in class_names for n in gt_names], dtype=np.bool_) if db_sampler is not None: sampled_dict = db_sampler.sample_all(root_path, gt_boxes, gt_names, num_point_features, random_crop, gt_group_ids=group_ids, rect=rect, Trv2c=Trv2c, P2=P2) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] # gt_names = gt_names[gt_boxes_mask].tolist() gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) # gt_names += [s["name"] for s in sampled] gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] group_ids = np.concatenate([group_ids, sampled_group_ids]) if remove_points_after_sample: points = prep.remove_points_in_boxes( points, sampled_gt_boxes) points = np.concatenate([sampled_points, points], axis=0) # unlabeled_mask = np.zeros((gt_boxes.shape[0], ), dtype=np.bool_) if without_reflectivity: used_point_axes = list(range(num_point_features)) used_point_axes.pop(3) points = points[:, used_point_axes] if bev_only: # set z and h to limits gt_boxes[:, 2] = pc_range[2] gt_boxes[:, 5] = pc_range[5] - pc_range[2] prep.noise_per_object_v3_( gt_boxes, points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # should remove unrelated objects after noise per object gt_boxes = gt_boxes[gt_boxes_mask] gt_names = gt_names[gt_boxes_mask] if group_ids is not None: group_ids = group_ids[gt_boxes_mask] gt_classes = np.array([class_names.index(n) + 1 for n in gt_names], dtype=np.int32) gt_boxes, points = prep.random_flip(gt_boxes, points) gt_boxes, points = prep.global_rotation(gt_boxes, points, rotation=global_rotation_noise) gt_boxes, points = prep.global_scaling_v2(gt_boxes, points, *global_scaling_noise) # Global translation gt_boxes, points = prep.global_translate(gt_boxes, points, global_loc_noise_std) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range) gt_boxes = gt_boxes[mask] gt_classes = gt_classes[mask] if group_ids is not None: group_ids = group_ids[mask] # limit rad to [-pi, pi] gt_boxes[:, 6] = box_np_ops.limit_period(gt_boxes[:, 6], offset=0.5, period=2 * np.pi) if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) voxel_size = voxel_generator.voxel_size pc_range = voxel_generator.point_cloud_range grid_size = voxel_generator.grid_size voxels, coordinates, num_points = voxel_generator.generate( points, max_voxels) example = { 'voxels': voxels, 'num_points': num_points, 'coordinates': coordinates, "num_voxels": np.array([voxels.shape[0]], dtype=np.int64), "ground": ground } example.update({ 'rect': rect, 'Trv2c': Trv2c, 'P2': P2, }) if generate_bev: bev_vxsize = voxel_size.copy() bev_vxsize[:2] /= 2 bev_vxsize[2] *= 2 bev_map = points_to_bev(points, bev_vxsize, pc_range, without_reflectivity) example["bev_map"] = bev_map #============================ NEW CODE =================================== if training: num_classes = len(class_names) hm = np.zeros((num_classes, length, width), dtype=np.float32) # wh = np.zeros((max_objs, 2), dtype=np.float32) reg = np.zeros((max_objs, 2), dtype=np.float32) rotbin = np.zeros((max_objs, 2), dtype=np.int64) rotres = np.zeros((max_objs, 2), dtype=np.float32) dim = np.zeros((max_objs, 3), dtype=np.float32) ind = np.zeros((max_objs), dtype=np.int64) reg_mask = np.zeros((max_objs), dtype=np.uint8) rot_mask = np.zeros((max_objs), dtype=np.uint8) num_objs = min(len(gt_boxes), max_objs) draw_gaussian = draw_msra_gaussian # if self.opt.mse_loss else draw_umich_gaussian gt_det = [] xmin, ymin, _, xmax, ymax, _ = pc_range for k in range(num_objs): box = gt_boxes[k] box[0] = np.clip(box[0], xmin, xmax) box[1] = np.clip(box[1], ymin, ymax) alpha = box[6] - np.arctan2(-box[1], box[0]) cls_id = gt_classes[k] - 1 cx = (box[0] - xmin) * (width - 1) / (xmax - xmin) cy = (box[1] - ymin) * (length - 1) / (ymax - ymin) lx = box[4] * (width - 1) / (xmax - xmin) ly = box[3] * (length - 1) / (ymax - ymin) if lx > 0 and ly > 0: radius = gaussian_radius((ly, lx)) radius = max(0, int(radius)) ct = np.array([cx, cy], dtype=np.float32) ct_int = ct.astype(np.int32) if cls_id < 0: ignore_id = [_ for _ in range(num_classes)] \ if cls_id == - 1 else [- cls_id - 2] for cc in ignore_id: draw_gaussian(hm[cc], ct, radius) hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999 continue draw_gaussian(hm[cls_id], ct, radius) if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.: rotbin[k, 0] = 1 rotres[k, 0] = alpha - (-0.5 * np.pi) if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.: rotbin[k, 1] = 1 rotres[k, 1] = alpha - (0.5 * np.pi) dim[k] = box[3:6] #w,l,h ind[k] = ct_int[1] * width + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 #if not training else 0 rot_mask[k] = 1 example.update({ 'hm': hm, 'dim': dim, 'ind': ind, 'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask, 'rot_mask': rot_mask, 'reg': reg }) #============================ NEW CODE =================================== return example
def _create_reduced_point_cloud(data_path, info_path, save_path=None, color=True, back=False): with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) #se_mo = seg_model() se_mo = None for info in prog_bar(kitti_infos): pc_info = info["point_cloud"] image_info = info["image"] calib = info["calib"] v_path = pc_info['velodyne_path'] v_path = Path(data_path) / v_path points_v = np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4]) rect = calib['R0_rect'] P2 = calib['P2'] Trv2c = calib['Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, image_info["image_shape"]) ############## feature path load ########################### # f_path = image_info['image_path'].split("\\") # feature_file = f_path[0]+"/"+"feature"+"/"+f_path[-1].split(".")[0]+".pkl" # feature_file = v_path.parent.parent.parent / feature_file # with open(str(feature_file), 'rb') as f: # feature = pickle.load(f) ################################################################ ##################### add bgr to lidar ############# points_xyz = lidar_to_camera(points_v, rect, Trv2c) points_xy = project_to_image(points_xyz, P2) image_path = image_info['image_path'] image_path = v_path.parent.parent.parent / image_path BGR_image = cv2.imread(str(image_path)) points_bgr = np.zeros((len(points_v), 3)) #points_feature = np.zeros((len(points_v),256)) # seg_feature = np.zeros((len(points_v),19)) image_shape = image_info["image_shape"] #seg_out = se_mo.predict(image_path) for i in range(len(points_bgr)): new_y = int(points_xy[i][0]) if new_y >= image_shape[1]: new_y = image_shape[1] - 1 new_x = int(points_xy[i][1]) if new_x >= image_shape[0]: new_x = image_shape[0] - 1 points_bgr[i] = BGR_image[new_x][new_y] / 255 #seg_feature[i] = seg_out[new_x][new_y] # points_bgr = points_bgr.astype(np.float32) # points_v = points_v.astype(np.float32) # seg_feature = seg_feature.astype(np.float32) points_v = np.concatenate([points_v, points_bgr], axis=-1) #points_v = np.concatenate([points_v,points_bgr,points_feature],axis=-1) #points_v = np.concatenate([points_v,points_bgr,seg_feature],axis=-1) ################################################################### points_v = points_v.astype(np.float32) # print("##############################") # print(seg_out[0,1:5,:]) # print(points_v.shape) # exit() if save_path is None: save_filename = v_path.parent.parent / (v_path.parent.stem + "_reduced") / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += "_back" else: save_filename = str(Path(save_path) / v_path.name) if back: save_filename += "_back" with open(save_filename, 'w') as f: points_v.tofile(f)
def create_groundtruth_database( data_path='/mrtstorage/datasets/kitti/object_detection', info_path='/home/zhwang/second.pytorch/second/data/sets/kitti_second' '/kitti_infos_train.pkl', used_classes=None, database_save_path='/home/zhwang/second.pytorch/second/data/sets/kitti_second' '/gt_database', db_info_save_path='/home/zhwang/second.pytorch/second/data/sets/kitti_second' '/kitti_dbinfos_train.pkl', relative_path=True, lidar_only=False, bev_only=False, coors_range=None): root_path = pathlib.Path(data_path) if info_path is None: info_path = root_path / 'kitti_infos_train.pkl' if database_save_path is None: database_save_path = root_path / 'gt_database' else: database_save_path = pathlib.Path(database_save_path) if db_info_save_path is None: db_info_save_path = root_path / "kitti_dbinfos_train.pkl" database_save_path.mkdir(parents=True, exist_ok=True) with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) all_db_infos = {} if used_classes is None: used_classes = list(kitti.get_classes()) used_classes.pop(used_classes.index('DontCare')) for name in used_classes: all_db_infos[name] = [] group_counter = 0 for info in prog_bar(kitti_infos): velodyne_path = info['velodyne_path'] if relative_path: # velodyne_path = str(root_path / velodyne_path) + "_reduced" velodyne_path = str(root_path / velodyne_path) num_features = 4 if 'pointcloud_num_features' in info: num_features = info['pointcloud_num_features'] points = np.fromfile(velodyne_path, dtype=np.float32, count=-1).reshape([-1, num_features]) image_idx = info["image_idx"] rect = info['calib/R0_rect'] P2 = info['calib/P2'] Trv2c = info['calib/Tr_velo_to_cam'] if not lidar_only: points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, info["img_shape"]) """ fusion camera data to lidar """ image_label = _get_semantic_segmentation_result( image_idx, data_dir='/home/zhwang/semantic-segmentation' '/kitti_train_results') points = _add_class_score(image_label, points, rect, Trv2c, P2) annos = info["annos"] names = annos["name"] bboxes = annos["bbox"] difficulty = annos["difficulty"] gt_idxes = annos["index"] num_obj = np.sum(annos["index"] >= 0) rbbox_cam = kitti.anno_to_rbboxes(annos)[:num_obj] rbbox_lidar = box_np_ops.box_camera_to_lidar(rbbox_cam, rect, Trv2c) if bev_only: # set z and h to limits assert coors_range is not None rbbox_lidar[:, 2] = coors_range[2] rbbox_lidar[:, 5] = coors_range[5] - coors_range[2] group_dict = {} group_ids = np.full([bboxes.shape[0]], -1, dtype=np.int64) if "group_ids" in annos: group_ids = annos["group_ids"] else: group_ids = np.arange(bboxes.shape[0], dtype=np.int64) point_indices = box_np_ops.points_in_rbbox(points, rbbox_lidar) for i in range(num_obj): filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}.bin" filepath = database_save_path / filename gt_points = points[point_indices[:, i]] gt_points[:, :3] -= rbbox_lidar[i, :3] with open(filepath, 'w') as f: gt_points.tofile(f) if names[i] in used_classes: if relative_path: db_path = str(database_save_path.stem + "/" + filename) else: db_path = str(filepath) db_info = { "name": names[i], "path": db_path, "image_idx": image_idx, "gt_idx": gt_idxes[i], "box3d_lidar": rbbox_lidar[i], "num_points_in_gt": gt_points.shape[0], "difficulty": difficulty[i], # "group_id": -1, # "bbox": bboxes[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info["group_id"] = group_dict[local_group_id] if "score" in annos: db_info["score"] = annos["score"][i] all_db_infos[names[i]].append(db_info) for k, v in all_db_infos.items(): print(f"load {len(v)} {k} database infos") with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)
def prep_pointcloud(input_dict, root_path, voxel_generator, target_assigner, use_quadrant=False, db_sampler=None, max_voxels=20000, remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, remove_unknown=False, gt_rotation_noise=(-np.pi / 3, np.pi / 3), gt_loc_noise_std=(1.0, 1.0, 1.0), global_rotation_noise=(-np.pi / 4, np.pi / 4), global_scaling_noise=(0.95, 1.05), global_random_rot_range=(0.78, 2.35), global_translate_noise_std=(0, 0, 0), num_point_features=4, remove_points_after_sample=True, remove_environment=False, random_crop=False, reference_detections=None, out_size_factor=2, use_group_id=False, multi_gpu=False, min_points_in_gt=-1, random_flip_x=True, random_flip_y=True, sample_importance=1.0, dataset_name = 'KITTI'): """convert point cloud to voxels, create targets if ground truths exists. input_dict format: dataset.get_sensor_data format """ t = time.time() class_names = target_assigner.classes points = input_dict["lidar"]["points"] if training: anno_dict = input_dict["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": anno_dict["names"], "gt_importance": np.ones([anno_dict["boxes"].shape[0]], dtype=anno_dict["boxes"].dtype), } if "difficulty" not in anno_dict: difficulty = np.zeros([anno_dict["boxes"].shape[0]], dtype=np.int32) gt_dict["difficulty"] = difficulty else: gt_dict["difficulty"] = anno_dict["difficulty"] if use_group_id and "group_ids" in anno_dict: group_ids = anno_dict["group_ids"] gt_dict["group_ids"] = group_ids calib = None if "calib" in input_dict: calib = input_dict["calib"] if reference_detections is not None: assert calib is not None and "image" in input_dict C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points: assert calib is not None image_shape = input_dict["image"]["image_shape"] points = box_np_ops.remove_outside_points( points, calib["rect"], calib["Trv2c"], calib["P2"], image_shape) if remove_environment and training: selected = kitti.keep_arrays_by_name(gt_names, target_assigner.classes) _dict_select(gt_dict, selected) masks = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) points = points[masks.any(-1)] metrics = {} if training: # boxes_lidar = gt_dict["gt_boxes"] # bev_map = simplevis.nuscene_vis(points, boxes_lidar) # cv2.imshow('pre-noise', bev_map) selected = kitti.drop_arrays_by_name(gt_dict["gt_names"], ["DontCare"]) _dict_select(gt_dict, selected) if remove_unknown: remove_mask = gt_dict["difficulty"] == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) _dict_select(gt_dict, keep_mask) gt_dict.pop("difficulty") if min_points_in_gt > 0: # points_count_rbbox takes 10ms with 10 sweeps nuscenes data point_counts = box_np_ops.points_count_rbbox(points, gt_dict["gt_boxes"]) mask = point_counts >= min_points_in_gt _dict_select(gt_dict, mask) # select the gt_box in the specified classes gt_boxes_mask = np.array( [n in class_names for n in gt_dict["gt_names"]], dtype=np.bool_) # data augmentation if db_sampler is not None: group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] # data augmentation, using sample to add target sampled_dict = db_sampler.sample_all( root_path, gt_dict["gt_boxes"], gt_dict["gt_names"], num_point_features, random_crop, gt_group_ids=group_ids, calib=calib) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) sampled_gt_importance = np.full([sampled_gt_boxes.shape[0]], sample_importance, dtype=sampled_gt_boxes.dtype) gt_dict["gt_importance"] = np.concatenate( [gt_dict["gt_importance"], sampled_gt_importance]) if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] gt_dict["group_ids"] = np.concatenate( [gt_dict["group_ids"], sampled_group_ids]) # remove the raw points in the added box, to avoid overlap if remove_points_after_sample: masks = box_np_ops.points_in_rbbox(points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate([sampled_points, points], axis=0) group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] prep.noise_per_object_v3_(gt_dict["gt_boxes"], points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # should remove unrelated objects after noise per object # for k, v in gt_dict.items(): # print(k, v.shape) _dict_select(gt_dict, gt_boxes_mask) gt_classes = np.array( [class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32) gt_dict["gt_classes"] = gt_classes gt_dict["gt_boxes"], points = prep.random_flip(gt_dict["gt_boxes"], points, 0.5, random_flip_x, random_flip_y) gt_dict["gt_boxes"], points = prep.global_rotation_v2( gt_dict["gt_boxes"], points, *global_rotation_noise) gt_dict["gt_boxes"], points = prep.global_scaling_v2( gt_dict["gt_boxes"], points, *global_scaling_noise) prep.global_translate_(gt_dict["gt_boxes"], points, global_translate_noise_std) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range_by_center(gt_dict["gt_boxes"], bv_range) _dict_select(gt_dict, mask) # limit rad to [-pi, pi] gt_dict["gt_boxes"][:, 6] = box_np_ops.limit_period( gt_dict["gt_boxes"][:, 6], offset=0.5, period=2 * np.pi) # boxes_lidar = gt_dict["gt_boxes"] # bev_map = simplevis.nuscene_vis(points, boxes_lidar) # cv2.imshow('post-noise', bev_map) # cv2.waitKey(0) if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) # [0, -40, -3, 70.4, 40, 1] grid_size = voxel_generator.grid_size feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] # [352, 400] t1 = time.time() if not multi_gpu: res = voxel_generator.generate(points, max_voxels) voxels = res["voxels"] coordinates = res["coordinates"] num_points = res["num_points_per_voxel"] num_voxels = np.array([voxels.shape[0]], dtype=np.int64) else: res = voxel_generator.generate_multi_gpu(points, max_voxels) voxels = res["voxels"] coordinates = res["coordinates"] num_points = res["num_points_per_voxel"] num_voxels = np.array([res["voxel_num"]], dtype=np.int64) metrics["voxel_gene_time"] = time.time() - t1 anchors_all = generate_anchors(target_assigner.classes_cfg, feature_map_size, use_quadrant) if use_quadrant: split_res = split_voxel_into_quadrants(res, grid_size) voxels = split_res["voxels"] coordinates = split_res["coordinates"] example = { 'voxels': voxels, 'num_points': num_points, 'coordinates': coordinates, "num_voxels": num_voxels, "metrics": metrics , 'anchors': anchors_all["anchors"]} if calib is not None: example["calib"] = calib metrics["prep_time"] = time.time() - t if not training: return example # voxel_labels = box_np_ops.assign_label_to_voxel(gt_boxes, coordinates, # voxel_size, coors_range) if create_targets: if target_assigner.name == 'LabelAssigner': if use_quadrant: res = [] gt_box = split_dict["gt_boxes"] gt_classes = split_dict["gt_classes"] gt_names = split_dict["gt_name"] gt_importance = split_dict["gt_importance"] for i in range(4): d = target_assigner.assign( gt_boxes=gt_box[i], feature_map_size=feature_map_size, gt_classes=gt_classes[i], gt_names=gt_names[i], importance=gt_importance[i], training=training, dataset_name=dataset_name ) res.append(d) else: targets_dict = target_assigner.assign( gt_dict["gt_boxes"], feature_map_size, gt_classes=gt_dict["gt_classes"], gt_names=gt_dict["gt_names"], importance=gt_dict["gt_importance"], training=training, dataset_name= dataset_name ) example.update({ 'gt_dict': targets_dict['gt_dict'], 'hm': targets_dict['targets']['hm'], 'anno_box': targets_dict['targets']['anno_box'], 'ind': targets_dict['targets']['ind'], 'mask': targets_dict['targets']['mask'], 'cat': targets_dict['targets']['cat'] }) else: if use_quadrant: split_dict = split_gt_into_quadrants(gt_dict) example['use_quadrant']= use_quadrant targets_dict = {} gt_box = split_dict["gt_boxes"] gt_classes = split_dict["gt_classes"] gt_names = split_dict["gt_names"] gt_importance = split_dict["gt_importance"] labels=[] bbox_targets=[] importance = [] for i in range(4): d = target_assigner.assign( anchors_all, gt_boxes=gt_box[i], gt_classes=gt_classes[i], gt_names=gt_names[i], importance=gt_importance[i] ) labels.append(d['labels']) bbox_targets.append(d['bbox_targets']) importance.append(d['importance']) targets_dict['labels']=np.stack(labels) targets_dict['bbox_targets'] = np.stack(bbox_targets) targets_dict['importance'] = np.stack(importance) else: targets_dict = target_assigner.assign( anchors_all, gt_dict["gt_boxes"], gt_classes=gt_dict["gt_classes"], gt_names=gt_dict["gt_names"], importance=gt_dict["gt_importance"]) example.update({'targets': targets_dict}) return example
def create_groundtruth_database(data_path, train_or_test, info_path=None, used_classes=None, database_save_path=None, db_info_save_path=None, relative_path=True, lidar_only=False, bev_only=False, coors_range=None): # custom dataset parameter custom_dataset = True # if true, it indicates that we are not operating on the kitti data sample_val_dataset_mode = True if train_or_test == "test" else False # if true, we are creating a gt database for the test set (instead of the train set) # ------------------------------------------------------------------------------------------------------ # create path where the gt boxes are stored # ----------------------------------------------------------------------------------------------------- root_path = pathlib.Path(data_path) if info_path is None: if sample_val_dataset_mode: info_path = root_path / 'kitti_infos_val.pkl' else: info_path = root_path / 'kitti_infos_train.pkl' if database_save_path is None: if sample_val_dataset_mode: database_save_path = root_path / 'gt_database_val' else: database_save_path = root_path / 'gt_database' else: database_save_path = pathlib.Path(database_save_path) if db_info_save_path is None: if sample_val_dataset_mode: db_info_save_path = root_path / "kitti_dbinfos_val.pkl" else: db_info_save_path = root_path / "kitti_dbinfos_train.pkl" database_save_path.mkdir(parents=True, exist_ok=True) # ------------------------------------------------------------------------------------------------------ # load kitti infos # ----------------------------------------------------------------------------------------------------- with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) all_db_infos = {} # get the classnames we are intered in if used_classes is None: used_classes = list(kitti.get_classes()) used_classes.pop(used_classes.index('DontCare')) for name in used_classes: all_db_infos[name] = [] group_counter = 0 # ------------------------------------------------------------------------------------------------------ # iterate over kitti_infos # ----------------------------------------------------------------------------------------------------- for info in prog_bar(kitti_infos): # ------------------------------------------------------------------------------------------------------ # load pc # ----------------------------------------------------------------------------------------------------- velodyne_path = info['velodyne_path'] if relative_path: # velodyne_path = str(root_path / velodyne_path) + "_reduced" velodyne_path = str(root_path / velodyne_path) num_features = 4 if 'pointcloud_num_features' in info: num_features = info['pointcloud_num_features'] if custom_dataset: with open(str(velodyne_path)[:-3] + "pkl", 'rb') as file: points = pickle.load(file, encoding='latin1') else: points = np.fromfile(str(velodyne_path), dtype=np.float32, count=-1).reshape([-1, 4]) image_idx = info["image_idx"] rect = info['calib/R0_rect'] P2 = info['calib/P2'] Trv2c = info['calib/Tr_velo_to_cam'] # ------------------------------------------------------------------------------------------------------ # remove boxes outside the frustum of the image # ----------------------------------------------------------------------------------------------------- if not lidar_only and not custom_dataset: points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, info["img_shape"]) # ------------------------------------------------------------------------------------------------------ # get the bboxes and transform (annos not points) # ----------------------------------------------------------------------------------------------------- annos = info["annos"] names = annos["name"] bboxes = annos["bbox"] difficulty = annos["difficulty"] gt_idxes = annos["index"] num_obj = np.sum(annos["index"] >= 0) rbbox_cam = kitti.anno_to_rbboxes(annos)[:num_obj] rbbox_lidar = box_np_ops.box_camera_to_lidar(rbbox_cam, rect, Trv2c) if bev_only: # set z and h to limits assert coors_range is not None rbbox_lidar[:, 2] = coors_range[2] rbbox_lidar[:, 5] = coors_range[5] - coors_range[2] # ------------------------------------------------------------------------------------------------------ # other stuff # ----------------------------------------------------------------------------------------------------- group_dict = {} group_ids = np.full([bboxes.shape[0]], -1, dtype=np.int64) if "group_ids" in annos: group_ids = annos["group_ids"] else: group_ids = np.arange(bboxes.shape[0], dtype=np.int64) # ------------------------------------------------------------------------------------------------------ # test which points are in bboxes # ----------------------------------------------------------------------------------------------------- point_indices = box_np_ops.points_in_rbbox(points, rbbox_lidar) # ------------------------------------------------------------------------------------------------------ # iterate over all objects in the annos # ----------------------------------------------------------------------------------------------------- for i in range(num_obj): filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}.bin" filepath = database_save_path / filename gt_points = points[point_indices[:, i]] gt_points[:, :3] -= rbbox_lidar[i, :3] # ------------------------------------------------------------------------------------------------------ # save points of gt boxes to files # ----------------------------------------------------------------------------------------------------- with open(str(filepath)[:-3] + "pkl", 'wb') as file: pickle.dump(np.array(gt_points), file, 2) # debug # with open("/home/makr/Documents/uni/TU/3.Master/experiments/own/tf_3dRGB_pc/debug_rviz/points/bbox_pixels.pkl", 'wb') as file: # pickle.dump(np.array(gt_points), file, 2) # ------------------------------------------------------------------------------------------------------ # save infos of gt boxes to single file # ----------------------------------------------------------------------------------------------------- if names[i] in used_classes: if relative_path: db_path = str(database_save_path.stem + "/" + filename) else: db_path = str(filepath) db_info = { "name": names[i], "path": db_path, "image_idx": image_idx, "gt_idx": gt_idxes[i], "box3d_lidar": rbbox_lidar[i], "num_points_in_gt": gt_points.shape[0], "difficulty": difficulty[i], # "group_id": -1, # "bbox": bboxes[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info["group_id"] = group_dict[local_group_id] if "score" in annos: db_info["score"] = annos["score"][i] all_db_infos[names[i]].append(db_info) for k, v in all_db_infos.items(): print(f"load {len(v)} {k} database infos") with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)
def _calculate_num_points_in_gt(data_path, infos, relative_path, remove_outside=True, num_features=4): # custom dataset parameter custom_dataset = True # comment strating form here import pickle num_features = 3 # iterate over the datapoints for info in infos: if relative_path: v_path = str(pathlib.Path(data_path) / info["velodyne_path"]) else: v_path = info["velodyne_path"] if custom_dataset: with open(v_path[:-3] + "pkl", 'rb') as file: points_v = pickle.load(file, encoding='latin1') else: points_v = np.fromfile(v_path, dtype=np.float32, count=-1).reshape([-1, num_features]) rect = info['calib/R0_rect'] Trv2c = info['calib/Tr_velo_to_cam'] P2 = info['calib/P2'] # debug # with open("/home/makr/Documents/uni/TU/3.Master/experiments/own/tf_3dRGB_pc/debug_rviz/points/points.pkl", 'wb') as file: # pickle.dump(np.array(points_v), file, 2) # remove points outside a frustum defined by the shape of the image if remove_outside and not custom_dataset: points_v = box_np_ops.remove_outside_points( points_v, rect, Trv2c, P2, info["img_shape"]) # points_v = points_v[points_v[:, 0] > 0] annos = info['annos'] # filter DontCare num_obj = len([n for n in annos['name'] if n != 'DontCare']) # annos = kitti.filter_kitti_anno(annos, ['DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] # get gt boxes in cameras and lidar coords gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes_lidar = box_np_ops.box_camera_to_lidar( gt_boxes_camera, rect, Trv2c) # check which points are in the gt boxes indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) # fill the list up with -1s for the DontCares num_ignored = len(annos['dimensions']) - num_obj num_points_in_gt = np.concatenate( [num_points_in_gt, -np.ones([num_ignored])]) annos["num_points_in_gt"] = num_points_in_gt.astype(np.int32)
def prep_pointcloud(input_dict, root_path, voxel_generator, target_assigner, db_sampler=None, max_voxels=20000, remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, remove_unknown=False, gt_rotation_noise=(-np.pi / 3, np.pi / 3), gt_loc_noise_std=(1.0, 1.0, 1.0), global_rotation_noise=(-np.pi / 4, np.pi / 4), global_scaling_noise=(0.95, 1.05), global_random_rot_range=(0.78, 2.35), global_translate_noise_std=(0, 0, 0), num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=True, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, out_size_factor=2, use_group_id=False, multi_gpu=False, min_points_in_gt=-1, random_flip_x=True, random_flip_y=True, sample_importance=1.0, out_dtype=np.float32): """convert point cloud to voxels, create targets if ground truths exists. input_dict format: dataset.get_sensor_data format """ t = time.time() class_names = target_assigner.classes points = input_dict["lidar"]["points"] if training: anno_dict = input_dict["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": anno_dict["names"], "gt_importance": np.ones([anno_dict["boxes"].shape[0]], dtype=anno_dict["boxes"].dtype), } if "difficulty" not in anno_dict: difficulty = np.zeros([anno_dict["boxes"].shape[0]], dtype=np.int32) gt_dict["difficulty"] = difficulty else: gt_dict["difficulty"] = anno_dict["difficulty"] if use_group_id and "group_ids" in anno_dict: group_ids = anno_dict["group_ids"] gt_dict["group_ids"] = group_ids calib = None if "calib" in input_dict: calib = input_dict["calib"] if reference_detections is not None: assert calib is not None and "image" in input_dict C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points: assert calib is not None image_shape = input_dict["image"]["image_shape"] points = box_np_ops.remove_outside_points( points, calib["rect"], calib["Trv2c"], calib["P2"], image_shape) if remove_environment is True and training: selected = kitti.keep_arrays_by_name(gt_names, target_assigner.classes) _dict_select(gt_dict, selected) masks = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) points = points[masks.any(-1)] metrics = {} if training: """ boxes_lidar = gt_dict["gt_boxes"] bev_map = simplevis.nuscene_vis(points, boxes_lidar) cv2.imshow('pre-noise', bev_map) """ selected = kitti.drop_arrays_by_name(gt_dict["gt_names"], ["DontCare"]) _dict_select(gt_dict, selected) if remove_unknown: remove_mask = gt_dict["difficulty"] == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) _dict_select(gt_dict, keep_mask) gt_dict.pop("difficulty") if min_points_in_gt > 0: # points_count_rbbox takes 10ms with 10 sweeps nuscenes data point_counts = box_np_ops.points_count_rbbox(points, gt_dict["gt_boxes"]) mask = point_counts >= min_points_in_gt _dict_select(gt_dict, mask) gt_boxes_mask = np.array( [n in class_names for n in gt_dict["gt_names"]], dtype=np.bool_) if db_sampler is not None: group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] sampled_dict = db_sampler.sample_all( root_path, gt_dict["gt_boxes"], gt_dict["gt_names"], num_point_features, random_crop, gt_group_ids=group_ids, calib=calib) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) sampled_gt_importance = np.full([sampled_gt_boxes.shape[0]], sample_importance, dtype=sampled_gt_boxes.dtype) gt_dict["gt_importance"] = np.concatenate( [gt_dict["gt_importance"], sampled_gt_importance]) if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] gt_dict["group_ids"] = np.concatenate( [gt_dict["group_ids"], sampled_group_ids]) if remove_points_after_sample: masks = box_np_ops.points_in_rbbox(points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate([sampled_points, points], axis=0) pc_range = voxel_generator.point_cloud_range group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] prep.noise_per_object_v3_( gt_dict["gt_boxes"], points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # should remove unrelated objects after noise per object # for k, v in gt_dict.items(): # print(k, v.shape) _dict_select(gt_dict, gt_boxes_mask) gt_classes = np.array( [class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32) gt_dict["gt_classes"] = gt_classes gt_dict["gt_boxes"], points = prep.random_flip(gt_dict["gt_boxes"], points, 0.5, random_flip_x, random_flip_y) gt_dict["gt_boxes"], points = prep.global_rotation_v2( gt_dict["gt_boxes"], points, *global_rotation_noise) gt_dict["gt_boxes"], points = prep.global_scaling_v2( gt_dict["gt_boxes"], points, *global_scaling_noise) prep.global_translate_(gt_dict["gt_boxes"], points, global_translate_noise_std) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range_by_center(gt_dict["gt_boxes"], bv_range) _dict_select(gt_dict, mask) # limit rad to [-pi, pi] gt_dict["gt_boxes"][:, 6] = box_np_ops.limit_period( gt_dict["gt_boxes"][:, 6], offset=0.5, period=2 * np.pi) # boxes_lidar = gt_dict["gt_boxes"] # bev_map = simplevis.nuscene_vis(points, boxes_lidar) # cv2.imshow('post-noise', bev_map) # cv2.waitKey(0) if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) # [0, -40, -3, 70.4, 40, 1] voxel_size = voxel_generator.voxel_size pc_range = voxel_generator.point_cloud_range grid_size = voxel_generator.grid_size # [352, 400] t1 = time.time() if not multi_gpu: res = voxel_generator.generate( points, max_voxels) voxels = res["voxels"] coordinates = res["coordinates"] num_points = res["num_points_per_voxel"] num_voxels = np.array([voxels.shape[0]], dtype=np.int64) else: res = voxel_generator.generate_multi_gpu( points, max_voxels) voxels = res["voxels"] coordinates = res["coordinates"] num_points = res["num_points_per_voxel"] num_voxels = np.array([res["voxel_num"]], dtype=np.int64) metrics["voxel_gene_time"] = time.time() - t1 example = { 'voxels': voxels, 'num_points': num_points, 'coordinates': coordinates, "num_voxels": num_voxels, "metrics": metrics, } if calib is not None: example["calib"] = calib feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] if anchor_cache is not None: anchors = anchor_cache["anchors"] anchors_bv = anchor_cache["anchors_bv"] anchors_dict = anchor_cache["anchors_dict"] matched_thresholds = anchor_cache["matched_thresholds"] unmatched_thresholds = anchor_cache["unmatched_thresholds"] else: ret = target_assigner.generate_anchors(feature_map_size) anchors = ret["anchors"] anchors = anchors.reshape([-1, target_assigner.box_ndim]) anchors_dict = target_assigner.generate_anchors_dict(feature_map_size) anchors_bv = box_np_ops.rbbox2d_to_near_bbox( anchors[:, [0, 1, 3, 4, 6]]) matched_thresholds = ret["matched_thresholds"] unmatched_thresholds = ret["unmatched_thresholds"] example["anchors"] = anchors anchors_mask = None if anchor_area_threshold >= 0: # slow with high resolution. recommend disable this forever. coors = coordinates dense_voxel_map = box_np_ops.sparse_sum_for_anchors_mask( coors, tuple(grid_size[::-1][1:])) dense_voxel_map = dense_voxel_map.cumsum(0) dense_voxel_map = dense_voxel_map.cumsum(1) anchors_area = box_np_ops.fused_get_anchors_area( dense_voxel_map, anchors_bv, voxel_size, pc_range, grid_size) anchors_mask = anchors_area > anchor_area_threshold # example['anchors_mask'] = anchors_mask.astype(np.uint8) example['anchors_mask'] = anchors_mask # print("prep time", time.time() - t) metrics["prep_time"] = time.time() - t if not training: return example example["gt_names"] = gt_dict["gt_names"] # voxel_labels = box_np_ops.assign_label_to_voxel(gt_boxes, coordinates, # voxel_size, coors_range) if create_targets: t1 = time.time() targets_dict = target_assigner.assign( anchors, anchors_dict, gt_dict["gt_boxes"], anchors_mask, gt_classes=gt_dict["gt_classes"], gt_names=gt_dict["gt_names"], matched_thresholds=matched_thresholds, unmatched_thresholds=unmatched_thresholds, importance=gt_dict["gt_importance"]) """ boxes_lidar = gt_dict["gt_boxes"] bev_map = simplevis.nuscene_vis(points, boxes_lidar, gt_dict["gt_names"]) assigned_anchors = anchors[targets_dict['labels'] > 0] ignored_anchors = anchors[targets_dict['labels'] == -1] bev_map = simplevis.draw_box_in_bev(bev_map, [-50, -50, 3, 50, 50, 1], ignored_anchors, [128, 128, 128], 2) bev_map = simplevis.draw_box_in_bev(bev_map, [-50, -50, 3, 50, 50, 1], assigned_anchors, [255, 0, 0]) cv2.imshow('anchors', bev_map) cv2.waitKey(0) boxes_lidar = gt_dict["gt_boxes"] pp_map = np.zeros(grid_size[:2], dtype=np.float32) voxels_max = np.max(voxels[:, :, 2], axis=1, keepdims=False) voxels_min = np.min(voxels[:, :, 2], axis=1, keepdims=False) voxels_height = voxels_max - voxels_min voxels_height = np.minimum(voxels_height, 4) # sns.distplot(voxels_height) # plt.show() pp_map[coordinates[:, 1], coordinates[:, 2]] = voxels_height / 4 pp_map = (pp_map * 255).astype(np.uint8) pp_map = cv2.cvtColor(pp_map, cv2.COLOR_GRAY2RGB) pp_map = simplevis.draw_box_in_bev(pp_map, [-50, -50, 3, 50, 50, 1], boxes_lidar, [128, 0, 128], 1) cv2.imshow('heights', pp_map) cv2.waitKey(0) """ example.update({ 'labels': targets_dict['labels'], 'reg_targets': targets_dict['bbox_targets'], # 'reg_weights': targets_dict['bbox_outside_weights'], 'importance': targets_dict['importance'], }) return example
def prep_pointcloud(input_dict, root_path, voxel_generator, target_assigner, db_sampler=None, max_voxels=20000, class_names=['Car'], remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, reduce_valid_area=False, remove_unknown=False, gt_rotation_noise=[-np.pi / 3, np.pi / 3], gt_loc_noise_std=[1.0, 1.0, 1.0], global_rotation_noise=[-np.pi / 4, np.pi / 4], global_scaling_noise=[0.95, 1.05], global_loc_noise_std=(0.2, 0.2, 0.2), global_random_rot_range=[0.78, 2.35], generate_bev=False, without_reflectivity=False, num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=True, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, add_rgb_to_points=False, lidar_input=False, unlabeled_db_sampler=None, out_size_factor=2, min_gt_point_dict=None, bev_only=False, use_group_id=False, out_dtype=np.float32): """convert point cloud to voxels, create targets if ground truths exists. """ # 这部分用来读取某一帧数据 points = input_dict["points"] # velodyne_reduced, array(N*4) if training: gt_boxes = input_dict["gt_boxes"] # 真值框,位置,尺寸,绝对转角,N*1,一个真值框一行 gt_names = input_dict["gt_names"] difficulty = input_dict["difficulty"] group_ids = None if use_group_id and "group_ids" in input_dict: # False group_ids = input_dict["group_ids"] rect = input_dict["rect"] Trv2c = input_dict["Trv2c"] P2 = input_dict["P2"] unlabeled_training = unlabeled_db_sampler is not None image_idx = input_dict["image_idx"] if reference_detections is not None: # None C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T # frustums = np.linalg.inv(R) @ frustums.T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points and not lidar_input: # False image_shape = input_dict["image_shape"] points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, image_shape) if remove_environment is True and training: # False selected = kitti.keep_arrays_by_name(gt_names, class_names) gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: # None group_ids = group_ids[selected] points = prep.remove_points_outside_boxes(points, gt_boxes) if training: # 先去掉真值内的DontCare selected = kitti.drop_arrays_by_name(gt_names, ["DontCare"]) # 去掉DontCare gt_boxes = gt_boxes[selected] gt_names = gt_names[selected] difficulty = difficulty[selected] if group_ids is not None: # None group_ids = group_ids[selected] gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c) # 相机坐标下的真值框转换成激光雷达坐标下[xyz_lidar,w,l,h,r],一个对象一行 if remove_unknown: # False remove_mask = difficulty == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) gt_boxes = gt_boxes[keep_mask] gt_names = gt_names[keep_mask] difficulty = difficulty[keep_mask] if group_ids is not None: # None group_ids = group_ids[keep_mask] gt_boxes_mask = np.array( # 目标类别的对象标签,布尔类型,同样一行一个对象 [n in class_names for n in gt_names], dtype=np.bool_) # 下面用来对去掉DontCare的真值进行采样补充 if db_sampler is not None: # not None # 数据库预处理类里的方法,返回的是该帧数据内各类别用来补充的采样真值数据,包括真值内出现的非目标类别 sampled_dict = db_sampler.sample_all( root_path, gt_boxes, gt_names, num_point_features, random_crop, gt_group_ids=group_ids, # None rect=rect, Trv2c=Trv2c, P2=P2) if sampled_dict is not None: # 下面将原始数据与补充采样数据合并 sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] # gt_names = gt_names[gt_boxes_mask].tolist() gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) # gt_names += [s["name"] for s in sampled] gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) # 真值框是目标类别的标志 if group_ids is not None: # None sampled_group_ids = sampled_dict["group_ids"] group_ids = np.concatenate([group_ids, sampled_group_ids]) if remove_points_after_sample: # False,将采样框所占位置点云去除 points = prep.remove_points_in_boxes( points, sampled_gt_boxes) points = np.concatenate([sampled_points, points], axis=0) # 合并原始点云与采样点云 # unlabeled_mask = np.zeros((gt_boxes.shape[0], ), dtype=np.bool_) if without_reflectivity: # False used_point_axes = list(range(num_point_features)) used_point_axes.pop(3) points = points[:, used_point_axes] pc_range = voxel_generator.point_cloud_range if bev_only: # set z and h to limits, False gt_boxes[:, 2] = pc_range[2] gt_boxes[:, 5] = pc_range[5] - pc_range[2] prep.noise_per_object_v3_( # 每一个对象添加扰动,实际上就是修改每个对象真值框坐标与内部点云坐标 gt_boxes, # 补充过采样数据的真值框 points, # 补充过采样数据的点云 gt_boxes_mask, # 真值框标志(不区分原始数据与采样数据) rotation_perturb=gt_rotation_noise, # 旋转角度范围,平均分布 center_noise_std=gt_loc_noise_std, # 位置正态分布方差 global_random_rot_range=global_random_rot_range, # 全局旋转[0.0,0.0] group_ids=group_ids, # None num_try=100) # should remove unrelated objects after noise per object gt_boxes = gt_boxes[gt_boxes_mask] # 添加扰动后筛选目标类别真值框 gt_names = gt_names[gt_boxes_mask] if group_ids is not None: # Fasle group_ids = group_ids[gt_boxes_mask] gt_classes = np.array( # (gt_num) [class_names.index(n) + 1 for n in gt_names], dtype=np.int32) gt_boxes, points = prep.random_flip(gt_boxes, points) # 随机翻转 gt_boxes, points = prep.global_rotation( # 全局旋转 gt_boxes, points, rotation=global_rotation_noise) gt_boxes, points = prep.global_scaling_v2(gt_boxes, points, # 全局缩放 *global_scaling_noise) # Global translation # 全局定位扰动 gt_boxes, points = prep.global_translate(gt_boxes, points, global_loc_noise_std) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range) # 过滤超出鸟瞰图范围的真值框 gt_boxes = gt_boxes[mask] gt_classes = gt_classes[mask] if group_ids is not None: # None group_ids = group_ids[mask] # limit rad to [-pi, pi] gt_boxes[:, 6] = box_np_ops.limit_period( # 调整真值转角至目标范围 gt_boxes[:, 6], offset=0.5, period=2 * np.pi) if shuffle_points: # True,打乱全局点云数据 # shuffle is a little slow. np.random.shuffle(points) voxel_size = voxel_generator.voxel_size # [0.16, 0.16, 4] pc_range = voxel_generator.point_cloud_range # [0, -39.68, -3, 69.12, 39.68, 1] grid_size = voxel_generator.grid_size # [432, 496, 1] x,y,z # 生成体素 """ Returns: voxels:[num_voxels, 100, 4] 体素索引映射全局体素特征 coordinates:[num_voxels, 3] 体素索引映射体素坐标 num_points:(num_voxels,) 体素索引映射体素内的点数 """ voxels, coordinates, num_points = voxel_generator.generate( points, max_voxels) example = { 'voxels': voxels, 'num_points': num_points, 'coordinates': coordinates, "num_voxels": np.array([voxels.shape[0]], dtype=np.int64) } example.update({ 'rect': rect, 'Trv2c': Trv2c, 'P2': P2, }) # if not lidar_input: feature_map_size = grid_size[:2] // out_size_factor # [216 248] feature_map_size = [*feature_map_size, 1][::-1] # [1,248,216] if anchor_cache is not None: anchors = anchor_cache["anchors"] anchors_bv = anchor_cache["anchors_bv"] matched_thresholds = anchor_cache["matched_thresholds"] unmatched_thresholds = anchor_cache["unmatched_thresholds"] else: ret = target_assigner.generate_anchors(feature_map_size) anchors = ret["anchors"] anchors = anchors.reshape([-1, 7]) matched_thresholds = ret["matched_thresholds"] unmatched_thresholds = ret["unmatched_thresholds"] anchors_bv = box_np_ops.rbbox2d_to_near_bbox( anchors[:, [0, 1, 3, 4, 6]]) example["anchors"] = anchors # print("debug", anchors.shape, matched_thresholds.shape) # anchors_bv = anchors_bv.reshape([-1, 4]) anchors_mask = None if anchor_area_threshold >= 0: # True coors = coordinates dense_voxel_map = box_np_ops.sparse_sum_for_anchors_mask( # 某处体素是否被采样,否为0,是为1.array[496,432] y,x coors, tuple(grid_size[::-1][1:])) dense_voxel_map = dense_voxel_map.cumsum(0) # 元素值为按列累加到元素的和 dense_voxel_map = dense_voxel_map.cumsum(1) # 元素值为按行累加到元素的和,相当于统计了元素两个维度上之前元素的和 anchors_area = box_np_ops.fused_get_anchors_area( # 统计每个锚框内的体素数量 dense_voxel_map, anchors_bv, voxel_size, pc_range, grid_size) anchors_mask = anchors_area > anchor_area_threshold # 标记超过一个体素的锚框 # example['anchors_mask'] = anchors_mask.astype(np.uint8) example['anchors_mask'] = anchors_mask if generate_bev: # False bev_vxsize = voxel_size.copy() bev_vxsize[:2] /= 2 bev_vxsize[2] *= 2 bev_map = points_to_bev(points, bev_vxsize, pc_range, without_reflectivity) example["bev_map"] = bev_map if not training: return example # 测试数据集不需要创建训练目标 if create_targets: # True targets_dict = target_assigner.assign( anchors, # (248*216*2,7) gt_boxes, # (gt_num, 7) anchors_mask, # (248*216*2,) gt_classes=gt_classes, # (gt_num,) matched_thresholds=matched_thresholds, # (248*216*2,) unmatched_thresholds=unmatched_thresholds) # (248*216*2,) example.update({ 'labels': targets_dict['labels'], # (total_anchors,),所有锚框对应真值类别(1,2...),无对应真值设为0,dontcare设为-1 'reg_targets': targets_dict['bbox_targets'], # (total_anchors, 7),所有锚框对应真值相对锚框的偏差编码,无对应真值设为[0,0,0,0,0,0,0] 'reg_weights': targets_dict['bbox_outside_weights'], # (total_anchors,),所有锚框的外部权重,有对应真值设为1,无对应真值设为0 }) return example
def prep_pointcloud( input_dict, root_path, voxel_generator, target_assigner, db_sampler=None, max_voxels=20000, remove_outside_points=False, training=True, create_targets=True, shuffle_points=False, remove_unknown=False, gt_rotation_noise=(-np.pi / 3, np.pi / 3), gt_loc_noise_std=(1.0, 1.0, 1.0), global_rotation_noise=(-np.pi / 4, np.pi / 4), global_scaling_noise=(0.95, 1.05), global_random_rot_range=(0.78, 2.35), global_translate_noise_std=(0, 0, 0), num_point_features=4, anchor_area_threshold=1, gt_points_drop=0.0, gt_drop_max_keep=10, remove_points_after_sample=True, anchor_cache=None, remove_environment=False, random_crop=False, reference_detections=None, out_size_factor=2, use_group_id=False, multi_gpu=False, min_points_in_gt=-1, random_flip_x=True, random_flip_y=True, sample_importance=1.0, out_dtype=np.float32, bcl_keep_voxels=6500, #6000~8000 pillar seg_keep_points=8000, points_per_voxel=200, feature_map_size=[1, 200, 176], num_anchor_per_loc=2, segmentation=False, object_detection=True): """convert point cloud to voxels, create targets if ground truths exists. input_dict format: dataset.get_sensor_data format """ class_names = target_assigner.classes points = input_dict["lidar"]["points"] if training or segmentation: anno_dict = input_dict["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": anno_dict["names"], "gt_importance": np.ones([anno_dict["boxes"].shape[0]], dtype=anno_dict["boxes"].dtype), } if "difficulty" not in anno_dict: difficulty = np.zeros([anno_dict["boxes"].shape[0]], dtype=np.int32) gt_dict["difficulty"] = difficulty else: gt_dict["difficulty"] = anno_dict["difficulty"] if use_group_id and "group_ids" in anno_dict: group_ids = anno_dict["group_ids"] gt_dict["group_ids"] = group_ids calib = None if "calib" in input_dict: calib = input_dict["calib"] if reference_detections is not None: assert calib is not None and "image" in input_dict C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if remove_outside_points: assert calib is not None image_shape = input_dict["image"]["image_shape"] points = box_np_ops.remove_outside_points(points, calib["rect"], calib["Trv2c"], calib["P2"], image_shape) if remove_environment is True and training: selected = kitti.keep_arrays_by_name(gt_names, target_assigner.classes) _dict_select(gt_dict, selected) masks = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) points = points[masks.any(-1)] if training: boxes_lidar = gt_dict["gt_boxes"] selected = kitti.drop_arrays_by_name(gt_dict["gt_names"], ["DontCare"]) _dict_select(gt_dict, selected) if remove_unknown: remove_mask = gt_dict["difficulty"] == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) _dict_select(gt_dict, keep_mask) gt_dict.pop("difficulty") if min_points_in_gt > 0: # points_count_rbbox takes 10ms with 10 sweeps nuscenes data point_counts = box_np_ops.points_count_rbbox( points, gt_dict["gt_boxes"]) mask = point_counts >= min_points_in_gt _dict_select(gt_dict, mask) gt_boxes_mask = np.array( [n in class_names for n in gt_dict["gt_names"]], dtype=np.bool_) if db_sampler is not None: group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] sampled_dict = db_sampler.sample_all(root_path, gt_dict["gt_boxes"], gt_dict["gt_names"], num_point_features, random_crop, gt_group_ids=group_ids, calib=calib) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) sampled_gt_importance = np.full([sampled_gt_boxes.shape[0]], sample_importance, dtype=sampled_gt_boxes.dtype) gt_dict["gt_importance"] = np.concatenate( [gt_dict["gt_importance"], sampled_gt_importance]) if group_ids is not None: sampled_group_ids = sampled_dict["group_ids"] gt_dict["group_ids"] = np.concatenate( [gt_dict["group_ids"], sampled_group_ids]) if remove_points_after_sample: masks = box_np_ops.points_in_rbbox(points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate([sampled_points, points], axis=0) pc_range = voxel_generator.point_cloud_range group_ids = None if "group_ids" in gt_dict: group_ids = gt_dict["group_ids"] prep.noise_per_object_v3_( gt_dict["gt_boxes"], points, gt_boxes_mask, rotation_perturb=gt_rotation_noise, center_noise_std=gt_loc_noise_std, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) # should remove unrelated objects after noise per object # for k, v in gt_dict.items(): # print(k, v.shape) _dict_select(gt_dict, gt_boxes_mask) gt_classes = np.array( [class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32) gt_dict["gt_classes"] = gt_classes gt_dict["gt_boxes"], points = prep.random_flip(gt_dict["gt_boxes"], points, 0.5, random_flip_x, random_flip_y) gt_dict["gt_boxes"], points = prep.global_rotation_v2( gt_dict["gt_boxes"], points, *global_rotation_noise) gt_dict["gt_boxes"], points = prep.global_scaling_v2( gt_dict["gt_boxes"], points, *global_scaling_noise) prep.global_translate_(gt_dict["gt_boxes"], points, global_translate_noise_std) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range_by_center( gt_dict["gt_boxes"], bv_range) _dict_select(gt_dict, mask) # limit rad to [-pi, pi] gt_dict["gt_boxes"][:, 6] = box_np_ops.limit_period( gt_dict["gt_boxes"][:, 6], offset=0.5, period=2 * np.pi) # add depth for point feature and remove intensity # points = points[...,:3] # points = AddDepthFeature(points, num_point_features) # num_point_features = points.shape[-1] #update point shape #remove points out of PC rannge pc_range = voxel_generator.point_cloud_range # [0, -40, -3, 70.4, 40, 1] xmin,ymin.zmin. xmax. ymax, zmax points = box_np_ops.remove_out_pc_range_points(points, pc_range) if shuffle_points and not segmentation: np.random.shuffle(points) # shuffle is a little slow. if not training and segmentation: #Keep Car Only gt_boxes_mask = np.array( [n in class_names for n in gt_dict["gt_names"]], dtype=np.bool_) _dict_select(gt_dict, gt_boxes_mask) points_in_box, points_out_box = box_np_ops.split_points_in_boxes( points, gt_dict["gt_boxes"]) #xyzr points_in_box, points_out_box = SamplePointsKeepALLPositive( points_in_box, points_out_box, seg_keep_points, num_point_features) #fixed points data, label = PrepDataAndLabel(points_in_box, points_out_box) example = { 'seg_points': data, #data 'seg_labels': label, #label 'gt_boxes': gt_dict["gt_boxes"], 'image_idx': input_dict['metadata']['image_idx'], } ################# For feature map Focs # # # NOTE: For feature map Focs point_cloud_range = np.array(voxel_generator.point_cloud_range) anchor_strides = (point_cloud_range[3:] - point_cloud_range[:3]) / feature_map_size[::-1] anchor_offsets = point_cloud_range[:3] + anchor_strides / 2 centers = box_np_ops.create_anchors_3d_stride( feature_map_size, anchor_strides=anchor_strides, anchor_offsets=anchor_offsets, rotations=[0]) centers = centers.squeeze()[..., :3].reshape(-1, 3) example.update({ 'coords_center': centers, # if anchors free the 0 is the horizontal/vertical anchors }) ############## ################ Fcos & points to voxel Test # NOTE: For voxel seg net # _, coords, coords_center, p2voxel_idx = box_np_ops.points_to_3dvoxel(data, # feat_size=[100,80,10], # max_voxels=bcl_keep_voxels, # num_p_voxel=points_per_voxel) # example = { # 'seg_points': data, #data # 'coords': coords, # 'coords_center': coords_center, # 'p2voxel_idx': p2voxel_idx, # 'gt_boxes' : gt_dict["gt_boxes"], # 'image_idx' : input_dict['metadata']['image_idx'], # "gt_num" : len(gt_dict["gt_boxes"]), # 'gt_boxes' : gt_dict["gt_boxes"], # 'seg_labels': label # } ################ Fcos & points to voxel if anchor_cache is not None: example.update({ "gt_num": len(gt_dict["gt_boxes"]), #how many objects in eval GT "anchors": anchor_cache["anchors"] }) return example ################################Car point segmentation##################### if training and segmentation: # points_in_box = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) #xyzr # enlarge bouding box # enlarge_size = 0.2 # gt_dict["gt_boxes"][:, 3:6] = gt_dict["gt_boxes"][:, 3:6] + enlarge_size #xyzhwlr # masks = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) # points = points[np.logical_not(masks.any(-1))] # points = np.concatenate((points, points_in_box), axis=0) #above and below bouding box should have no points # gt_dict["gt_boxes"][:,3] += 2 #random sample # points = SamplePoints(points, seg_keep_points, num_point_features) #Sample zero # points = PointRandomChoice(points, seg_keep_points) #Repeat sample points = PointRandomChoiceV2( points, seg_keep_points) #Repeat sample according points distance points_in_box, points_out_box = box_np_ops.split_points_in_boxes( points, gt_dict["gt_boxes"]) #xyzr data, label = PrepDataAndLabel(points_in_box, points_out_box) #keep positive sample # points_in_box, points_out_box = box_np_ops.split_points_in_boxes(points, gt_dict["gt_boxes"]) #xyzr # points_in_box, points_out_box = SamplePointsKeepALLPositive(points_in_box, points_out_box, seg_keep_points, num_point_features) #fixed 18888 points # data, label = PrepDataAndLabel(points_in_box, points_out_box) """shuffle car seg points""" indices = np.arange(data.shape[0]) np.random.shuffle(indices) data = data[indices] label = label[indices] example = { 'seg_points': data, #data 'seg_labels': label, #label 'gt_boxes': gt_dict["gt_boxes"], } ################# For feature map Focs # # NOTE: For feature map Focs point_cloud_range = np.array(voxel_generator.point_cloud_range) anchor_strides = (point_cloud_range[3:] - point_cloud_range[:3]) / feature_map_size[::-1] anchor_offsets = point_cloud_range[:3] + anchor_strides / 2 centers = box_np_ops.create_anchors_3d_stride( feature_map_size, anchor_strides=anchor_strides, anchor_offsets=anchor_offsets, rotations=[0]) centers = centers.squeeze()[..., :3].reshape(-1, 3) targets_dict = box_np_ops.fcos_box_encoder_v2(centers, gt_dict["gt_boxes"]) # bbox = box_np_ops.fcos_box_decoder_v2(np.expand_dims(centers, 0), # np.expand_dims(targets_dict["bbox_targets"], 0)) # labels = targets_dict["labels"] # with open(os.path.join('./debug_tool',"points.pkl") , 'wb') as f: # pickle.dump(data,f) # with open(os.path.join('./debug_tool',"seg_points.pkl") , 'wb') as f: # pickle.dump(centers[labels==1],f) # with open(os.path.join('./debug_tool',"pd_boxes.pkl") , 'wb') as f: # pickle.dump(bbox.squeeze()[labels==1],f) # with open(os.path.join('./debug_tool',"gt_boxes.pkl") , 'wb') as f: # pickle.dump(gt_dict["gt_boxes"],f) # exit() example.update({ 'labels': targets_dict[ 'labels'], # if anchors free the 0 is the horizontal/vertical anchors # 'seg_labels': targets_dict['labels'], # if anchors free the 0 is the horizontal/vertical anchors 'reg_targets': targets_dict['bbox_targets'], # target assign get offsite 'importance': targets_dict['importance'], # 'reg_weights': targets_dict['bbox_outside_weights'], }) ############## ################ Fcos & points to voxel # NOTE: For voxel seg net # _, coords, coords_center, p2voxel_idx = box_np_ops.points_to_3dvoxel(data, # feat_size=[200,176,10], # max_voxels=bcl_keep_voxels, # num_p_voxel=points_per_voxel) # # targets_dict = box_np_ops.fcos_box_encoder_v2(coords_center, # gt_dict["gt_boxes"]) # # Jim added # example.update({ # 'coords': coords, # 'p2voxel_idx': p2voxel_idx, # 'cls_labels': targets_dict['labels'], # if anchors free the 0 is the horizontal/vertical anchors # 'reg_targets': targets_dict['bbox_targets'], # target assign get offsite # 'importance': targets_dict['importance'], # }) ################ Fcos & points to voxel ################ Fcos & points to voxel if anchor_cache is not None: anchors = anchor_cache["anchors"] anchors_bv = anchor_cache["anchors_bv"] anchors_dict = anchor_cache["anchors_dict"] matched_thresholds = anchor_cache["matched_thresholds"] unmatched_thresholds = anchor_cache["unmatched_thresholds"] targets_dict = target_assigner.assign( anchors, anchors_dict, #this is the key to control the number of anchors (input anchors) ['anchors, unmatch,match'] gt_dict["gt_boxes"], anchors_mask=None, gt_classes=gt_dict["gt_classes"], gt_names=gt_dict["gt_names"], matched_thresholds=matched_thresholds, unmatched_thresholds=unmatched_thresholds, importance=gt_dict["gt_importance"]) example.update({ 'labels': targets_dict[ 'labels'], # if anchors free the 0 is the horizontal/vertical anchors 'reg_targets': targets_dict['bbox_targets'], # target assign get offsite #'importance': targets_dict['importance'], }) # boxes_lidar = gt_dict["gt_boxes"] # bev_map = simplevis.kitti_vis(points, boxes_lidar, gt_dict["gt_names"]) # assigned_anchors = anchors[targets_dict['labels'] > 0] # ignored_anchors = anchors[targets_dict['labels'] == -1] # bev_map = simplevis.draw_box_in_bev(bev_map, [0, -40, -3, 70.4, 40, 1], ignored_anchors, [128, 128, 128], 2) # bev_map = simplevis.draw_box_in_bev(bev_map, [0, -40, -3, 70.4, 40, 1], assigned_anchors, [255, 0, 0]) # cv2.imwrite('./visualization/anchors/anchors_{}.png'.format(input_dict['metadata']['image_idx']),bev_map) return example #################################voxel_generator############################ ''' voxel_size = voxel_generator.voxel_size # [0, -40, -3, 70.4, 40, 1] pc_range = voxel_generator.point_cloud_range grid_size = voxel_generator.grid_size # [352, 400] max_num_points_per_voxel = voxel_generator.max_num_points_per_voxel if not multi_gpu: res = voxel_generator.generate( points, max_voxels) voxels = res["voxels"] coordinates = res["coordinates"] num_points = res["num_points_per_voxel"] num_voxels = np.array([voxels.shape[0]], dtype=np.int64) else: res = voxel_generator.generate_multi_gpu( points, max_voxels) voxels = res["voxels"] coordinates = res["coordinates"] num_points = res["num_points_per_voxel"] num_voxels = np.array([res["voxel_num"]], dtype=np.int64) example = { 'voxels': voxels, #'num_points': num_points, 'coordinates': coordinates, "num_voxels": num_voxels, } ## WARNING: For Simplex voxel Testing if bug comment this voxels= SimpleVoxel(voxels, num_points) #(V,100,C) -> (B, C, V, N) #For Second, if Pillar comment it max_num_points_per_voxel=1 #If SimpleVoxel max_num_points_per_voxel=1 voxels, coordinates = VoxelRandomChoice(voxels, coordinates, bcl_keep_voxels, num_point_features, max_num_points_per_voexl=max_num_points_per_voxel) example['voxels']=voxels example['coordinates']=coordinates ''' ############################################################################ # if calib is not None: # example["calib"] = calib if anchor_cache is not None: anchors = anchor_cache["anchors"] anchors_bv = anchor_cache["anchors_bv"] anchors_dict = anchor_cache["anchors_dict"] matched_thresholds = anchor_cache["matched_thresholds"] unmatched_thresholds = anchor_cache["unmatched_thresholds"] else: # generate anchors from ground truth """ voxels= SimpleVoxel(voxels, num_points) #(V,100,C) -> (B, C, V, N) voxels, coordinates, num_points = VoxelRandomChoice(voxels, coordinates, num_points, bcl_keep_voxels) example['voxels']=voxels example['num_points']=num_points example['coordinates']=coordinates example['num_voxels']=bcl_keep_voxels if training: # for anchor free gt_boxes_coords = gt_dict["gt_boxes"][:,:3] #original gt xyz example['gt_boxes_coords']=gt_boxes_coords #GT save to example gt_boxes_coords = np.round(gt_dict["gt_boxes"][:,:3]).astype(int) #round xyz gt_boxes_coords = gt_boxes_coords[:,::-1] #zyx reverse ret = target_assigner.generate_anchors_from_gt(gt_boxes_coords) #for GT generate anchors anchors = ret["anchors"] anchors_dict = target_assigner.generate_anchors_dict_from_gt(gt_boxes_coords) #for GT generate anchors if not training: # for anchor free feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] ret = target_assigner.generate_anchors(feature_map_size) anchors_dict = target_assigner.generate_anchors_dict(feature_map_size) anchors = ret["anchors"] """ # # generate anchors from anchor free (Voxel-wise) # ret = target_assigner.generate_anchors_from_voxels(coordinates) #for coordinates generate anchors # anchors_dict = target_assigner.generate_anchors_dict_from_voxels(coordinates) #this is the key to control the number of anchors (input anchors) # anchors = ret["anchors"] # matched_thresholds = ret["matched_thresholds"] # unmatched_thresholds = ret["unmatched_thresholds"] # generate anchors from voxel + anchor free """ gt_boxes_coords = gt_dict["gt_boxes"][:,:3] #original gt xyz #gt_boxes_coords = np.round(gt_dict["gt_boxes"][:,:3]).astype(int) #round xyz gt_boxes_coords = gt_boxes_coords[:,::-1] #zyx reverse #stack ret and ret_gt ret = target_assigner.generate_anchors_from_voxels(coordinates) #for coordinates generate anchors ret_gt = target_assigner.generate_anchors_from_gt(gt_boxes_coords) #for GT generate anchors for k in ret.keys(): ret[k] = np.concatenate((ret[k], ret_gt[k])) anchors = ret["anchors"] #stack anchors_dict and anchors_dict_gt anchors_dict = target_assigner.generate_anchors_dict_from_voxels(coordinates) #this is the key to control the number of anchors (input anchors) ['anchors, unmatch,match'] anchors_dict_gt = target_assigner.generate_anchors_dict_from_gt(gt_boxes_coords) #for GT generate anchors for order_k in anchors_dict.keys(): for k in anchors_dict[order_k].keys(): anchors_dict[order_k][k] = np.concatenate((anchors_dict[order_k][k], anchors_dict_gt[order_k][k])) """ # generate anchors from groundtruth """ if training: # generate anchors from car points points_in_box = points_in_box[:,:3] #xyz points_in_box = points_in_box[:,::-1] #zyx ret = target_assigner.generate_anchors_from_gt(points_in_box) #for GT generate anchors anchors = ret["anchors"] anchors_dict = target_assigner.generate_anchors_dict_from_gt(points_in_box) #for GT generate anchors anchors_bv = box_np_ops.rbbox2d_to_near_bbox( anchors[:, [0, 1, 3, 4, 6]]) matched_thresholds = ret["matched_thresholds"] unmatched_thresholds = ret["unmatched_thresholds"] """ # Fcos points sampling points = SamplePoints(points, bcl_keep_voxels, num_point_features) example = { 'voxels': np.expand_dims(points, 0), #'num_points': num_points, 'coordinates': points, # "num_voxels": None, } if not training: anno_dict = input_dict["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": anno_dict["names"], "gt_importance": np.ones([anno_dict["boxes"].shape[0]], dtype=anno_dict["boxes"].dtype), } targets_dict = box_np_ops.fcos_box_encoder_v2(points, gt_dict["gt_boxes"]) # targets_dict = box_np_ops.fcos_box_encoder(points, gt_dict["gt_boxes"]) example.update({ 'labels': targets_dict[ 'labels'], # if anchors free the 0 is the horizontal/vertical anchors 'seg_labels': targets_dict[ 'labels'], # if anchors free the 0 is the horizontal/vertical anchors 'reg_targets': targets_dict['bbox_targets'], # target assign get offsite 'importance': targets_dict['importance'], # 'reg_weights': targets_dict['bbox_outside_weights'], }) # example["anchors"] = anchors # anchors_mask = None # if anchor_area_threshold >= 0: # # slow with high resolution. recommend disable this forever. # coors = coordinates # dense_voxel_map = box_np_ops.sparse_sum_for_anchors_mask( # coors, tuple(grid_size[::-1][1:])) # dense_voxel_map = dense_voxel_map.cumsum(0) # dense_voxel_map = dense_voxel_map.cumsum(1) # anchors_area = box_np_ops.fused_get_anchors_area( # dense_voxel_map, anchors_bv, voxel_size, pc_range, grid_size) # anchors_mask = anchors_area > anchor_area_threshold # # example['anchors_mask'] = anchors_mask.astype(np.uint8) # example['anchors_mask'] = anchors_mask if not training: # Use it when debuging eval nms for good eval_classes = input_dict["lidar"]["annotations"]["names"] eval_gt_dict = {"gt_names": eval_classes} gt_boxes_mask = np.array([n in class_names for n in eval_classes], dtype=np.bool_) _dict_select(eval_gt_dict, gt_boxes_mask) example["gt_num"] = len( eval_gt_dict["gt_names"]) #how many objects in eval GT return example example["gt_names"] = gt_dict["gt_names"] # voxel_labels = box_np_ops.assign_label_to_voxel(gt_boxes, coordinates, # voxel_size, coors_range) """ # bev anchors without screening boxes_lidar = gt_dict["gt_boxes"] bev_map = simplevis.kitti_vis(points, boxes_lidar, gt_dict["gt_names"]) bev_map = simplevis.draw_box_in_bev(bev_map, [0, -40, -3, 70.4, 40, 1], anchors, [255, 0, 0]) #assigned_anchors blue cv2.imwrite('anchors/anchors_{}.png'.format(input_dict['metadata']['image_idx']),bev_map) # cv2.imshow('anchors', bev_map) # cv2.waitKey(0) """ if create_targets: # No particular use where = None # Fcos target generator and encoder # targets_dict = target_assigner.assign( # anchors, # anchors_dict, #this is the key to control the number of anchors (input anchors) ['anchors, unmatch,match'] # gt_dict["gt_boxes"], # anchors_mask, # gt_classes=gt_dict["gt_classes"], # gt_names=gt_dict["gt_names"], # matched_thresholds=matched_thresholds, # unmatched_thresholds=unmatched_thresholds, # importance=gt_dict["gt_importance"]) ################################Visualaiziton########################### """ bev anchors with points boxes_lidar = gt_dict["gt_boxes"] bev_map = simplevis.kitti_vis(points, boxes_lidar, gt_dict["gt_names"]) assigned_anchors = anchors[targets_dict['labels'] > 0] ignored_anchors = anchors[targets_dict['labels'] == -1] bev_map = simplevis.draw_box_in_bev(bev_map, [0, -40, -3, 70.4, 40, 1], ignored_anchors, [128, 128, 128], 2) #ignored_anchors gray #[0, -30, -3, 64, 30, 1] for kitti bev_map = simplevis.draw_box_in_bev(bev_map, [0, -40, -3, 70.4, 40, 1], assigned_anchors, [255, 0, 0]) #assigned_anchors blue cv2.imwrite('anchors/anchors_{}.png'.format(input_dict['metadata']['image_idx']),bev_map) cv2.imshow('anchors', bev_map) cv2.waitKey(0) """ """ # bev boxes_lidar with voxels (put z in to the plane) boxes_lidar = gt_dict["gt_boxes"] pp_map = np.zeros(grid_size[:2], dtype=np.float32) # (1408, 1600) #print(voxels.shape) #(16162, 5, 4) $ 4=bzyx voxels_max = np.max(voxels[:, :, 1], axis=1, keepdims=False) voxels_min = np.min(voxels[:, :, 1], axis=1, keepdims=False) voxels_height = voxels_max - voxels_min voxels_height = np.minimum(voxels_height, 4) #keep every voxels length less than 4 # sns.distplot(voxels_height) # plt.show() pp_map[coordinates[:, 2], coordinates[:, 1]] = voxels_height / 4 #coordinates bzyx pp_map = (pp_map * 255).astype(np.uint8) pp_map = cv2.cvtColor(pp_map, cv2.COLOR_GRAY2RGB) pp_map = simplevis.draw_box_in_bev(pp_map, [0, -30, -3, 64, 30, 1], boxes_lidar, [128, 0, 128], 2) # for kitti 0, -30, -3, 64, 30, 1 cv2.imwrite('bev_pp_map/pp_map{}.png'.format(input_dict['metadata']['image_idx']),pp_map) # cv2.imshow('heights', pp_map) # cv2.waitKey(0) """ # example.update({ # 'labels': targets_dict['labels'], # if anchors free the 0 is the horizontal/vertical anchors # 'reg_targets': targets_dict['bbox_targets'], # target assign get offsite # 'importance': targets_dict['importance'], # # 'reg_weights': targets_dict['bbox_outside_weights'], # }) return example
def evaluation_from_kitti_dets(self, dt_annos, output_dir): if "annos" not in self._kitti_infos[0]: return None gt_annos = [info["annos"] for info in self._kitti_infos] # firstly convert standard detection to kitti-format dt annos z_axis = 1 # KITTI camera format use y as regular "z" axis. z_center = 1.0 # KITTI camera box's center is [0.5, 1, 0.5] # for regular raw lidar data, z_axis = 2, z_center = 0.5. result_official_dict = get_official_eval_result( gt_annos, dt_annos, self._class_names, z_axis=z_axis, z_center=z_center) result_coco = get_coco_eval_result( gt_annos, dt_annos, self._class_names, z_axis=z_axis, z_center=z_center) # feature extraction for info, det in tqdm(zip(self._kitti_infos, dt_annos), desc="feature", total=len(dt_annos)): pc_info = info["point_cloud"] image_info = info["image"] calib = info["calib"] num_features = pc_info["num_features"] v_path = self._root_path / pc_info["velodyne_path"] v_path = str(v_path.parent.parent / (v_path.parent.stem + "_reduced") / v_path.name) points_v = np.fromfile( v_path, dtype=np.float32, count=-1).reshape([-1, num_features]) rect = calib['R0_rect'] Trv2c = calib['Tr_velo_to_cam'] P2 = calib['P2'] if False: # No longer you need remove outside image-rect (*_reduced pointcloud is already filtered.) points_v = box_np_ops.remove_outside_points( points_v, rect, Trv2c, P2, image_info["image_shape"]) annos = det num_obj = len([n for n in annos['name'] if n != 'DontCare']) # annos = kitti.filter_kitti_anno(annos, ['DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes_lidar = box_np_ops.box_camera_to_lidar( gt_boxes_camera, rect, Trv2c) indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) num_ignored = len(annos['dimensions']) - num_obj num_points_in_gt = np.concatenate( [num_points_in_gt, -np.ones([num_ignored])]) annos["num_points_in_det"] = num_points_in_gt.astype(np.int32) return { "results": { "official": result_official_dict["result"], "coco": result_coco["result"], }, "detail": { "eval.kitti": { "official": result_official_dict["detail"], "coco": result_coco["detail"] } }, "result_kitti": result_official_dict["detections"], }