def __call__(self, res, info): # get points res["mode"] = self.mode if res["type"] in ["KittiDataset"]: points = res["lidar"]["points"] # get gt_boxes (x,y,z(velo), w, l, h, ry), gt_names and difficulty levels if self.mode == "train": anno_dict = res["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": np.array(anno_dict["names"]).reshape(-1), } if "difficulty" not in anno_dict: # True, todo: we may try to make it act. difficulty = np.zeros([anno_dict["boxes"].shape[0]], dtype=np.int32) # todo: all set as 0 gt_dict["difficulty"] = difficulty else: gt_dict["difficulty"] = anno_dict["difficulty"] # get calib calib = res["calib"] if "calib" in res else None if self.mode == "train": selected = kitti.drop_arrays_by_name( gt_dict["gt_names"], ["DontCare", "ignore" ]) # todo: where is the definition of ignore??? _dict_select(gt_dict, selected) # False if self.remove_unknown: remove_mask = gt_dict["difficulty"] == -1 gt_boxes_remove = gt_dict['gt_boxes'][remove_mask] gt_boxes_remove[:, 3:6] += 0.25 masks = box_np_ops.points_in_rbbox(points, gt_boxes_remove) points = points[np.logical_not(masks.any(-1))] keep_mask = np.logical_not(remove_mask) _dict_select(gt_dict, keep_mask) # discard gt_dict.pop("difficulty") # False, todo: remove those gt_boxes with too little points if self.min_points_in_gt > 0: point_counts = box_np_ops.points_count_rbbox( points, gt_dict["gt_boxes"]) mask = point_counts >= self.min_points_in_gt _dict_select(gt_dict, mask) # remove untargeted category objects; todo: what about the similar types gt_boxes_mask = np.array( [n in self.class_names for n in gt_dict["gt_names"]], dtype=np.bool_) # perform gt-augmentation if self.db_sampler: # GT-AUG: filter_by_min_num_points, filter_by_difficulty sampled_dict = self.db_sampler.sample_all( res["metadata"]["image_prefix"], gt_dict["gt_boxes"], gt_dict["gt_names"], res["metadata"]["num_point_features"], self.random_crop, # False gt_group_ids=None, calib=calib, targeted_class_names=self.class_names, ) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] # all 1. gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) # True, remove points in original scene with location occupied by auged gt boxes. if self.remove_points_after_sample: # points_in_rbbox is much faster than roipool3d.pts_in_boxes3d_velo_cpu masks = box_np_ops.points_in_rbbox( points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate( [sampled_points, points], axis=0 ) # concat existed points and points in gt-aug boxes # per-object augmentation prep.noise_per_object_v4_( gt_dict["gt_boxes"], # x,y,z(lidar), w, l, h, ry(cam) points, gt_boxes_mask, rotation_perturb=self.gt_rotation_noise, # rotation noise center_noise_std=self.gt_loc_noise_std, # translation noise global_random_rot_range=self. global_random_rot_range, # no need here due to prep.global_rotation later. group_ids=None, num_try=100, data_aug_with_context=self.data_aug_with_context, data_aug_random_drop=self.data_aug_random_drop, ) _dict_select(gt_dict, gt_boxes_mask) # get gt_boxes of specific class gt_classes = np.array( [self.class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32, ) gt_dict["gt_classes"] = gt_classes # global augmentation gt_dict["gt_boxes"], points = prep.random_flip( gt_dict["gt_boxes"], points) # gt_dict["gt_boxes"], points = prep.global_translate_(gt_dict["gt_boxes"], points, self.global_translate_noise_std) gt_dict["gt_boxes"], points = prep.global_rotation( gt_dict["gt_boxes"], points, rotation=self.global_rotation_noise) gt_dict["gt_boxes"], points = prep.global_scaling_v2( gt_dict["gt_boxes"], points, *self.global_scaling_noise) if self.shuffle_points: # shuffle is a little slow. # np.random.shuffle(points) choice = np.random.choice(np.arange(points.shape[0]), points.shape[0], replace=False) points = points[choice] # points sampling if self.mode == "train" and self.random_select: # False if self.npoints < points.shape[0]: pts_depth = points[:, 2] # should be points[:, 0] (x-axis in velo coord) here pts_near_flag = pts_depth < 40.0 far_idxs_choice = np.where(pts_near_flag == 0)[0] near_idxs = np.where(pts_near_flag == 1)[0] near_idxs_choice = np.random.choice(near_idxs, self.npoints - len(far_idxs_choice), replace=False) choice = (np.concatenate( (near_idxs_choice, far_idxs_choice), axis=0) if len(far_idxs_choice) > 0 else near_idxs_choice) np.random.shuffle(choice) else: choice = np.arange(0, len(points), dtype=np.int32) if self.npoints > len(points): extra_choice = np.random.choice(choice, self.npoints - len(points), replace=False) choice = np.concatenate((choice, extra_choice), axis=0) np.random.shuffle(choice) points = points[choice] # False, uniformize intensity if self.symmetry_intensity: points[:, -1] -= 0.5 # translate intensity to [-0.5, 0.5] # points[:, -1] *= 2 res["lidar"]["points"] = points if self.mode == "train": res["lidar"]["annotations"] = gt_dict return res, info
def __call__(self, res, info): res["mode"] = self.mode if res["type"] in ["KittiDataset", "LyftDataset"]: points = res["lidar"]["points"] elif res["type"] == "NuScenesDataset": points = res["lidar"]["combined"] if self.mode == "train": anno_dict = res["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": np.array(anno_dict["names"]).reshape(-1), } if "difficulty" not in anno_dict: difficulty = np.zeros([anno_dict["boxes"].shape[0]], dtype=np.int32) gt_dict["difficulty"] = difficulty else: gt_dict["difficulty"] = anno_dict["difficulty"] if "calib" in res: calib = res["calib"] else: calib = None if self.add_rgb_to_points: assert calib is not None and "image" in res image_path = res["image"]["image_path"] image = ( imgio.imread(str(pathlib.Path(root_path) / image_path)).astype( np.float32 ) / 255 ) points_rgb = box_np_ops.add_rgb_to_points( points, image, calib["rect"], calib["Trv2c"], calib["P2"] ) points = np.concatenate([points, points_rgb], axis=1) num_point_features += 3 if self.reference_detections is not None: assert calib is not None and "image" in res C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T frustums = np.einsum("ij, akj->aki", np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if self.remove_outside_points: assert calib is not None image_shape = res["image"]["image_shape"] points = box_np_ops.remove_outside_points( points, calib["rect"], calib["Trv2c"], calib["P2"], image_shape ) if self.remove_environment is True and self.mode == "train": selected = kitti.keep_arrays_by_name(gt_names, target_assigner.classes) _dict_select(gt_dict, selected) masks = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) points = points[masks.any(-1)] if self.mode == "train": selected = kitti.drop_arrays_by_name( gt_dict["gt_names"], ["DontCare", "ignore"] ) _dict_select(gt_dict, selected) if self.remove_unknown: remove_mask = gt_dict["difficulty"] == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) _dict_select(gt_dict, keep_mask) gt_dict.pop("difficulty") if self.min_points_in_gt > 0: # points_count_rbbox takes 10ms with 10 sweeps nuscenes data point_counts = box_np_ops.points_count_rbbox( points, gt_dict["gt_boxes"] ) mask = point_counts >= min_points_in_gt _dict_select(gt_dict, mask) gt_boxes_mask = np.array( [n in self.class_names for n in gt_dict["gt_names"]], dtype=np.bool_ ) if self.db_sampler: sampled_dict = self.db_sampler.sample_all( res["metadata"]["image_prefix"], gt_dict["gt_boxes"], gt_dict["gt_names"], res["metadata"]["num_point_features"], self.random_crop, gt_group_ids=None, calib=calib, ) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0 ) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes] ) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0 ) if self.remove_points_after_sample: masks = box_np_ops.points_in_rbbox(points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate([sampled_points, points], axis=0) prep.noise_per_object_v3_( gt_dict["gt_boxes"], points, gt_boxes_mask, rotation_perturb=self.gt_rotation_noise, center_noise_std=self.gt_loc_noise_std, global_random_rot_range=self.global_random_rot_range, group_ids=None, num_try=100, ) _dict_select(gt_dict, gt_boxes_mask) gt_classes = np.array( [self.class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32, ) gt_dict["gt_classes"] = gt_classes if res["type"] in ["NuScenesDataset"]: # double flip gives 3 map improvement for pointppillars on nuScenes gt_dict["gt_boxes"], points = prep.random_flip_both(gt_dict["gt_boxes"], points) else: gt_dict["gt_boxes"], points = prep.random_flip(gt_dict["gt_boxes"], points) gt_dict["gt_boxes"], points = prep.global_rotation( gt_dict["gt_boxes"], points, rotation=self.global_rotation_noise ) gt_dict["gt_boxes"], points = prep.global_scaling_v2( gt_dict["gt_boxes"], points, *self.global_scaling_noise ) if self.shuffle_points: # shuffle is a little slow. np.random.shuffle(points) if self.mode == "train" and self.random_select: if self.npoints < points.shape[0]: pts_depth = points[:, 2] pts_near_flag = pts_depth < 40.0 far_idxs_choice = np.where(pts_near_flag == 0)[0] near_idxs = np.where(pts_near_flag == 1)[0] near_idxs_choice = np.random.choice( near_idxs, self.npoints - len(far_idxs_choice), replace=False ) choice = ( np.concatenate((near_idxs_choice, far_idxs_choice), axis=0) if len(far_idxs_choice) > 0 else near_idxs_choice ) np.random.shuffle(choice) else: choice = np.arange(0, len(points), dtype=np.int32) if self.npoints > len(points): extra_choice = np.random.choice( choice, self.npoints - len(points), replace=False ) choice = np.concatenate((choice, extra_choice), axis=0) np.random.shuffle(choice) points = points[choice] if self.symmetry_intensity: points[:, -1] -= 0.5 # translate intensity to [-0.5, 0.5] # points[:, -1] *= 2 res["lidar"]["points"] = points if self.mode == "train": res["lidar"]["annotations"] = gt_dict return res, info
def __call__(self, res, info): # get points res["mode"] = self.mode points = res["lidar"]["points"] # get gt_boxes (x,y,z(velo), w, l, h, ry), gt_names and difficulty levels if self.mode == "train" and res['labeled']: anno_dict = res["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": np.array(anno_dict["names"]).reshape(-1), } calib = res["calib"] if "calib" in res else None selected = kitti.drop_arrays_by_name(gt_dict["gt_names"], ["DontCare", "ignore"]) _dict_select(gt_dict, selected) gt_boxes_mask = np.array( [n in self.class_names for n in gt_dict["gt_names"]], dtype=np.bool_) # perform gt-augmentation if self.db_sampler: sampled_dict = self.db_sampler.sample_all( res["metadata"]["image_prefix"], gt_dict["gt_boxes"], gt_dict["gt_names"], res["metadata"]["num_point_features"], self.random_crop, # False gt_group_ids=None, calib=calib, targeted_class_names=self.class_names, ) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] # all 1. gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) # True, remove points in original scene with location occupied by auged gt boxes. if self.remove_points_after_sample: masks = box_np_ops.points_in_rbbox( points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate( [sampled_points, points], axis=0 ) # concat existed points and points in gt-aug boxes # per-object augmentation prep.noise_per_object_v4_( gt_dict["gt_boxes"], points, gt_boxes_mask, rotation_perturb=self.gt_rotation_noise, center_noise_std=self.gt_loc_noise_std, global_random_rot_range=self.global_random_rot_range, group_ids=None, num_try=100, data_aug_with_context=self.data_aug_with_context, data_aug_random_drop=self.data_aug_random_drop, ) _dict_select(gt_dict, gt_boxes_mask) # get gt_boxes of specific class gt_classes = np.array( [self.class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32, ) gt_dict["gt_classes"] = gt_classes # without global augmentation res["lidar"]["points_raw"] = points.copy() res["lidar"]["annotations_raw"] = { } # IMPORTANT: necessary with deep copy for key in gt_dict.keys(): res["lidar"]["annotations_raw"].update( {key: gt_dict[key].copy()}) # with global augmentation gt_dict["gt_boxes"], points, flipped = prep.random_flip_v2( gt_dict["gt_boxes"], points) gt_dict[ "gt_boxes"], points, noise_rotation = prep.global_rotation_v3( gt_dict["gt_boxes"], points, self.global_rotation_noise) gt_dict["gt_boxes"], points, noise_scale = prep.global_scaling_v3( gt_dict["gt_boxes"], points, *self.global_scaling_noise) res["lidar"]["transformation"] = { "flipped": flipped, "noise_rotation": noise_rotation, "noise_scale": noise_scale } # gt_dict["gt_boxes"], points, noise_trans = prep.global_translate_v2(gt_dict["gt_boxes"], points, [1.0, 1.0, 0.5]) # res["lidar"]["transformation"].update({"noise_trans": noise_trans}) gt_boxes = gt_dict["gt_boxes"] # for car: default setting points = sa_da_v2.pyramid_augment_v0( gt_boxes, points, enable_sa_dropout=0.25, enable_sa_sparsity=[0.05, 50], enable_sa_swap=[0.1, 50], ) # for cyclist & ped # points = pa_aug_v2.pyramid_augment_v0(gt_boxes, points, # enable_sa_dropout=0.2, # 0.2 # enable_sa_sparsity=[0.1, 25], # 0.1 # enable_sa_swap=[0.1, 10], # 0.1 # ) if self.shuffle_points: choice = np.random.choice(np.arange(points.shape[0]), points.shape[0], replace=False) points = points[choice] if self.mode == "train" and not res['labeled']: _, points, flipped = prep.random_flip_v2(None, points) _, points, noise_rotation = prep.global_rotation_v3( None, points, self.global_rotation_noise) _, points, noise_scale = prep.global_scaling_v3( None, points, *self.global_scaling_noise) res["lidar"]["transformation"] = { "flipped": flipped, "noise_rotation": noise_rotation, "noise_scale": noise_scale } # _, points, noise_trans = prep.global_translate_v2(None, points, [1.0, 1.0, 0.5]) # res["lidar"]["transformation"].update({"noise_trans": noise_trans}) res["lidar"]["points"] = points if self.mode == "train" and res['labeled']: res["lidar"]["annotations"] = gt_dict return res, info
def __call__(self, res, info): # get points res["mode"] = self.mode # train or val if res["type"] in ["KittiDataset"]: points = res["lidar"]["points"] #import ipdb; ipdb.set_trace() # get gt_boxes (x,y,z(velo), w, l, h, ry), class_names and difficulty levels if self.mode == "train": anno_dict = res["lidar"]["annotations"] gt_dict = { "gt_boxes": anno_dict["boxes"], "gt_names": np.array(anno_dict["names"]).reshape(-1), } if "difficulty" not in anno_dict: # True difficulty = np.zeros([anno_dict["boxes"].shape[0]], dtype=np.int32) # todo: all set as 0 gt_dict["difficulty"] = difficulty else: gt_dict["difficulty"] = anno_dict["difficulty"] # get calib if "calib" in res: calib = res["calib"] else: calib = None ''' if self.add_rgb_to_points: # False assert calib is not None and "image" in res image_path = res["image"]["image_path"] image = (imgio.imread(str(pathlib.Path(root_path) / image_path)).astype(np.float32) / 255) points_rgb = box_np_ops.add_rgb_to_points(points, image, calib["rect"], calib["Trv2c"], calib["P2"]) points = np.concatenate([points, points_rgb], axis=1) num_point_features += 3 if self.reference_detections is not None: # False assert calib is not None and "image" in res C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) frustums = box_np_ops.get_frustum_v2(reference_detections, C) frustums -= T frustums = np.einsum("ij, akj->aki", np.linalg.inv(R), frustums) frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) masks = points_in_convex_polygon_3d_jit(points, surfaces) points = points[masks.any(-1)] if self.remove_outside_points: # False, as points are loaded from reduced .bin file assert calib is not None image_shape = res["image"]["image_shape"] points = box_np_ops.remove_outside_points(points, calib["rect"], calib["Trv2c"], calib["P2"], image_shape) if self.remove_environment is True and self.mode == "train": # False selected = kitti.keep_arrays_by_name(gt_names, target_assigner.classes) _dict_select(gt_dict, selected) masks = box_np_ops.points_in_rbbox(points, gt_dict["gt_boxes"]) points = points[masks.any(-1)] ''' if self.mode == "train": # redundant: discard dc and ignore gt selected = kitti.drop_arrays_by_name( gt_dict["gt_names"], ["DontCare", "ignore" ]) # todo: where is the definition of ignore??? _dict_select(gt_dict, selected) # False, todo: remove those gt_boxes with difficulty as -1 if self.remove_unknown: remove_mask = gt_dict["difficulty"] == -1 """ gt_boxes_remove = gt_boxes[remove_mask] gt_boxes_remove[:, 3:6] += 0.25 points = prep.remove_points_in_boxes(points, gt_boxes_remove) """ keep_mask = np.logical_not(remove_mask) _dict_select(gt_dict, keep_mask) # discard gt_dict.pop("difficulty") # False, todo: remove those gt_boxes with too little points if self.min_points_in_gt > 0: point_counts = box_np_ops.points_count_rbbox( points, gt_dict["gt_boxes"]) mask = point_counts >= self.min_points_in_gt _dict_select(gt_dict, mask) # remove untargeted category objects; todo: mask re-implementation; 'car', what about the similar types # if self.class_names.__len__() == 1: # gt_boxes_mask = gt_dict["gt_names"] == self.class_names[0] gt_boxes_mask = np.array( [n in self.class_names for n in gt_dict["gt_names"]], dtype=np.bool_) # perform gt-augmentation if self.db_sampler: # filter_by_min_num_points, filter_by_difficulty sampled_dict = self.db_sampler.sample_all( res["metadata"]["image_prefix"], gt_dict["gt_boxes"], gt_dict["gt_names"], res["metadata"]["num_point_features"], self.random_crop, # False gt_group_ids=None, calib=calib, ) if sampled_dict is not None: sampled_gt_names = sampled_dict["gt_names"] sampled_gt_boxes = sampled_dict["gt_boxes"] sampled_points = sampled_dict["points"] sampled_gt_masks = sampled_dict["gt_masks"] gt_dict["gt_names"] = np.concatenate( [gt_dict["gt_names"], sampled_gt_names], axis=0) gt_dict["gt_boxes"] = np.concatenate( [gt_dict["gt_boxes"], sampled_gt_boxes]) gt_boxes_mask = np.concatenate( [gt_boxes_mask, sampled_gt_masks], axis=0) if self.remove_points_after_sample: masks = box_np_ops.points_in_rbbox( points, sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] points = np.concatenate( [sampled_points, points], axis=0 ) # concat existed points and points in gt-aug boxes prep.noise_per_object_v3_( gt_dict["gt_boxes"], points, gt_boxes_mask, rotation_perturb=self.gt_rotation_noise, center_noise_std=self.gt_loc_noise_std, global_random_rot_range=self.global_random_rot_range, group_ids=None, num_try=100, ) _dict_select(gt_dict, gt_boxes_mask) # get gt_boxes of specific class gt_classes = np.array( [self.class_names.index(n) + 1 for n in gt_dict["gt_names"]], dtype=np.int32, ) gt_dict["gt_classes"] = gt_classes # data augmentation here gt_dict["gt_boxes"], points = prep.random_flip( gt_dict["gt_boxes"], points) gt_dict["gt_boxes"], points = prep.global_rotation( gt_dict["gt_boxes"], points, rotation=self.global_rotation_noise) gt_dict["gt_boxes"], points = prep.global_scaling_v2( gt_dict["gt_boxes"], points, *self.global_scaling_noise) if self.shuffle_points: # todo: not efficient, use choice # shuffle is a little slow. np.random.shuffle(points) # points sampling if self.mode == "train" and self.random_select: # False if self.npoints < points.shape[0]: pts_depth = points[:, 2] pts_near_flag = pts_depth < 40.0 far_idxs_choice = np.where(pts_near_flag == 0)[0] near_idxs = np.where(pts_near_flag == 1)[0] near_idxs_choice = np.random.choice(near_idxs, self.npoints - len(far_idxs_choice), replace=False) choice = (np.concatenate( (near_idxs_choice, far_idxs_choice), axis=0) if len(far_idxs_choice) > 0 else near_idxs_choice) np.random.shuffle(choice) else: choice = np.arange(0, len(points), dtype=np.int32) if self.npoints > len(points): extra_choice = np.random.choice(choice, self.npoints - len(points), replace=False) choice = np.concatenate((choice, extra_choice), axis=0) np.random.shuffle(choice) points = points[choice] # uniformize intensity if self.symmetry_intensity: points[:, -1] -= 0.5 # translate intensity to [-0.5, 0.5] # points[:, -1] *= 2 res["lidar"]["points"] = points if self.mode == "train": res["lidar"]["annotations"] = gt_dict return res, info