def filter_eval_boxes(nusc: NuScenes, eval_boxes: EvalBoxes, max_dist: Dict[str, float], verbose: bool = False) -> EvalBoxes: """ Applies filtering to boxes. Distance, bike-racks and points per box. :param nusc: An instance of the NuScenes class. :param eval_boxes: An instance of the EvalBoxes class. :param max_dist: Maps the detection name to the eval distance threshold for that class. :param verbose: Whether to print to stdout. """ # Accumulators for number of filtered boxes. total, dist_filter, point_filter, bike_rack_filter = 0, 0, 0, 0 for ind, sample_token in enumerate(eval_boxes.sample_tokens): # Filter on distance first total += len(eval_boxes[sample_token]) eval_boxes.boxes[sample_token] = [ box for box in eval_boxes[sample_token] if box.ego_dist < max_dist[box.detection_name] ] dist_filter += len(eval_boxes[sample_token]) # Then remove boxes with zero points in them. Eval boxes have -1 points by default. eval_boxes.boxes[sample_token] = [ box for box in eval_boxes[sample_token] if not box.num_pts == 0 ] point_filter += len(eval_boxes[sample_token]) # Perform bike-rack filtering sample_anns = nusc.get('sample', sample_token)['anns'] bikerack_recs = [ nusc.get('sample_annotation', ann) for ann in sample_anns if nusc.get('sample_annotation', ann)['category_name'] == 'static_object.bicycle_rack' ] bikerack_boxes = [ Box(rec['translation'], rec['size'], Quaternion(rec['rotation'])) for rec in bikerack_recs ] filtered_boxes = [] for box in eval_boxes[sample_token]: if box.detection_name in ['bicycle', 'motorcycle']: in_a_bikerack = False for bikerack_box in bikerack_boxes: if np.sum( points_in_box( bikerack_box, np.expand_dims(np.array(box.translation), axis=1))) > 0: in_a_bikerack = True if not in_a_bikerack: filtered_boxes.append(box) else: filtered_boxes.append(box) eval_boxes.boxes[sample_token] = filtered_boxes bike_rack_filter += len(eval_boxes.boxes[sample_token]) if verbose: print("=> Original number of boxes: %d" % total) print("=> After distance based filtering: %d" % dist_filter) print("=> After LIDAR points based filtering: %d" % point_filter) print("=> After bike rack filtering: %d" % bike_rack_filter) return eval_boxes
def calculate_object_box(box_aligned, box_inclined, image_ground, image_zmax, object_class, score, p): # Grid Map data resolution = p['pointcloud_grid_map_interface']['grids']['cartesian'][ 'resolution']['x'] if resolution - p['pointcloud_grid_map_interface']['grids']['cartesian'][ 'resolution']['y'] > 0.001: raise ValueError( 'Grid Map resolution in x and y direction need to be equal') if FLAGS.range is None: grid_map_data_origin_idx = np.array([ p['pointcloud_grid_map_interface']['grids']['cartesian']['range'] ['x'] / 2 + p['pointcloud_grid_map_interface']['grids'] ['cartesian']['offset']['x'], p['pointcloud_grid_map_interface'] ['grids']['cartesian']['range']['y'] / 2 ]) else: grid_map_data_origin_idx = np.array( [float(FLAGS.range) / 2, float(FLAGS.range) / 2]) image_to_velo = np.array([[0, -1, grid_map_data_origin_idx[0]], [-1, 0, grid_map_data_origin_idx[1]], [0, 0, 1]]) heigth_diff = (p['pointcloud_grid_map_interface']['z_max'] - p['pointcloud_grid_map_interface']['z_min']) height_offset = p['pointcloud_grid_map_interface']['z_min'] # Convert box height, width = image_zmax.shape y_min = box_aligned[0] * height x_min = box_aligned[1] * width y_max = box_aligned[2] * height x_max = box_aligned[3] * width x_c = box_inclined[0] * width y_c = box_inclined[1] * height w_s = box_inclined[2] h_s = box_inclined[3] sin_angle = box_inclined[4] cos_angle = box_inclined[5] angle_rad = math.atan2(sin_angle, cos_angle) / 2 # Tranform angle from kitti camera to kitti lidar: # angle_rad = - angle_rad - math.pi / 2 vec_s_x = math.cos(angle_rad) vec_s_y = math.sin(angle_rad) object_width = w_s * resolution / \ math.sqrt(vec_s_x * vec_s_x / (height * height) + vec_s_y * vec_s_y / (width * width)) object_length = h_s * resolution / \ math.sqrt(vec_s_x * vec_s_x / (width * width) + vec_s_y * vec_s_y / (height * height)) image_ground_box = image_ground[int(y_min):math.ceil(y_max), int(x_min):math.ceil(x_max)] mean_ground = image_ground_box.mean() image_height_max_box = image_zmax[int(y_min):math.ceil(y_max), int(x_min):math.ceil(x_max)] height_max = image_height_max_box.max() height_max_m = heigth_diff * height_max / 255.0 + height_offset mean_ground_m = heigth_diff * mean_ground / 255.0 + height_offset if height_max_m - mean_ground_m > 0: object_height = height_max_m - mean_ground_m else: object_height = 0.1 # Box output in box coordinate system object_wlh = [object_width, object_length, object_height] object_center = np.array([x_c * resolution, y_c * resolution]) object_center_velo = image_to_velo.dot(np.append(object_center, 1)) object_center_velo[2] = (height_max_m + mean_ground_m) / 2 object_quat = Quaternion(axis=(0, 0, 1), angle=angle_rad) return Box(object_center_velo, object_wlh, object_quat, score=score, velocity=(0, 0, 0), name=object_class)
# Test iou3d (camera coord) box_1 = compute_box_3d(dim=[1.599275, 1.9106505, 4.5931444], location=[7.1180778, 2.1364648, 41.784885], rotation_y=-1.1312813047259618) box_2 = compute_box_3d(dim=[1.599275, 1.9106505, 4.5931444], location=[7.1180778, 2.1364648, 41.784885], rotation_y=-1.1312813047259618) iou = iou3d(box_1, box_2) print("Results should be almost 1.0: ", iou) # # Test iou3d (global coord) translation1 = [634.7540893554688, 1620.952880859375, 0.4360223412513733] size1 = [1.9073231220245361, 4.5971598625183105, 1.5940513610839844] rotation1 = [ -0.6379619591303222, 0.6256341359192967, -0.320485847319929, 0.31444441216651253 ] translation2 = [634.7540893554688, 1620.952880859375, 0.4360223412513733] size2 = [1.9073231220245361, 4.5971598625183105, 1.5940513610839844] rotation2 = [ -0.6379619591303222, 0.6256341359192967, -0.320485847319929, 0.31444441216651253 ] box_1 = Box(translation1, size1, Quaternion(rotation1)) box_2 = Box(translation2, size2, Quaternion(rotation2)) iou, iou_2d = iou3d_global(box_1.corners(), box_2.corners()) print(iou, iou_2d)
def gen_2d_grid_gt_for_visualization(data_dict: dict, grid_size: np.array, extents: np.array = None, frame_skip: int = 0, reordered: bool = False, proportion_thresh: float = 0.5, category_num: int = 5, one_hot_thresh: float = 0.8): """ Generate the 2d grid ground-truth for the input point cloud. The ground-truth is: the displacement vectors of the occupied pixels in BEV image. The displacement is computed w.r.t the current time and the future time The difference between this function and gen_2d_grid_gt: the input is "data_dict" instead of sample file path. :param data_dict: The dictionary storing point cloud data and annotations :param grid_size: The size of each pixel :param extents: The extents of the point cloud on the 2D xy plane. Shape (3, 2) :param frame_skip: The number of sample frames that need to be skipped :param reordered: Whether need to reorder the results, so that the first element corresponds to the oldest record. :param proportion_thresh: Within a given pixel, only when the proportion of foreground points exceeds this threshold will we compute the displacement vector for this pixel. :param category_num: The number of categories for points. :param one_hot_thresh: When the proportion of the majority points within a cell exceeds this threshold, we compute the (hard) one-hot category vector for this cell, otherwise compute the soft category vector. :return: The ground-truth displacement field. Shape (num_sweeps, image height, image width, 2). """ num_sweeps = data_dict['num_sweeps'] times = data_dict['times'] num_past_sweeps = len(np.where(times >= 0)[0]) num_future_sweeps = len(np.where(times < 0)[0]) assert num_past_sweeps + num_future_sweeps == num_sweeps, "The number of sweeps is incorrect!" pc_list = [] for i in range(num_sweeps): pc = data_dict['pc_' + str(i)] pc_list.append(pc.T) # Retrieve the instance boxes num_instances = data_dict['num_instances'] instance_box_list = list() instance_cat_list = list() # for instance categories for i in range(num_instances): instance = data_dict['instance_boxes_' + str(i)] category = data_dict['category_' + str(i)] instance_box_list.append(instance) instance_cat_list.append(category) # ---------------------------------------------------- # Filter and sort the reference point cloud refer_pc = pc_list[0] refer_pc = refer_pc[:, 0:3] if extents is not None: if extents.shape != (3, 2): raise ValueError("Extents are the wrong shape {}".format( extents.shape)) filter_idx = np.where((extents[0, 0] < refer_pc[:, 0]) & (refer_pc[:, 0] < extents[0, 1]) & (extents[1, 0] < refer_pc[:, 1]) & (refer_pc[:, 1] < extents[1, 1]) & (extents[2, 0] < refer_pc[:, 2]) & (refer_pc[:, 2] < extents[2, 1]))[0] refer_pc = refer_pc[filter_idx] # -- Discretize pixel coordinates to given quantization size discrete_pts = np.floor(refer_pc[:, 0:2] / grid_size).astype(np.int32) # -- Use Lex Sort, sort by x, then y x_col = discrete_pts[:, 0] y_col = discrete_pts[:, 1] sorted_order = np.lexsort((y_col, x_col)) refer_pc = refer_pc[sorted_order] discrete_pts = discrete_pts[sorted_order] contiguous_array = np.ascontiguousarray(discrete_pts).view( np.dtype( (np.void, discrete_pts.dtype.itemsize * discrete_pts.shape[1]))) # -- The new coordinates are the discretized array with its unique indexes _, unique_indices = np.unique(contiguous_array, return_index=True) # -- Sort unique indices to preserve order unique_indices.sort() pixel_coords = discrete_pts[unique_indices] # -- Number of points per voxel, last voxel calculated separately num_points_in_pixel = np.diff(unique_indices) num_points_in_pixel = np.append(num_points_in_pixel, discrete_pts.shape[0] - unique_indices[-1]) # -- Compute the minimum and maximum voxel coordinates if extents is not None: min_pixel_coord = np.floor(extents.T[0, 0:2] / grid_size) max_pixel_coord = np.ceil(extents.T[1, 0:2] / grid_size) - 1 else: min_pixel_coord = np.amin(pixel_coords, axis=0) max_pixel_coord = np.amax(pixel_coords, axis=0) # -- Get the voxel grid dimensions num_divisions = ((max_pixel_coord - min_pixel_coord) + 1).astype(np.int32) # -- Bring the min voxel to the origin pixel_indices = (pixel_coords - min_pixel_coord).astype(int) # ---------------------------------------------------- # ---------------------------------------------------- # Get the point cloud subsets, which are inside different instance bounding boxes refer_box_list = list() refer_pc_idx_per_bbox = list() points_category = np.zeros(refer_pc.shape[0], dtype=np.int) # store the point categories for i in range(num_instances): instance_cat = instance_cat_list[i] instance_box = instance_box_list[i] instance_box_data = instance_box[0] assert not np.isnan(instance_box_data).any( ), "In the keyframe, there should not be NaN box annotation!" tmp_box = Box(center=instance_box_data[:3], size=instance_box_data[3:6], orientation=Quaternion(instance_box_data[6:])) idx = point_in_hull_fast(refer_pc[:, 0:3], tmp_box) refer_pc_idx_per_bbox.append(idx) refer_box_list.append(tmp_box) points_category[idx] = instance_cat if len(refer_pc_idx_per_bbox) > 0: refer_pc_idx_inside_box = np.concatenate( refer_pc_idx_per_bbox).tolist() else: refer_pc_idx_inside_box = [] refer_pc_idx_outside_box = set(range( refer_pc.shape[0])) - set(refer_pc_idx_inside_box) refer_pc_idx_outside_box = list(refer_pc_idx_outside_box) # Compute pixel (cell) categories pixel_cat = np.zeros([unique_indices.shape[0], category_num], dtype=np.float32) most_freq_info = [] for h, v in enumerate(zip(unique_indices, num_points_in_pixel)): pixel_elements_categories = points_category[v[0]:v[0] + v[1]] elements_freq = np.bincount(pixel_elements_categories, minlength=category_num) assert np.sum( elements_freq) == v[1], "The frequency count is incorrect." elements_freq = elements_freq / float(v[1]) most_freq_cat, most_freq = np.argmax(elements_freq), np.max( elements_freq) most_freq_info.append([most_freq_cat, most_freq]) if most_freq >= one_hot_thresh: one_hot_cat = np.zeros(category_num, dtype=np.float32) one_hot_cat[most_freq_cat] = 1.0 pixel_cat[h] = one_hot_cat else: pixel_cat[ h] = elements_freq # we use soft category probability vector. pixel_cat_map = np.zeros( (num_divisions[0], num_divisions[1], category_num), dtype=np.float32) pixel_cat_map[pixel_indices[:, 0], pixel_indices[:, 1]] = pixel_cat[:] # Set the non-zero pixels to 1.0 # Note that the non-zero pixels correspond to the foreground and background objects non_empty_map = np.zeros((num_divisions[0], num_divisions[1]), dtype=np.float32) non_empty_map[pixel_indices[:, 0], pixel_indices[:, 1]] = 1.0 # Compute the displacement vectors w.r.t. the other sweeps all_disp_field_gt_list = list() zero_disp_field = np.zeros((num_divisions[0], num_divisions[1], 2), dtype=np.float32) all_disp_field_gt_list.append(zero_disp_field) all_valid_pixel_maps_list = list( ) # valid pixel map will be used for masking the computation of loss all_valid_pixel_maps_list.append(non_empty_map) # -- Skip some frames if necessary past_part = list(range(0, num_past_sweeps, frame_skip + 1)) future_part = list( range(num_past_sweeps + frame_skip, num_sweeps, frame_skip + 1)) frame_considered = np.asarray(past_part + future_part) for i in frame_considered[1:]: curr_disp_vectors = np.zeros_like(refer_pc, dtype=np.float32) curr_disp_vectors.fill(np.nan) curr_disp_vectors[refer_pc_idx_outside_box, ] = 0.0 # First, for each instance, compute the corresponding points displacement. for j in range(num_instances): instance_box = instance_box_list[j] instance_box_data = instance_box[i] # This is for the i-th sweep if np.isnan(instance_box_data).any( ): # It is possible that in this sweep there is no annotation continue tmp_box = Box(center=instance_box_data[:3], size=instance_box_data[3:6], orientation=Quaternion(instance_box_data[6:])) pc_in_bbox_idx = refer_pc_idx_per_bbox[j] disp_vectors = calc_displace_vector(refer_pc[pc_in_bbox_idx], refer_box_list[j], tmp_box) curr_disp_vectors[pc_in_bbox_idx] = disp_vectors[:] # Second, compute the mean displacement vector and category for each non-empty pixel disp_field = np.zeros( [unique_indices.shape[0], 2], dtype=np.float32) # we only consider the 2D field # We only compute loss for valid pixels where there are corresponding box annotations between two frames valid_pixels = np.zeros(unique_indices.shape[0], dtype=np.bool) for h, v in enumerate(zip(unique_indices, num_points_in_pixel)): # Only when the number of majority points exceeds predefined proportion, we compute # the displacement vector for this pixel. Otherwise, We consider it is background (possibly ground plane) # and has zero displacement. pixel_elements_categories = points_category[v[0]:v[0] + v[1]] most_freq_cat, most_freq = most_freq_info[h] if most_freq >= proportion_thresh: most_freq_cat_idx = np.where( pixel_elements_categories == most_freq_cat)[0] most_freq_cat_disp_vectors = curr_disp_vectors[v[0]:v[0] + v[1], :3] most_freq_cat_disp_vectors = most_freq_cat_disp_vectors[ most_freq_cat_idx] if np.isnan(most_freq_cat_disp_vectors).any( ): # contains invalid disp vectors valid_pixels[h] = 0.0 else: mean_disp_vector = np.mean(most_freq_cat_disp_vectors, axis=0) disp_field[h] = mean_disp_vector[ 0:2] # ignore the z direction valid_pixels[h] = 1.0 # Finally, assemble to a 2D image disp_field_sparse = np.zeros((num_divisions[0], num_divisions[1], 2), dtype=np.float32) disp_field_sparse[pixel_indices[:, 0], pixel_indices[:, 1]] = disp_field[:] valid_pixel_map = np.zeros((num_divisions[0], num_divisions[1]), dtype=np.float32) valid_pixel_map[pixel_indices[:, 0], pixel_indices[:, 1]] = valid_pixels[:] all_disp_field_gt_list.append(disp_field_sparse) all_valid_pixel_maps_list.append(valid_pixel_map) all_disp_field_gt_list = np.stack(all_disp_field_gt_list, axis=0) all_valid_pixel_maps_list = np.stack(all_valid_pixel_maps_list, axis=0) if reordered: num_past = len(past_part) all_disp_field_gt_list[0:num_past] = all_disp_field_gt_list[(num_past - 1)::-1] all_valid_pixel_maps_list[0:num_past] = all_valid_pixel_maps_list[( num_past - 1)::-1] return all_disp_field_gt_list, all_valid_pixel_maps_list, non_empty_map, pixel_cat_map
def vis_model_per_sample_data(bev_input_data, data_dict, frame_skip=3, voxel_size=(0.25, 0.25, 0.4), loaded_models=None, which_model="MotionNet", model_path=None, img_save_dir=None, use_adj_frame_pred=False, use_motion_state_pred_masking=False, frame_idx=0, disp=True): """ Visualize the prediction (ie, displacement field) results. bev_ipput_data: the preprocessed sparse bev data data_dict: a dictionary storing the point cloud data and annotations frame_skip: how many frames we want to skip for future frames voxel_size: the size of each voxel loaded_models: the model which has loaded the pretrained weights which_model: which model to apply ['MotionNet'/'MotionNetMGDA'] model_path: the path to the pretrained model img_save_dir: the directory for saving the predicted image use_adj_frame_pred: whether to predict the relative offsets between two adjacent frames use_motion_state_pred_masking: whether to threshold the prediction with motion state estimation results frame_idx: used for specifying the name of saved image frames disp: whether to immediately show the predicted results """ if model_path is None: raise ValueError("Need to specify saved model path.") if img_save_dir is None: raise ValueError("Need to specify image save path.") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") fig, ax = plt.subplots(1, 3, figsize=(20, 8)) # Load pre-trained network weights if which_model == "MotionNet": model = loaded_models[0] else: model_encoder = loaded_models[0] model_head = loaded_models[1] # Prepare data for the network padded_voxel_points, all_disp_field_gt, all_valid_pixel_maps,\ non_empty_map, pixel_cat_map_gt, num_past_pcs, num_future_pcs = bev_input_data padded_voxel_points = torch.unsqueeze(padded_voxel_points, 0).to(device) # Make prediction if which_model == "MotionNet": model.eval() else: model_encoder.eval() model_head.eval() with torch.no_grad(): if which_model == "MotionNet": disp_pred, cat_pred, motion_pred = model(padded_voxel_points) else: shared_feats = model_encoder(padded_voxel_points) disp_pred, cat_pred, motion_pred = model_head(shared_feats) disp_pred = disp_pred.cpu().numpy() disp_pred = np.transpose(disp_pred, (0, 2, 3, 1)) cat_pred = np.squeeze(cat_pred.cpu().numpy(), 0) if use_adj_frame_pred: # The prediction are the displacement between adjacent frames for c in range(1, disp_pred.shape[0]): disp_pred[c, ...] = disp_pred[c, ...] + disp_pred[c - 1, ...] if use_motion_state_pred_masking: motion_pred_numpy = motion_pred.cpu().numpy() motion_pred_numpy = np.argmax(motion_pred_numpy, axis=1) motion_mask = motion_pred_numpy == 0 cat_pred_numpy = np.argmax(cat_pred, axis=0) cat_mask = np.logical_and(cat_pred_numpy == 0, non_empty_map == 1) cat_mask = np.expand_dims(cat_mask, 0) cat_weight_map = np.ones_like(motion_pred_numpy, dtype=np.float32) cat_weight_map[motion_mask] = 0.0 cat_weight_map[cat_mask] = 0.0 cat_weight_map = cat_weight_map[:, :, :, np.newaxis] # (1, h, w. 1) disp_pred = disp_pred * cat_weight_map # ------------------------- Visualization ------------------------- # --- Load the point cloud data and annotations --- num_sweeps = data_dict['num_sweeps'] times = data_dict['times'] num_past_sweeps = len(np.where(times >= 0)[0]) num_future_sweeps = len(np.where(times < 0)[0]) assert num_past_sweeps + num_future_sweeps == num_sweeps, "The number of sweeps is incorrect!" # Load point cloud pc_list = [] for i in range(num_sweeps): pc = data_dict['pc_' + str(i)] pc_list.append(pc) # Reorder the pc, and skip sample frames if wanted tmp_pc_list_1 = pc_list[0:num_past_sweeps:(frame_skip + 1)] tmp_pc_list_1 = tmp_pc_list_1[::-1] tmp_pc_list_2 = pc_list[(num_past_sweeps + frame_skip)::(frame_skip + 1)] pc_list = tmp_pc_list_1 + tmp_pc_list_2 num_past_pcs = len(tmp_pc_list_1) # Load box annotations, and reorder and skip some annotations if wanted num_instances = data_dict['num_instances'] instance_box_list = list() for i in range(num_instances): instance = data_dict['instance_boxes_' + str(i)] # Reorder the boxes tmp_instance = np.zeros((len(pc_list), instance.shape[1]), dtype=np.float32) tmp_instance[(num_past_pcs - 1)::-1] = instance[0:num_past_sweeps:(frame_skip + 1)] tmp_instance[num_past_pcs:] = instance[(num_past_sweeps + frame_skip)::(frame_skip + 1)] instance = tmp_instance[:] instance_box_list.append(instance) # Draw the LIDAR and quiver plots # The distant points are very sparse and not reliable. We do not show them. border_meter = 4 border_pixel = border_meter * 4 x_lim = [-(32 - border_meter), (32 - border_meter)] y_lim = [-(32 - border_meter), (32 - border_meter)] # We only show the cells having one-hot category vectors max_prob = np.amax(pixel_cat_map_gt, axis=-1) filter_mask = max_prob == 1.0 pixel_cat_map = np.argmax( pixel_cat_map_gt, axis=-1) + 1 # category starts from 1 (background), etc pixel_cat_map = (pixel_cat_map * non_empty_map * filter_mask).astype( np.int) cat_pred = np.argmax(cat_pred, axis=0) + 1 cat_pred = (cat_pred * non_empty_map * filter_mask).astype(np.int) # --- Visualization --- idx = num_past_pcs - 1 points = pc_list[idx] ax[0].scatter(points[0, :], points[1, :], c=points[2, :], s=1) ax[0].set_xlim(x_lim[0], x_lim[1]) ax[0].set_ylim(y_lim[0], y_lim[1]) ax[0].axis('off') ax[0].set_aspect('equal') ax[0].title.set_text('LIDAR data') for j in range(num_instances): inst = instance_box_list[j] box_data = inst[idx] if np.isnan(box_data).any(): continue box = Box(center=box_data[0:3], size=box_data[3:6], orientation=Quaternion(box_data[6:])) box.render(ax[0]) # Plot quiver. We only show non-empty vectors. Plot each category. field_gt = all_disp_field_gt[-1] idx_x = np.arange(field_gt.shape[0]) idx_y = np.arange(field_gt.shape[1]) idx_x, idx_y = np.meshgrid(idx_x, idx_y, indexing='ij') qk = [None] * len(color_map) # for quiver key for k in range(len(color_map)): # ------------------------ Ground-truth ------------------------ mask = pixel_cat_map == (k + 1) # For cells with very small movements, we threshold them to be static field_gt_norm = np.linalg.norm(field_gt, ord=2, axis=-1) # out: (h, w) thd_mask = field_gt_norm <= 0.4 field_gt[thd_mask, :] = 0 # Get the displacement field X = idx_x[mask] Y = idx_y[mask] U = field_gt[:, :, 0][mask] / voxel_size[ 0] # the distance between pixels is w.r.t. grid size (e.g., 0.2m) V = field_gt[:, :, 1][mask] / voxel_size[1] qk[k] = ax[1].quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=1, color=color_map[k]) ax[1].quiverkey(qk[k], X=0.0 + k / 5.0, Y=1.1, U=20, label=cat_names[k], labelpos='E') ax[1].set_xlim(border_pixel, field_gt.shape[0] - border_pixel) ax[1].set_ylim(border_pixel, field_gt.shape[1] - border_pixel) ax[1].set_aspect('equal') ax[1].title.set_text('Ground-truth') ax[1].axis('off') # ------------------------ Prediction ------------------------ # Show the prediction results. We show the cells corresponding to the non-empty one-hot gt cells. mask_pred = cat_pred == (k + 1) field_pred = disp_pred[ -1] # Show last prediction, ie., the 20-th frame # For cells with very small movements, we threshold them to be static field_pred_norm = np.linalg.norm(field_pred, ord=2, axis=-1) # out: (h, w) thd_mask = field_pred_norm <= 0.4 field_pred[thd_mask, :] = 0 # We use the same indices as the ground-truth, since we are currently focused on the foreground X_pred = idx_x[mask_pred] Y_pred = idx_y[mask_pred] U_pred = field_pred[:, :, 0][mask_pred] / voxel_size[0] V_pred = field_pred[:, :, 1][mask_pred] / voxel_size[1] ax[2].quiver(X_pred, Y_pred, U_pred, V_pred, angles='xy', scale_units='xy', scale=1, color=color_map[k]) ax[2].set_xlim(border_pixel, field_pred.shape[0] - border_pixel) ax[2].set_ylim(border_pixel, field_pred.shape[1] - border_pixel) ax[2].set_aspect('equal') ax[2].title.set_text('Prediction') ax[2].axis('off') print("finish sample {}".format(frame_idx)) plt.savefig(os.path.join(img_save_dir, str(frame_idx) + '.png')) if disp: plt.pause(0.02) ax[0].clear() ax[1].clear() ax[2].clear() if disp: plt.show()
def car(self) -> 'Box': ''' return a Box object representing the vehicle in sensor cordinates ''' A_car_2_cs = self.A_car_2_cs return Box(center=A_car_2_cs[:3, 3], size=self._CAR_SIZE, orientation=Quaternion(matrix=A_car_2_cs))
def convert_eval_format(self, results): from nuscenes.utils.data_classes import Box ret = { "meta": { "use_camera": True, "use_lidar": False, "use_radar": False, "use_map": False, "use_external": False, }, "results": {}, } print("Converting nuscenes format...") for image_id in self.images: if not (image_id in results): continue image_info = self.coco.loadImgs(ids=[image_id])[0] sample_token = image_info["sample_token"] trans_matrix = np.array(image_info["trans_matrix"], np.float32) sensor_id = image_info["sensor_id"] sample_results = [] for item in results[image_id]: class_name = ( self.class_name[int(item["class"] - 1)] if not ("detection_name" in item) else item["detection_name"] ) if self.opt.tracking and class_name in self._tracking_ignored_class: continue score = ( float(item["score"]) if not ("detection_score" in item) else item["detection_score"] ) if "size" in item: size = item["size"] else: size = [ float(item["dim"][1]), float(item["dim"][2]), float(item["dim"][0]), ] if "translation" in item: translation = item["translation"] else: translation = np.dot( trans_matrix, np.array( [ item["loc"][0], item["loc"][1] - size[2], item["loc"][2], 1, ], np.float32, ), ) det_id = item["det_id"] if "det_id" in item else -1 tracking_id = item["tracking_id"] if "tracking_id" in item else 1 if not ("rotation" in item): rot_cam = Quaternion(axis=[0, 1, 0], angle=item["rot_y"]) loc = np.array( [item["loc"][0], item["loc"][1], item["loc"][2]], np.float32 ) box = Box(loc, size, rot_cam, name="2", token="1") box.translate(np.array([0, -box.wlh[2] / 2, 0])) box.rotate(Quaternion(image_info["cs_record_rot"])) box.translate(np.array(image_info["cs_record_trans"])) box.rotate(Quaternion(image_info["pose_record_rot"])) box.translate(np.array(image_info["pose_record_trans"])) rotation = box.orientation rotation = [ float(rotation.w), float(rotation.x), float(rotation.y), float(rotation.z), ] else: rotation = item["rotation"] nuscenes_att = ( np.array(item["nuscenes_att"], np.float32) if "nuscenes_att" in item else np.zeros(8, np.float32) ) att = "" if class_name in self._cycles: att = self.id_to_attribute[np.argmax(nuscenes_att[0:2]) + 1] elif class_name in self._pedestrians: att = self.id_to_attribute[np.argmax(nuscenes_att[2:5]) + 3] elif class_name in self._vehicles: att = self.id_to_attribute[np.argmax(nuscenes_att[5:8]) + 6] if "velocity" in item and len(item["velocity"]) == 2: velocity = item["velocity"] else: velocity = item["velocity"] if "velocity" in item else [0, 0, 0] velocity = np.dot( trans_matrix, np.array( [velocity[0], velocity[1], velocity[2], 0], np.float32 ), ) velocity = [float(velocity[0]), float(velocity[1])] result = { "sample_token": sample_token, "translation": [ float(translation[0]), float(translation[1]), float(translation[2]), ], "size": size, "rotation": rotation, "velocity": velocity, "detection_name": class_name, "attribute_name": att if not ("attribute_name" in item) else item["attribute_name"], "detection_score": score, "tracking_name": class_name, "tracking_score": score, "tracking_id": tracking_id, "sensor_id": sensor_id, "det_id": det_id, } sample_results.append(result) if sample_token in ret["results"]: ret["results"][sample_token] = ( ret["results"][sample_token] + sample_results ) else: ret["results"][sample_token] = sample_results for sample_token in ret["results"].keys(): confs = sorted( [ (-d["detection_score"], ind) for ind, d in enumerate(ret["results"][sample_token]) ] ) ret["results"][sample_token] = [ ret["results"][sample_token][ind] for _, ind in confs[: min(500, len(confs))] ] return ret
def convert_eval_format(self, results): from nuscenes.utils.data_classes import Box ret = {'meta': {'use_camera': True, 'use_lidar': False, 'use_radar': False, 'use_map': False, 'use_external': False}, 'results': {}} print('Converting nuscenes format...') for image_id in self.images: if not (image_id in results): continue image_info = self.coco.loadImgs(ids=[image_id])[0] sample_token = image_info['sample_token'] trans_matrix = np.array(image_info['trans_matrix'], np.float32) sensor_id = image_info['sensor_id'] sample_results = [] for item in results[image_id]: class_name = self.class_name[int(item['class'] - 1)] \ if not ('detection_name' in item) else item['detection_name'] if self.opt.tracking and class_name in self._tracking_ignored_class: continue score = float(item['score']) \ if not ('detection_score' in item) else item['detection_score'] if 'size' in item: size = item['size'] else: size = [float(item['dim'][1]), float(item['dim'][2]), \ float(item['dim'][0])] for i in range(3): size[i] = max(0.001, size[i]) if 'translation' in item: translation = item['translation'] else: translation = np.dot(trans_matrix, np.array( [item['loc'][0], item['loc'][1] - size[2], item['loc'][2], 1], np.float32)) det_id = item['det_id'] if 'det_id' in item else -1 tracking_id = item['tracking_id'] if 'tracking_id' in item else 1 if not ('rotation' in item): rot_cam = Quaternion( axis=[0, 1, 0], angle=item['rot_y']) loc = np.array( [item['loc'][0], item['loc'][1], item['loc'][2]], np.float32) box = Box(loc, size, rot_cam, name='2', token='1') box.translate(np.array([0, - box.wlh[2] / 2, 0])) box.rotate(Quaternion(image_info['cs_record_rot'])) box.translate(np.array(image_info['cs_record_trans'])) box.rotate(Quaternion(image_info['pose_record_rot'])) box.translate(np.array(image_info['pose_record_trans'])) rotation = box.orientation rotation = [float(rotation.w), float(rotation.x), \ float(rotation.y), float(rotation.z)] else: rotation = item['rotation'] nuscenes_att = np.array(item['nuscenes_att'], np.float32) \ if 'nuscenes_att' in item else np.zeros(8, np.float32) att = '' if class_name in self._cycles: att = self.id_to_attribute[np.argmax(nuscenes_att[0:2]) + 1] elif class_name in self._pedestrians: att = self.id_to_attribute[np.argmax(nuscenes_att[2:5]) + 3] elif class_name in self._vehicles: att = self.id_to_attribute[np.argmax(nuscenes_att[5:8]) + 6] if 'velocity' in item and len(item['velocity']) == 2: velocity = item['velocity'] else: velocity = item['velocity'] if 'velocity' in item else [0, 0, 0] velocity = np.dot(trans_matrix, np.array( [velocity[0], velocity[1], velocity[2], 0], np.float32)) velocity = [float(velocity[0]), float(velocity[1])] result = { 'sample_token': sample_token, 'translation': [float(translation[0]), float(translation[1]), \ float(translation[2])], 'size': size, 'rotation': rotation, 'velocity': velocity, 'detection_name': class_name, 'attribute_name': att \ if not ('attribute_name' in item) else item['attribute_name'], 'detection_score': score, 'tracking_name': class_name, 'tracking_score': score, 'tracking_id': tracking_id, 'sensor_id': sensor_id, 'det_id': det_id} sample_results.append(result) if sample_token in ret['results']: ret['results'][sample_token] = ret['results'][sample_token] + \ sample_results else: ret['results'][sample_token] = sample_results for sample_token in ret['results'].keys(): confs = sorted([(-d['detection_score'], ind) \ for ind, d in enumerate(ret['results'][sample_token])]) ret['results'][sample_token] = [ret['results'][sample_token][ind] \ for _, ind in confs[:min(500, len(confs))]] return ret
def box_nuscenes_to_kitti(box: Box, velo_to_cam_rot: Quaternion, velo_to_cam_trans: np.ndarray, r0_rect: Quaternion, kitti_to_nu_lidar_inv: Quaternion = Quaternion(axis=(0, 0, 1), angle=np.pi / 2).inverse) \ -> Box: """ Transform from nuScenes lidar frame to KITTI reference frame. :param box: Instance in nuScenes lidar frame. :param velo_to_cam_rot: Quaternion to rotate from lidar to camera frame. :param velo_to_cam_trans: <np.float: 3>. Translate from lidar to camera frame. :param r0_rect: Quaternion to rectify camera frame. :param kitti_to_nu_lidar_inv: Quaternion to rotate nuScenes to KITTI LIDAR. :return: Box instance in KITTI reference frame. """ # Copy box to avoid side-effects. box = box.copy() # Rotate to KITTI lidar. box.rotate(kitti_to_nu_lidar_inv) # Transform to KITTI camera. box.rotate(velo_to_cam_rot) box.translate(velo_to_cam_trans) # Rotate to KITTI rectified camera. box.rotate(r0_rect) # KITTI defines the box center as the bottom center of the object. # We use the true center, so we need to adjust half height in y direction. box.translate(np.array([0, box.wlh[2] / 2, 0])) return box
def evaluation(self, det_annos, class_names, **kwargs): eval_det_annos = copy.deepcopy(det_annos) # Create NuScenes JSON output file nusc_annos = {} for sample in eval_det_annos: try: sample_idx = sample['sample_idx'][0] except: continue sample_results = [] calib = self.get_calib(sample_idx) sample['boxes_lidar'] = np.array(sample['boxes_lidar']) positions = sample['boxes_lidar'][:, :3] dimensions = sample['boxes_lidar'][:, 3:6] rotations = sample['boxes_lidar'][:, 6] for center, dimension, yaw, label, score in zip( positions, dimensions, rotations, sample['name'], sample['score']): quaternion = Quaternion(axis=[0, 0, 1], radians=yaw) box = Box(center, dimension, quaternion) # Move box to ego vehicle coord system box.rotate(Quaternion(calib.lidar_calibrated['rotation'])) box.translate(np.array(calib.lidar_calibrated['translation'])) # Move box to global coord system box.rotate(Quaternion(calib.ego_pose['rotation'])) box.translate(np.array(calib.ego_pose['translation'])) if (float(score) < 0): score = 0 if (float(score) > 1): score = 1 if (label == 'Cyclist'): label = 'bicycle' sample_results.append({ "sample_token": sample_idx, "translation": box.center.tolist(), "size": box.wlh.tolist(), "rotation": box.orientation.elements.tolist(), "lidar_yaw": float(yaw), "velocity": (0, 0), "detection_name": label.lower(), "detection_score": float(score), "attribute_name": self.DefaultAttribute[label.lower()], }) nusc_annos[sample_idx] = sample_results for sample_id in self.sample_id_list: if sample_id not in nusc_annos: nusc_annos[sample_id] = [] nusc_submission = { "meta": { "use_camera": False, "use_lidar": True, "use_radar": False, "use_map": False, "use_external": False, }, "results": nusc_annos, } eval_file = os.path.join(kwargs['output_dir'], 'nusc_results.json') with open(eval_file, "w") as f: json.dump(nusc_submission, f, indent=2) # Call NuScenes evaluation cfg = config_factory('detection_cvpr_2019') nusc_eval = DetectionEval(self.nusc, config=cfg, result_path=eval_file, eval_set=self.split, output_dir=kwargs['output_dir'], verbose=True) metric_summary = nusc_eval.main(plot_examples=10, render_curves=True) # Reformat the metrics summary a bit for the tensorboard logger err_name_mapping = { 'trans_err': 'mATE', 'scale_err': 'mASE', 'orient_err': 'mAOE', 'vel_err': 'mAVE', 'attr_err': 'mAAE' } result = {} result['mean_ap'] = metric_summary['mean_ap'] for tp_name, tp_val in metric_summary['tp_errors'].items(): result[tp_name] = tp_val class_aps = metric_summary['mean_dist_aps'] class_tps = metric_summary['label_tp_errors'] for class_name in class_aps.keys(): result['mAP_' + class_name] = class_aps[class_name] for key, val in err_name_mapping.items(): result[val + '_' + class_name] = class_tps[class_name][key] return str(result), result
def get_binimg(self, rec): egopose = self.nusc.get( 'ego_pose', self.nusc.get('sample_data', rec['data']['LIDAR_TOP'])['ego_pose_token']) trans = -np.array(egopose['translation']) rot = Quaternion(egopose['rotation']).inverse #vehicle label img_vehicle = np.zeros((self.nx[0], self.nx[1])) for tok in rec['anns']: inst = self.nusc.get('sample_annotation', tok) # add category for lyft if not inst['category_name'].split('.')[0] == 'vehicle': continue box = Box(inst['translation'], inst['size'], Quaternion(inst['rotation'])) box.translate(trans) box.rotate(rot) pts = box.bottom_corners()[:2].T pts = np.round((pts - self.bx[:2] + self.dx[:2] / 2.) / self.dx[:2]).astype(np.int32) pts[:, [1, 0]] = pts[:, [0, 1]] cv2.fillPoly(img_vehicle, [pts], 1.0) # cv2.imwrite('./output/vehicle{}.png'.format(rec['timestamp']),img_vehicle*255) #map label map_name = self.scene2map[self.nusc.get('scene', rec['scene_token'])['name']] rot_map = Quaternion(egopose['rotation']).rotation_matrix rot_map = np.arctan2(rot_map[1, 0], rot_map[0, 0]) center = np.array([ egopose['translation'][0], egopose['translation'][1], np.cos(rot_map), np.sin(rot_map) ]) lmap = get_local_map(self.nusc_maps[map_name], center, 50.0, ['road_segment', 'lane'], ['lane_divider', 'road_divider']) #road_segment img_road_segment = np.zeros((self.nx[0], self.nx[1])) arr_pts = [] for pts in lmap['road_segment']: pts = np.round((pts - self.bx[:2] + self.dx[:2] / 2.) / self.dx[:2]).astype(np.int32) pts[:, [1, 0]] = pts[:, [0, 1]] arr_pts.append(pts) cv2.fillPoly(img_road_segment, arr_pts, 1.0) #lane #lane = np.zeros((self.nx[0], self.nx[1])) arr_pts = [] for pts in lmap['lane']: pts = np.round((pts - self.bx[:2] + self.dx[:2] / 2.) / self.dx[:2]).astype(np.int32) pts[:, [1, 0]] = pts[:, [0, 1]] arr_pts.append(pts) # cv2.fillPoly(lane,arr_pts,1.0) cv2.fillPoly(img_road_segment, arr_pts, 1.0) #road_divider # img_road_divider = np.zeros((self.nx[0], self.nx[1])) # arr_pts=[] # for pts in lmap['road_divider']: # pts = np.round( # (pts - self.bx[:2] + self.dx[:2]/2.) / self.dx[:2] # ).astype(np.int32) # pts[:, [1, 0]] = pts[:, [0, 1]] # arr_pts.append(pts) # cv2.polylines(img_road_divider,arr_pts,False,1.0,1) #lane_divider img_lane_divider = np.zeros((self.nx[0], self.nx[1])) arr_pts = [] for pts in lmap['lane_divider']: pts = np.round((pts - self.bx[:2] + self.dx[:2] / 2.) / self.dx[:2]).astype(np.int32) pts[:, [1, 0]] = pts[:, [0, 1]] arr_pts.append(pts) cv2.polylines(img_lane_divider, arr_pts, False, 1.0, 2) # cv2.imwrite('./output/lane_divider{}.png'.format(rec['timestamp']),img_lane_divider*255) #plt.plot(pts[:, 1], pts[:, 0], c=(159./255., 0.0, 1.0), alpha=0.5) return torch.Tensor( np.stack([img_vehicle, img_road_segment, img_lane_divider]))
i = 0 while sample_token != end_token: my_sample = nusc.get('sample', my_sample['next']) img = get_image(my_sample, root_dir) sample_data = nusc.get_sample_data(my_sample['data']['CAM_FRONT']) labels = sample_data[1] # this label is according to camera, what we have is lidar # converts top lidar intrinsic = sample_data[2] for label in labels: c2d = project_cam_coords_to_pixel([label.center], intrinsic)[0] cv2.circle(img, (int(c2d[0]), int(c2d[1])), 3, (0, 255, 255), -1) # convert center and wlh to 3d box box = Box(center=label.center, size=label.wlh, orientation=label.orientation) corners3d = box.corners() corners3d_2d = project_cam_coords_to_pixel(corners3d, intrinsic) idx = all_category_db.index(label.name) c = get_unique_color_by_id(idx) draw_3d_box(corners3d_2d, img, c) if len(corners3d_2d) > 4: cv2.putText(img, '{0}'.format(all_category[idx]), (int(corners3d_2d[1][0]), int(corners3d_2d[1][1])), cv2.FONT_HERSHEY_PLAIN, .9, (255, 255, 255)) # show some image and lidar points cv2.imshow('aa', img) cv2.imwrite('results/{}.png'.format(i), img)
def box_gl(self, ann_token: str) -> 'Box': ann_rec = self.nusc.get('sample_annotation', ann_token) return Box(center=ann_rec['translation'], size=ann_rec['size'], orientation=Quaternion(ann_rec['rotation']), token=ann_token)
def car_gl(self) -> 'Box': return Box(center=self.A_car_2_gl[:3, 3], size=self._CAR_SIZE, orientation=Quaternion(matrix=self.A_car_2_gl))
def render(self, events: DataFrame, timestamp: int, frame_gt: List[TrackingBox], frame_pred: List[TrackingBox], points=None, pose_record=None, cs_record=None, ifplotgt=False,\ threshold=0.1, ifpltsco=False, outscen_class=False,nusc=None, ifplthis=False, pltve=False) \ -> None: """ Render function for a given scene timestamp :param events: motmetrics events for that particular :param timestamp: timestamp for the rendering :param frame_gt: list of ground truth boxes :param frame_pred: list of prediction boxes """ # Init. #print('Rendering {}'.format(timestamp)) switches = events[events.Type == 'SWITCH'] switch_ids = switches.HId.values #对应frame_pred的tracking_id switch_gt_ids = switches.OId.values #对应GT的tracking_id FN = events[events.Type == 'MISS'] #FN = FN.HId.values FN = FN.OId.values #对应GT的tracking_id FP = events[events.Type == 'FP'] FP = FP.HId.values #对应frame_pred的tracking_id fig, ax = plt.subplots() #plt.style.use('dark_background') # 黑 背景颜色 plt.style.use('classic') # 白 背景颜色 points.render_height(ax, view=np.eye(4), x_lim=(-50, 50), y_lim=(-50, 50), marker_size=0.1) #BEV #points.render_height(ax, view=np.eye(4) )#BEV #points = points.rotate(Quaternion( pose_record["rotation"]).inverse) sam_anno = [] if len(frame_gt) > 0: sample = nusc.get('sample', frame_gt[0].sample_token) sample_annotation_tokens = sample['anns'] #标注 for anno_token in sample_annotation_tokens: sam_anno.append( nusc.get('sample_annotation', anno_token)["instance_token"]) vislev = {'v0-40': 0, 'v40-60': 1, 'v60-80': 2, 'v80-100': 3} #points.render_intensity(ax) # Plot GT boxes. if ifplotgt: for i, b in enumerate(frame_gt): color = 'k' #qua = tuple(self.list_add(list(b.rotation),[0.924,0.0,0.0,0.383])) box = Box(np.array(b.ego_translation, dtype=float), b.size, Quaternion(b.rotation), name=b.tracking_name, token=b.tracking_id) #qua = tuple(self.list_add(list(pose_record["rotation"]),[ 0.831,0.0,0.0,0.556])) #box.translate(-np.array(pose_record["translation"])) box.rotate(Quaternion(pose_record["rotation"]).inverse) # move box to sensor coord system box.translate(-np.array(cs_record["translation"])) box.rotate(Quaternion(cs_record["rotation"]).inverse) if outscen_class: #####TrackingRenderer.gt_ptsnumrange = {'0-5nums':0, '5-10nums':0, '10-50nums':0, '50-200nums':0, '>200nums': 0 } #car lidar点云数 #####TrackingRenderer.gt_ptsnumrange = {'0-5nums':0, '5-10nums':0, '10-20nums':0, '20-30nums':0, '>30nums': 0 } #ped lidar点云数 num_pts = frame_gt[i].num_pts #####car if TrackingRenderer.outscen == 'car': if num_pts > 0 and num_pts <= 5: TrackingRenderer.gt_ptsnumrange['0-5nums'] += 1 elif num_pts > 5 and num_pts <= 10: TrackingRenderer.gt_ptsnumrange['5-10nums'] += 1 elif num_pts > 10 and num_pts <= 50: TrackingRenderer.gt_ptsnumrange['10-50nums'] += 1 elif num_pts > 50 and num_pts <= 200: TrackingRenderer.gt_ptsnumrange['50-200nums'] += 1 elif num_pts > 200: TrackingRenderer.gt_ptsnumrange['>200nums'] += 1 else: ####ped if num_pts > 0 and num_pts <= 5: TrackingRenderer.gt_ptsnumrange['0-5nums'] += 1 elif num_pts > 5 and num_pts <= 10: TrackingRenderer.gt_ptsnumrange['5-10nums'] += 1 elif num_pts > 10 and num_pts <= 20: TrackingRenderer.gt_ptsnumrange['10-20nums'] += 1 elif num_pts > 20 and num_pts <= 30: TrackingRenderer.gt_ptsnumrange['20-30nums'] += 1 elif num_pts > 30: TrackingRenderer.gt_ptsnumrange['>30nums'] += 1 if box.token in FN: #####distance dis = math.sqrt(box.center[0]**2 + box.center[1]**2) if dis > 0 and dis <= 15: TrackingRenderer.fn_disrange[ '<15m'] = TrackingRenderer.fn_disrange[ '<15m'] + 1 elif dis > 15 and dis <= 30: TrackingRenderer.fn_disrange[ '15-30m'] = TrackingRenderer.fn_disrange[ '15-30m'] + 1 elif dis > 30 and dis <= 45: TrackingRenderer.fn_disrange[ '30-45m'] = TrackingRenderer.fn_disrange[ '30-45m'] + 1 elif dis > 45 and dis <= 54: TrackingRenderer.fn_disrange[ '45-54m'] = TrackingRenderer.fn_disrange[ '45-54m'] + 1 else: TrackingRenderer.fn_disrange[ '-1'] = TrackingRenderer.fn_disrange['-1'] + 1 #####ve TrackingRenderer.fn_verange = {'0-0.1m/s':0, '0.1-2.5m/s':0, '2.5-5m/s':0, '5-10m/s':0, '>10m/s': 0 } #car 绝对速度 ##### TrackingRenderer.fn_verange = {'0-0.1m/s':0, '0.1-1.0m/s':0, '1.0-1.5m/s':0, '1.5-2m/s':0, '>2m/s': 0 } #ped 绝对速度 ve = math.sqrt(frame_gt[i].velocity[0]**2 + frame_gt[i].velocity[1]**2) if TrackingRenderer.outscen == 'car': if ve > 0 and ve <= 0.1: TrackingRenderer.fn_verange['0-0.1m/s'] += 1 elif ve > 0.1 and ve <= 2.5: TrackingRenderer.fn_verange['0.1-2.5m/s'] += 1 elif ve > 2.5 and ve <= 5: TrackingRenderer.fn_verange['2.5-5m/s'] += 1 elif ve > 5 and ve <= 10: TrackingRenderer.fn_verange['5-10m/s'] += 1 else: TrackingRenderer.fn_verange['>10m/s'] += 1 else: if ve > 0 and ve <= 0.1: TrackingRenderer.fn_verange['0-0.1m/s'] += 1 elif ve > 0.1 and ve <= 1.0: TrackingRenderer.fn_verange['0.1-1.0m/s'] += 1 elif ve > 1.0 and ve <= 1.5: TrackingRenderer.fn_verange['1.0-1.5m/s'] += 1 elif ve > 1.5 and ve <= 2: TrackingRenderer.fn_verange['1.5-2m/s'] += 1 else: TrackingRenderer.fn_verange['>2m/s'] += 1 #####num_pts TrackingRenderer.fn_ptsnumrange = {'0-5nums':0, '5-10nums':0, '10-50nums':0, '50-200nums':0, '>200nums': 0 } #car lidar点云数 抽样参考比例:0.21 0.23 0.26,0.2,0.1 #############TrackingRenderer.fn_ptsnumrange = {'0-5nums':0, '5-10nums':0, '10-20nums':0, '20-30nums':0, '>30nums': 0 } #ped lidar点云数 num_pts = frame_gt[i].num_pts if TrackingRenderer.outscen == 'car': if num_pts > 0 and num_pts <= 5: TrackingRenderer.fn_ptsnumrange['0-5nums'] += 1 elif num_pts > 5 and num_pts <= 10: TrackingRenderer.fn_ptsnumrange[ '5-10nums'] += 1 elif num_pts > 10 and num_pts <= 50: TrackingRenderer.fn_ptsnumrange[ '10-50nums'] += 1 elif num_pts > 50 and num_pts <= 200: TrackingRenderer.fn_ptsnumrange[ '50-200nums'] += 1 elif num_pts > 200: TrackingRenderer.fn_ptsnumrange[ '>200nums'] += 1 else: TrackingRenderer.fn_ptsnumrange['-1'] += 1 else: if num_pts > 0 and num_pts <= 5: TrackingRenderer.fn_ptsnumrange['0-5nums'] += 1 elif num_pts > 5 and num_pts <= 10: TrackingRenderer.fn_ptsnumrange[ '5-10nums'] += 1 elif num_pts > 10 and num_pts <= 20: TrackingRenderer.fn_ptsnumrange[ '10-20nums'] += 1 elif num_pts > 20 and num_pts <= 30: TrackingRenderer.fn_ptsnumrange[ '20-30nums'] += 1 elif num_pts > 200: TrackingRenderer.fn_ptsnumrange['>30nums'] += 1 else: TrackingRenderer.fn_ptsnumrange['-1'] += 1 ######读取 #sample = nusc.get('sample', frame_gt[i].sample_token) #sample_annotation_tokens = sample['anns'] #标注 try: ind = sam_anno.index(b.tracking_id) sample_annotation = nusc.get( 'sample_annotation', sample_annotation_tokens[ind]) ####TrackingRenderer.vis_ratio = {'0-0.4':0, '0.4-0.6':0, '0.6-0.8':0, '0.8-1.0':0} #相机视角 0-40%, 40-60%, 60-80% and 80-100% The visibility of an instance is the fraction of annotation visible in all 6 images. visibility = nusc.get( 'visibility', sample_annotation['visibility_token']) vis_level = vislev[visibility["level"]] if vis_level == 0: TrackingRenderer.vis_ratio['0-0.4'] += 1 elif vis_level == 1: TrackingRenderer.vis_ratio['0.4-0.6'] += 1 elif vis_level == 2: TrackingRenderer.vis_ratio['0.6-0.8'] += 1 elif vis_level == 3: TrackingRenderer.vis_ratio['0.8-1.0'] += 1 ####TrackingRenderer.gt_ratio = {'ang_muta':0, 've_muta':0, 'aug_other':0, 've_other':0, 'firfn_trk':0, 'nonfirfn_trk':0} #仅包含与上一帧持续追踪的样本,角度和速度突变分别与other相并 pre_token = sample_annotation['prev'] if pre_token == '': #仅作为first_FN TrackingRenderer.gt_ratio['firfn_trk'] += 1 else: TrackingRenderer.gt_ratio['nonfirfn_trk'] += 1 pre_annotation = nusc.get( 'sample_annotation', pre_token) vari_ang = abs( R.from_quat(list(frame_gt[i].rotation)). as_euler('zxy', degrees=False)[0] - R.from_quat( list(pre_annotation['rotation']) ).as_euler('zxy', degrees=False)[0]) if vari_ang > 0.52: #30度 TrackingRenderer.gt_ratio['angvar>30'] += 1 elif 0.52 > vari_ang and vari_ang > 0.35: TrackingRenderer.gt_ratio[ '30>angvar>20'] += 1 elif 0.35 > vari_ang and vari_ang > 0.17: TrackingRenderer.gt_ratio[ '20>angvar>10'] += 1 elif vari_ang < 0.17: TrackingRenderer.gt_ratio['10>angvar'] += 1 else: pass pre_ve = nusc.box_velocity( pre_annotation['token']) ve_varity = abs(ve - math.sqrt(pre_ve[0]**2 + pre_ve[1]**2)) if ve_varity > TrackingRenderer.mutave_thr[0]: TrackingRenderer.gt_ratio[ 'vevari>%s' % (TrackingRenderer.mutave_thr[0])] += 1 elif ve_varity < TrackingRenderer.mutave_thr[ 0] and ve_varity >= TrackingRenderer.mutave_thr[ 1]: TrackingRenderer.gt_ratio[ '%s<vevari<%s' % (TrackingRenderer.mutave_thr[1], TrackingRenderer.mutave_thr[0])] += 1 elif ve_varity < TrackingRenderer.mutave_thr[ 1] and ve_varity >= TrackingRenderer.mutave_thr[ 2]: TrackingRenderer.gt_ratio[ '%s<vevari<%s' % (TrackingRenderer.mutave_thr[2], TrackingRenderer.mutave_thr[1])] += 1 else: TrackingRenderer.gt_ratio[ 'vevari<%s' % TrackingRenderer.mutave_thr[2]] += 1 except ValueError: #标注错误 TrackingRenderer.fault_datas += 1 box.render(ax, view=np.eye(4), colors=(color, color, color), linewidth=1) else: pass # Plot predicted boxes. pred_trackid = [] for i, b in enumerate(frame_pred): box = Box( b.ego_translation, b.size, Quaternion(b.rotation), name=b.tracking_name, token=b.tracking_id, score=b.tracking_score, ) # move box to ego vehicle coord system before has done the translation box.rotate(Quaternion(pose_record["rotation"]).inverse) # move box to sensor coord system box.translate(-np.array(cs_record["translation"])) box.rotate(Quaternion(cs_record["rotation"]).inverse) pred_trackid.append(b.tracking_id) if outscen_class: if b.tracking_id in FP: #####distance TrackingRenderer.fp_disrange = {'<15m':0, '15-30m':0, '30-45m':0, '45-54m':0} dis = math.sqrt(box.center[0]**2 + box.center[1]**2) if dis > 0 and dis <= 15: TrackingRenderer.fp_disrange[ '<15m'] = TrackingRenderer.fp_disrange['<15m'] + 1 elif dis > 15 and dis <= 30: TrackingRenderer.fp_disrange[ '15-30m'] = TrackingRenderer.fp_disrange[ '15-30m'] + 1 elif dis > 30 and dis <= 45: TrackingRenderer.fp_disrange[ '30-45m'] = TrackingRenderer.fp_disrange[ '30-45m'] + 1 elif dis > 45 and dis <= 54: TrackingRenderer.fp_disrange[ '45-54m'] = TrackingRenderer.fp_disrange[ '45-54m'] + 1 else: TrackingRenderer.fp_disrange[ '-1'] = TrackingRenderer.fp_disrange['-1'] + 1 #####ve TrackingRenderer.fp_verange = {'0-0.1m/s':0, '0.1-2.5m/s':0, '2.5-5m/s':0, '5-10m/s':0, '>10m/s': 0 } #car 绝对速度 #####TrackingRenderer.fp_verange = {'0-0.1m/s':0, '0.1-1.0m/s':0, '1.0-1.5m/s':0, '1.5-2m/s':0, '>2m/s': 0 } #ped 绝对速度 ve = math.sqrt(frame_pred[i].velocity[0]**2 + frame_pred[i].velocity[1]**2) if TrackingRenderer.outscen == 'car': if ve > 0 and ve <= 0.1: TrackingRenderer.fp_verange['0-0.1m/s'] += 1 elif ve > 0.1 and ve <= 2.5: TrackingRenderer.fp_verange['0.1-2.5m/s'] += 1 elif ve > 2.5 and ve <= 5: TrackingRenderer.fp_verange['2.5-5m/s'] += 1 elif ve > 5 and ve <= 10: TrackingRenderer.fp_verange['5-10m/s'] += 1 else: TrackingRenderer.fp_verange['>10m/s'] += 1 else: if ve > 0 and ve <= 0.1: TrackingRenderer.fp_verange['0-0.1m/s'] += 1 elif ve > 0.1 and ve <= 1.0: TrackingRenderer.fp_verange['0.1-1.0m/s'] += 1 elif ve > 1.0 and ve <= 1.5: TrackingRenderer.fp_verange['1.0-1.5m/s'] += 1 elif ve > 1.5 and ve <= 2: TrackingRenderer.fp_verange['1.5-2m/s'] += 1 else: TrackingRenderer.fp_verange['>2m/s'] += 1 #####num_pts TrackingRenderer.fp_ptsnumrange = {'0-5nums':0, '5-10nums':0, '10-50nums':0, '50-200nums':0, '>200nums': 0 } #car lidar点云数 抽样参考比例:0.21 0.23 0.26,0.2,0.1 ########## TrackingRenderer.fp_ptsnumrange = {'0-5nums':0, '5-10nums':0, '10-20nums':0, '20-30nums':0, '>30nums': 0 } #ped lidar点云数 points_xyzr = np.stack(points.points, 1) points_xyz = points_xyzr[:, :3] points_mask = points_in_box(box, points.points[:3]) mask_indx = np.arange(points_mask.shape[0]) mask_indx = mask_indx[points_mask] num_pts = mask_indx.shape[0] if TrackingRenderer.outscen == 'car': if num_pts > 0 and num_pts <= 5: TrackingRenderer.fp_ptsnumrange['0-5nums'] += 1 elif num_pts > 5 and num_pts <= 10: TrackingRenderer.fp_ptsnumrange['5-10nums'] += 1 elif num_pts > 10 and num_pts <= 50: TrackingRenderer.fp_ptsnumrange['10-50nums'] += 1 elif num_pts > 50 and num_pts <= 200: TrackingRenderer.fp_ptsnumrange['50-200nums'] += 1 elif num_pts > 200: TrackingRenderer.fp_ptsnumrange['>200nums'] += 1 else: if num_pts > 0 and num_pts <= 5: TrackingRenderer.fp_ptsnumrange['0-5nums'] += 1 elif num_pts > 5 and num_pts <= 10: TrackingRenderer.fp_ptsnumrange['5-10nums'] += 1 elif num_pts > 10 and num_pts <= 20: TrackingRenderer.fp_ptsnumrange['10-20nums'] += 1 elif num_pts > 20 and num_pts <= 30: TrackingRenderer.fp_ptsnumrange['20-30nums'] += 1 elif num_pts > 30: TrackingRenderer.fp_ptsnumrange['>30nums'] += 1 #####TrackingRenderer.fpscorrange = {'0-0.1':0, '0.2-0.4':0, '0.4-0.6':0,'0.6-1.0':0} score = box.score if score >= 0 and score <= 0.1: TrackingRenderer.fpscorrange['0-0.1'] += 1 if score >= 0.2 and score <= 0.4: TrackingRenderer.fpscorrange['0.2-0.4'] += 1 if score >= 0.4 and score <= 0.6: TrackingRenderer.fpscorrange['0.4-0.6'] += 1 if score >= 0.6 and score <= 1.0: TrackingRenderer.fpscorrange['0.6-1.0'] += 1 #####TrackingRenderer.trk_ratio = {'ang_muta':0, 've_muta':0, 'aug_other':0, 've_other':0} #仅包含与上一帧持续追踪的样本,角度和速度突变分别与other相并 if box.token in TrackingRenderer.his_trackid: pre_box = TrackingRenderer.his_track[ TrackingRenderer.his_trackid.index(box.token)] vari_ang = abs( (R.from_quat(list(frame_pred[i].rotation)). as_euler('zxy', degrees=False)[0]) - (R.from_quat(list(pre_box.rotation)).as_euler( 'zxy', degrees=False)[0])) if vari_ang > 0.52: #30度 TrackingRenderer.trk_ratio['angvar>30'] += 1 elif 0.52 > vari_ang > 0.35: TrackingRenderer.trk_ratio['30>angvar>20'] += 1 elif 0.35 > vari_ang > 0.17: TrackingRenderer.trk_ratio['20>angvar>10'] += 1 elif vari_ang < 0.17: TrackingRenderer.trk_ratio['10>angvar'] += 1 pre_ve = pre_box.velocity ve = frame_pred[i].velocity ve_varity = abs( math.sqrt(ve[0]**2 + ve[1]**2) - math.sqrt(pre_ve[0]**2 + pre_ve[1]**2)) if ve_varity > TrackingRenderer.mutave_thr[ 0]: # car均匀加速度为2.778m/s^2 3*0.5s=1.5 TrackingRenderer.trk_ratio[ 'vevari>%s' % TrackingRenderer.mutave_thr[0]] += 1 elif ve_varity < TrackingRenderer.mutave_thr[ 0] and ve_varity >= TrackingRenderer.mutave_thr[ 1]: TrackingRenderer.trk_ratio[ '%s<vevari<%s' % (TrackingRenderer.mutave_thr[1], TrackingRenderer.mutave_thr[0])] += 1 elif ve_varity < TrackingRenderer.mutave_thr[ 1] and ve_varity >= TrackingRenderer.mutave_thr[ 2]: TrackingRenderer.trk_ratio[ '%s<vevari<%s' % (TrackingRenderer.mutave_thr[2], TrackingRenderer.mutave_thr[1])] += 1 else: TrackingRenderer.trk_ratio[ 'vevari<%s' % TrackingRenderer.mutave_thr[2]] += 1 # Determine color for this tracking id. if b.tracking_id not in self.id2color.keys(): self.id2color[b.tracking_id] = ( float(hash(b.tracking_id + 'r') % 256) / 255, float(hash(b.tracking_id + 'g') % 256) / 255, float(hash(b.tracking_id + 'b') % 256) / 255) if ifplthis: box_for_path = copy.deepcopy(box) box_for_path.rotate(Quaternion(cs_record["rotation"])) box_for_path.translate(np.array(cs_record["translation"])) box_for_path.rotate(Quaternion(pose_record["rotation"])) box_for_path.translate(np.array( pose_record["translation"])) #到全局 # 记录轨迹坐标 if b.tracking_id in self.track_path.keys(): self.track_path[b.tracking_id].append(box_for_path) else: self.track_path[b.tracking_id] = [box_for_path] # Render box. Highlight identity switches in red. if b.tracking_id in switch_ids: color = self.id2color[b.tracking_id] box.render(ax, view=np.eye(4), colors=('r', 'r', color)) if outscen_class: ###TrackingRenderer.ids_verange = {'0-0.1m/s':0, '0.1-2.5m/s':0, '2.5-5m/s':0, '5-10m/s':0, '>10m/s': 0 } #car 绝对速度 ###TrackingRenderer.ids_verange = {'0-0.1m/s':0, '0.1-1.0m/s':0, '1.0-1.5m/s':0, '1.5-2m/s':0, '>2m/s': 0 } #ped 绝对速度 ve = math.sqrt(frame_pred[i].velocity[0]**2 + frame_pred[i].velocity[1]**2) if TrackingRenderer.outscen == 'car': if ve > 0 and ve <= 0.1: TrackingRenderer.ids_verange['0-0.1m/s'] += 1 elif ve > 0.1 and ve <= 2.5: TrackingRenderer.ids_verange['0.1-2.5m/s'] += 1 elif ve > 2.5 and ve <= 5: TrackingRenderer.ids_verange['2.5-5m/s'] += 1 elif ve > 5 and ve <= 10: TrackingRenderer.ids_verange['5-10m/s'] += 1 else: TrackingRenderer.ids_verange['>10m/s'] += 1 else: if ve > 0 and ve <= 0.1: TrackingRenderer.ids_verange['0-0.1m/s'] += 1 elif ve > 0.1 and ve <= 1.0: TrackingRenderer.ids_verange['0.1-1.0m/s'] += 1 elif ve > 1.0 and ve <= 1.5: TrackingRenderer.ids_verange['1.0-1.5m/s'] += 1 elif ve > 1.5 and ve <= 2: TrackingRenderer.ids_verange['1.5-2m/s'] += 1 else: TrackingRenderer.ids_verange['>2m/s'] += 1 ####TrackingRenderer.ids_factratio = {'delay_trk':0, 'del_oth_trk':0, 'reappear':0, 'reapother':0, 've_muta':0, 've_other':0, 'reapdeltrk':0 } indx = np.where(switch_ids == b.tracking_id) gt_id = switch_gt_ids[indx] try: x = sam_anno.index(gt_id) #sample = nusc.get('sample', frame_gt[x].sample_token) #sample_annotation_tokens = sample['anns'] #标注 sample_annotation = nusc.get( 'sample_annotation', sample_annotation_tokens[x]) if sample_annotation['prev'] == '': #参考意义不大 TrackingRenderer.ids_factratio['del_oth_trk'] += 1 else: TrackingRenderer.ids_factratio['delay_trk'] += 1 visibility = nusc.get( 'visibility', sample_annotation['visibility_token']) vis_level = vislev[visibility["level"]] pre_annotation = nusc.get( "sample_annotation", sample_annotation['prev']) pre_vis = nusc.get( 'visibility', pre_annotation['visibility_token']) pre_vislevel = vislev[pre_vis["level"]] if vis_level > pre_vislevel or (vis_level == pre_vislevel and vis_level < 3): TrackingRenderer.ids_factratio['reappear'] += 1 elif vis_level == pre_vislevel: TrackingRenderer.ids_factratio[ 'reapdeltrk'] += 1 else: TrackingRenderer.ids_factratio[ 'reapother'] += 1 pre_ve = nusc.box_velocity(pre_annotation['token']) ve = nusc.box_velocity(sample_annotation['token']) ve_varity = abs( math.sqrt(ve[0]**2 + ve[1]**2) - math.sqrt(pre_ve[0]**2 + pre_ve[1]**2)) if ve_varity > TrackingRenderer.mutave_thr[ 0]: # car均匀加速度为2.778m/s^2 3*0.5s=1.5 TrackingRenderer.ids_factratio[ 'vevari>%s' % (TrackingRenderer.mutave_thr[0])] += 1 elif ve_varity < TrackingRenderer.mutave_thr[ 0] and ve_varity >= TrackingRenderer.mutave_thr[ 1]: TrackingRenderer.ids_factratio[ '%s<vevari<%s' % (TrackingRenderer.mutave_thr[1], TrackingRenderer.mutave_thr[0])] += 1 elif ve_varity < TrackingRenderer.mutave_thr[ 1] and ve_varity >= TrackingRenderer.mutave_thr[ 2]: TrackingRenderer.ids_factratio[ '%s<vevari<%s' % (TrackingRenderer.mutave_thr[2], TrackingRenderer.mutave_thr[1])] += 1 else: TrackingRenderer.ids_factratio[ 'vevari<%s' % TrackingRenderer.mutave_thr[2]] += 1 except: #标注错误 pass else: color = self.id2color[b.tracking_id] box.render(ax, view=np.eye(4), colors=(color, color, color)) # Render other infos if ifpltsco: corners = view_points(box.corners(), view=np.eye(4), normalize=False)[:2, :] # ax.text(4,5,"hhaa0.8", fontsize=5) # ax.text(4,5,"%.2f\n%.2f,%.2f"%(b.tracking_score,b.velocity[0],b.velocity[1]), fontsize=5) ax.text(box.center[0], box.center[1], "%.2f\nvx=%.2f,vy=%.2f" % (b.tracking_score, b.velocity[0], b.velocity[1]), fontdict={ 'size': '6', 'color': 'b' }) #ax.text(box.center[0],box.center[1],"%.2f\n%.2f,%.2f"%(b.tracking_score,b.velocity[0],b.velocity[1]), fontsize=5) #if pltve: #删去当前帧多余轨迹线 keys = list(self.track_path.keys()) for key in keys: if key not in pred_trackid: self.track_path.pop(key) # 画历史轨迹线: if ifplthis: for id in self.track_path.keys(): color = self.id2color[id] for box_path in self.track_path[id]: #转到当前帧局部 # move box to ego vehicle coord system before has done the translation box_path.translate(-np.array(pose_record["translation"])) box_path.rotate( Quaternion(pose_record["rotation"]).inverse) # move box to sensor coord system box_path.translate(-np.array(cs_record["translation"])) box_path.rotate(Quaternion(cs_record["rotation"]).inverse) ax.scatter(box_path.center[0], box_path.center[1], 10, color) #转回全局 box_path.rotate(Quaternion(cs_record["rotation"])) box_path.translate(np.array(cs_record["translation"])) box_path.rotate(Quaternion(pose_record["rotation"])) box_path.translate(np.array(pose_record["translation"])) TrackingRenderer.his_track = frame_pred TrackingRenderer.his_trackid = pred_trackid # Plot MOTA metrics. ly # 目标位置 左上角,距离 Y 轴 0.01 倍距离,距离 X 轴 0.95倍距离 self.cursumfp += len(FP) #当前场景累积值 self.cursumfn += len(FN) #print("FN=%d,FP=%d,switch_ids=%d,cursumfp=%d,cursumfn=%d" % ( len(FN), len(FP), len(switch_ids), self.cursumfp, self.cursumfn )) #ax.text(0.01, 0.95, "IDS:%d\nFP:%d\nFN:%d\ncur_sce sumfp:%d sumfn:%d\nthreshold:%f"%(len(switch_ids),len(FP),len(FN),self.cursumfp,self.cursumfn,threshold), transform=ax.transAxes, fontdict={'size': '10', 'color': 'b'}) # Plot ego pose. plt.scatter(0, 0, s=96, facecolors='none', edgecolors='k', marker='o') plt.xlim(-50, 50) plt.ylim(-50, 50) plt.axis('off') # Save to disk and close figure. fig.savefig(os.path.join(self.save_path, '{}.png'.format(timestamp))) plt.close(fig)
def get_boxes(self, token: str, filter_classes: List[str] = None, max_dist: float = None) -> List[Box]: """ Load up all the boxes associated with a sample. Boxes are in nuScenes lidar frame. :param token: KittiDB unique id. :param filter_classes: List of Kitti classes to use or None to use all. :param max_dist: Maximum distance in m to still draw a box. :return: Boxes in nuScenes lidar reference frame. """ # Get transforms for this sample transforms = self.get_transforms(token, root=self.root) boxes = [] if token.startswith('test_'): # No boxes to return for the test set. return boxes with open(KittiDB.get_filepath(token, 'label_2', root=self.root), 'r') as f: for line in f: # Parse this line into box information. parsed_line = self.parse_label_line(line) if parsed_line['name'] in {'DontCare', 'Misc'}: continue center = parsed_line['xyz_camera'] wlh = parsed_line['wlh'] yaw_camera = parsed_line['yaw_camera'] name = parsed_line['name'] score = parsed_line['score'] # Optional: Filter classes. if filter_classes is not None and name not in filter_classes: continue # The Box class coord system is oriented the same way as as KITTI LIDAR: x forward, y left, z up. # For orientation confer: http://www.cvlibs.net/datasets/kitti/setup.php. # 1: Create box in Box coordinate system with center at origin. # The second quaternion in yaw_box transforms the coordinate frame from the object frame # to KITTI camera frame. The equivalent cannot be naively done afterwards, as it's a rotation # around the local object coordinate frame, rather than the camera frame. quat_box = Quaternion(axis=(0, 1, 0), angle=yaw_camera) * Quaternion( axis=(1, 0, 0), angle=np.pi / 2) box = Box([0.0, 0.0, 0.0], wlh, quat_box, name=name) # 2: Translate: KITTI defines the box center as the bottom center of the vehicle. We use true center, # so we need to add half height in negative y direction, (since y points downwards), to adjust. The # center is already given in camera coord system. box.translate(center + np.array([0, -wlh[2] / 2, 0])) # 3: Transform to KITTI LIDAR coord system. First transform from rectified camera to camera, then # camera to KITTI lidar. box.rotate(Quaternion(matrix=transforms['r0_rect']).inverse) box.translate(-transforms['velo_to_cam']['T']) box.rotate( Quaternion(matrix=transforms['velo_to_cam']['R']).inverse) # 4: Transform to nuScenes LIDAR coord system. box.rotate(self.kitti_to_nu_lidar) # Set score or NaN. box.score = score # Set dummy velocity. box.velocity = np.array((0.0, 0.0, 0.0)) # Optional: Filter by max_dist if max_dist is not None: dist = np.sqrt(np.sum(box.center[:2]**2)) if dist > max_dist: continue boxes.append(box) return boxes
def run(self, image_or_path_or_tensor, meta={}, image_info=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, track_time, tot_time, display_time = 0, 0, 0, 0 self.debugger.clear() start_time = time.time() # read image pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(""): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor["image"][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += loaded_time - start_time detections = [] # for multi-scale testing for scale in self.opt.test_scales: scale_start_time = time.time() if not pre_processed: # not prefetch testing or demo images, meta = self.pre_process(image, scale, meta) else: # prefetch testing images = pre_processed_images["images"][scale][0] meta = pre_processed_images["meta"][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} if "pre_dets" in pre_processed_images["meta"]: meta["pre_dets"] = pre_processed_images["meta"]["pre_dets"] if "cur_dets" in pre_processed_images["meta"]: meta["cur_dets"] = pre_processed_images["meta"]["cur_dets"] images = images.to(self.opt.device, non_blocking=self.opt.non_block_test) # initializing tracker pre_hms, pre_inds = None, None pre_process_time = time.time() pre_time += pre_process_time - scale_start_time # run the network # output: the output feature maps, only used for visualizing # dets: output tensors after extracting peaks output, dets, forward_time, FeatureMaps = self.process( images, self.pre_images, pre_hms, pre_inds, return_time=True) net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time # convert the cropped and 4x downsampled output coordinate system # back to the input image coordinate system result = self.post_process(dets, meta, scale) post_process_time = time.time() post_time += post_process_time - decode_time detections.append(result) if self.opt.debug >= 2: self.debug( self.debugger, images, result, output, scale, pre_images=self.pre_images if not self.opt.no_pre_img else None, pre_hms=pre_hms, ) # merge multi-scale testing results results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time # public detection mode in MOT challenge if self.opt.public_det: results = (pre_processed_images["meta"]["cur_dets"] if self.opt.public_det else None) if self.dataset == "nuscenes": trans_matrix = np.array(image_info["trans_matrix"], np.float32) results_by_class = {} ddd_boxes_by_class = {} depths_by_class = {} ddd_boxes_by_class2 = {} ddd_org_boxes_by_class = {} ddd_box_submission1 = {} ddd_box_submission2 = {} for class_name in NUSCENES_TRACKING_NAMES: results_by_class[class_name] = [] ddd_boxes_by_class2[class_name] = [] ddd_boxes_by_class[class_name] = [] depths_by_class[class_name] = [] ddd_org_boxes_by_class[class_name] = [] ddd_box_submission1[class_name] = [] ddd_box_submission2[class_name] = [] for det in results: cls_id = int(det["class"]) class_name = nuscenes_class_name[cls_id - 1] if class_name not in NUSCENES_TRACKING_NAMES: continue if det["score"] < 0.3: continue if class_name == "pedestrian" and det["score"] < 0.35: continue results_by_class[class_name].append(det["bbox"].tolist() + [det["score"]]) size = [ float(det["dim"][1]), float(det["dim"][2]), float(det["dim"][0]), ] rot_cam = Quaternion(axis=[0, 1, 0], angle=det["rot_y"]) translation_submission1 = np.dot( trans_matrix, np.array( [ det["loc"][0], det["loc"][1] - size[2], det["loc"][2], 1 ], np.float32, ), ).copy() loc = np.array([det["loc"][0], det["loc"][1], det["loc"][2]], np.float32) depths_by_class[class_name].append([float(det["loc"][2]) ].copy()) trans = [det["loc"][0], det["loc"][1], det["loc"][2]] ddd_org_boxes_by_class[class_name].append([ float(det["dim"][0]), float(det["dim"][1]), float(det["dim"][2]) ] + trans + [det["rot_y"]]) box = Box(loc, size, rot_cam, name="2", token="1") box.translate(np.array([0, -box.wlh[2] / 2, 0])) box.rotate(Quaternion(image_info["cs_record_rot"])) box.translate(np.array(image_info["cs_record_trans"])) box.rotate(Quaternion(image_info["pose_record_rot"])) box.translate(np.array(image_info["pose_record_trans"])) rotation = box.orientation rotation = [ float(rotation.w), float(rotation.x), float(rotation.y), float(rotation.z), ] ddd_box_submission1[class_name].append([ float(translation_submission1[0]), float(translation_submission1[1]), float(translation_submission1[2]), ].copy() + size.copy() + rotation.copy()) q = Quaternion(rotation) angle = q.angle if q.axis[2] > 0 else -q.angle ddd_boxes_by_class[class_name].append([ size[2], size[0], size[1], box.center[0], box.center[1], box.center[2], angle, ].copy()) online_targets = [] for class_name in NUSCENES_TRACKING_NAMES: if len(results_by_class[class_name]) > 0 and NMS: boxess = torch.from_numpy( np.array(results_by_class[class_name])[:, :4]) scoress = torch.from_numpy( np.array(results_by_class[class_name])[:, -1]) if class_name == "bus" or class_name == "truck": ovrlp = 0.7 else: ovrlp = 0.8 keep, count = nms(boxess, scoress, overlap=ovrlp) keep = keep.data.numpy().tolist() keep = sorted(set(keep)) results_by_class[class_name] = np.array( results_by_class[class_name])[keep] ddd_boxes_by_class[class_name] = np.array( ddd_boxes_by_class[class_name])[keep] depths_by_class[class_name] = np.array( depths_by_class[class_name])[keep] ddd_org_boxes_by_class[class_name] = np.array( ddd_org_boxes_by_class[class_name])[keep] ddd_box_submission1[class_name] = np.array( ddd_box_submission1[class_name])[keep] online_targets += self.tracker[class_name].update( results_by_class[class_name], FeatureMaps, ddd_boxes=ddd_boxes_by_class[class_name], depths_by_class=depths_by_class[class_name], ddd_org_boxes=ddd_org_boxes_by_class[class_name], submission=ddd_box_submission1[class_name], classe=class_name, ) else: online_targets = self.tracker.update(results, FeatureMaps) return online_targets
def _get_bboxes(self, anns, image_info=None): bboxes, track_ids, classes = [], [], [] if self.dataset == "nuscenes": for ann in anns: trans_matrix = np.array(image_info["trans_matrix"], np.float32) cls_id = int(self.cat_ids[ann["category_id"]]) class_name = self.class_name[cls_id - 1] if class_name not in NUSCENES_TRACKING_NAMES: continue center_location = ann["location"] wlh = ann["dim"] rotation = ann["rotation_y"] size = [float(wlh[1]), float(wlh[2]), float(wlh[0])] rot_cam = Quaternion(axis=[0, 1, 0], angle=rotation) loc = np.array( [ center_location[0], center_location[1], center_location[2] ], np.float32, ).copy() translation = np.dot( trans_matrix, np.array( [ center_location[0], center_location[1] - size[2] / 2, center_location[2], 1, ], np.float32, ), ).copy() box = Box(loc, size, rot_cam, name="2", token="1") box.translate(np.array([0, -box.wlh[2] / 2, 0])) box.rotate(Quaternion(image_info["cs_record_rot"])) box.translate(np.array(image_info["cs_record_trans"])) box.rotate(Quaternion(image_info["pose_record_rot"])) box.translate(np.array(image_info["pose_record_trans"])) rotation = box.orientation rotation = [ float(rotation.w), float(rotation.x), float(rotation.y), float(rotation.z), ] q = Quaternion(rotation) angle = q.angle if q.axis[2] > 0 else -q.angle bboxes.append([ size[2], size[0], size[1], box.center[0], box.center[1], box.center[2], angle, ].copy()) track_ids.append(ann["track_id"]) classes.append(class_name) else: for ann in anns: cls_id = int(self.cat_ids[ann["category_id"]]) if (cls_id > self.opt.num_classes or cls_id <= -99 or ("iscrowd" in ann and ann["iscrowd"] > 0)): continue bbox = self._coco_box_to_bbox(ann["bbox"]) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: bboxes.append([bbox[0], bbox[1], bbox[2], bbox[3]].copy()) track_ids.append(ann["track_id"] if "track_id" in ann else -1) return bboxes, track_ids, classes