def convert_gt_to_kitti_annos(self): annos = [] for i in range(len(self.cloud_and_label_list)): annotation_path, _ = self.cloud_and_label_list[i] gt_boxes, gt_names = self.read_annotations_data(annotation_path) anno = kitti.get_start_result_anno() num_example = 0 box3d_lidar = gt_boxes for j in range(box3d_lidar.shape[0]): anno["bbox"].append(np.array([0, 0, 100, 100])) anno["alpha"].append(-10) anno["dimensions"].append(box3d_lidar[j, 3:6]) anno["location"].append(box3d_lidar[j, :3]) anno["rotation_y"].append(box3d_lidar[j, 6]) anno["name"].append(gt_names[j]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["score"].append(0) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["metadata"] = {'image_idx': i} return annos
def convert_detection_to_kitti_annos(self, detection): class_names = self._class_names annos = [] for i in range(len(detection)): det = detection[i] final_box_preds = det["box3d_lidar"].detach().cpu().numpy() label_preds = det["label_preds"].detach().cpu().numpy() scores = det["scores"].detach().cpu().numpy() anno = kitti.get_start_result_anno() num_example = 0 box3d_lidar = final_box_preds for j in range(box3d_lidar.shape[0]): anno["bbox"].append(np.array([0, 0, 100, 100])) anno["alpha"].append(-10) anno["dimensions"].append(box3d_lidar[j, 3:6]) anno["location"].append(box3d_lidar[j, :3]) anno["rotation_y"].append(box3d_lidar[j, 6]) anno["name"].append(class_names[int(label_preds[j])]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["score"].append(scores[j]) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["metadata"] = det["metadata"] return annos
def evaluation_kitti(self, detections, output_dir): """eval by kitti evaluation tool. I use num_lidar_pts to set easy, mod, hard. easy: num>15, mod: num>7, hard: num>0. """ print("++++++++NuScenes KITTI unofficial Evaluation:") print( "++++++++easy: num_lidar_pts>15, mod: num_lidar_pts>7, hard: num_lidar_pts>0" ) print("++++++++The bbox AP is invalid. Don't forget to ignore it.") class_names = self._class_names gt_annos = self.ground_truth_annotations if gt_annos is None: return None gt_annos = deepcopy(gt_annos) detections = deepcopy(detections) dt_annos = [] for det in detections: final_box_preds = det["box3d_lidar"].detach().cpu().numpy() label_preds = det["label_preds"].detach().cpu().numpy() scores = det["scores"].detach().cpu().numpy() anno = kitti.get_start_result_anno() num_example = 0 box3d_lidar = final_box_preds for j in range(box3d_lidar.shape[0]): anno["bbox"].append(np.array([0, 0, 50, 50])) anno["alpha"].append(-10) anno["dimensions"].append(box3d_lidar[j, 3:6]) anno["location"].append(box3d_lidar[j, :3]) anno["rotation_y"].append(box3d_lidar[j, 6]) anno["name"].append(class_names[int(label_preds[j])]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["score"].append(scores[j]) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} dt_annos.append(anno) else: dt_annos.append(kitti.empty_result_anno()) num_example = dt_annos[-1]["name"].shape[0] dt_annos[-1]["metadata"] = det["metadata"] for anno in gt_annos: names = anno["name"].tolist() mapped_names = [] for n in names: if n in self.NameMapping: mapped_names.append(self.NameMapping[n]) else: mapped_names.append(n) anno["name"] = np.array(mapped_names) for anno in dt_annos: names = anno["name"].tolist() mapped_names = [] for n in names: if n in self.NameMapping: mapped_names.append(self.NameMapping[n]) else: mapped_names.append(n) anno["name"] = np.array(mapped_names) mapped_class_names = [] for n in self._class_names: if n in self.NameMapping: mapped_class_names.append(self.NameMapping[n]) else: mapped_class_names.append(n) z_axis = 2 z_center = 0.5 # for regular raw lidar data, z_axis = 2, z_center = 0.5. result_official_dict = get_official_eval_result(gt_annos, dt_annos, mapped_class_names, z_axis=z_axis, z_center=z_center) result_coco = get_coco_eval_result(gt_annos, dt_annos, mapped_class_names, z_axis=z_axis, z_center=z_center) return { "results": { "official": result_official_dict["result"], "coco": result_coco["result"], }, "detail": { "official": result_official_dict["detail"], "coco": result_coco["detail"], }, }
def predict_kitti_to_anno(net, example, class_names, center_limit_range=None, lidar_input=False, global_set=None): batch_image_shape = example['image_shape'] batch_imgidx = example['image_idx'] predictions_dicts = net(example) # t = time.time() annos = [] for i, preds_dict in enumerate(predictions_dicts): image_shape = batch_image_shape[i] img_idx = preds_dict["image_idx"] if preds_dict["bbox"] is not None: box_2d_preds = preds_dict["bbox"].detach().cpu().numpy() box_preds = preds_dict["box3d_camera"].detach().cpu().numpy() scores = preds_dict["scores"].detach().cpu().numpy() box_preds_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy() # write pred to file label_preds = preds_dict["label_preds"].detach().cpu().numpy() # label_preds = np.zeros([box_2d_preds.shape[0]], dtype=np.int32) anno = kitti.get_start_result_anno() num_example = 0 for box, box_lidar, bbox, score, label in zip( box_preds, box_preds_lidar, box_2d_preds, scores, label_preds): if not lidar_input: if bbox[0] > image_shape[1] or bbox[1] > image_shape[0]: continue if bbox[2] < 0 or bbox[3] < 0: continue # print(img_shape) if center_limit_range is not None: limit_range = np.array(center_limit_range) if (np.any(box_lidar[:3] < limit_range[:3]) or np.any(box_lidar[:3] > limit_range[3:])): continue bbox[2:] = np.minimum(bbox[2:], image_shape[::-1]) bbox[:2] = np.maximum(bbox[:2], [0, 0]) anno["name"].append(class_names[int(label)]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["alpha"].append(-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6]) anno["bbox"].append(bbox) anno["dimensions"].append(box[3:6]) anno["location"].append(box[:3]) anno["rotation_y"].append(box[6]) if global_set is not None: for i in range(100000): if score in global_set: score -= 1 / 100000 else: global_set.add(score) break anno["score"].append(score) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) else: annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["image_idx"] = np.array([img_idx] * num_example, dtype=np.int64) return annos
def convert_detection_to_kitti_annos(self, detection): class_names = self._class_names det_image_idxes = [det["metadata"]["image_idx"] for det in detection] gt_image_idxes = [ info["image"]["image_idx"] for info in self._kitti_infos ] annos = [] for i in range(len(detection)): det_idx = det_image_idxes[i] det = detection[i] # info = self._kitti_infos[gt_image_idxes.index(det_idx)] info = self._kitti_infos[i] calib = info["calib"] rect = calib["R0_rect"] Trv2c = calib["Tr_velo_to_cam"] P2 = calib["P2"] final_box_preds = det["box3d_lidar"].detach().cpu().numpy() label_preds = det["label_preds"].detach().cpu().numpy() scores = det["scores"].detach().cpu().numpy() if final_box_preds.shape[0] != 0: final_box_preds[:, 2] -= final_box_preds[:, 5] / 2 box3d_camera = box_np_ops.box_lidar_to_camera( final_box_preds, rect, Trv2c) locs = box3d_camera[:, :3] dims = box3d_camera[:, 3:6] angles = box3d_camera[:, 6] camera_box_origin = [0.5, 1.0, 0.5] box_corners = box_np_ops.center_to_corner_box3d( locs, dims, angles, camera_box_origin, axis=1) box_corners_in_image = box_np_ops.project_to_image( box_corners, P2) # box_corners_in_image: [N, 8, 2] minxy = np.min(box_corners_in_image, axis=1) maxxy = np.max(box_corners_in_image, axis=1) bbox = np.concatenate([minxy, maxxy], axis=1) anno = kitti.get_start_result_anno() num_example = 0 box3d_lidar = final_box_preds for j in range(box3d_lidar.shape[0]): image_shape = info["image"]["image_shape"] if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]: continue if bbox[j, 2] < 0 or bbox[j, 3] < 0: continue bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1]) bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0]) anno["bbox"].append(bbox[j]) # convert center format to kitti format # box3d_lidar[j, 2] -= box3d_lidar[j, 5] / 2 anno["alpha"].append( -np.arctan2(-box3d_lidar[j, 1], box3d_lidar[j, 0]) + box3d_camera[j, 6]) anno["dimensions"].append(box3d_camera[j, 3:6]) anno["location"].append(box3d_camera[j, :3]) anno["rotation_y"].append(box3d_camera[j, 6]) anno["name"].append(class_names[int(label_preds[j])]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["score"].append(scores[j]) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["metadata"] = det["metadata"] return annos
def _process_output(predictions_dicts, batch_image_shape, class_names, center_limit_range=None, lidar_input=False, global_set=None): '''Predict net output, reformat output, return''' annos = [] # For each lidar/camera scan, perform for i, preds_dict in enumerate(predictions_dicts): image_shape = batch_image_shape[i] img_idx = preds_dict["image_idx"] # If atleast one prediction is made by the net, process output if preds_dict["bbox"] is not None: # Detach from Grad, GPU and tensor bbox = preds_dict["bbox"].detach().cpu().numpy() box3d_camera = preds_dict["box3d_camera"].detach().cpu().numpy() box3d_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy() label_preds = preds_dict["label_preds"].detach().cpu().numpy() scores = preds_dict["scores"].detach().cpu().numpy() # Setup initial variable values anno = kitti.get_start_result_anno() num_example = 0 # Number of bounding boxes found by the net # Append annotations for each bounding box detection for bbox_camera, bbox_lidar, bbox_2d, score, label in zip( box3d_camera, box3d_lidar, bbox, scores, label_preds): if not lidar_input: # If camera data is available along with lidar input, then if bbox_2d[0] > image_shape[1] or bbox_2d[1] > image_shape[ 0]: # If bbox_2d length/breadth > camera image size, then continue # Stop further processing of this specific 'for' loop if bbox_2d[2] < 0 or bbox_2d[ 3] < 0: # If bbox_2d length/breadth < 0, then continue # Stop further processing of this specific 'for' loop # # DEBUG: # print(f'image: {image_shape[::-1]}') # NOTE: image_shape is shape of camera images # print(f'bbox: {bbox_2d}') print(f'bbox_lidar: {bbox_lidar}') print(f'score: {score}') if center_limit_range is not None: limit_range = np.array(center_limit_range) if (np.any(bbox_lidar[:3] < limit_range[:3]) or np.any(bbox_lidar[:3] > limit_range[3:]) ): # If out of limit range, then continue # Stop further processing of this specific 'for' loop bbox_2d[2:] = np.minimum( bbox_2d[2:], image_shape[::-1] ) # Location must be within image boundaries bbox_2d[:2] = np.maximum(bbox_2d[:2], [0, 0]) # Size must >= 0 anno["name"].append( class_names[int(label)]) # label name such as 'car' anno["truncated"].append( 0.0) # FIXME: Not sure what is the point anno["occluded"].append(0) # FIXME: Not sure what is the point anno["alpha"].append( -np.arctan2(-bbox_lidar[1], bbox_lidar[0]) + bbox_camera[6]) anno["bbox"].append( bbox_2d) # 2D bounding box: x, y, length, breadth anno["location"].append(bbox_camera[:3]) # x, y, z anno["dimensions"].append(bbox_camera[3:6]) # length, breadth anno["rotation_y"].append(bbox_camera[6]) # angle # FIXME: Not sure but looks like previous scores based update can be used here if global_set is not None: for i in range(100000): if score in global_set: score -= 1 / 100000 else: global_set.add(score) break anno["score"].append(score) num_example += 1 print(num_example) # DEBUG: if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) else: # Simply an empty set of annotations annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["image_idx"] = np.array([img_idx] * num_example, dtype=np.int64) return annos
def predict_kitti_to_anno(net, example, class_names, center_limit_range=None, lidar_input=False, global_set=None): # eval example : [0: 'voxels', 1: 'num_points', 2: 'coordinates', 3: 'rect' # 4: 'Trv2c', 5: 'P2', 6: 'anchors', 7: 'anchors_mask' # 8: 'image_idx', 9: 'image_shape'] batch_image_shape = example[9] batch_imgidx = example[8] pillar_x = example[0][:, :, 0].unsqueeze(0).unsqueeze(0) pillar_y = example[0][:, :, 1].unsqueeze(0).unsqueeze(0) pillar_z = example[0][:, :, 2].unsqueeze(0).unsqueeze(0) pillar_i = example[0][:, :, 3].unsqueeze(0).unsqueeze(0) num_points_per_pillar = example[1].float().unsqueeze(0) # Find distance of x, y, and z from pillar center # assuming xyres_16.proto coors_x = example[2][:, 3].float() coors_y = example[2][:, 2].float() x_sub = coors_x.unsqueeze(1) * 0.16 + 0.1 y_sub = coors_y.unsqueeze(1) * 0.16 + -39.9 ones = torch.ones([1, 100], dtype=torch.float32, device=pillar_x.device) x_sub_shaped = torch.mm(x_sub, ones).unsqueeze(0).unsqueeze(0) y_sub_shaped = torch.mm(y_sub, ones).unsqueeze(0).unsqueeze(0) num_points_for_a_pillar = pillar_x.size()[3] mask = get_paddings_indicator(num_points_per_pillar, num_points_for_a_pillar, axis=0) mask = mask.permute(0, 2, 1) mask = mask.unsqueeze(1) mask = mask.type_as(pillar_x) coors = example[2] anchors = example[6] anchors_mask = example[7] anchors_mask = torch.as_tensor(anchors_mask, dtype=torch.uint8, device=pillar_x.device) anchors_mask = anchors_mask.byte() rect = example[3] Trv2c = example[4] P2 = example[5] image_idx = example[8] input = [ pillar_x, pillar_y, pillar_z, pillar_i, num_points_per_pillar, x_sub_shaped, y_sub_shaped, mask, coors, anchors, anchors_mask, rect, Trv2c, P2, image_idx ] predictions_dicts = net(input) annos = [] for i, preds_dict in enumerate(predictions_dicts): image_shape = batch_image_shape[i] img_idx = preds_dict[5] if preds_dict[0] is not None: # bbox list box_2d_preds = preds_dict[0].detach().cpu().numpy() # bbox box_preds = preds_dict[1].detach().cpu().numpy() # bbox3d_camera scores = preds_dict[3].detach().cpu().numpy() # scores box_preds_lidar = preds_dict[2].detach().cpu().numpy( ) # box3d_lidar # write pred to file label_preds = preds_dict[4].detach().cpu().numpy() # label_preds anno = kitti.get_start_result_anno() num_example = 0 for box, box_lidar, bbox, score, label in zip( box_preds, box_preds_lidar, box_2d_preds, scores, label_preds): if not lidar_input: if bbox[0] > image_shape[1] or bbox[1] > image_shape[0]: continue if bbox[2] < 0 or bbox[3] < 0: continue # print(img_shape) if center_limit_range is not None: limit_range = np.array(center_limit_range) if (np.any(box_lidar[:3] < limit_range[:3]) or np.any(box_lidar[:3] > limit_range[3:])): continue image_shape = [image_shape[0], image_shape[1]] bbox[2:] = np.minimum(bbox[2:], image_shape[::-1]) bbox[:2] = np.maximum(bbox[:2], [0, 0]) anno["name"].append(class_names[int(label)]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["alpha"].append(-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6]) anno["bbox"].append(bbox) anno["dimensions"].append(box[3:6]) anno["location"].append(box[:3]) anno["rotation_y"].append(box[6]) if global_set is not None: for i in range(100000): if score in global_set: score -= 1 / 100000 else: global_set.add(score) break anno["score"].append(score) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) else: annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["image_idx"] = np.array([img_idx] * num_example, dtype=np.int64) return annos
def predict_to_kitti_label(net, example, class_names, center_limit_range=None, lidar_input=False): predictions_dicts = net(example) limit_range = None if center_limit_range is not None: limit_range = np.array(center_limit_range) annos = [] for i, preds_dict in enumerate(predictions_dicts): box3d_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy() box3d_camera = None scores = preds_dict["scores"].detach().cpu().numpy() label_preds = preds_dict["label_preds"].detach().cpu().numpy() if "box3d_camera" in preds_dict: box3d_camera = preds_dict["box3d_camera"].detach().cpu().numpy() bbox = None if "bbox" in preds_dict: bbox = preds_dict["bbox"].detach().cpu().numpy() anno = kitti.get_start_result_anno() num_example = 0 for j in range(box3d_lidar.shape[0]): if limit_range is not None: if (np.any(box3d_lidar[j, :3] < limit_range[:3]) or np.any(box3d_lidar[j, :3] > limit_range[3:])): continue if "bbox" in preds_dict: assert "image_shape" in preds_dict["metadata"]["image"] image_shape = preds_dict["metadata"]["image"]["image_shape"] if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]: continue if bbox[j, 2] < 0 or bbox[j, 3] < 0: continue bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1]) bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0]) anno["bbox"].append(bbox[j]) # convert center format to kitti format # box3d_lidar[j, 2] -= box3d_lidar[j, 5] / 2 anno["alpha"].append( -np.arctan2(-box3d_lidar[j, 1], box3d_lidar[j, 0]) + box3d_camera[j, 6]) anno["dimensions"].append(box3d_camera[j, 3:6]) anno["location"].append(box3d_camera[j, :3]) anno["rotation_y"].append(box3d_camera[j, 6]) ### added for mmmot compatibility #anno["image_idx"] = preds_dict["metadata"]["image"]["image_idx"] else: # bbox's height must higher than 25, otherwise filtered during eval anno["bbox"].append(np.array([0, 0, 50, 50])) # note that if you use raw lidar data to eval, # you will get strange performance because # in standard KITTI eval, instance with small bbox height # will be filtered. but it is impossible to filter # boxes when using raw data. anno["alpha"].append(0.0) anno["dimensions"].append(box3d_lidar[j, 3:6]) anno["location"].append(box3d_lidar[j, :3]) anno["rotation_y"].append(box3d_lidar[j, 6]) anno["name"].append(class_names[int(label_preds[j])]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["score"].append(scores[j]) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["metadata"] = preds_dict["metadata"] return annos
def predict_kitti_to_anno(net, detection_2d_path, fusion_layer, example, class_names, center_limit_range=None, lidar_input=False, global_set=None): focal_loss_val = SigmoidFocalClassificationLoss() batch_image_shape = example['image_shape'] batch_imgidx = example['image_idx'] all_3d_output_camera_dict, all_3d_output, top_predictions, fusion_input, torch_index = net( example, detection_2d_path) t_start = time.time() fusion_cls_preds, flag = fusion_layer(fusion_input.cuda(), torch_index.cuda()) t_end = time.time() t_fusion = t_end - t_start fusion_cls_preds_reshape = fusion_cls_preds.reshape(1, 200, 176, 2) all_3d_output.update({ 'cls_preds': fusion_cls_preds_reshape }) ###########################################!!!!!!!!!!!!! predictions_dicts = predict_v2(net, example, all_3d_output) test_mode = False if test_mode == False: d3_gt_boxes = example["d3_gt_boxes"][0, :, :] if d3_gt_boxes.shape[0] == 0: target_for_fusion = np.zeros((1, 70400, 1)) positives = torch.zeros(1, 70400).type(torch.float32).cuda() negatives = torch.zeros(1, 70400).type(torch.float32).cuda() negatives[:, :] = 1 else: d3_gt_boxes_camera = box_torch_ops.box_lidar_to_camera( d3_gt_boxes, example['rect'][0, :], example['Trv2c'][0, :]) d3_gt_boxes_camera_bev = d3_gt_boxes_camera[:, [0, 2, 3, 5, 6]] ###### predicted bev boxes pred_3d_box = all_3d_output_camera_dict[0]["box3d_camera"] pred_bev_box = pred_3d_box[:, [0, 2, 3, 5, 6]] #iou_bev = bev_box_overlap(d3_gt_boxes_camera_bev.detach().cpu().numpy(), pred_bev_box.detach().cpu().numpy(), criterion=-1) iou_bev = d3_box_overlap( d3_gt_boxes_camera.detach().cpu().numpy(), pred_3d_box.squeeze().detach().cpu().numpy(), criterion=-1) iou_bev_max = np.amax(iou_bev, axis=0) target_for_fusion = ((iou_bev_max >= 0.7) * 1).reshape(1, -1, 1) positive_index = ((iou_bev_max >= 0.7) * 1).reshape(1, -1) positives = torch.from_numpy(positive_index).type( torch.float32).cuda() negative_index = ((iou_bev_max <= 0.5) * 1).reshape(1, -1) negatives = torch.from_numpy(negative_index).type( torch.float32).cuda() cls_preds = fusion_cls_preds one_hot_targets = torch.from_numpy(target_for_fusion).type( torch.float32).cuda() negative_cls_weights = negatives.type(torch.float32) * 1.0 cls_weights = negative_cls_weights + 1.0 * positives.type( torch.float32) pos_normalizer = positives.sum(1, keepdim=True).type(torch.float32) cls_weights /= torch.clamp(pos_normalizer, min=1.0) cls_losses = focal_loss_val._compute_loss(cls_preds, one_hot_targets, cls_weights.cuda()) # [N, M] cls_losses_reduced = cls_losses.sum() / example['labels'].shape[0] cls_losses_reduced = cls_losses_reduced.detach().cpu().numpy() else: cls_losses_reduced = 1000 annos = [] for i, preds_dict in enumerate(predictions_dicts): image_shape = batch_image_shape[i] img_idx = preds_dict["image_idx"] if preds_dict["bbox"] is not None or preds_dict["bbox"].size.numel( ) != 0: box_2d_preds = preds_dict["bbox"].detach().cpu().numpy() box_preds = preds_dict["box3d_camera"].detach().cpu().numpy() scores = preds_dict["scores"].detach().cpu().numpy() box_preds_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy() # write pred to file label_preds = preds_dict["label_preds"].detach().cpu().numpy() # label_preds = np.zeros([box_2d_preds.shape[0]], dtype=np.int32) anno = kitti.get_start_result_anno() num_example = 0 for box, box_lidar, bbox, score, label in zip( box_preds, box_preds_lidar, box_2d_preds, scores, label_preds): if not lidar_input: if bbox[0] > image_shape[1] or bbox[1] > image_shape[0]: continue if bbox[2] < 0 or bbox[3] < 0: continue # print(img_shape) if center_limit_range is not None: limit_range = np.array(center_limit_range) if (np.any(box_lidar[:3] < limit_range[:3]) or np.any(box_lidar[:3] > limit_range[3:])): continue bbox[2:] = np.minimum(bbox[2:], image_shape[::-1]) bbox[:2] = np.maximum(bbox[:2], [0, 0]) anno["name"].append(class_names[int(label)]) anno["truncated"].append(0.0) anno["occluded"].append(0) anno["alpha"].append(-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6]) anno["bbox"].append(bbox) anno["dimensions"].append(box[3:6]) anno["location"].append(box[:3]) anno["rotation_y"].append(box[6]) if global_set is not None: for i in range(100000): if score in global_set: score -= 1 / 100000 else: global_set.add(score) break anno["score"].append(score) num_example += 1 if num_example != 0: anno = {n: np.stack(v) for n, v in anno.items()} annos.append(anno) else: annos.append(kitti.empty_result_anno()) else: annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["image_idx"] = np.array([img_idx] * num_example, dtype=np.int64) #cls_losses_reduced=100 return annos, cls_losses_reduced