Python get_start_result_annoの例、second.data.kitti_common.get_start_result_anno Pythonの例

コード例 #1

0

ファイルを表示

ファイル: my_dataset.py プロジェクト: git-try-it/pointpillars_train

    def convert_gt_to_kitti_annos(self):
        annos = []
        for i in range(len(self.cloud_and_label_list)):
            annotation_path, _ = self.cloud_and_label_list[i]
            gt_boxes, gt_names = self.read_annotations_data(annotation_path)
            anno = kitti.get_start_result_anno()
            num_example = 0
            box3d_lidar = gt_boxes
            for j in range(box3d_lidar.shape[0]):
                anno["bbox"].append(np.array([0, 0, 100, 100]))
                anno["alpha"].append(-10)
                anno["dimensions"].append(box3d_lidar[j, 3:6])
                anno["location"].append(box3d_lidar[j, :3])
                anno["rotation_y"].append(box3d_lidar[j, 6])

                anno["name"].append(gt_names[j])
                anno["truncated"].append(0.0)
                anno["occluded"].append(0)
                anno["score"].append(0)

                num_example += 1
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                annos.append(anno)
            else:
                annos.append(kitti.empty_result_anno())
            num_example = annos[-1]["name"].shape[0]
            annos[-1]["metadata"] = {'image_idx': i}
        return annos

コード例 #2

0

ファイルを表示

ファイル: my_dataset.py プロジェクト: git-try-it/pointpillars_train

    def convert_detection_to_kitti_annos(self, detection):
        class_names = self._class_names
        annos = []
        for i in range(len(detection)):
            det = detection[i]
            final_box_preds = det["box3d_lidar"].detach().cpu().numpy()
            label_preds = det["label_preds"].detach().cpu().numpy()
            scores = det["scores"].detach().cpu().numpy()
            anno = kitti.get_start_result_anno()
            num_example = 0
            box3d_lidar = final_box_preds
            for j in range(box3d_lidar.shape[0]):
                anno["bbox"].append(np.array([0, 0, 100, 100]))
                anno["alpha"].append(-10)
                anno["dimensions"].append(box3d_lidar[j, 3:6])
                anno["location"].append(box3d_lidar[j, :3])
                anno["rotation_y"].append(box3d_lidar[j, 6])

                anno["name"].append(class_names[int(label_preds[j])])
                anno["truncated"].append(0.0)
                anno["occluded"].append(0)
                anno["score"].append(scores[j])

                num_example += 1
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                annos.append(anno)
            else:
                annos.append(kitti.empty_result_anno())
            num_example = annos[-1]["name"].shape[0]
            annos[-1]["metadata"] = det["metadata"]
        return annos

コード例 #3

0

ファイルを表示

ファイル: nuscenes_dataset.py プロジェクト: dzhulgakov/second.pytorch

    def evaluation_kitti(self, detections, output_dir):
        """eval by kitti evaluation tool.
        I use num_lidar_pts to set easy, mod, hard.
        easy: num>15, mod: num>7, hard: num>0.
        """
        print("++++++++NuScenes KITTI unofficial Evaluation:")
        print(
            "++++++++easy: num_lidar_pts>15, mod: num_lidar_pts>7, hard: num_lidar_pts>0"
        )
        print("++++++++The bbox AP is invalid. Don't forget to ignore it.")
        class_names = self._class_names
        gt_annos = self.ground_truth_annotations
        if gt_annos is None:
            return None
        gt_annos = deepcopy(gt_annos)
        detections = deepcopy(detections)
        dt_annos = []
        for det in detections:
            final_box_preds = det["box3d_lidar"].detach().cpu().numpy()
            label_preds = det["label_preds"].detach().cpu().numpy()
            scores = det["scores"].detach().cpu().numpy()
            anno = kitti.get_start_result_anno()
            num_example = 0
            box3d_lidar = final_box_preds
            for j in range(box3d_lidar.shape[0]):
                anno["bbox"].append(np.array([0, 0, 50, 50]))
                anno["alpha"].append(-10)
                anno["dimensions"].append(box3d_lidar[j, 3:6])
                anno["location"].append(box3d_lidar[j, :3])
                anno["rotation_y"].append(box3d_lidar[j, 6])
                anno["name"].append(class_names[int(label_preds[j])])
                anno["truncated"].append(0.0)
                anno["occluded"].append(0)
                anno["score"].append(scores[j])
                num_example += 1
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                dt_annos.append(anno)
            else:
                dt_annos.append(kitti.empty_result_anno())
            num_example = dt_annos[-1]["name"].shape[0]
            dt_annos[-1]["metadata"] = det["metadata"]

        for anno in gt_annos:
            names = anno["name"].tolist()
            mapped_names = []
            for n in names:
                if n in self.NameMapping:
                    mapped_names.append(self.NameMapping[n])
                else:
                    mapped_names.append(n)
            anno["name"] = np.array(mapped_names)
        for anno in dt_annos:
            names = anno["name"].tolist()
            mapped_names = []
            for n in names:
                if n in self.NameMapping:
                    mapped_names.append(self.NameMapping[n])
                else:
                    mapped_names.append(n)
            anno["name"] = np.array(mapped_names)
        mapped_class_names = []
        for n in self._class_names:
            if n in self.NameMapping:
                mapped_class_names.append(self.NameMapping[n])
            else:
                mapped_class_names.append(n)

        z_axis = 2
        z_center = 0.5
        # for regular raw lidar data, z_axis = 2, z_center = 0.5.
        result_official_dict = get_official_eval_result(gt_annos,
                                                        dt_annos,
                                                        mapped_class_names,
                                                        z_axis=z_axis,
                                                        z_center=z_center)
        result_coco = get_coco_eval_result(gt_annos,
                                           dt_annos,
                                           mapped_class_names,
                                           z_axis=z_axis,
                                           z_center=z_center)
        return {
            "results": {
                "official": result_official_dict["result"],
                "coco": result_coco["result"],
            },
            "detail": {
                "official": result_official_dict["detail"],
                "coco": result_coco["detail"],
            },
        }

コード例 #4

0

ファイルを表示

def predict_kitti_to_anno(net,
                          example,
                          class_names,
                          center_limit_range=None,
                          lidar_input=False,
                          global_set=None):
    batch_image_shape = example['image_shape']
    batch_imgidx = example['image_idx']
    predictions_dicts = net(example)
    # t = time.time()
    annos = []
    for i, preds_dict in enumerate(predictions_dicts):
        image_shape = batch_image_shape[i]
        img_idx = preds_dict["image_idx"]
        if preds_dict["bbox"] is not None:
            box_2d_preds = preds_dict["bbox"].detach().cpu().numpy()
            box_preds = preds_dict["box3d_camera"].detach().cpu().numpy()
            scores = preds_dict["scores"].detach().cpu().numpy()
            box_preds_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy()
            # write pred to file
            label_preds = preds_dict["label_preds"].detach().cpu().numpy()
            # label_preds = np.zeros([box_2d_preds.shape[0]], dtype=np.int32)
            anno = kitti.get_start_result_anno()
            num_example = 0
            for box, box_lidar, bbox, score, label in zip(
                    box_preds, box_preds_lidar, box_2d_preds, scores,
                    label_preds):
                if not lidar_input:
                    if bbox[0] > image_shape[1] or bbox[1] > image_shape[0]:
                        continue
                    if bbox[2] < 0 or bbox[3] < 0:
                        continue
                # print(img_shape)
                if center_limit_range is not None:
                    limit_range = np.array(center_limit_range)
                    if (np.any(box_lidar[:3] < limit_range[:3])
                            or np.any(box_lidar[:3] > limit_range[3:])):
                        continue
                bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
                bbox[:2] = np.maximum(bbox[:2], [0, 0])
                anno["name"].append(class_names[int(label)])
                anno["truncated"].append(0.0)
                anno["occluded"].append(0)
                anno["alpha"].append(-np.arctan2(-box_lidar[1], box_lidar[0]) +
                                     box[6])
                anno["bbox"].append(bbox)
                anno["dimensions"].append(box[3:6])
                anno["location"].append(box[:3])
                anno["rotation_y"].append(box[6])
                if global_set is not None:
                    for i in range(100000):
                        if score in global_set:
                            score -= 1 / 100000
                        else:
                            global_set.add(score)
                            break
                anno["score"].append(score)

                num_example += 1
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                annos.append(anno)
            else:
                annos.append(kitti.empty_result_anno())
        else:
            annos.append(kitti.empty_result_anno())
        num_example = annos[-1]["name"].shape[0]
        annos[-1]["image_idx"] = np.array([img_idx] * num_example,
                                          dtype=np.int64)
    return annos

コード例 #5

0

ファイルを表示

ファイル: kitti_dataset.py プロジェクト: ahmedius2/second.pytorch

    def convert_detection_to_kitti_annos(self, detection):
        class_names = self._class_names
        det_image_idxes = [det["metadata"]["image_idx"] for det in detection]
        gt_image_idxes = [
            info["image"]["image_idx"] for info in self._kitti_infos
        ]
        annos = []
        for i in range(len(detection)):
            det_idx = det_image_idxes[i]
            det = detection[i]
            # info = self._kitti_infos[gt_image_idxes.index(det_idx)]
            info = self._kitti_infos[i]
            calib = info["calib"]
            rect = calib["R0_rect"]
            Trv2c = calib["Tr_velo_to_cam"]
            P2 = calib["P2"]
            final_box_preds = det["box3d_lidar"].detach().cpu().numpy()
            label_preds = det["label_preds"].detach().cpu().numpy()
            scores = det["scores"].detach().cpu().numpy()
            if final_box_preds.shape[0] != 0:
                final_box_preds[:, 2] -= final_box_preds[:, 5] / 2
                box3d_camera = box_np_ops.box_lidar_to_camera(
                    final_box_preds, rect, Trv2c)
                locs = box3d_camera[:, :3]
                dims = box3d_camera[:, 3:6]
                angles = box3d_camera[:, 6]
                camera_box_origin = [0.5, 1.0, 0.5]
                box_corners = box_np_ops.center_to_corner_box3d(
                    locs, dims, angles, camera_box_origin, axis=1)
                box_corners_in_image = box_np_ops.project_to_image(
                    box_corners, P2)
                # box_corners_in_image: [N, 8, 2]
                minxy = np.min(box_corners_in_image, axis=1)
                maxxy = np.max(box_corners_in_image, axis=1)
                bbox = np.concatenate([minxy, maxxy], axis=1)
            anno = kitti.get_start_result_anno()
            num_example = 0
            box3d_lidar = final_box_preds
            for j in range(box3d_lidar.shape[0]):
                image_shape = info["image"]["image_shape"]
                if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]:
                    continue
                if bbox[j, 2] < 0 or bbox[j, 3] < 0:
                    continue
                bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1])
                bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0])
                anno["bbox"].append(bbox[j])
                # convert center format to kitti format
                # box3d_lidar[j, 2] -= box3d_lidar[j, 5] / 2
                anno["alpha"].append(
                    -np.arctan2(-box3d_lidar[j, 1], box3d_lidar[j, 0]) +
                    box3d_camera[j, 6])
                anno["dimensions"].append(box3d_camera[j, 3:6])
                anno["location"].append(box3d_camera[j, :3])
                anno["rotation_y"].append(box3d_camera[j, 6])

                anno["name"].append(class_names[int(label_preds[j])])
                anno["truncated"].append(0.0)
                anno["occluded"].append(0)
                anno["score"].append(scores[j])

                num_example += 1
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                annos.append(anno)
            else:
                annos.append(kitti.empty_result_anno())
            num_example = annos[-1]["name"].shape[0]
            annos[-1]["metadata"] = det["metadata"]
        return annos

コード例 #6

0

ファイルを表示

def _process_output(predictions_dicts,
                    batch_image_shape,
                    class_names,
                    center_limit_range=None,
                    lidar_input=False,
                    global_set=None):
    '''Predict net output, reformat output, return'''

    annos = []

    # For each lidar/camera scan, perform
    for i, preds_dict in enumerate(predictions_dicts):
        image_shape = batch_image_shape[i]
        img_idx = preds_dict["image_idx"]

        # If atleast one prediction is made by the net, process output
        if preds_dict["bbox"] is not None:

            # Detach from Grad, GPU and tensor
            bbox = preds_dict["bbox"].detach().cpu().numpy()
            box3d_camera = preds_dict["box3d_camera"].detach().cpu().numpy()
            box3d_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy()
            label_preds = preds_dict["label_preds"].detach().cpu().numpy()
            scores = preds_dict["scores"].detach().cpu().numpy()

            # Setup initial variable values
            anno = kitti.get_start_result_anno()
            num_example = 0  # Number of bounding boxes found by the net

            # Append annotations for each bounding box detection
            for bbox_camera, bbox_lidar, bbox_2d, score, label in zip(
                    box3d_camera, box3d_lidar, bbox, scores, label_preds):
                if not lidar_input:  # If camera data is available along with lidar input, then
                    if bbox_2d[0] > image_shape[1] or bbox_2d[1] > image_shape[
                            0]:  # If bbox_2d length/breadth > camera image size, then
                        continue  # Stop further processing of this specific 'for' loop
                    if bbox_2d[2] < 0 or bbox_2d[
                            3] < 0:  # If bbox_2d length/breadth < 0, then
                        continue  # Stop further processing of this specific 'for' loop

                # # DEBUG:
                # print(f'image: {image_shape[::-1]}')  # NOTE: image_shape is shape of camera images
                # print(f'bbox:  {bbox_2d}')
                print(f'bbox_lidar: {bbox_lidar}')
                print(f'score:      {score}')

                if center_limit_range is not None:
                    limit_range = np.array(center_limit_range)
                    if (np.any(bbox_lidar[:3] < limit_range[:3])
                            or np.any(bbox_lidar[:3] > limit_range[3:])
                        ):  # If out of limit range, then
                        continue  # Stop further processing of this specific 'for' loop

                bbox_2d[2:] = np.minimum(
                    bbox_2d[2:], image_shape[::-1]
                )  # Location must be within image boundaries
                bbox_2d[:2] = np.maximum(bbox_2d[:2], [0, 0])  # Size must >= 0

                anno["name"].append(
                    class_names[int(label)])  # label name such as 'car'
                anno["truncated"].append(
                    0.0)  # FIXME: Not sure what is the point
                anno["occluded"].append(0)  # FIXME: Not sure what is the point
                anno["alpha"].append(
                    -np.arctan2(-bbox_lidar[1], bbox_lidar[0]) +
                    bbox_camera[6])
                anno["bbox"].append(
                    bbox_2d)  # 2D bounding box: x, y, length, breadth
                anno["location"].append(bbox_camera[:3])  # x, y, z
                anno["dimensions"].append(bbox_camera[3:6])  # length, breadth
                anno["rotation_y"].append(bbox_camera[6])  # angle

                # FIXME: Not sure but looks like previous scores based update can be used here
                if global_set is not None:
                    for i in range(100000):
                        if score in global_set:
                            score -= 1 / 100000
                        else:
                            global_set.add(score)
                            break
                anno["score"].append(score)

                num_example += 1
                print(num_example)  # DEBUG:
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                annos.append(anno)
            else:
                annos.append(kitti.empty_result_anno())
        else:
            # Simply an empty set of annotations
            annos.append(kitti.empty_result_anno())

        num_example = annos[-1]["name"].shape[0]
        annos[-1]["image_idx"] = np.array([img_idx] * num_example,
                                          dtype=np.int64)

    return annos

コード例 #7

0

ファイルを表示

def predict_kitti_to_anno(net,
                          example,
                          class_names,
                          center_limit_range=None,
                          lidar_input=False,
                          global_set=None):

    # eval example : [0: 'voxels', 1: 'num_points', 2: 'coordinates', 3: 'rect'
    #                 4: 'Trv2c', 5: 'P2', 6: 'anchors', 7: 'anchors_mask'
    #                 8: 'image_idx', 9: 'image_shape']

    batch_image_shape = example[9]

    batch_imgidx = example[8]

    pillar_x = example[0][:, :, 0].unsqueeze(0).unsqueeze(0)
    pillar_y = example[0][:, :, 1].unsqueeze(0).unsqueeze(0)
    pillar_z = example[0][:, :, 2].unsqueeze(0).unsqueeze(0)
    pillar_i = example[0][:, :, 3].unsqueeze(0).unsqueeze(0)
    num_points_per_pillar = example[1].float().unsqueeze(0)

    # Find distance of x, y, and z from pillar center
    # assuming xyres_16.proto
    coors_x = example[2][:, 3].float()
    coors_y = example[2][:, 2].float()
    x_sub = coors_x.unsqueeze(1) * 0.16 + 0.1
    y_sub = coors_y.unsqueeze(1) * 0.16 + -39.9
    ones = torch.ones([1, 100], dtype=torch.float32, device=pillar_x.device)
    x_sub_shaped = torch.mm(x_sub, ones).unsqueeze(0).unsqueeze(0)
    y_sub_shaped = torch.mm(y_sub, ones).unsqueeze(0).unsqueeze(0)

    num_points_for_a_pillar = pillar_x.size()[3]
    mask = get_paddings_indicator(num_points_per_pillar,
                                  num_points_for_a_pillar,
                                  axis=0)
    mask = mask.permute(0, 2, 1)
    mask = mask.unsqueeze(1)
    mask = mask.type_as(pillar_x)

    coors = example[2]
    anchors = example[6]
    anchors_mask = example[7]
    anchors_mask = torch.as_tensor(anchors_mask,
                                   dtype=torch.uint8,
                                   device=pillar_x.device)
    anchors_mask = anchors_mask.byte()
    rect = example[3]
    Trv2c = example[4]
    P2 = example[5]
    image_idx = example[8]

    input = [
        pillar_x, pillar_y, pillar_z, pillar_i, num_points_per_pillar,
        x_sub_shaped, y_sub_shaped, mask, coors, anchors, anchors_mask, rect,
        Trv2c, P2, image_idx
    ]

    predictions_dicts = net(input)

    annos = []
    for i, preds_dict in enumerate(predictions_dicts):
        image_shape = batch_image_shape[i]
        img_idx = preds_dict[5]

        if preds_dict[0] is not None:  # bbox list
            box_2d_preds = preds_dict[0].detach().cpu().numpy()  # bbox
            box_preds = preds_dict[1].detach().cpu().numpy()  # bbox3d_camera
            scores = preds_dict[3].detach().cpu().numpy()  # scores
            box_preds_lidar = preds_dict[2].detach().cpu().numpy(
            )  # box3d_lidar
            # write pred to file
            label_preds = preds_dict[4].detach().cpu().numpy()  # label_preds

            anno = kitti.get_start_result_anno()
            num_example = 0
            for box, box_lidar, bbox, score, label in zip(
                    box_preds, box_preds_lidar, box_2d_preds, scores,
                    label_preds):
                if not lidar_input:
                    if bbox[0] > image_shape[1] or bbox[1] > image_shape[0]:
                        continue
                    if bbox[2] < 0 or bbox[3] < 0:
                        continue
                # print(img_shape)
                if center_limit_range is not None:
                    limit_range = np.array(center_limit_range)
                    if (np.any(box_lidar[:3] < limit_range[:3])
                            or np.any(box_lidar[:3] > limit_range[3:])):
                        continue
                image_shape = [image_shape[0], image_shape[1]]
                bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
                bbox[:2] = np.maximum(bbox[:2], [0, 0])
                anno["name"].append(class_names[int(label)])
                anno["truncated"].append(0.0)
                anno["occluded"].append(0)
                anno["alpha"].append(-np.arctan2(-box_lidar[1], box_lidar[0]) +
                                     box[6])
                anno["bbox"].append(bbox)
                anno["dimensions"].append(box[3:6])
                anno["location"].append(box[:3])
                anno["rotation_y"].append(box[6])
                if global_set is not None:
                    for i in range(100000):
                        if score in global_set:
                            score -= 1 / 100000
                        else:
                            global_set.add(score)
                            break
                anno["score"].append(score)

                num_example += 1
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                annos.append(anno)
            else:
                annos.append(kitti.empty_result_anno())
        else:
            annos.append(kitti.empty_result_anno())
        num_example = annos[-1]["name"].shape[0]
        annos[-1]["image_idx"] = np.array([img_idx] * num_example,
                                          dtype=np.int64)
    return annos

コード例 #8

0

ファイルを表示

def predict_to_kitti_label(net,
                           example,
                           class_names,
                           center_limit_range=None,
                           lidar_input=False):
    predictions_dicts = net(example)
    limit_range = None
    if center_limit_range is not None:
        limit_range = np.array(center_limit_range)
    annos = []
    for i, preds_dict in enumerate(predictions_dicts):
        box3d_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy()
        box3d_camera = None
        scores = preds_dict["scores"].detach().cpu().numpy()
        label_preds = preds_dict["label_preds"].detach().cpu().numpy()
        if "box3d_camera" in preds_dict:
            box3d_camera = preds_dict["box3d_camera"].detach().cpu().numpy()
        bbox = None
        if "bbox" in preds_dict:
            bbox = preds_dict["bbox"].detach().cpu().numpy()
        anno = kitti.get_start_result_anno()
        num_example = 0
        for j in range(box3d_lidar.shape[0]):
            if limit_range is not None:
                if (np.any(box3d_lidar[j, :3] < limit_range[:3])
                        or np.any(box3d_lidar[j, :3] > limit_range[3:])):
                    continue
            if "bbox" in preds_dict:
                assert "image_shape" in preds_dict["metadata"]["image"]
                image_shape = preds_dict["metadata"]["image"]["image_shape"]
                if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]:
                    continue
                if bbox[j, 2] < 0 or bbox[j, 3] < 0:
                    continue
                bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1])
                bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0])
                anno["bbox"].append(bbox[j])
                # convert center format to kitti format
                # box3d_lidar[j, 2] -= box3d_lidar[j, 5] / 2
                anno["alpha"].append(
                    -np.arctan2(-box3d_lidar[j, 1], box3d_lidar[j, 0]) +
                    box3d_camera[j, 6])
                anno["dimensions"].append(box3d_camera[j, 3:6])
                anno["location"].append(box3d_camera[j, :3])
                anno["rotation_y"].append(box3d_camera[j, 6])

            ### added for mmmot compatibility
            #anno["image_idx"] = preds_dict["metadata"]["image"]["image_idx"]
            else:
                # bbox's height must higher than 25, otherwise filtered during eval
                anno["bbox"].append(np.array([0, 0, 50, 50]))
                # note that if you use raw lidar data to eval,
                # you will get strange performance because
                # in standard KITTI eval, instance with small bbox height
                # will be filtered. but it is impossible to filter
                # boxes when using raw data.
                anno["alpha"].append(0.0)
                anno["dimensions"].append(box3d_lidar[j, 3:6])
                anno["location"].append(box3d_lidar[j, :3])
                anno["rotation_y"].append(box3d_lidar[j, 6])

            anno["name"].append(class_names[int(label_preds[j])])
            anno["truncated"].append(0.0)
            anno["occluded"].append(0)
            anno["score"].append(scores[j])

            num_example += 1
        if num_example != 0:
            anno = {n: np.stack(v) for n, v in anno.items()}
            annos.append(anno)
        else:
            annos.append(kitti.empty_result_anno())
        num_example = annos[-1]["name"].shape[0]
        annos[-1]["metadata"] = preds_dict["metadata"]
    return annos

コード例 #9

0

ファイルを表示

ファイル: train.py プロジェクト: rkotimi/CLOCs

def predict_kitti_to_anno(net,
                          detection_2d_path,
                          fusion_layer,
                          example,
                          class_names,
                          center_limit_range=None,
                          lidar_input=False,
                          global_set=None):
    focal_loss_val = SigmoidFocalClassificationLoss()
    batch_image_shape = example['image_shape']
    batch_imgidx = example['image_idx']
    all_3d_output_camera_dict, all_3d_output, top_predictions, fusion_input, torch_index = net(
        example, detection_2d_path)
    t_start = time.time()
    fusion_cls_preds, flag = fusion_layer(fusion_input.cuda(),
                                          torch_index.cuda())
    t_end = time.time()
    t_fusion = t_end - t_start
    fusion_cls_preds_reshape = fusion_cls_preds.reshape(1, 200, 176, 2)
    all_3d_output.update({
        'cls_preds': fusion_cls_preds_reshape
    })  ###########################################!!!!!!!!!!!!!
    predictions_dicts = predict_v2(net, example, all_3d_output)
    test_mode = False
    if test_mode == False:
        d3_gt_boxes = example["d3_gt_boxes"][0, :, :]
        if d3_gt_boxes.shape[0] == 0:
            target_for_fusion = np.zeros((1, 70400, 1))
            positives = torch.zeros(1, 70400).type(torch.float32).cuda()
            negatives = torch.zeros(1, 70400).type(torch.float32).cuda()
            negatives[:, :] = 1
        else:
            d3_gt_boxes_camera = box_torch_ops.box_lidar_to_camera(
                d3_gt_boxes, example['rect'][0, :], example['Trv2c'][0, :])
            d3_gt_boxes_camera_bev = d3_gt_boxes_camera[:, [0, 2, 3, 5, 6]]
            ###### predicted bev boxes
            pred_3d_box = all_3d_output_camera_dict[0]["box3d_camera"]
            pred_bev_box = pred_3d_box[:, [0, 2, 3, 5, 6]]
            #iou_bev = bev_box_overlap(d3_gt_boxes_camera_bev.detach().cpu().numpy(), pred_bev_box.detach().cpu().numpy(), criterion=-1)
            iou_bev = d3_box_overlap(
                d3_gt_boxes_camera.detach().cpu().numpy(),
                pred_3d_box.squeeze().detach().cpu().numpy(),
                criterion=-1)
            iou_bev_max = np.amax(iou_bev, axis=0)
            target_for_fusion = ((iou_bev_max >= 0.7) * 1).reshape(1, -1, 1)
            positive_index = ((iou_bev_max >= 0.7) * 1).reshape(1, -1)
            positives = torch.from_numpy(positive_index).type(
                torch.float32).cuda()
            negative_index = ((iou_bev_max <= 0.5) * 1).reshape(1, -1)
            negatives = torch.from_numpy(negative_index).type(
                torch.float32).cuda()

        cls_preds = fusion_cls_preds
        one_hot_targets = torch.from_numpy(target_for_fusion).type(
            torch.float32).cuda()

        negative_cls_weights = negatives.type(torch.float32) * 1.0
        cls_weights = negative_cls_weights + 1.0 * positives.type(
            torch.float32)
        pos_normalizer = positives.sum(1, keepdim=True).type(torch.float32)
        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
        cls_losses = focal_loss_val._compute_loss(cls_preds, one_hot_targets,
                                                  cls_weights.cuda())  # [N, M]

        cls_losses_reduced = cls_losses.sum() / example['labels'].shape[0]
        cls_losses_reduced = cls_losses_reduced.detach().cpu().numpy()
    else:
        cls_losses_reduced = 1000
    annos = []
    for i, preds_dict in enumerate(predictions_dicts):
        image_shape = batch_image_shape[i]
        img_idx = preds_dict["image_idx"]
        if preds_dict["bbox"] is not None or preds_dict["bbox"].size.numel(
        ) != 0:
            box_2d_preds = preds_dict["bbox"].detach().cpu().numpy()
            box_preds = preds_dict["box3d_camera"].detach().cpu().numpy()
            scores = preds_dict["scores"].detach().cpu().numpy()
            box_preds_lidar = preds_dict["box3d_lidar"].detach().cpu().numpy()
            # write pred to file
            label_preds = preds_dict["label_preds"].detach().cpu().numpy()
            # label_preds = np.zeros([box_2d_preds.shape[0]], dtype=np.int32)
            anno = kitti.get_start_result_anno()
            num_example = 0
            for box, box_lidar, bbox, score, label in zip(
                    box_preds, box_preds_lidar, box_2d_preds, scores,
                    label_preds):
                if not lidar_input:
                    if bbox[0] > image_shape[1] or bbox[1] > image_shape[0]:
                        continue
                    if bbox[2] < 0 or bbox[3] < 0:
                        continue
                # print(img_shape)
                if center_limit_range is not None:
                    limit_range = np.array(center_limit_range)
                    if (np.any(box_lidar[:3] < limit_range[:3])
                            or np.any(box_lidar[:3] > limit_range[3:])):
                        continue
                bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
                bbox[:2] = np.maximum(bbox[:2], [0, 0])
                anno["name"].append(class_names[int(label)])
                anno["truncated"].append(0.0)
                anno["occluded"].append(0)
                anno["alpha"].append(-np.arctan2(-box_lidar[1], box_lidar[0]) +
                                     box[6])
                anno["bbox"].append(bbox)
                anno["dimensions"].append(box[3:6])
                anno["location"].append(box[:3])
                anno["rotation_y"].append(box[6])
                if global_set is not None:
                    for i in range(100000):
                        if score in global_set:
                            score -= 1 / 100000
                        else:
                            global_set.add(score)
                            break
                anno["score"].append(score)

                num_example += 1
            if num_example != 0:
                anno = {n: np.stack(v) for n, v in anno.items()}
                annos.append(anno)
            else:
                annos.append(kitti.empty_result_anno())
        else:
            annos.append(kitti.empty_result_anno())
        num_example = annos[-1]["name"].shape[0]
        annos[-1]["image_idx"] = np.array([img_idx] * num_example,
                                          dtype=np.int64)
        #cls_losses_reduced=100
    return annos, cls_losses_reduced