Esempio n. 1
0
    def get_inference_input_dict_ros(self, info, points):
        assert self.anchor_cache is not None
        assert self.target_assigner is not None
        assert self.voxel_generator is not None
        assert self.config is not None
        assert self.built is True
        rect = info['calib/R0_rect']
        P2 = info['calib/P2']
        Trv2c = info['calib/Tr_velo_to_cam']
        input_cfg = self.config.eval_input_reader
        model_cfg = self.config.model.second

        input_dict = {
            'points': points,
            'rect': rect,
            'Trv2c': Trv2c,
            'P2': P2,
            'image_shape': np.array(info["img_shape"], dtype=np.int32),
            # 'image_idx': info['image_idx'],
            # 'image_path': info['img_path'],
            # 'pointcloud_num_features': num_point_features,
        }
        out_size_factor = model_cfg.rpn.layer_strides[
            0] // model_cfg.rpn.upsample_strides[0]
        example = prep_pointcloud(
            input_dict=input_dict,
            root_path=str(self.root_path),
            voxel_generator=self.voxel_generator,
            target_assigner=self.target_assigner,
            max_voxels=input_cfg.max_number_of_voxels,
            class_names=self.target_assigner.classes,
            training=False,
            create_targets=False,
            shuffle_points=input_cfg.shuffle_points,
            generate_bev=False,
            without_reflectivity=model_cfg.without_reflectivity,
            num_point_features=model_cfg.num_point_features,
            anchor_area_threshold=input_cfg.anchor_area_threshold,
            anchor_cache=self.anchor_cache,
            out_size_factor=out_size_factor,
            out_dtype=np.float32)

        # example["image_idx"] = info['image_idx']
        example["image_shape"] = input_dict["image_shape"]
        example["points"] = points
        if "anchors_mask" in example:
            example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
        #############
        # convert example to batched example
        #############
        example = merge_second_batch([example])
        return example
Esempio n. 2
0
    def get_inference_input_dict(self, points):
        assert self.anchor_cache is not None
        assert self.target_assigner is not None
        assert self.voxel_generator is not None
        assert self.config is not None
        assert self.built is True
        input_cfg = self.config.eval_input_reader
        model_cfg = self.config.model.second

        input_dict = {
            'points': points,
        }
        out_size_factor = model_cfg.rpn.layer_strides[0] // model_cfg.rpn.upsample_strides[0]
        example = prep_pointcloud(
            input_dict=input_dict,
            root_path=str(self.root_path),
            voxel_generator=self.voxel_generator,
            target_assigner=self.target_assigner,
            max_voxels=input_cfg.max_number_of_voxels,
            class_names=list(input_cfg.class_names),
            training=False,
            create_targets=False,
            shuffle_points=input_cfg.shuffle_points,
            generate_bev=False,
            without_reflectivity=model_cfg.without_reflectivity,
            num_point_features=model_cfg.num_point_features,
            anchor_area_threshold=input_cfg.anchor_area_threshold,
            anchor_cache=self.anchor_cache,
            out_size_factor=out_size_factor,
            out_dtype=np.float32)
        # example["image_idx"] = info['image_idx']
        # example["image_shape"] = input_dict["image_shape"]
        example["points"] = points
        if "anchors_mask" in example:
            example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
        #############
        # convert example to batched example
        #############
        example = merge_second_batch([example])
        return example
Esempio n. 3
0
def test(config_path=args.config_path,
         model_dir=args.model_dir,
         result_path=None,
         create_folder=False,
         pickle_result=True,
         include_roadmap=False,
         device=1):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)

    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    eval_checkpoint_dir = model_dir / 'eval_checkpoints'
    eval_checkpoint_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    batch_size = 1
    class_names = list(input_cfg.class_names)
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    grid_size = voxel_generator.grid_size
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    net = second_builder.build(model_cfg, voxel_generator, target_assigner,
                               include_roadmap)
    net.cuda().eval()

    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)

    #torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    torchplus.train.restore(args.model_path, net)
    #torchplus.train.restore("./ped_models_56/voxelnet-275130.tckpt",net)
    out_size_factor = model_cfg.rpn.layer_strides[
        0] / model_cfg.rpn.upsample_strides[0]
    print(out_size_factor)
    #out_size_factor *= model_cfg.middle_feature_extractor.downsample_factor
    out_size_factor = int(out_size_factor)
    feature_map_size = grid_size[:2] // out_size_factor
    feature_map_size = [*feature_map_size, 1][::-1]
    print(feature_map_size)
    ret = target_assigner.generate_anchors(feature_map_size)
    #anchors_dict = target_assigner.generate_anchors_dict(feature_map_size)
    anchors = ret["anchors"]
    anchors = anchors.reshape([-1, 7])
    matched_thresholds = ret["matched_thresholds"]
    unmatched_thresholds = ret["unmatched_thresholds"]
    anchors_bv = box_np_ops.rbbox2d_to_near_bbox(anchors[:, [0, 1, 3, 4, 6]])
    anchor_cache = {
        "anchors": anchors,
        "anchors_bv": anchors_bv,
        "matched_thresholds": matched_thresholds,
        "unmatched_thresholds": unmatched_thresholds,
        #"anchors_dict": anchors_dict,
    }

    am = ArgoverseMap()
    dt_annos = []

    root_dir = os.path.join('./../../argodataset/argoverse-tracking/',
                            args.set)
    argoverse_loader = ArgoverseTrackingLoader(root_dir)

    prog_cnt = 0
    for seq in range(len(argoverse_loader)):
        argoverse_data = argoverse_loader[seq]
        nlf = argoverse_data.num_lidar_frame
        for frame in range(nlf):
            prog_cnt += 1
            if prog_cnt % 50 == 0:
                print(prog_cnt)
            points = argoverse_data.get_lidar(frame)
            roi_pts = copy.deepcopy(points)
            city_name = argoverse_data.city_name
            city_to_egovehicle_se3 = argoverse_data.get_pose(frame)
            '''
            roi_pts = city_to_egovehicle_se3.transform_point_cloud(roi_pts)  # put into city coords
            #non roi
            roi_pts_flag = am.remove_non_roi_points(roi_pts, city_name) # remove non-driveable region
            roi_pts = roi_pts[roi_pts_flag]
            roi_pts = am.remove_ground_surface(roi_pts, city_name)  # remove ground surface
    
            # convert city to lidar co-ordinates

            roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_pts) 
            '''
            if args.include_roi or args.dr_area or not args.include_road_points:
                roi_pts = city_to_egovehicle_se3.transform_point_cloud(
                    roi_pts)  # put into city coords

            if args.include_roi:
                roi_pts_flag = am.remove_non_roi_points(
                    roi_pts, city_name)  # remove non-driveable region
                roi_pts = roi_pts[roi_pts_flag]

            if not args.include_roi and args.dr_area:
                roi_pts_flag = am.remove_non_driveable_area_points(
                    roi_pts, city_name)  # remove non-driveable region
                roi_pts = roi_pts[roi_pts_flag]

            if not args.include_road_points:
                roi_pts = am.remove_ground_surface(
                    roi_pts, city_name)  # remove ground surface

            # convert city to lidar co-ordinates
            if args.include_roi or args.dr_area or not args.include_road_points:
                roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(
                    roi_pts)

            roi_pts[:, 2] = roi_pts[:, 2] - 1.73

            pts_x, pts_y, pts_z = roi_pts[:, 0], roi_pts[:, 1], roi_pts[:, 2]

            input_dict = {
                'points': roi_pts,
                'pointcloud_num_features': 3,
            }

            out_size_factor = model_cfg.rpn.layer_strides[
                0] // model_cfg.rpn.upsample_strides[0]

            example = prep_pointcloud(
                input_dict=input_dict,
                root_path=None,
                voxel_generator=voxel_generator,
                target_assigner=target_assigner,
                max_voxels=input_cfg.max_number_of_voxels,
                class_names=list(input_cfg.class_names),
                training=False,
                create_targets=False,
                shuffle_points=input_cfg.shuffle_points,
                generate_bev=False,
                without_reflectivity=model_cfg.without_reflectivity,
                num_point_features=model_cfg.num_point_features,
                anchor_area_threshold=input_cfg.anchor_area_threshold,
                anchor_cache=anchor_cache,
                out_size_factor=out_size_factor,
                out_dtype=np.float32)

            if "anchors_mask" in example:
                example["anchors_mask"] = example["anchors_mask"].astype(
                    np.uint8)
            example["image_idx"] = str(seq) + "_" + str(frame)
            example["image_shape"] = np.array([400, 400], dtype=np.int32)
            example["road_map"] = None
            example["include_roadmap"] = False
            example["points"] = roi_pts
            #torch.save(example,"./network_input_examples/" + info)
            example = merge_second_batch([example])

            example_torch = example_convert_to_torch(example,
                                                     device=args.device)
            try:
                result_annos = predict_kitti_to_anno(
                    net, example_torch, input_cfg.class_names,
                    model_cfg.post_center_limit_range, model_cfg.lidar_input)
            except:
                print(seq, frame)
                continue
            dt_annos += result_annos

    if pickle_result:
        sdi = args.save_path.rfind('/')
        save_dir = args.save_path[:sdi]
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        with open(args.save_path, 'wb') as f:
            pickle.dump(dt_annos, f)
def main(config_path,
         lc_horizon,
         num_examples,
         model_dir,
         ckpt_path=None,
         **kwargs):
    """Don't support pickle_result anymore. if you want to generate kitti label file,
    please use kitti_anno_to_label_file and convert_detection_to_kitti_annos
    in second.data.kitti_dataset.
    """
    assert len(kwargs) == 0
    model_dir = str(Path(model_dir).resolve())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if isinstance(config_path, str):
        # directly provide a config object. this usually used
        # when you want to eval with several different parameters in
        # one script.
        config = pipeline_pb2.TrainEvalPipelineConfig()
        with open(config_path, "r") as f:
            proto_str = f.read()
            text_format.Merge(proto_str, config)
    else:
        config = config_path

    input_cfg = config.eval_input_reader
    input_cfg.cum_lc_wrapper.lc_horizon = lc_horizon
    model_cfg = config.model.second
    train_cfg = config.train_config

    net = build_network(model_cfg, measure_time=False).to(device)
    if train_cfg.enable_mixed_precision:
        net.half()
        print("half inference!")
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    target_assigner = net.target_assigner
    voxel_generator = net.voxel_generator

    if ckpt_path is None:
        assert model_dir is not None
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)
    batch_size = 1
    eval_dataset = input_reader_builder.build(input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner,
                                              net=net)

    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    net.eval()
    t = time.time()
    detections = []
    print("Generate output labels...")
    bar = ProgressBar()
    bar.start((len(eval_dataset) + batch_size - 1) // batch_size)
    prep_example_times = []
    prep_times = []
    t2 = time.time()

    times = []
    for scene_id in trange(num_examples):
        idx = eval_dataset.scene_id_and_step_to_idx(scene_id, lc_horizon)
        torch.cuda.synchronize()
        b_ex_time = time.time()
        example = eval_dataset[idx]
        example = merge_second_batch([example])
        example = example_convert_to_torch(example, float_dtype)
        with torch.no_grad():
            detections = net(example)
        torch.cuda.synchronize()
        e_ex_time = time.time()
        del example, detections
        times.append(e_ex_time - b_ex_time)

    times = np.array(times)
    mean = times.mean()
    interval = 1.96 * times.std() / np.sqrt(
        len(times))  # 95% confidence interval

    return mean, interval
Esempio n. 5
0
    def get_inference_input_dict(self, info, points):
        assert self.anchor_cache is not None
        assert self.target_assigner is not None
        assert self.voxel_generator is not None
        assert self.config is not None
        assert self.built is True
        kitti.convert_to_kitti_info_version2(info)
        pc_info = info["point_cloud"]
        image_info = info["image"]
        calib = info["calib"]

        rect = calib['R0_rect']
        Trv2c = calib['Tr_velo_to_cam']
        P2 = calib['P2']

        input_cfg = self.config.eval_input_reader
        model_cfg = self.config.model.second

        input_dict = {
            'points': points,
            "calib": {
                'rect': rect,
                'Trv2c': Trv2c,
                'P2': P2,
            },
            "image": {
                'image_shape': np.array(image_info["image_shape"],
                                        dtype=np.int32),
                'image_idx': image_info['image_idx'],
                'image_path': image_info['image_path'],
            },
        }
        out_size_factor = np.prod(model_cfg.rpn.layer_strides)
        if len(model_cfg.rpn.upsample_strides) > 0:
            out_size_factor /= model_cfg.rpn.upsample_strides[-1]
        out_size_factor *= model_cfg.middle_feature_extractor.downsample_factor
        out_size_factor = int(out_size_factor)
        example = prep_pointcloud(
            input_dict=input_dict,
            root_path=str(self.root_path),
            voxel_generator=self.voxel_generator,
            target_assigner=self.target_assigner,
            max_voxels=input_cfg.max_number_of_voxels,
            class_names=self.target_assigner.classes,
            training=False,
            create_targets=False,
            shuffle_points=input_cfg.shuffle_points,
            generate_bev=False,
            without_reflectivity=model_cfg.without_reflectivity,
            num_point_features=model_cfg.num_point_features,
            anchor_area_threshold=input_cfg.anchor_area_threshold,
            anchor_cache=self.anchor_cache,
            out_size_factor=out_size_factor,
            out_dtype=np.float32)
        example["metadata"] = {}
        if "image" in info:
            example["metadata"]["image"] = input_dict["image"]

        if "anchors_mask" in example:
            example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
        #############
        # convert example to batched example
        #############
        example = merge_second_batch([example])
        return example
Esempio n. 6
0
    def get_inference_input_dict(self, info, points):
        # assert self.anchor_cache is not None
        # assert self.target_assigner is not None
        # assert self.voxel_generator is not None
        assert self.fv_generator is not None
        assert self.config is not None
        assert self.built is True
        rect = info['calib/R0_rect']
        P2 = info['calib/P2']
        Trv2c = info['calib/Tr_velo_to_cam']
        input_cfg = self.config.eval_input_reader
        model_cfg = self.config.model.second

        root_path = '/home/js/data/KITTI/object'
        input_dict = {
            'points': points,
            'rect': rect,
            'Trv2c': Trv2c,
            'P2': P2,
            'image_shape': np.array(info["img_shape"], dtype=np.int32),
            'image_idx': info['image_idx'],
            'image_path': root_path + '/' + info['img_path'],
            # 'pointcloud_num_features': num_point_features,
        }

        if 'annos' in info:
            annos = info['annos']
            # we need other objects to avoid collision when sample
            annos = kitti.remove_dontcare(annos)
            loc = annos["location"]
            dims = annos["dimensions"]
            rots = annos["rotation_y"]
            # alpha = annos["alpha"]
            gt_names = annos["name"]
            # print(gt_names, len(loc))
            gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],
                                      axis=1).astype(np.float32)
            # gt_boxes = np.concatenate(
            #     [loc, dims, alpha[..., np.newaxis]], axis=1).astype(np.float32)
            # gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c)
            difficulty = annos["difficulty"]
            input_dict.update({
                'gt_boxes': gt_boxes,
                'gt_names': gt_names,
                'difficulty': difficulty,
            })
            if 'group_ids' in annos:
                input_dict['group_ids'] = annos["group_ids"]

        out_size_factor = model_cfg.rpn.layer_strides[
            0] // model_cfg.rpn.upsample_strides[0]
        print("RGB_embedding: ", self.RGB_embedding)

        example = prep_pointcloud(
            input_dict=input_dict,
            root_path=str(self.root_path),
            # voxel_generator=self.voxel_generator,
            fv_generator=self.fv_generator,
            target_assigner=self.target_assigner,
            max_voxels=input_cfg.max_number_of_voxels,
            class_names=list(input_cfg.class_names),
            training=False,
            create_targets=False,
            shuffle_points=input_cfg.shuffle_points,
            generate_bev=False,
            remove_outside_points=False,
            without_reflectivity=model_cfg.without_reflectivity,
            num_point_features=model_cfg.num_point_features,
            anchor_area_threshold=input_cfg.anchor_area_threshold,
            anchor_cache=self.anchor_cache,
            out_size_factor=out_size_factor,
            out_dtype=np.float32,
            num_classes=model_cfg.num_class,
            RGB_embedding=self.RGB_embedding)
        example["image_idx"] = info['image_idx']
        example["image_shape"] = input_dict["image_shape"]
        example["points"] = points
        if "anchors_mask" in example:
            example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
        #############
        # convert example to batched example
        #############
        example = merge_second_batch([example])
        return example
Esempio n. 7
0
    def slicing_forward(self, dataset_iter, deadline):
        self.measure_time_start('Pre-stage-1', False)
        self.measure_time_start('PFE')
        #self.measure_time_start('PillarGen')
        try:
            if self._repeat_example:
                example = merge_second_batch([
                    self._data_loader.dataset[self._repeat_example_idx]
                ])  # id 19
            else:
                example = next(dataset_iter)
        except StopIteration:
            print("Woaaaah, that is unexpected! Check dataset iter!")
            return None, None, None

        num_voxels = example["num_voxels"][0][0]  # batch size 1

        example = example_convert_to_torch(example, self._float_dtype)
        #self.measure_time_end('PillarGen')
        torch.backends.cudnn.benchmark = False
        io_dict = self._net.forward_pfn(example)
        torch.backends.cudnn.benchmark = self._cudnn_benchmarking
        self.measure_time_end('PFE')

        #with torch.cuda.stream(self._other_cuda_stream):
        # Calculate anchor mask
        stg0_sum = torch.sum(io_dict['stage0'], 1, keepdim=True)
        sum_mask = torch.nn.functional.max_pool2d(
            stg0_sum, 15, stride=int(self._stg0_pred_scale_rate),
            padding=7).type(torch.bool)
        example['anchors_mask'] = sum_mask.expand(
            self._box_preds_size[:-1]).contiguous()
        sum_del_mask = torch.unsqueeze(torch.logical_not(sum_mask), -1)
        sum_del_mask = sum_del_mask.expand(self._cls_preds_size).contiguous()

        #self.measure_time_start("RPN-total")
        # Returns possible batch sizes for reach stage
        #
        self.measure_time_start('RPN-stage-1')
        self._net.forward_rpn_stage(io_dict)
        self._net.forward_rpn_cls_preds(io_dict)
        self.measure_time_end('RPN-stage-1')

        # Calculate sum of class scores within each slice
        # but only use class scores positioned close to pillar locations
        # use stg0 to create the pillar mask which will be used for slicing
        # Apply sigmoid and mask values below nms threshold
        cls_scores = torch.sigmoid(io_dict["cls_preds"])
        cls_scores_del = cls_scores <= self._nms_score_threshold

        #torch.cuda.default_stream().wait_stream(self._other_cuda_stream)

        cls_scores_del = torch.logical_or(cls_scores_del, sum_del_mask)
        cls_scores.masked_scatter_(cls_scores_del, self._pred_zeroer)
        if not self._net._encode_background_as_zeros:
            cls_scores = cls_scores[..., 1:].contiguous()
        cls_scores = torch.sum(cls_scores, [0, 1, 2, 4])  # reduce to H
        csa = self.slice_with_ranges(cls_scores.cpu(), self._cls_scr_ranges)
        # LOOKS LIKE IT IS SYNCHED AT THIS POINT

        if not self._merge_preds:
            anchors = example['anchors'].view(self._box_preds_size)
            aa = self.slice_preds_with_ranges(anchors, self._preds_slc_ranges)
            ama = self.slice_with_ranges(example['anchors_mask'],
                                         self._preds_slc_ranges)

        slice_io_dicts = []
        for i in range(self._num_slices):
            slice_io_dicts.append({})
            if not self._merge_preds:
                slice_io_dicts[-1]['anchors'] = aa[i]
                slice_io_dicts[-1]['anchors_mask'] = ama[i]

        # Get the cls mask of each slice, also the overlapped regions explicitly
        # stg1 class scores will be enough for everything
        cls_scr_sums = torch.empty(2 * len(slice_io_dicts) - 1,
                                   dtype=cls_scores.dtype,
                                   device='cpu')
        for i, cs in enumerate(csa):
            cls_scr_sums[i] = torch.sum(cs)

        zerocuk_tensor = cls_scr_sums.new_zeros((1, ))
        slice_io_dicts[0]['cls_scores'] = torch.cat(
            (zerocuk_tensor, cls_scr_sums[:2]))
        slice_io_dicts[-1]['cls_scores'] = torch.cat(
            (cls_scr_sums[-2:], zerocuk_tensor))
        for i, io_d in zip(range(1,
                                 len(cls_scr_sums) - 2, 2),
                           slice_io_dicts[1:-1]):
            io_d['cls_scores'] = cls_scr_sums[i:i + 3]

        # I DON'T NEED TO CALL SYNC BECAUSE IT IS ALREADY SYNCED
        # FROM WHAT I SAW BUT DO IT ANYWAY, NO BIG LOSS
        torch.cuda.synchronize()

        # Now decide the slice forwarding pattern
        # This algorithm takes 0.5 ms
        slices_to_exec = self.sched_slices(slice_io_dicts, deadline)
        stg2_slices, stg3_slices = slices_to_exec

        stg_seq = [1]
        self.measure_time_end('Pre-stage-1', False)
        self.measure_time_start('Post-stage-1', False)

        data_sliced = False
        if len(stg2_slices) == self._num_slices:
            # Since we are going to execute all slices,
            # Don't do slicing and run the whole stage
            self.measure_time_start("RPN-stage-2")
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end(f"RPN-stage-2")
        elif len(stg2_slices) > 0:
            data_sliced = True
            # Slice the tensors
            sa = self.slice_with_ranges(io_dict["stage1"],
                                        self._stg1_slc_ranges)
            ua = self.slice_with_ranges(io_dict["up1"], self._up1_slc_ranges)
            cpa = self.slice_preds_with_ranges(io_dict["cls_preds"],
                                               self._preds_slc_ranges)

            for i in range(self._num_slices):
                slice_io_dicts[i]["stages_executed"] = 1
                slice_io_dicts[i]["stage1"] = sa[i]
                slice_io_dicts[i]["up1"] = ua[i]
                slice_io_dicts[i]["backbone_out"] = ua[i]
                slice_io_dicts[i]["cls_preds"] = cpa[i]

            # We have slices to exec through stage 2
            # batch the chosen slices
            batch_io_dict = {
                "stages_executed": 1,
            }
            batch_io_dict["stage1"] = torch.cat(
                [slice_io_dicts[s]["stage1"] for s in stg2_slices])
            #batch_io_dict["up1"] = torch.cat(
            #        [slice_io_dicts[s]["up1"] for s in stg2_slices])

            self.measure_time_start("RPN-stage-2")
            self._net.forward_rpn_stage(batch_io_dict)
            self.measure_time_end(f"RPN-stage-2")

            # Scatter the results anyway
            #if len(stg3_slices) < len(stg2_slices):
            stg2_chunks = torch.chunk(batch_io_dict["stage2"],
                                      len(stg2_slices))
            up2_chunks = torch.chunk(batch_io_dict["up2"], len(stg2_slices))
            for i, s in enumerate(stg2_slices):
                slice_io_dicts[s]["stage2"] = stg2_chunks[i]
                slice_io_dicts[s]["up2"] = up2_chunks[i]
                slice_io_dicts[s]["stages_executed"] = 2

        stg_seq.extend([2] * len(stg2_slices))

        if len(stg3_slices) == self._num_slices:
            # data_sliced will be always false
            # at this point since stage2 slices
            # will be also equal to _num_slices
            self.measure_time_start("RPN-stage-3")
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end(f"RPN-stage-3")
        elif len(stg3_slices) > 0:  # that means stg2_slices was also > 0
            data_sliced = True
            if len(stg2_slices) == self._num_slices:
                # Slice the tensors if they were not sliced during stage 2
                sa = self.slice_with_ranges(io_dict["stage2"],
                                            self._stg2_slc_ranges)
                ua1 = self.slice_with_ranges(io_dict["up1"],
                                             self._up1_slc_ranges)
                ua2 = self.slice_with_ranges(io_dict["up2"],
                                             self._up2_slc_ranges)

                for i in range(self._num_slices):
                    slice_io_dicts[i]["stage2"] = sa[i]
                    slice_io_dicts[i]["up1"] = ua1[i]
                    slice_io_dicts[i]["up2"] = ua2[i]
                    slice_io_dicts[i]["stages_executed"] = 2

                batch_io_dict = {
                    "stages_executed": 2,
                }

            # We have slices to exec through stage 3
            # batch chosen slices
            batch_io_dict["stage2"] = torch.cat(
                [slice_io_dicts[s]["stage2"] for s in stg3_slices])
            #batch_io_dict["up2"] = torch.cat(
            #        [slice_io_dicts[s]["up2"] for s in stg3_slices])

            self.measure_time_start("RPN-stage-3")
            self._net.forward_rpn_stage(batch_io_dict)
            self.measure_time_end(f"RPN-stage-3")

            # Scatter the results
            up3_chunks = torch.chunk(batch_io_dict["up3"], len(stg3_slices))
            for i, s in enumerate(stg3_slices):
                slice_io_dicts[s]["up3"] = up3_chunks[i]
                slice_io_dicts[s]["stages_executed"] = 3

            stg_seq.extend([3] * len(stg3_slices))

        self.measure_time_start("RPN-finalize")
        if not data_sliced:
            # No slicing were used
            if io_dict['stages_executed'] == 1:
                self._net.forward_rpn_rem_preds(io_dict)
            else:
                self._net.forward_rpn_all_preds(io_dict)
            preds_dict = io_dict
        else:
            # We used slicing, now we need to merge the slices
            # After detection heads
            # This part can be batched too but it is okay to
            # stay like this
            # Another optimization could be using cuda streams
            for io_d in slice_io_dicts:
                if io_d["stages_executed"] == 1:
                    # stage 1 slices already has cls preds
                    io_d['backbone_out'] = io_d['backbone_out'].contiguous()
                    self._net.forward_rpn_rem_preds(io_d)
                else:
                    self._net.forward_rpn_all_preds(io_d)

            if self._merge_preds:
                # if two overlapped regions went through same number of
                # stages, get half of the overlapped region from each
                # neighboor io dict
                # Otherwise, select the one with more stages executed
                preds_dict = {}
                for k, v in self._pred_dict_copy.items():
                    preds_dict[k] = v.clone().detach()

                # every slice has a big middle range and two (or one)
                # small overlap ranges
                slc_r = self._cls_scr_ranges[0]
                for k in preds_dict.keys():
                    preds_dict[k][..., :slc_r[1], :] = \
                            slice_io_dicts[0][k][..., :slc_r[1], :]

                for i in range(len(slice_io_dicts) - 1):
                    io_d1, io_d2 = slice_io_dicts[i], slice_io_dicts[i + 1]
                    se1, se2 = io_d1["stages_executed"], io_d2[
                        "stages_executed"]
                    ovl_r = self._cls_scr_ranges[i * 2 + 1]
                    ovl_len = ovl_r[1] - ovl_r[0]
                    for k in preds_dict.keys():
                        if se1 > se2:
                            preds_dict[k][..., ovl_r[0]:ovl_r[1], :] = \
                                    io_d1[k][..., -ovl_len:, :]
                        elif se1 < se2:
                            preds_dict[k][..., ovl_r[0]:ovl_r[1], :] = \
                                    io_d2[k][..., :ovl_len, :]
                        else:
                            mid = ovl_len // 2
                            preds_dict[k][..., ovl_r[0]:(ovl_r[0]+mid), :] = \
                                    io_d1[k][..., -ovl_len:(-ovl_len+mid), :]
                            preds_dict[k][..., (ovl_r[0]+mid):ovl_r[1], :] = \
                                    io_d2[k][..., mid:ovl_len, :]
                        slc_r = self._cls_scr_ranges[i * 2 + 2]
                        slc_len = slc_r[1] - slc_r[0]
                        preds_dict[k][..., slc_r[0]:slc_r[1], :] = \
                                io_d2[k][..., ovl_len:(ovl_len+slc_len) , :]

                for k, v in preds_dict.items():
                    preds_dict[k] = v.contiguous()

        self.measure_time_end("RPN-finalize")
        #self.measure_time_end("RPN-total")

        # ASSUME BATCH SIZE 1
        # Predict has high execution time variance, I wonder why
        # IDEA: Use anchor mask to predict prediction time
        # actually, I can just use number of pillars as well
        self.measure_time_start('Predict')
        torch.backends.cudnn.benchmark = False
        if self._merge_preds:
            # DEBUG
            #self.plot_amask_and_save(example['anchors_mask'], f"merged_{self._sample_idx}")
            #self.plot_cls_scores_and_save(preds_dict['cls_preds'], example['anchors_mask'],
            #        f"merged_{self._sample_idx}")
            # DEBUG END
            det = self._net.predict(example, preds_dict)
        else:
            # I can use batching for prediction
            # Exclude stage 1 slices having class score sum of 0
            selected_slices = []
            for i, s in enumerate(slice_io_dicts):
                if s['stages_executed'] > 1 or torch.sum(s['cls_scores']) > .0:
                    selected_slices.append(s)

            det = self.create_empty_det_dict(example['metadata'][0])
            if len(selected_slices) > 0:
                batch_pred_dict = {}
                for k in self._pred_dict_copy.keys():
                    batch_pred_dict[k] = torch.cat(
                        [s[k] for s in selected_slices])

                for k in ['anchors', 'anchors_mask']:
                    example[k] = torch.cat([s[k] for s in selected_slices])
                example['metadata'] = []

                slice_dets = self._net.predict(example, batch_pred_dict)

                # remove slices that has no detections
                slice_dets_final = []
                for sd in slice_dets:
                    if sd['box3d_lidar'].shape[0] > 0:
                        slice_dets_final.append(sd)

                if len(slice_dets_final) > 0:
                    # merge final slice detections
                    for k in det.keys():
                        if k != 'metadata':
                            det[k] = torch.cat(
                                [d[k] for d in slice_dets_final])

                    #print('3D bounding boxes before:')
                    #for box in det['box3d_lidar']:
                    #    print(box)

                    # Now we need to remove duplicated overlapped predictions
                    # if they exist. We have to do it because we executed NMS
                    # twice on overlapped regions
                    mask_indexes = []
                    centers = det['box3d_lidar'][:, :2].cpu()
                    scores = det['scores'].cpu()
                    for i in range(centers.shape[0]):
                        diffs = torch.linalg.norm(centers - centers[i], dim=1)
                        sel = True
                        for j, d in enumerate(diffs):
                            if d > 0 and d < 2. and scores[i] < scores[j]:
                                # distance below 2 meter threshold
                                sel = False
                                print(f"Discard 3d bbox at", centers[i],
                                      'in image', det['metadata']['image_idx'])
                                break
                        if sel:
                            mask_indexes.append(i)

                    for k, v in det.items():
                        if k != 'metadata':
                            det[k] = det[k][mask_indexes]

            det = [det]  # batch size 1

        torch.backends.cudnn.benchmark = self._cudnn_benchmarking
        self.measure_time_end('Predict')
        torch.cuda.synchronize()
        self.measure_time_end('Post-stage-1', False)
        return det, stg_seq, num_voxels
Esempio n. 8
0
    def no_slicing_forward(self, dataset_iter, deadline):
        self.measure_time_start('Pre-stage-1')
        self.measure_time_start('PFE')
        #self.measure_time_start('PillarGen')
        try:
            if self._repeat_example:
                example = merge_second_batch(
                    [self._data_loader.dataset[self._repeat_example_idx]])
            else:
                example = next(dataset_iter)
        except StopIteration:
            print("Woaaaah, that is unexpected! Check dataset iter!")
            return None, None, None

        num_voxels = example["num_voxels"][0][0]  # batch size 1
        if self._method == 3:  # imprecise
            #num_stgs = self.num_stages_to_exec(deadline, num_voxels)
            print('ERROR! imprecise no slice is not being supported')
        else:
            num_stgs = self._method + 1

        example = example_convert_to_torch(example, self._float_dtype)
        torch.backends.cudnn.benchmark = False
        io_dict = self._net.forward_pfn(example)
        torch.backends.cudnn.benchmark = self._cudnn_benchmarking

        # Calculate anchor mask
        stg0_sum = torch.sum(io_dict['stage0'], 1, keepdim=True)
        sum_mask = torch.nn.functional.max_pool2d(
            stg0_sum, 15, stride=int(self._stg0_pred_scale_rate),
            padding=7).type(torch.bool)
        example['anchors_mask'] = sum_mask.expand(
            self._box_preds_size[:-1]).contiguous()

        self.measure_time_end('PFE')

        #self.measure_time_start('RPN-total')
        self.measure_time_start('RPN-stage-1')
        self._net.forward_rpn_stage(io_dict)
        self.measure_time_end('RPN-stage-1')
        stg_seq = [1]

        self.measure_time_end('Pre-stage-1')
        self.measure_time_start('Post-stage-1')

        if num_stgs >= 2:
            self.measure_time_start('RPN-stage-2')
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end('RPN-stage-2')
            stg_seq.append(2)

        if num_stgs == 3:
            self.measure_time_start('RPN-stage-3')
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end('RPN-stage-3')
            stg_seq.append(3)

        self.measure_time_start('RPN-finalize')
        self._net.forward_rpn_all_preds(io_dict)
        self.measure_time_end('RPN-finalize')
        #self.measure_time_end('RPN-total')

        self.measure_time_start('Predict')
        #torch.cuda.nvtx.range_push('Predict')
        torch.backends.cudnn.benchmark = False
        det = self._net.predict(example, io_dict)
        torch.backends.cudnn.benchmark = self._cudnn_benchmarking
        #torch.cuda.nvtx.range_pop()
        self.measure_time_end('Predict')
        torch.cuda.synchronize()
        self.measure_time_end('Post-stage-1')

        return det, stg_seq, num_voxels