Ejemplo n.º 1
0
def inference_by_idx():
    global BACKEND
    instance = request.json
    response = {"status": "normal"}
    if BACKEND.root_path is None:
        return error_response("root path is not set")
    image_idx = instance["image_idx"]
    # remove_outside = instance["remove_outside"]
    idx = BACKEND.image_idxes.index(image_idx)
    example = BACKEND.dataset[idx]
    # don't forget to pad batch idx in coordinates
    example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)),
                                    mode='constant',
                                    constant_values=0)
    # don't forget to add newaxis for anchors
    example["anchors"] = example["anchors"][np.newaxis, ...]
    example_torch = example_convert_to_torch(example, device=BACKEND.device)
    pred = BACKEND.net(example_torch)[0]  # calls forward function
    box3d = pred["box3d_lidar"].detach().cpu().numpy()
    locs = box3d[:, :3]
    dims = box3d[:, 3:6]
    rots = np.concatenate(
        [np.zeros([locs.shape[0], 2], dtype=np.float32), -box3d[:, 6:7]],
        axis=1)
    response["dt_locs"] = locs.tolist()
    response["dt_dims"] = dims.tolist()
    response["dt_rots"] = rots.tolist()
    response["dt_labels"] = pred["label_preds"].detach().cpu().numpy().tolist()
    response["dt_scores"] = pred["scores"].detach().cpu().numpy().tolist()

    response = jsonify(results=[response])
    response.headers['Access-Control-Allow-Headers'] = '*'
    return response
Ejemplo n.º 2
0
 def _inference(self, example):
     train_cfg = self.config.train_config
     input_cfg = self.config.eval_input_reader
     model_cfg = self.config.model.second
     example_torch = example_convert_to_torch(example)
     result_annos = predict_to_kitti_label(
         self.net, example_torch, list(self.target_assigner.classes),
         model_cfg.post_center_limit_range, model_cfg.lidar_input)
     return result_annos
Ejemplo n.º 3
0
    def _inference(self, example):
        train_cfg = self.config.train_config
        input_cfg = self.config.eval_input_reader
        model_cfg = self.config.model.second

        example_torch = example_convert_to_torch(example)
        if train_cfg.enable_mixed_precision:
            float_dtype = torch.float16
        else:
            float_dtype = torch.float32
        predictions_dicts = self.net(example_torch)
        return predictions_dicts
Ejemplo n.º 4
0
 def _inference(self, example):
     train_cfg = self.config.train_config
     input_cfg = self.config.eval_input_reader
     model_cfg = self.config.model.second
     if train_cfg.enable_mixed_precision:
         float_dtype = torch.half
     else:
         float_dtype = torch.float32
     example_torch = example_convert_to_torch(example, float_dtype)
     result_annos = predict_kitti_to_anno(
         self.net, example_torch, list(self.target_assigner.classes),
         model_cfg.post_center_limit_range, model_cfg.lidar_input)
     return result_annos
Ejemplo n.º 5
0
def main():
    cfg_path = Path('/..../pointpillars/car/xyres_##.config')
    ckpt_path = Path('/..../voxelnet-######.tckpt')

    config = pipeline_pb2.TrainEvalPipelineConfig()
    print("config reading")
    with open(cfg_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("building net")
    net = build_network(config.model.second).to(device).float().eval()
    net.load_state_dict(torch.load(ckpt_path))
    print("net built")

    eval_input_cfg = config.eval_input_reader
    dataset = input_reader_builder.build(
        eval_input_cfg,
        config.model.second,
        training=False,
        voxel_generator=net.voxel_generator,
        target_assigner=net.target_assigner).dataset
    idx = 0
    example = dataset[idx]

    example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)),
                                    mode='constant',
                                    constant_values=0)
    # don't forget to add newaxis for anchors
    example["anchors"] = example["anchors"][np.newaxis, ...]
    example_torch = example_convert_to_torch(example, device=device)

    voxels = example_torch["voxels"]
    num_points = example_torch["num_points"]
    coors = example_torch["coordinates"]
    batch_anchors = example["anchors"]
    batch_size_dev = batch_anchors.shape[0]

    voxel_features = net.voxel_feature_extractor(voxels, num_points, coors)
    spatial_features = net.middle_feature_extractor(voxel_features, coors,
                                                    batch_size_dev)

    # Export the model
    print("exporting as onnx")
    torch_out = torch.onnx._export(net.rpn, (spatial_features),
                                   "rpn.onnx",
                                   export_params=True)
    print("export complete")
Ejemplo n.º 6
0
def test(config_path=args.config_path,
         model_dir=args.model_dir,
         result_path=None,
         create_folder=False,
         pickle_result=True,
         include_roadmap=False,
         device=1):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)

    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    eval_checkpoint_dir = model_dir / 'eval_checkpoints'
    eval_checkpoint_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    batch_size = 1
    class_names = list(input_cfg.class_names)
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    grid_size = voxel_generator.grid_size
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    net = second_builder.build(model_cfg, voxel_generator, target_assigner,
                               include_roadmap)
    net.cuda().eval()

    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)

    #torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    torchplus.train.restore(args.model_path, net)
    #torchplus.train.restore("./ped_models_56/voxelnet-275130.tckpt",net)
    out_size_factor = model_cfg.rpn.layer_strides[
        0] / model_cfg.rpn.upsample_strides[0]
    print(out_size_factor)
    #out_size_factor *= model_cfg.middle_feature_extractor.downsample_factor
    out_size_factor = int(out_size_factor)
    feature_map_size = grid_size[:2] // out_size_factor
    feature_map_size = [*feature_map_size, 1][::-1]
    print(feature_map_size)
    ret = target_assigner.generate_anchors(feature_map_size)
    #anchors_dict = target_assigner.generate_anchors_dict(feature_map_size)
    anchors = ret["anchors"]
    anchors = anchors.reshape([-1, 7])
    matched_thresholds = ret["matched_thresholds"]
    unmatched_thresholds = ret["unmatched_thresholds"]
    anchors_bv = box_np_ops.rbbox2d_to_near_bbox(anchors[:, [0, 1, 3, 4, 6]])
    anchor_cache = {
        "anchors": anchors,
        "anchors_bv": anchors_bv,
        "matched_thresholds": matched_thresholds,
        "unmatched_thresholds": unmatched_thresholds,
        #"anchors_dict": anchors_dict,
    }

    am = ArgoverseMap()
    dt_annos = []

    root_dir = os.path.join('./../../argodataset/argoverse-tracking/',
                            args.set)
    argoverse_loader = ArgoverseTrackingLoader(root_dir)

    prog_cnt = 0
    for seq in range(len(argoverse_loader)):
        argoverse_data = argoverse_loader[seq]
        nlf = argoverse_data.num_lidar_frame
        for frame in range(nlf):
            prog_cnt += 1
            if prog_cnt % 50 == 0:
                print(prog_cnt)
            points = argoverse_data.get_lidar(frame)
            roi_pts = copy.deepcopy(points)
            city_name = argoverse_data.city_name
            city_to_egovehicle_se3 = argoverse_data.get_pose(frame)
            '''
            roi_pts = city_to_egovehicle_se3.transform_point_cloud(roi_pts)  # put into city coords
            #non roi
            roi_pts_flag = am.remove_non_roi_points(roi_pts, city_name) # remove non-driveable region
            roi_pts = roi_pts[roi_pts_flag]
            roi_pts = am.remove_ground_surface(roi_pts, city_name)  # remove ground surface
    
            # convert city to lidar co-ordinates

            roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_pts) 
            '''
            if args.include_roi or args.dr_area or not args.include_road_points:
                roi_pts = city_to_egovehicle_se3.transform_point_cloud(
                    roi_pts)  # put into city coords

            if args.include_roi:
                roi_pts_flag = am.remove_non_roi_points(
                    roi_pts, city_name)  # remove non-driveable region
                roi_pts = roi_pts[roi_pts_flag]

            if not args.include_roi and args.dr_area:
                roi_pts_flag = am.remove_non_driveable_area_points(
                    roi_pts, city_name)  # remove non-driveable region
                roi_pts = roi_pts[roi_pts_flag]

            if not args.include_road_points:
                roi_pts = am.remove_ground_surface(
                    roi_pts, city_name)  # remove ground surface

            # convert city to lidar co-ordinates
            if args.include_roi or args.dr_area or not args.include_road_points:
                roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(
                    roi_pts)

            roi_pts[:, 2] = roi_pts[:, 2] - 1.73

            pts_x, pts_y, pts_z = roi_pts[:, 0], roi_pts[:, 1], roi_pts[:, 2]

            input_dict = {
                'points': roi_pts,
                'pointcloud_num_features': 3,
            }

            out_size_factor = model_cfg.rpn.layer_strides[
                0] // model_cfg.rpn.upsample_strides[0]

            example = prep_pointcloud(
                input_dict=input_dict,
                root_path=None,
                voxel_generator=voxel_generator,
                target_assigner=target_assigner,
                max_voxels=input_cfg.max_number_of_voxels,
                class_names=list(input_cfg.class_names),
                training=False,
                create_targets=False,
                shuffle_points=input_cfg.shuffle_points,
                generate_bev=False,
                without_reflectivity=model_cfg.without_reflectivity,
                num_point_features=model_cfg.num_point_features,
                anchor_area_threshold=input_cfg.anchor_area_threshold,
                anchor_cache=anchor_cache,
                out_size_factor=out_size_factor,
                out_dtype=np.float32)

            if "anchors_mask" in example:
                example["anchors_mask"] = example["anchors_mask"].astype(
                    np.uint8)
            example["image_idx"] = str(seq) + "_" + str(frame)
            example["image_shape"] = np.array([400, 400], dtype=np.int32)
            example["road_map"] = None
            example["include_roadmap"] = False
            example["points"] = roi_pts
            #torch.save(example,"./network_input_examples/" + info)
            example = merge_second_batch([example])

            example_torch = example_convert_to_torch(example,
                                                     device=args.device)
            try:
                result_annos = predict_kitti_to_anno(
                    net, example_torch, input_cfg.class_names,
                    model_cfg.post_center_limit_range, model_cfg.lidar_input)
            except:
                print(seq, frame)
                continue
            dt_annos += result_annos

    if pickle_result:
        sdi = args.save_path.rfind('/')
        save_dir = args.save_path[:sdi]
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        with open(args.save_path, 'wb') as f:
            pickle.dump(dt_annos, f)
Ejemplo n.º 7
0
                + str(box.wlh[0]) + ' ' + str(box.wlh[1]) + ' '  +  \
                str(box.wlh[2]) + ' ' + str(box.orientation.yaw_pitch_roll[0]) \
                + ' ' + str(name) + ' '
        pred_str += pred
    return pred_str.strip()


# In[12]:

token2predstr = {}
detections = []
#tokens = []
tk0 = tqdm(dataloader, total=len(dataloader))
for idx, examples in enumerate(tk0):
    try:
        example_torch = example_convert_to_torch(examples, device=device)
        detections += net(example_torch)
        #tokens += examples['metadata']
    except Exception as e:
        print(e)
        import pdb
        pdb.set_trace()

threshold = 0.2
for idx, pred in enumerate(tqdm(detections)):
    pred = thresholded_pred(pred, threshold)
    #token = tokens[idx]['token']
    token = pred['metadata']['token']
    pred_str = get_pred_str(pred, token)
    index = df[df['Id'] == token].index[0]
    df.loc[index, 'PredictionString'] = pred_str
def main(config_path,
         lc_horizon,
         num_examples,
         model_dir,
         ckpt_path=None,
         **kwargs):
    """Don't support pickle_result anymore. if you want to generate kitti label file,
    please use kitti_anno_to_label_file and convert_detection_to_kitti_annos
    in second.data.kitti_dataset.
    """
    assert len(kwargs) == 0
    model_dir = str(Path(model_dir).resolve())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if isinstance(config_path, str):
        # directly provide a config object. this usually used
        # when you want to eval with several different parameters in
        # one script.
        config = pipeline_pb2.TrainEvalPipelineConfig()
        with open(config_path, "r") as f:
            proto_str = f.read()
            text_format.Merge(proto_str, config)
    else:
        config = config_path

    input_cfg = config.eval_input_reader
    input_cfg.cum_lc_wrapper.lc_horizon = lc_horizon
    model_cfg = config.model.second
    train_cfg = config.train_config

    net = build_network(model_cfg, measure_time=False).to(device)
    if train_cfg.enable_mixed_precision:
        net.half()
        print("half inference!")
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    target_assigner = net.target_assigner
    voxel_generator = net.voxel_generator

    if ckpt_path is None:
        assert model_dir is not None
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)
    batch_size = 1
    eval_dataset = input_reader_builder.build(input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner,
                                              net=net)

    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    net.eval()
    t = time.time()
    detections = []
    print("Generate output labels...")
    bar = ProgressBar()
    bar.start((len(eval_dataset) + batch_size - 1) // batch_size)
    prep_example_times = []
    prep_times = []
    t2 = time.time()

    times = []
    for scene_id in trange(num_examples):
        idx = eval_dataset.scene_id_and_step_to_idx(scene_id, lc_horizon)
        torch.cuda.synchronize()
        b_ex_time = time.time()
        example = eval_dataset[idx]
        example = merge_second_batch([example])
        example = example_convert_to_torch(example, float_dtype)
        with torch.no_grad():
            detections = net(example)
        torch.cuda.synchronize()
        e_ex_time = time.time()
        del example, detections
        times.append(e_ex_time - b_ex_time)

    times = np.array(times)
    mean = times.mean()
    interval = 1.96 * times.std() / np.sqrt(
        len(times))  # 95% confidence interval

    return mean, interval
Ejemplo n.º 9
0
def predict(config_path,
            model_dir,
            result_path=None,
            predict_test=False,
            ckpt_path=None,
            ref_detfile=None,
            pickle_result=True,
            bb_save_dir=None,
            pub_bb=None,
            pub_lidar=None):
    ''' Setup network and provide useful output '''

    ####################
    # SETUP PARAMETERS #
    ####################
    model_dir = pathlib.Path(model_dir)
    if predict_test:
        result_name = 'predict_test'
    else:
        result_name = 'eval_results'
    if result_path is None:
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    # TODO: include this program as a function call in the localization/mapping code as needed
    # TODO: use whole pointcloud data instead of reduced pointcloud
    # TODO: [Done] store data in respective pcd and bounding box (csv) files
    # TODO: [Done] create a cpp file to read and show (n number of) pcd files with respective bounding boxes
    # > [Done] Check if pcl_viewer can open pcd
    # > [Done] Check if pcl_viewer can be called from a cpp program for vizualization
    # > [Done] Check if that cpp program can also show a bounding box
    input_cfg = config.eval_input_reader  # Read the config file data into useful structures
    model_cfg = config.model.second  # Read the config file data into useful structures
    train_cfg = config.train_config  # Read the config file data into useful structures
    class_names = list(input_cfg.class_names)
    center_limit_range = model_cfg.post_center_limit_range

    #########################
    # BUILD VOXEL GENERATOR #
    #########################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)

    #####################
    # NETWORK GENERATOR #
    #####################
    # Build the NN in GPU mode
    net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()

    # Standard conversion approach if using FloatingPoint16 instead of FloatingPoint32 type of tensor
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    # Restore old checkpoint if possible
    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    # Setup network for evaluation mode
    net.eval()

    #####################
    # DATASET GENERATOR #
    #####################
    # Dataset build for easy usage
    eval_dataset = input_reader_builder.build(input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=input_cfg.batch_size,
        shuffle=False,
        num_workers=input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)

    # Further variable setup
    result_path_step = result_path / f"step_{net.get_global_step()}"
    result_path_step.mkdir(parents=True, exist_ok=True)
    t = time.time()
    dt_annos = []
    global_set = None
    print()
    print("Generate output labels...")
    bar = ProgressBar()
    bar.start(len(eval_dataset) // input_cfg.batch_size + 1)

    #################
    # NETWORK USAGE #
    #################
    # Predict a set of 'num_workers'  samples, get info and reformat data as needed
    # temp_count = 0
    for example in iter(eval_dataloader):
        # pprint.pprint(example, width=1)
        # for key, value in example.items():
        # 	print(key)
        # 	print(np.shape(value))
        example = example_convert_to_torch(example, float_dtype)
        print(example['image_idx'])
        # pprint.pprint(example, width=1)
        # for key, value in example.items():
        # 	print(key)
        # 	print(np.shape(value))
        # # # # if pickle_result:

        # NOTE: Predict network output
        # start_time = time.time()
        predictions_dicts = net(example)

        # # Save copy of data if user requested
        # if save_pcd:
        # 	np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4])

        # # Publish original data
        # if pub_lidar:
        # 	data=PointCloud2()
        # 	# FIXME: Extract pointclound info from 'example' (use original kitti data file if needed) > publish
        # 	pub_lidar.publish(data)

        # # Publish network output
        # if pub_bb:
        # 	data = MarkerArray()
        # 	# FIXME: Create a wireframe 3D bounding box and, if possible, a transluscent 3D cuboid as well > publish
        # 	pub_bb.publish(data)

        # # print('Network predict time: {}'.format(time.time()-start_time))
        # pprint.pprint(predictions_dicts[0])
        # for key, value in predictions_dicts[0].items():
        # 	print(key)
        # 	print(np.shape(value))

        if bb_save_dir:
            save_path = pathlib.Path(bb_save_dir)
            save_path.mkdir(
                parents=True, exist_ok=True
            )  # create directory (and its parents) if non-existent

            for pred_dict in predictions_dicts:
                if pred_dict['box3d_lidar'] is not None:
                    bb_lidar = pred_dict['box3d_lidar'].detach().cpu().numpy()
                else:
                    bb_lidar = [[
                        'temp', 'temp', 'temp', 'temp', 'temp', 'temp', 'temp'
                    ]]
                df = pd.DataFrame(bb_lidar)
                df.columns = ['x', 'y', 'z', 'w', 'l', 'h', 't']
                filename = save_path.joinpath(
                    str(pred_dict['image_idx']) + '.csv')
                filename.write_text(df.to_csv(index=False))
Ejemplo n.º 10
0
def detect(scene_token, config_path, ckpt_path, info_path, root_path,
           result_path):
    ### Read Config file

    torch.set_num_threads(2)
    #config_path = "configs/nuscenes/all.pp.lowa_large_range_v2.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    # config_tool.change_detection_range_v2(model_cfg, [-50, -50, 50, 50])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    ### Build Network, Target Assigner and Voxel Generator

    #info_path = '/home/itiv/Desktop/lyft-dataset/infos_val.pkl'
    #root_path = '/home/itiv/Desktop/lyft-dataset'
    with open(info_path, 'rb') as f:
        infos = pickle.load(f)

    token2info = {}
    for info in infos['infos']:
        token2info[info['token']] = info
    #ckpt_path = "/home/itiv/Desktop/repo/scenarios_in_CarMaker/BA_Daniel/Lyft-Detector/second.pytorch/second/model/model_large_range_v2/voxelnet-33445.tckpt"
    net = build_network(config.model.second).to(device).float().eval()
    net.load_state_dict(torch.load(ckpt_path))
    eval_input_cfg = config.eval_input_reader
    eval_input_cfg.dataset.kitti_root_path = root_path
    eval_input_cfg.dataset.kitti_info_path = info_path
    dataset = input_reader_builder.build(
        eval_input_cfg,
        config.model.second,
        training=False,
        voxel_generator=net.voxel_generator,
        target_assigner=net.target_assigner)  #.dataset

    batch_size = 2
    num_workers = 2

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             pin_memory=False,
                                             collate_fn=merge_second_batch)

    target_assigner = net.target_assigner
    voxel_generator = net.voxel_generator
    classes = target_assigner.classes

    detections = []
    #tk0 = prog_bar(dataloader, total=len(dataloader))
    tk0 = (dataloader)
    for idx, examples in enumerate(tk0):
        #print(idx)
        #print(examples)
        try:
            example_torch = example_convert_to_torch(examples, device=device)
            detections += net(example_torch)
        except Exception as e:
            print(e)
            import pdb
            pdb.set_trace()

    threshold = 0.2
    first_sample_token = detections[0]['metadata']['token']
    dict_detections = {"results": {}}

    for idx, pred in enumerate((detections)):
        pred = thresholded_pred(pred, threshold)
        #token = tokens[idx]['token']
        token = pred['metadata']['token']
        dict_detections['results'].update(
            get_pred_dict(pred, token, classes, token2info))
    #pred_str = get_pred_str(pred, token)
    #predStrings.append(pred_str)
    #index = df[df['Id'] == token].index[0]
    #df.loc[index, 'PredictionString'] = pred_str


#df.to_csv(f'final.csv', index=False)
#print(dict_detections)

#path_to_result = f'/home/itiv/Desktop/lyft-dataset/detections-largev2.json'
    with open(result_path + '/detections_' + scene_token + '.json', 'w') as fp:
        json.dump(dict_detections, fp)
Ejemplo n.º 11
0
def LC_PROCESS(prev_sensor_data, net, dataset, policy, sparsify_config):
    """
    Processes prev_sensor_data and feeds it to the net, places light curtain using
    generated confidence scores, gets light curtain return, and adds it to a new
    sensor data.
    
    While processing the sensor_data, only the dataset._prep_main_func is used.
    No calls to _prep_data_aug and _prep_targets are made.

    sensor_data["lidar"]["points"] will be considered the main input to the model.
    prev_sensor_data["lidar"]["points"] will be used as the input cloud, and the output
    cloud will be saved into sensor_data["lidar"]["points"].
    This only handles 4-dimensional points, of the form (x, y, z, intensity).

    Args:
    *   prev_sensor_data: a dict containing the following items
            {
                "lidar": {
                    "type": "lidar",
                    "points": ...
                },
                "calib": {
                    ...
                },
                "depth": {
                    "type": "depth_map",
                    "image": ...
                },
                "init_lidar": {
                    "num_beams": ...,
                    "points": ...
                }

            }
            sensor_data["lidar"]["points"] constitutes the main input the network.
            It could be initialized by sensor_data["init_lidar"] for example.
    
    *   net: (VoxelNet)

    *   dataset: (Dataset) dataset.

    *   policy: (Policy) one of the light curtain policies in second.light_curtain.policy.

    *   sparsify_config: (config) options that are used to subsample a point cloud return.
    
    Returns:
    *   dictionary of all information generated during the LC process
        {
            "prev_sensor_data": previous sensor data,
            "next_sensor_data": the new sensor data dict,
            "lc_image": lc_image returned by pylc API,
            "lc_cloud": lc_image processed into a point cloud; this is the main return,
            "net_pred": pred of the network,
            "net_preds_dict": preds_dict of the network, 
            "confidence_map": network predictions converted to a confidence_map
        }
    *   sensor_data: the same sensor_data, but with sensor_data["lidar"]["points"] replaced by
                     the new cumulative point cloud.

    """
    net_was_training = net.training
    net.eval()

    points = prev_sensor_data["lidar"]["points"]
    calib = prev_sensor_data["calib"]

    example = dataset._prep_func_main(points, calib)
    if "image_idx" in prev_sensor_data["metadata"]:
        example["metadata"] = prev_sensor_data["metadata"]

    if "anchors_mask" in example:
        example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)

    # don't forget to pad batch idx in coordinates
    example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)),
                                    mode='constant',
                                    constant_values=0)

    # don't forget to add newaxis for anchors
    example["anchors"] = example["anchors"][np.newaxis, ...]

    with torch.no_grad():
        example_torch = example_convert_to_torch(example)
        pred, preds_dict = net(example_torch, ret_preds_dict=True)

        # Creating confidence map.
        cls_preds = preds_dict['cls_preds']  # shape=(1, 2, 200, 176, 1)
        cls_preds = cls_preds[0, :, :, :, 0]  # shape=(2, 200, 176)
        cls_preds = torch.sigmoid(
            cls_preds).detach().cpu().numpy()  # shape=(2, 200, 176)

    anchors = example["anchors"][0]  # (2 * 200 * 176, 7)
    anchors_mask = example.get("anchors_mask",
                               None)  # (200 * 176,) dtype=np.uint8
    confidence_map = _get_confidence_map(anchors, anchors_mask, cls_preds,
                                         dataset.lc_device.TRANSFORMS["wTc"])

    # Light curtain point cloud.
    depth_image = prev_sensor_data["depth"]["image"]
    # Design points should be in the camera frame.
    design_pts = policy.get_design_points(confidence_map)
    lc_image = dataset.lc_device.get_return(depth_image, design_pts)
    lc_cloud = lc_image.reshape(-1, 4)  # (N, 4)
    # Remove points which are NaNs.
    non_nan_mask = np.all(np.isfinite(lc_cloud), axis=1)
    lc_cloud = lc_cloud[non_nan_mask]  # (N, 4)
    # Convert lc_cloud to velo frame.
    lc_cloud_xyz1 = np.hstack(
        (lc_cloud[:, :3], np.ones([len(lc_cloud), 1], dtype=np.float32)))
    lc_cloud_xyz1 = lc_cloud_xyz1 @ dataset.lc_device.TRANSFORMS["cTw"].T
    lc_cloud[:, :3] = lc_cloud_xyz1[:, :3]  # (N, 4)
    # Rescale LC return to [0, 1].
    lc_cloud[:, 3] /= 255.

    lc_pts_added = lc_cloud
    if sparsify_config is not None:
        lc_pts_added = sparsify_lc_return(lc_pts_added, sparsify_config)

    next_sensor_data = prev_sensor_data.copy()
    next_sensor_data["lidar"] = prev_sensor_data["lidar"].copy()
    next_sensor_data["lidar"]["points"] = np.vstack(
        (next_sensor_data["lidar"]["points"], lc_pts_added))

    # Reset training state of network.
    net.train(net_was_training)

    return {
        "prev_sensor_data": prev_sensor_data,
        "next_sensor_data": next_sensor_data,
        "lc_image": lc_image,
        "lc_cloud": lc_cloud,
        "net_pred": pred,
        "net_preds_dict": preds_dict,
        "confidence_map": confidence_map
    }
    def process(self):
        """
        Publishes:
            {
                "detections"    : { "dt_locs", "dt_dims", "dt_rots", "dt_labels", "dt_scores" },
                "confidence_map": (np.ndarray, dtype=float32, shape=(X, Z, 2+K)) confidence map of detector.
                                    Axis 0 corresponds to increasing X (camera frame) / decreasing Y (velo frame).
                                    Axis 1 corresponds to increasing Z (camera frame) / increasing X (velo frame).
                                    Axis 2 corresponds to (x, z, c_1, ..., c_K):
                                        - x : x in camera frame.
                                        - z : z in camera frame.
                                        - c_k : kth confidence score lying in [0, 1].
            }
        """
        while True:
            # collect lidar data
            if len(self.lidar.stream) == 0:
                raise Exception("Detector: lidar stream is empty! Detector needs lidar points.")
            lidar_points = self.lidar.stream[-1].data  # (N, 3)
            lidar_points = np.hstack((lidar_points, np.ones([len(lidar_points), 1], dtype=np.float32)))  # (N, 4)

            # collect light curtain return
            if len(self.light_curtain.stream) > 0:
                lc_points = [elem.data["lc_cloud"] for elem in self.light_curtain.stream]  # list of (N, 4)
                lc_points = np.vstack(lc_points)  # (N, 4)
                lc_points = self.sparsify_lc_points(lc_points)  # (N, 4)
            else:
                lc_points = np.zeros([0, 4], dtype=np.float32)

            # combine lidar and light curtain points
            points = np.vstack([lidar_points, lc_points])  # (N, 4)

            example = self.preprocess_fn(points)
            if "anchors_mask" in example:
                example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
            
            # don't forget to pad batch idx in coordinates
            example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)),
                                            mode='constant',
                                            constant_values=0)
            
            # don't forget to add newaxis for anchors
            example["anchors"] = example["anchors"][np.newaxis, ...]

            with torch.no_grad():
                example_torch = example_convert_to_torch(example)
                pred, preds_dict = self.net(example_torch, ret_preds_dict=True)
            
            # get detections
            detections = {}
            pred = pred[0]
            box3d = pred["box3d_lidar"].detach().cpu().numpy()
            locs = box3d[:, :3]
            dims = box3d[:, 3:6]
            rots = np.concatenate([np.zeros([locs.shape[0], 2], dtype=np.float32), -box3d[:, 6:7]], axis=1)
            detections["dt_locs"] = locs.tolist()
            detections["dt_dims"] = dims.tolist()
            detections["dt_rots"] = rots.tolist()
            detections["dt_labels"] = pred["label_preds"].detach().cpu().numpy().tolist()
            detections["dt_scores"] = pred["scores"].detach().cpu().numpy().tolist()
            
            # get confidence map
            cls_preds = preds_dict['cls_preds']  # (1, 2, 200, 176, 1)
            cls_preds = cls_preds[0, :, :, :, 0]  # (2, 200, 176)
            cls_preds = torch.sigmoid(cls_preds).detach().cpu().numpy()  # (2, 200, 176)

            anchors = example["anchors"][0]  # (2 * 200 * 176, 7)
            anchors_mask = example.get("anchors_mask", None)  # (200 * 176,) dtype=np.uint8
            confidence_map = self.get_confidence_map(
                anchors, anchors_mask, 
                cls_preds, self.light_curtain.lc_device.TRANSFORMS["wTc"]
            )

            stream_data = dict(detections=detections, confidence_map=confidence_map)

            yield self.env.timeout(self.latency)  # forward pass
            self.publish(stream_data)

            # get design points
            yield self.env.process(self.dp_optimizer.service(confidence_map))
            design_pts = self.dp_optimizer.stream[-1].data  # (N, 2)

            # operate light curtain
            yield self.env.process(self.light_curtain.service(design_pts))
Ejemplo n.º 13
0
    def slicing_forward(self, dataset_iter, deadline):
        self.measure_time_start('Pre-stage-1', False)
        self.measure_time_start('PFE')
        #self.measure_time_start('PillarGen')
        try:
            if self._repeat_example:
                example = merge_second_batch([
                    self._data_loader.dataset[self._repeat_example_idx]
                ])  # id 19
            else:
                example = next(dataset_iter)
        except StopIteration:
            print("Woaaaah, that is unexpected! Check dataset iter!")
            return None, None, None

        num_voxels = example["num_voxels"][0][0]  # batch size 1

        example = example_convert_to_torch(example, self._float_dtype)
        #self.measure_time_end('PillarGen')
        torch.backends.cudnn.benchmark = False
        io_dict = self._net.forward_pfn(example)
        torch.backends.cudnn.benchmark = self._cudnn_benchmarking
        self.measure_time_end('PFE')

        #with torch.cuda.stream(self._other_cuda_stream):
        # Calculate anchor mask
        stg0_sum = torch.sum(io_dict['stage0'], 1, keepdim=True)
        sum_mask = torch.nn.functional.max_pool2d(
            stg0_sum, 15, stride=int(self._stg0_pred_scale_rate),
            padding=7).type(torch.bool)
        example['anchors_mask'] = sum_mask.expand(
            self._box_preds_size[:-1]).contiguous()
        sum_del_mask = torch.unsqueeze(torch.logical_not(sum_mask), -1)
        sum_del_mask = sum_del_mask.expand(self._cls_preds_size).contiguous()

        #self.measure_time_start("RPN-total")
        # Returns possible batch sizes for reach stage
        #
        self.measure_time_start('RPN-stage-1')
        self._net.forward_rpn_stage(io_dict)
        self._net.forward_rpn_cls_preds(io_dict)
        self.measure_time_end('RPN-stage-1')

        # Calculate sum of class scores within each slice
        # but only use class scores positioned close to pillar locations
        # use stg0 to create the pillar mask which will be used for slicing
        # Apply sigmoid and mask values below nms threshold
        cls_scores = torch.sigmoid(io_dict["cls_preds"])
        cls_scores_del = cls_scores <= self._nms_score_threshold

        #torch.cuda.default_stream().wait_stream(self._other_cuda_stream)

        cls_scores_del = torch.logical_or(cls_scores_del, sum_del_mask)
        cls_scores.masked_scatter_(cls_scores_del, self._pred_zeroer)
        if not self._net._encode_background_as_zeros:
            cls_scores = cls_scores[..., 1:].contiguous()
        cls_scores = torch.sum(cls_scores, [0, 1, 2, 4])  # reduce to H
        csa = self.slice_with_ranges(cls_scores.cpu(), self._cls_scr_ranges)
        # LOOKS LIKE IT IS SYNCHED AT THIS POINT

        if not self._merge_preds:
            anchors = example['anchors'].view(self._box_preds_size)
            aa = self.slice_preds_with_ranges(anchors, self._preds_slc_ranges)
            ama = self.slice_with_ranges(example['anchors_mask'],
                                         self._preds_slc_ranges)

        slice_io_dicts = []
        for i in range(self._num_slices):
            slice_io_dicts.append({})
            if not self._merge_preds:
                slice_io_dicts[-1]['anchors'] = aa[i]
                slice_io_dicts[-1]['anchors_mask'] = ama[i]

        # Get the cls mask of each slice, also the overlapped regions explicitly
        # stg1 class scores will be enough for everything
        cls_scr_sums = torch.empty(2 * len(slice_io_dicts) - 1,
                                   dtype=cls_scores.dtype,
                                   device='cpu')
        for i, cs in enumerate(csa):
            cls_scr_sums[i] = torch.sum(cs)

        zerocuk_tensor = cls_scr_sums.new_zeros((1, ))
        slice_io_dicts[0]['cls_scores'] = torch.cat(
            (zerocuk_tensor, cls_scr_sums[:2]))
        slice_io_dicts[-1]['cls_scores'] = torch.cat(
            (cls_scr_sums[-2:], zerocuk_tensor))
        for i, io_d in zip(range(1,
                                 len(cls_scr_sums) - 2, 2),
                           slice_io_dicts[1:-1]):
            io_d['cls_scores'] = cls_scr_sums[i:i + 3]

        # I DON'T NEED TO CALL SYNC BECAUSE IT IS ALREADY SYNCED
        # FROM WHAT I SAW BUT DO IT ANYWAY, NO BIG LOSS
        torch.cuda.synchronize()

        # Now decide the slice forwarding pattern
        # This algorithm takes 0.5 ms
        slices_to_exec = self.sched_slices(slice_io_dicts, deadline)
        stg2_slices, stg3_slices = slices_to_exec

        stg_seq = [1]
        self.measure_time_end('Pre-stage-1', False)
        self.measure_time_start('Post-stage-1', False)

        data_sliced = False
        if len(stg2_slices) == self._num_slices:
            # Since we are going to execute all slices,
            # Don't do slicing and run the whole stage
            self.measure_time_start("RPN-stage-2")
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end(f"RPN-stage-2")
        elif len(stg2_slices) > 0:
            data_sliced = True
            # Slice the tensors
            sa = self.slice_with_ranges(io_dict["stage1"],
                                        self._stg1_slc_ranges)
            ua = self.slice_with_ranges(io_dict["up1"], self._up1_slc_ranges)
            cpa = self.slice_preds_with_ranges(io_dict["cls_preds"],
                                               self._preds_slc_ranges)

            for i in range(self._num_slices):
                slice_io_dicts[i]["stages_executed"] = 1
                slice_io_dicts[i]["stage1"] = sa[i]
                slice_io_dicts[i]["up1"] = ua[i]
                slice_io_dicts[i]["backbone_out"] = ua[i]
                slice_io_dicts[i]["cls_preds"] = cpa[i]

            # We have slices to exec through stage 2
            # batch the chosen slices
            batch_io_dict = {
                "stages_executed": 1,
            }
            batch_io_dict["stage1"] = torch.cat(
                [slice_io_dicts[s]["stage1"] for s in stg2_slices])
            #batch_io_dict["up1"] = torch.cat(
            #        [slice_io_dicts[s]["up1"] for s in stg2_slices])

            self.measure_time_start("RPN-stage-2")
            self._net.forward_rpn_stage(batch_io_dict)
            self.measure_time_end(f"RPN-stage-2")

            # Scatter the results anyway
            #if len(stg3_slices) < len(stg2_slices):
            stg2_chunks = torch.chunk(batch_io_dict["stage2"],
                                      len(stg2_slices))
            up2_chunks = torch.chunk(batch_io_dict["up2"], len(stg2_slices))
            for i, s in enumerate(stg2_slices):
                slice_io_dicts[s]["stage2"] = stg2_chunks[i]
                slice_io_dicts[s]["up2"] = up2_chunks[i]
                slice_io_dicts[s]["stages_executed"] = 2

        stg_seq.extend([2] * len(stg2_slices))

        if len(stg3_slices) == self._num_slices:
            # data_sliced will be always false
            # at this point since stage2 slices
            # will be also equal to _num_slices
            self.measure_time_start("RPN-stage-3")
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end(f"RPN-stage-3")
        elif len(stg3_slices) > 0:  # that means stg2_slices was also > 0
            data_sliced = True
            if len(stg2_slices) == self._num_slices:
                # Slice the tensors if they were not sliced during stage 2
                sa = self.slice_with_ranges(io_dict["stage2"],
                                            self._stg2_slc_ranges)
                ua1 = self.slice_with_ranges(io_dict["up1"],
                                             self._up1_slc_ranges)
                ua2 = self.slice_with_ranges(io_dict["up2"],
                                             self._up2_slc_ranges)

                for i in range(self._num_slices):
                    slice_io_dicts[i]["stage2"] = sa[i]
                    slice_io_dicts[i]["up1"] = ua1[i]
                    slice_io_dicts[i]["up2"] = ua2[i]
                    slice_io_dicts[i]["stages_executed"] = 2

                batch_io_dict = {
                    "stages_executed": 2,
                }

            # We have slices to exec through stage 3
            # batch chosen slices
            batch_io_dict["stage2"] = torch.cat(
                [slice_io_dicts[s]["stage2"] for s in stg3_slices])
            #batch_io_dict["up2"] = torch.cat(
            #        [slice_io_dicts[s]["up2"] for s in stg3_slices])

            self.measure_time_start("RPN-stage-3")
            self._net.forward_rpn_stage(batch_io_dict)
            self.measure_time_end(f"RPN-stage-3")

            # Scatter the results
            up3_chunks = torch.chunk(batch_io_dict["up3"], len(stg3_slices))
            for i, s in enumerate(stg3_slices):
                slice_io_dicts[s]["up3"] = up3_chunks[i]
                slice_io_dicts[s]["stages_executed"] = 3

            stg_seq.extend([3] * len(stg3_slices))

        self.measure_time_start("RPN-finalize")
        if not data_sliced:
            # No slicing were used
            if io_dict['stages_executed'] == 1:
                self._net.forward_rpn_rem_preds(io_dict)
            else:
                self._net.forward_rpn_all_preds(io_dict)
            preds_dict = io_dict
        else:
            # We used slicing, now we need to merge the slices
            # After detection heads
            # This part can be batched too but it is okay to
            # stay like this
            # Another optimization could be using cuda streams
            for io_d in slice_io_dicts:
                if io_d["stages_executed"] == 1:
                    # stage 1 slices already has cls preds
                    io_d['backbone_out'] = io_d['backbone_out'].contiguous()
                    self._net.forward_rpn_rem_preds(io_d)
                else:
                    self._net.forward_rpn_all_preds(io_d)

            if self._merge_preds:
                # if two overlapped regions went through same number of
                # stages, get half of the overlapped region from each
                # neighboor io dict
                # Otherwise, select the one with more stages executed
                preds_dict = {}
                for k, v in self._pred_dict_copy.items():
                    preds_dict[k] = v.clone().detach()

                # every slice has a big middle range and two (or one)
                # small overlap ranges
                slc_r = self._cls_scr_ranges[0]
                for k in preds_dict.keys():
                    preds_dict[k][..., :slc_r[1], :] = \
                            slice_io_dicts[0][k][..., :slc_r[1], :]

                for i in range(len(slice_io_dicts) - 1):
                    io_d1, io_d2 = slice_io_dicts[i], slice_io_dicts[i + 1]
                    se1, se2 = io_d1["stages_executed"], io_d2[
                        "stages_executed"]
                    ovl_r = self._cls_scr_ranges[i * 2 + 1]
                    ovl_len = ovl_r[1] - ovl_r[0]
                    for k in preds_dict.keys():
                        if se1 > se2:
                            preds_dict[k][..., ovl_r[0]:ovl_r[1], :] = \
                                    io_d1[k][..., -ovl_len:, :]
                        elif se1 < se2:
                            preds_dict[k][..., ovl_r[0]:ovl_r[1], :] = \
                                    io_d2[k][..., :ovl_len, :]
                        else:
                            mid = ovl_len // 2
                            preds_dict[k][..., ovl_r[0]:(ovl_r[0]+mid), :] = \
                                    io_d1[k][..., -ovl_len:(-ovl_len+mid), :]
                            preds_dict[k][..., (ovl_r[0]+mid):ovl_r[1], :] = \
                                    io_d2[k][..., mid:ovl_len, :]
                        slc_r = self._cls_scr_ranges[i * 2 + 2]
                        slc_len = slc_r[1] - slc_r[0]
                        preds_dict[k][..., slc_r[0]:slc_r[1], :] = \
                                io_d2[k][..., ovl_len:(ovl_len+slc_len) , :]

                for k, v in preds_dict.items():
                    preds_dict[k] = v.contiguous()

        self.measure_time_end("RPN-finalize")
        #self.measure_time_end("RPN-total")

        # ASSUME BATCH SIZE 1
        # Predict has high execution time variance, I wonder why
        # IDEA: Use anchor mask to predict prediction time
        # actually, I can just use number of pillars as well
        self.measure_time_start('Predict')
        torch.backends.cudnn.benchmark = False
        if self._merge_preds:
            # DEBUG
            #self.plot_amask_and_save(example['anchors_mask'], f"merged_{self._sample_idx}")
            #self.plot_cls_scores_and_save(preds_dict['cls_preds'], example['anchors_mask'],
            #        f"merged_{self._sample_idx}")
            # DEBUG END
            det = self._net.predict(example, preds_dict)
        else:
            # I can use batching for prediction
            # Exclude stage 1 slices having class score sum of 0
            selected_slices = []
            for i, s in enumerate(slice_io_dicts):
                if s['stages_executed'] > 1 or torch.sum(s['cls_scores']) > .0:
                    selected_slices.append(s)

            det = self.create_empty_det_dict(example['metadata'][0])
            if len(selected_slices) > 0:
                batch_pred_dict = {}
                for k in self._pred_dict_copy.keys():
                    batch_pred_dict[k] = torch.cat(
                        [s[k] for s in selected_slices])

                for k in ['anchors', 'anchors_mask']:
                    example[k] = torch.cat([s[k] for s in selected_slices])
                example['metadata'] = []

                slice_dets = self._net.predict(example, batch_pred_dict)

                # remove slices that has no detections
                slice_dets_final = []
                for sd in slice_dets:
                    if sd['box3d_lidar'].shape[0] > 0:
                        slice_dets_final.append(sd)

                if len(slice_dets_final) > 0:
                    # merge final slice detections
                    for k in det.keys():
                        if k != 'metadata':
                            det[k] = torch.cat(
                                [d[k] for d in slice_dets_final])

                    #print('3D bounding boxes before:')
                    #for box in det['box3d_lidar']:
                    #    print(box)

                    # Now we need to remove duplicated overlapped predictions
                    # if they exist. We have to do it because we executed NMS
                    # twice on overlapped regions
                    mask_indexes = []
                    centers = det['box3d_lidar'][:, :2].cpu()
                    scores = det['scores'].cpu()
                    for i in range(centers.shape[0]):
                        diffs = torch.linalg.norm(centers - centers[i], dim=1)
                        sel = True
                        for j, d in enumerate(diffs):
                            if d > 0 and d < 2. and scores[i] < scores[j]:
                                # distance below 2 meter threshold
                                sel = False
                                print(f"Discard 3d bbox at", centers[i],
                                      'in image', det['metadata']['image_idx'])
                                break
                        if sel:
                            mask_indexes.append(i)

                    for k, v in det.items():
                        if k != 'metadata':
                            det[k] = det[k][mask_indexes]

            det = [det]  # batch size 1

        torch.backends.cudnn.benchmark = self._cudnn_benchmarking
        self.measure_time_end('Predict')
        torch.cuda.synchronize()
        self.measure_time_end('Post-stage-1', False)
        return det, stg_seq, num_voxels
Ejemplo n.º 14
0
    def no_slicing_forward(self, dataset_iter, deadline):
        self.measure_time_start('Pre-stage-1')
        self.measure_time_start('PFE')
        #self.measure_time_start('PillarGen')
        try:
            if self._repeat_example:
                example = merge_second_batch(
                    [self._data_loader.dataset[self._repeat_example_idx]])
            else:
                example = next(dataset_iter)
        except StopIteration:
            print("Woaaaah, that is unexpected! Check dataset iter!")
            return None, None, None

        num_voxels = example["num_voxels"][0][0]  # batch size 1
        if self._method == 3:  # imprecise
            #num_stgs = self.num_stages_to_exec(deadline, num_voxels)
            print('ERROR! imprecise no slice is not being supported')
        else:
            num_stgs = self._method + 1

        example = example_convert_to_torch(example, self._float_dtype)
        torch.backends.cudnn.benchmark = False
        io_dict = self._net.forward_pfn(example)
        torch.backends.cudnn.benchmark = self._cudnn_benchmarking

        # Calculate anchor mask
        stg0_sum = torch.sum(io_dict['stage0'], 1, keepdim=True)
        sum_mask = torch.nn.functional.max_pool2d(
            stg0_sum, 15, stride=int(self._stg0_pred_scale_rate),
            padding=7).type(torch.bool)
        example['anchors_mask'] = sum_mask.expand(
            self._box_preds_size[:-1]).contiguous()

        self.measure_time_end('PFE')

        #self.measure_time_start('RPN-total')
        self.measure_time_start('RPN-stage-1')
        self._net.forward_rpn_stage(io_dict)
        self.measure_time_end('RPN-stage-1')
        stg_seq = [1]

        self.measure_time_end('Pre-stage-1')
        self.measure_time_start('Post-stage-1')

        if num_stgs >= 2:
            self.measure_time_start('RPN-stage-2')
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end('RPN-stage-2')
            stg_seq.append(2)

        if num_stgs == 3:
            self.measure_time_start('RPN-stage-3')
            self._net.forward_rpn_stage(io_dict)
            self.measure_time_end('RPN-stage-3')
            stg_seq.append(3)

        self.measure_time_start('RPN-finalize')
        self._net.forward_rpn_all_preds(io_dict)
        self.measure_time_end('RPN-finalize')
        #self.measure_time_end('RPN-total')

        self.measure_time_start('Predict')
        #torch.cuda.nvtx.range_push('Predict')
        torch.backends.cudnn.benchmark = False
        det = self._net.predict(example, io_dict)
        torch.backends.cudnn.benchmark = self._cudnn_benchmarking
        #torch.cuda.nvtx.range_pop()
        self.measure_time_end('Predict')
        torch.cuda.synchronize()
        self.measure_time_end('Post-stage-1')

        return det, stg_seq, num_voxels
Ejemplo n.º 15
0
    def __init__(self, net, data_loader, deadline_sec, slice_size_perc, \
        min_slice_overlap_perc, method, float_dtype, batch_size=1, measure_time=True):
        assert (slice_size_perc < 100 or method < 4)

        self._kitti = False
        self._calibration = False
        self._cur_calib_tuple = None
        self._repeat_example = False
        self._repeat_example_idx = 5
        self._max_num_samples = len(data_loader)
        self._merge_preds = True  # Run NMS on each slice if False

        self._net = net
        self._data_loader = data_loader
        self._deadline_sec = deadline_sec
        self._slice_size_perc = slice_size_perc
        self._min_slice_overlap_perc = min_slice_overlap_perc
        self._method = method
        self._float_dtype = float_dtype
        self._batch_size = batch_size
        self._measure_time = measure_time
        self._H_dim = 3
        self._W_dim = 2
        self._use_slicing = (self._method >= 4)
        self._nms_score_threshold = 0.5

        #Print point cloud range : [h_min, w_min, z_min, h_max, w_max, z_max]
        self._pc_range = net.voxel_generator.point_cloud_range
        print('Point cloud range:', self._pc_range)

        #Print pillar scatter output shape : [1, 1, W, H , 64]
        self._p_scatter_outp_shape = net.middle_feature_extractor.output_shape
        print('Point pillar scatter output:', self._p_scatter_outp_shape)

        self._eval_dict = {}
        print("VAL method", self._method)
        self._eval_dict["method"] = self._method

        self._enable_bar = True
        self._cudnn_benchmarking = True
        self._cudnn_deterministic = False
        self._eval_dict["cudnn_benchmarking"] = self._cudnn_benchmarking
        self._eval_dict["cudnn_deterministic"] = self._cudnn_deterministic

        if self._cudnn_deterministic:
            torch.backends.cudnn.deterministic = True
            torch.cuda.manual_seed(0)
            np.random.seed(0)
            torch.set_deterministic(True)

        torch.backends.cudnn.benchmark = self._cudnn_benchmarking

        # Forward once to heat cache and get the slice ranges
        example = next(iter(self._data_loader))
        # how should I call this func?
        with torch.no_grad():
            example = example_convert_to_torch(example, float_dtype)
            if 'anchors_mask' in example:
                del example[
                    'anchors_mask']  #example['anchors_mask'].type(torch.bool)
            io_dict = self._net.forward_pfn(example)  # has variable input size
            self._net.forward_rpn_stage(io_dict)
            self._net.forward_rpn_stage(io_dict)
            self._net.forward_rpn_stage(io_dict)
            self._net.forward_rpn_all_preds(io_dict)
            det = self._net.predict(example, io_dict)[0]
            self._net.calc_elapsed_times()

        self._det_dict_copy = {
            "box3d_lidar":
            torch.zeros([0, det["box3d_lidar"].size()[1]],
                        dtype=det["box3d_lidar"].dtype,
                        device=det["box3d_lidar"].device),
            "scores":
            torch.zeros([0],
                        dtype=det["scores"].dtype,
                        device=det["scores"].device),
            "label_preds":
            torch.zeros([0],
                        dtype=det["label_preds"].dtype,
                        device=det["label_preds"].device),
            "metadata":
            None,
        }

        #Print full tensor sizes
        for k, v in io_dict.items():
            if not isinstance(v, int):
                print(k, v.size())
        self._box_preds_size = io_dict['box_preds'].size()
        self._cls_preds_size = io_dict['cls_preds'].size()
        self._net.clear_timers()

        self._pred_zeroer = torch.zeros_like(io_dict["cls_preds"])
        self._pred_dict_copy = {
            "cls_preds": None,
            "box_preds": None,
            "dir_cls_preds": None
        }
        for k in self._pred_dict_copy.keys():
            self._pred_dict_copy[k] = torch.zeros_like(io_dict[k])

        # The H_dim of preds does not change over stages
        pred_sz = io_dict["cls_preds"].size()[self._H_dim]
        stg0_sz = io_dict["stage0"].size()[self._H_dim]
        self._stg0_pred_scale_rate = stg0_sz / pred_sz

        if self._use_slicing:
            self._preds_slc_ranges, self._preds_ovl_slc_ranges = \
                    self.get_slice_ranges_v3(io_dict["cls_preds"])

            # Slicing can happen after stage 1 or stage 2
            stg1_sz = io_dict["stage1"].size()[self._H_dim]
            up1_sz = io_dict["up1"].size()[self._H_dim]
            scale_rate = stg1_sz / pred_sz
            self._stg1_slc_ranges = [(int(r[0] * scale_rate),
                                      int(r[1] * scale_rate))
                                     for r in self._preds_slc_ranges]
            scale_rate = up1_sz / pred_sz
            self._up1_slc_ranges = [(int(r[0] * scale_rate),
                                     int(r[1] * scale_rate))
                                    for r in self._preds_slc_ranges]

            stg2_sz = io_dict["stage2"].size()[self._H_dim]
            up2_sz = io_dict["up2"].size()[self._H_dim]
            scale_rate = stg2_sz / pred_sz
            self._stg2_slc_ranges = [(int(r[0] * scale_rate),
                                      int(r[1] * scale_rate))
                                     for r in self._preds_slc_ranges]
            scale_rate = up2_sz / pred_sz
            self._up2_slc_ranges = [(int(r[0] * scale_rate),
                                     int(r[1] * scale_rate))
                                    for r in self._preds_slc_ranges]

            self._num_slices = len(self._stg1_slc_ranges)
            # split overlapped regions for class scores
            posr = self._preds_ovl_slc_ranges
            self._cls_scr_ranges = [(0, posr[0][0])]
            for i in range(len(posr) - 1):
                ro1, ro2 = posr[i], posr[i + 1]
                self._cls_scr_ranges.append(ro1)
                self._cls_scr_ranges.append((ro1[1], ro2[0]))
            self._cls_scr_ranges.append(posr[-1])
            self._cls_scr_ranges.append((posr[-1][1], pred_sz))

            if self._cudnn_benchmarking:
                # This will trigger all possible slice inputs
                print('Starting dry run for benchmarking')
                self._dry_run_slices = []
                self._dry_run_idx = 0
                for i in range(self._num_slices + 1):
                    for j in range(i + 1):
                        self._dry_run_slices.append(
                            (np.arange(i, dtype=np.uint).tolist(),
                             np.arange(j, dtype=np.uint).tolist()))
                di = iter(self._data_loader)
                self._dry_run = True
                for drs in self._dry_run_slices:
                    self.slicing_forward(di, time.time() + 10.0)
                self._dry_run = False
                self._net.clear_timers()
                print('Dry run finished')

            slice_size_percs_possible = None
            do_slice_size_investigation = False
            if do_slice_size_investigation:
                slice_size_percs_possible = []
                print("Num_slices\tslice_size_perc\t\toverlap_percs")
                keepit = self._slice_size_perc
                for ssp in range(stg0_sz // 10, stg0_sz // 2):
                    self._slice_size_perc = ssp / stg0_sz * 100
                    slc_ranges, ovl_ranges = self.get_slice_ranges_v3(
                        io_dict["stage0"])
                    ovl_percs = [ round((ovl_r[1]-ovl_r[0]) / stg0_sz * 100, 2) \
                            for ovl_r in ovl_ranges ]
                    stg0_slices = self.slice_with_ranges(
                        io_dict["stage0"], slc_ranges)
                    num_slcs = len(stg0_slices)
                    temp_io_dict = {
                        "stage0": stg0_slices[0],
                        "stages_executed": 0,
                    }
                    try:
                        for i in range(3):
                            self._net.forward_rpn_stage(temp_io_dict)
                            self._net.forward_rpn_cls_preds(temp_io_dict)
                            stg0_slc_sz = temp_io_dict["stage0"].size()[
                                self._H_dim]
                            pred_slc_sz = temp_io_dict["cls_preds"].size()[
                                self._H_dim]
                            if stg0_slc_sz % pred_slc_sz != 0:
                                raise Exception("Rates does not match")
                    except:
                        pass
                    else:
                        slice_size_percs_possible.append(self._slice_size_perc)
                        print(
                            f"{num_slcs}\t\t{self._slice_size_perc}\t{ovl_percs}"
                        )
                self._slice_size_perc = keepit

            self._calib_test_cases = []
            if self._calibration:
                self._calibration_dict = {
                    "data": {},
                    "stats": {},
                    "eval": {},
                    "mAP": {},
                }
                for i in range(self._num_slices + 1):
                    for j in range(i + 1):
                        self._calib_test_cases.append((i, j))
            else:  # find the calibration file and read it
                with open(f"slice_calib_dict_s{self._slice_size_perc}.json",
                          'r') as handle:
                    self._calibration_dict = json.load(handle)

                    # Use 99 percentile Post-stage-1 times
                    m = np.finfo(np.single).max
                    self._post_stg1_table = np.full(
                        (self._num_slices + 1, self._num_slices + 1), m)
                    stat_dict = self._calibration_dict['stats']
                    for k, v in stat_dict.items():
                        r, c = k.replace('(',
                                         '').replace(')',
                                                     '').replace(',',
                                                                 '').split()
                        r, c = int(r), int(c)
                        self._post_stg1_table[r, c] = v['Post-stage-1'][3]

            print("VAL min_slice_overlap_perc", self._min_slice_overlap_perc)
            self._eval_dict[
                "min_slice_overlap_perc"] = self._min_slice_overlap_perc
            print("VAL num_slices", self._num_slices)
            self._eval_dict["num_slices"] = self._num_slices
            print("2D_LIST _stg1_slc_ranges", self._stg1_slc_ranges)
            print("2D_LIST _stg2_slc_ranges", self._stg2_slc_ranges)
            print("2D_LIST _preds_slc_ranges", self._preds_slc_ranges)
            print('2D_LIST _cls_scr_ranges', self._cls_scr_ranges)
        else:
            print("VAL min_slice_overlap_perc", 0)
            self._eval_dict["min_slice_overlap_perc"] = 0
            print("VAL num_slices 1")
            self._eval_dict["num_slices"] = 1
        print("VAL deadline_sec", self._deadline_sec)
        self._eval_dict["deadline_sec"] = self._deadline_sec
        print("VAL slice_size_perc", self._slice_size_perc)
        self._eval_dict["slice_size_perc"] = self._slice_size_perc

        # I can get ground truth like this
        # ground truth center location : [w, z, h]
        if self._kitti:
            self._gt_annos = [
                info["annos"]
                for info in data_loader.dataset.dataset._kitti_infos
            ]
            self._gt_img_paths = [
                info["image"]["image_path"]
                for info in data_loader.dataset.dataset._kitti_infos
            ]
        else:
            self._gt_annos = data_loader.dataset.dataset.ground_truth_annotations
            self._gt_img_paths = [
                info["cam_front_path"]
                for info in data_loader.dataset.dataset._nusc_infos
            ]

        #self._other_cuda_stream = torch.cuda.Stream()
        """