def test(config_path):
    #
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    model_cfg = config.model.second
    #
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)

    vfe_num_filters = list(model_cfg.voxel_feature_extractor.num_filters)
    grid_size = voxel_generator.grid_size
    # [1] + [10, 400, 352] + [128]
    dense_shape = [1] + grid_size[::-1].tolist() + [vfe_num_filters[-1]]
    # [1, 10, 400, 352, 128]
    print('dense_shape', dense_shape)
    middle_num_filters_d1 = list(
        model_cfg.middle_feature_extractor.num_filters_down1)
    middle_num_filters_d2 = list(
        model_cfg.middle_feature_extractor.num_filters_down2)
    middle_feature_extractor = SparseMiddleExtractor(
        output_shape=dense_shape,
        use_norm=True,
        num_input_features=vfe_num_filters[-1],
        num_filters_down1=middle_num_filters_d1,
        num_filters_down2=middle_num_filters_d2)
    middle_feature_extractor = middle_feature_extractor.cuda()
    print(count_parameters(middle_feature_extractor))  # 0.4M

    coors = [[0, 11, 12, 13], [1, 22, 23, 24], [0, 33, 34, 35]]
    coors = torch.Tensor(coors)
    voxel_features = torch.randn(3, vfe_num_filters[-1]).cuda()
    batch_size = 2
    ret = middle_feature_extractor(voxel_features, coors, batch_size)
    print(ret.shape)  # [2, 128, 400, 352]
Exemple #2
0
def test(config_path):
    # cfg
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    model_cfg = config.model.second
    input_cfg = config.train_input_reader
    # builds
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    #
    start = time.time()
    input_cfg.database_sampler.database_sampler_name = "DataBaseSamplerV3"
    training = True
    dataset = dataset_builder_build(input_cfg, model_cfg, training,
                                    voxel_generator, target_assigner)
    dataset = DatasetWrapper(dataset)

    print(len(dataset))
    example1 = dataset[2]
    #example2 = dataset[22]
    #example3 = dataset[122]
    print(time.time() - start, 'sec')
Exemple #3
0
    def _init_net(self):
        self.config = pipeline_pb2.TrainEvalPipelineConfig()
        with open(self.config_f, "r") as f:
            proto_str = f.read()
            text_format.Merge(proto_str, self.config)

        self.input_cfg = self.config.eval_input_reader
        self.model_cfg = self.config.model.second
        self.train_cfg = self.config.train_config
        self.class_names = list(self.input_cfg.class_names)
        self.center_limit_range = self.model_cfg.post_center_limit_range

        # BUILD VOXEL GENERATOR
        voxel_generator = voxel_builder.build(self.model_cfg.voxel_generator)
        bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
        box_coder = box_coder_builder.build(self.model_cfg.box_coder)
        target_assigner_cfg = self.model_cfg.target_assigner
        self.target_assigner = target_assigner_builder.build(
            target_assigner_cfg, bv_range, box_coder)

        self.net = second_builder.build(self.model_cfg, voxel_generator,
                                        self.target_assigner)
        self.net.cuda()
        if self.train_cfg.enable_mixed_precision:
            self.net.half()
            self.net.metrics_to_float()
            self.net.convert_norm_to_float(self.net)
        torchplus.train.try_restore_latest_checkpoints(self.model_dir,
                                                       [self.net])
        print('Success load latest checkpoint in {}'.format(self.model_dir))
    def _build(self):
        config = self.config
        input_cfg = config.eval_input_reader
        model_cfg = config.model.second
        train_cfg = config.train_config
        batch_size = 1
        # voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
        # bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
        # grid_size = voxel_generator.grid_size
        # self.voxel_generator = voxel_generator
        fv_generator = voxel_builder.build(model_cfg.voxel_generator)
        bv_range = fv_generator.cartesian_coord_range[[0, 1, 3, 4]]
        fv_dim = fv_generator.fv_dim
        self.fv_generator = fv_generator

        vfe_num_filters = list(model_cfg.voxel_feature_extractor.num_filters)

        # box_coder = box_coder_builder.build(model_cfg.box_coder)
        # target_assigner_cfg = model_cfg.target_assigner
        # target_assigner = target_assigner_builder.build(
        #     target_assigner_cfg, bv_range, box_coder)
        # self.target_assigner = target_assigner
        out_size_factor = model_cfg.rpn.layer_strides[
            0] // model_cfg.rpn.upsample_strides[0]
        self.net = second_builder.build(model_cfg,
                                        fv_generator,
                                        RGB_embedding=self.RGB_embedding)
        self.net.cuda().eval()
        if train_cfg.enable_mixed_precision:
            self.net.half()
            self.net.metrics_to_float()
            self.net.convert_norm_to_float(self.net)
        feature_map_size = fv_dim[:2] // out_size_factor
        feature_map_size = [*feature_map_size, 1][::-1]
Exemple #5
0
def build_network(model_cfg, measure_time=False):
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    # box_coder.custom_ndim = target_assigner._anchor_generators[0].custom_ndim
    return voxel_generator, target_assigner
 def build_network(self):
     model_cfg = self.config.model.second
     voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
     bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
     box_coder = box_coder_builder.build(model_cfg.box_coder)
     target_assigner_cfg = model_cfg.target_assigner
     target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder)
     box_coder.custom_ndim = target_assigner._anchor_generators[0].custom_ndim
     net = second_builder.build(model_cfg, voxel_generator, target_assigner, measure_time=False)
     return net
Exemple #7
0
def build_network(model_cfg, measure_time=False):
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    net = second_builder.build(
        model_cfg, voxel_generator, target_assigner, measure_time=measure_time)
    return net
Exemple #8
0
def build_network(model_cfg, measure_time=False):
    # generate voxel
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)

    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner = target_assigner_builder.build(model_cfg, bv_range, box_coder)

    # box_coder.custom_ndim = target_assigner._anchor_generators[0].custom_ndim
    # model build
    net = second_builder.build(model_cfg, voxel_generator, target_assigner, measure_time=measure_time)
    return net
Exemple #9
0
    def _build(self):
        config = self.config
        input_cfg = config.eval_input_reader
        model_cfg = config.model.second
        train_cfg = config.train_config
        batch_size = 1
        voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
        bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
        grid_size = voxel_generator.grid_size
        self.voxel_generator = voxel_generator
        vfe_num_filters = list(model_cfg.voxel_feature_extractor.num_filters)

        box_coder = box_coder_builder.build(model_cfg.box_coder)
        target_assigner_cfg = model_cfg.target_assigner
        target_assigner = target_assigner_builder.build(
            target_assigner_cfg, bv_range, box_coder)
        self.target_assigner = target_assigner
        out_size_factor = model_cfg.rpn.layer_strides[
            0] / model_cfg.rpn.upsample_strides[0]
        out_size_factor *= model_cfg.middle_feature_extractor.downsample_factor
        out_size_factor = int(out_size_factor)
        assert out_size_factor > 0
        self.net = second_builder.build(model_cfg, voxel_generator,
                                        target_assigner)
        self.net.cuda().eval()
        if train_cfg.enable_mixed_precision:
            self.net.half()
            self.net.metrics_to_float()
            self.net.convert_norm_to_float(self.net)
        feature_map_size = grid_size[:2] // out_size_factor
        feature_map_size = [*feature_map_size, 1][::-1]
        ret = target_assigner.generate_anchors(feature_map_size)
        anchors_dict = target_assigner.generate_anchors_dict(feature_map_size)
        #print("feature_map_size is ",feature_map_size)
        #print("generated_anchors shape is",ret['anchors'].shape)
        anchors = ret["anchors"]
        #print("",ret['anchors'][0,79,79,1,:])
        anchors = anchors.reshape([-1, 7])
        #anchors_reshape = anchors.reshape([1,200,176,14])
        #print("",anchors_reshape[0,79,79,7:])
        matched_thresholds = ret["matched_thresholds"]
        unmatched_thresholds = ret["unmatched_thresholds"]
        anchors_bv = box_np_ops.rbbox2d_to_near_bbox(anchors[:,
                                                             [0, 1, 3, 4, 6]])
        self.anchor_cache = {
            "anchors": anchors,
            "anchors_bv": anchors_bv,
            "matched_thresholds": matched_thresholds,
            "unmatched_thresholds": unmatched_thresholds,
            "anchors_dict": anchors_dict,
        }
def build_network(model_cfg, measure_time=False):
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3,
                                                  4]]  #[-50,50,-50,50]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner  #对10个类的大小等config进行分配
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    box_coder.custom_ndim = target_assigner._anchor_generators[0].custom_ndim
    net = second_builder.build(model_cfg,
                               voxel_generator,
                               target_assigner,
                               measure_time=measure_time)
    return net
Exemple #11
0
def build_inference_net(config_path,
                        model_dir,
                        result_path=None,
                        predict_test=False,
                        ckpt_path=None,
                        ref_detfile=None,
                        pickle_result=True,
                        measure_time=False,
                        batch_size=1):
    model_dir = pathlib.Path(model_dir)
    if predict_test:
        result_name = 'predict_test'
    else:
        result_name = 'eval_results'
    if result_path is None:
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    model_cfg = config.model.second
    detection_2d_path = config.train_config.detection_2d_path
    center_limit_range = model_cfg.post_center_limit_range
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    class_names = target_assigner.classes
    net = second_builder.build(model_cfg,
                               voxel_generator,
                               target_assigner,
                               measure_time=measure_time)
    net.cuda()

    if ckpt_path is None:
        print("load existing model")
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)
    batch_size = batch_size or input_cfg.batch_size
    #batch_size = 1
    net.eval()
    return net
Exemple #12
0
def build_network(model_cfg, measure_time=False):
    """
    build voxel generator, box codder, target assigner
        and network, from model cfg
    """
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    box_coder.custom_ndim = target_assigner._anchor_generators[0].custom_ndim
    net = second_builder.build(model_cfg,
                               voxel_generator,
                               target_assigner,
                               measure_time=measure_time)
    return net
Exemple #13
0
def test(config_path):
    #
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    model_cfg = config.model.second
    #
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    #
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    #
    num_rpn_input_filters = 64
    rpn = RPN(use_norm=True,
              num_class=model_cfg.num_class,
              layer_nums=list(model_cfg.rpn.layer_nums),
              layer_strides=list(model_cfg.rpn.layer_strides),
              num_filters=list(model_cfg.rpn.num_filters),
              upsample_strides=list(model_cfg.rpn.upsample_strides),
              num_upsample_filters=list(model_cfg.rpn.num_upsample_filters),
              num_input_filters=num_rpn_input_filters * 2,
              num_anchor_per_loc=target_assigner.num_anchors_per_location,
              encode_background_as_zeros=model_cfg.encode_background_as_zeros,
              use_direction_classifier=model_cfg.use_direction_classifier,
              use_bev=model_cfg.use_bev,
              num_groups=model_cfg.rpn.num_groups,
              use_groupnorm=model_cfg.rpn.use_groupnorm,
              box_code_size=target_assigner.box_coder.code_size)
    print(count_parameters(rpn))  # 5M
    spatial_features = torch.randn(1, num_rpn_input_filters * 2, 400, 768)
    spatial_features = spatial_features.cuda()
    rpn = rpn.cuda()
    # spatial_features [Batch, C, H, W]
    preds_dict = rpn(spatial_features)
    # box_preds [Batch, H/2, W/2, 14]
    box_preds = preds_dict["box_preds"]
    print(box_preds.shape)
    # cls_preds [Batch, H/2, W/2, 2]
    cls_preds = preds_dict["cls_preds"]
    print(cls_preds.shape)
Exemple #14
0
def set_model(config_path, model_dir, ckpt_path=None, ref_detfile=None):
    model_dir = pathlib.Path(model_dir)
    result_name = 'predict_test'
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    class_names = list(input_cfg.class_names)
    center_limit_range = model_cfg.post_center_limit_range
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)

    net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)

    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    return net, input_cfg, model_cfg, train_cfg, class_names, voxel_generator, target_assigner
Exemple #15
0
def test(config_path=args.config_path,
         model_dir=args.model_dir,
         result_path=None,
         create_folder=False,
         pickle_result=True,
         include_roadmap=False,
         device=1):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)

    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    eval_checkpoint_dir = model_dir / 'eval_checkpoints'
    eval_checkpoint_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    batch_size = 1
    class_names = list(input_cfg.class_names)
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    grid_size = voxel_generator.grid_size
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    net = second_builder.build(model_cfg, voxel_generator, target_assigner,
                               include_roadmap)
    net.cuda().eval()

    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)

    #torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    torchplus.train.restore(args.model_path, net)
    #torchplus.train.restore("./ped_models_56/voxelnet-275130.tckpt",net)
    out_size_factor = model_cfg.rpn.layer_strides[
        0] / model_cfg.rpn.upsample_strides[0]
    print(out_size_factor)
    #out_size_factor *= model_cfg.middle_feature_extractor.downsample_factor
    out_size_factor = int(out_size_factor)
    feature_map_size = grid_size[:2] // out_size_factor
    feature_map_size = [*feature_map_size, 1][::-1]
    print(feature_map_size)
    ret = target_assigner.generate_anchors(feature_map_size)
    #anchors_dict = target_assigner.generate_anchors_dict(feature_map_size)
    anchors = ret["anchors"]
    anchors = anchors.reshape([-1, 7])
    matched_thresholds = ret["matched_thresholds"]
    unmatched_thresholds = ret["unmatched_thresholds"]
    anchors_bv = box_np_ops.rbbox2d_to_near_bbox(anchors[:, [0, 1, 3, 4, 6]])
    anchor_cache = {
        "anchors": anchors,
        "anchors_bv": anchors_bv,
        "matched_thresholds": matched_thresholds,
        "unmatched_thresholds": unmatched_thresholds,
        #"anchors_dict": anchors_dict,
    }

    am = ArgoverseMap()
    dt_annos = []

    root_dir = os.path.join('./../../argodataset/argoverse-tracking/',
                            args.set)
    argoverse_loader = ArgoverseTrackingLoader(root_dir)

    prog_cnt = 0
    for seq in range(len(argoverse_loader)):
        argoverse_data = argoverse_loader[seq]
        nlf = argoverse_data.num_lidar_frame
        for frame in range(nlf):
            prog_cnt += 1
            if prog_cnt % 50 == 0:
                print(prog_cnt)
            points = argoverse_data.get_lidar(frame)
            roi_pts = copy.deepcopy(points)
            city_name = argoverse_data.city_name
            city_to_egovehicle_se3 = argoverse_data.get_pose(frame)
            '''
            roi_pts = city_to_egovehicle_se3.transform_point_cloud(roi_pts)  # put into city coords
            #non roi
            roi_pts_flag = am.remove_non_roi_points(roi_pts, city_name) # remove non-driveable region
            roi_pts = roi_pts[roi_pts_flag]
            roi_pts = am.remove_ground_surface(roi_pts, city_name)  # remove ground surface
    
            # convert city to lidar co-ordinates

            roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_pts) 
            '''
            if args.include_roi or args.dr_area or not args.include_road_points:
                roi_pts = city_to_egovehicle_se3.transform_point_cloud(
                    roi_pts)  # put into city coords

            if args.include_roi:
                roi_pts_flag = am.remove_non_roi_points(
                    roi_pts, city_name)  # remove non-driveable region
                roi_pts = roi_pts[roi_pts_flag]

            if not args.include_roi and args.dr_area:
                roi_pts_flag = am.remove_non_driveable_area_points(
                    roi_pts, city_name)  # remove non-driveable region
                roi_pts = roi_pts[roi_pts_flag]

            if not args.include_road_points:
                roi_pts = am.remove_ground_surface(
                    roi_pts, city_name)  # remove ground surface

            # convert city to lidar co-ordinates
            if args.include_roi or args.dr_area or not args.include_road_points:
                roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(
                    roi_pts)

            roi_pts[:, 2] = roi_pts[:, 2] - 1.73

            pts_x, pts_y, pts_z = roi_pts[:, 0], roi_pts[:, 1], roi_pts[:, 2]

            input_dict = {
                'points': roi_pts,
                'pointcloud_num_features': 3,
            }

            out_size_factor = model_cfg.rpn.layer_strides[
                0] // model_cfg.rpn.upsample_strides[0]

            example = prep_pointcloud(
                input_dict=input_dict,
                root_path=None,
                voxel_generator=voxel_generator,
                target_assigner=target_assigner,
                max_voxels=input_cfg.max_number_of_voxels,
                class_names=list(input_cfg.class_names),
                training=False,
                create_targets=False,
                shuffle_points=input_cfg.shuffle_points,
                generate_bev=False,
                without_reflectivity=model_cfg.without_reflectivity,
                num_point_features=model_cfg.num_point_features,
                anchor_area_threshold=input_cfg.anchor_area_threshold,
                anchor_cache=anchor_cache,
                out_size_factor=out_size_factor,
                out_dtype=np.float32)

            if "anchors_mask" in example:
                example["anchors_mask"] = example["anchors_mask"].astype(
                    np.uint8)
            example["image_idx"] = str(seq) + "_" + str(frame)
            example["image_shape"] = np.array([400, 400], dtype=np.int32)
            example["road_map"] = None
            example["include_roadmap"] = False
            example["points"] = roi_pts
            #torch.save(example,"./network_input_examples/" + info)
            example = merge_second_batch([example])

            example_torch = example_convert_to_torch(example,
                                                     device=args.device)
            try:
                result_annos = predict_kitti_to_anno(
                    net, example_torch, input_cfg.class_names,
                    model_cfg.post_center_limit_range, model_cfg.lidar_input)
            except:
                print(seq, frame)
                continue
            dt_annos += result_annos

    if pickle_result:
        sdi = args.save_path.rfind('/')
        save_dir = args.save_path[:sdi]
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        with open(args.save_path, 'wb') as f:
            pickle.dump(dt_annos, f)
def evaluate(config_path,
             model_dir,
             use_second_stage=False,
             use_endtoend=False,
             result_path=None,
             predict_test=False,
             ckpt_path=None,
             ref_detfile=None,
             pickle_result=True,
             measure_time=False,
             batch_size=None):
    model_dir = pathlib.Path(model_dir)
    if predict_test:
        result_name = 'predict_test_0095'
    else:
        result_name = 'eval_results'
    if result_path is None:
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    
    center_limit_range = model_cfg.post_center_limit_range
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    class_names = target_assigner.classes
    if use_second_stage:    
        net = second_2stage_builder.build(model_cfg, voxel_generator, target_assigner, measure_time=measure_time)
    elif use_endtoend:
        net = second_endtoend_builder.build(model_cfg, voxel_generator, target_assigner, measure_time=measure_time)
    else:
        net = second_builder.build(model_cfg, voxel_generator, target_assigner, measure_time=measure_time)
    net.cuda()
    #########################################
    # net = torch.nn.DataParallel(net)
    #########################################
    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)
    if train_cfg.enable_mixed_precision:
        net.half()
        print("half inference!")
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    batch_size = batch_size or input_cfg.batch_size
    eval_dataset = input_reader_builder_tr.build(
        input_cfg,
        model_cfg,
        training=False,
        voxel_generator=voxel_generator,
        target_assigner=target_assigner)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,# input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)

    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    net.eval()
    result_path_step = result_path / f"step_{net.get_global_step()}"
    result_path_step.mkdir(parents=True, exist_ok=True)
    t = time.time()
    dt_annos = []
    global_set = None
    print("Generate output labels...")
    bar = ProgressBar()
    bar.start((len(eval_dataset) + batch_size - 1) // batch_size)
    prep_example_times = []
    prep_times = []
    t2 = time.time()
    for example in iter(eval_dataloader):
        if measure_time:
            prep_times.append(time.time() - t2)
            t1 = time.time()
            torch.cuda.synchronize()
        example = example_convert_to_torch(example, float_dtype)
        if measure_time:
            torch.cuda.synchronize()
            prep_example_times.append(time.time() - t1)

        if pickle_result:
            dt_annos += predict_kitti_to_anno(
                net, example, class_names, center_limit_range,
                model_cfg.lidar_input, global_set)
        else:
            _predict_kitti_to_file(net, example, result_path_step, class_names,
                                   center_limit_range, model_cfg.lidar_input)
        # print(json.dumps(net.middle_feature_extractor.middle_conv.sparity_dict))
        bar.print_bar()
        if measure_time:
            t2 = time.time()

    sec_per_example = len(eval_dataset) / (time.time() - t)
    print(f'generate label finished({sec_per_example:.2f}/s). start eval:')
    if measure_time:
        print(f"avg example to torch time: {np.mean(prep_example_times) * 1000:.3f} ms")
        print(f"avg prep time: {np.mean(prep_times) * 1000:.3f} ms")
    for name, val in net.get_avg_time_dict().items():
        print(f"avg {name} time = {val * 1000:.3f} ms")
    if not predict_test:
        gt_annos = [info["annos"] for info in eval_dataset.dataset.kitti_infos]
        img_idx = [info["image_idx"] for info in eval_dataset.dataset.kitti_infos]
        if not pickle_result:
            dt_annos = kitti.get_label_annos(result_path_step)
        result = get_official_eval_result(gt_annos, dt_annos, class_names)
        # print(json.dumps(result, indent=2))
        print(result)
        result = get_coco_eval_result(gt_annos, dt_annos, class_names)
        print(result)
        if pickle_result:
            with open(result_path_step / "result.pkl", 'wb') as f:
                pickle.dump(dt_annos, f)
        # annos to txt file
        if True:
            os.makedirs(str(result_path_step) + '/txt', exist_ok=True)
            for i in range(len(dt_annos)):
                dt_annos[i]['dimensions'] = dt_annos[i]['dimensions'][:, [1, 2, 0]]
                result_lines = kitti.annos_to_kitti_label(dt_annos[i])
                image_idx = img_idx[i]
                with open(str(result_path_step) + '/txt/%06d.txt' % image_idx, 'w') as f:
                    for result_line in result_lines:
                        f.write(result_line + '\n')
                abcd = 1
    else:
        os.makedirs(str(result_path_step) + '/txt', exist_ok=True)
        img_idx = [info["image_idx"] for info in eval_dataset.dataset.kitti_infos]
        for i in range(len(dt_annos)):
            dt_annos[i]['dimensions'] = dt_annos[i]['dimensions'][:, [1, 2, 0]]
            result_lines = kitti.annos_to_kitti_label(dt_annos[i])
            image_idx = img_idx[i]
            with open(str(result_path_step) + '/txt/%06d.txt' % image_idx, 'w') as f:
                for result_line in result_lines:
                    f.write(result_line + '\n')
def train(config_path,
          model_dir,
          use_fusion=True,
          use_ft=False,
          use_second_stage=True,
          use_endtoend=True,
          result_path=None,
          create_folder=False,
          display_step=50,
          summary_step=5,
          local_rank=0,
          pickle_result=True,
          patchs=None):
    """train a VoxelNet mod[el specified by a config file.
    """
    ############ tracking
    config_tr_path = '/mnt/new_iou/second.pytorch/second/mmMOT/experiments/second/spatio_test/config.yaml'
    load_tr_path = '/mnt/new_iou/second.pytorch/second/mmMOT/experiments/second/spatio_test/results'
    with open(config_tr_path) as f:
        config_tr = yaml.load(f, Loader=yaml.FullLoader)

    result_path_tr = load_tr_path
    config_tr = EasyDict(config_tr['common'])
    config_tr.save_path = os.path.dirname(config_tr_path)

    # create model
    # model_tr = build_model(config_tr)
    # model_tr.cuda()

    # optimizer_tr = build_optim(model_tr, config_tr)

    criterion_tr = build_criterion(config_tr.loss)

    last_iter = -1
    best_mota = 0
    # if load_tr_path:
    #     if False:
    #         best_mota, last_iter = load_state(
    #             load_tr_path, model_tr, optimizer=optimizer_tr)
    #     else:
    #         load_state(load_tr_path, model_tr)

    cudnn.benchmark = True

    # Data loading code
    train_transform, valid_transform = build_augmentation(config_tr.augmentation)

    # # train
    # train_dataset = build_dataset(
    #     config_tr,
    #     set_source='train',
    #     evaluate=False,
    #     train_transform=train_transform)
    # trainval_dataset = build_dataset(
    #     config_tr,
    #     set_source='train',
    #     evaluate=True,
    #     valid_transform=valid_transform)
    # val_dataset = build_dataset(
    #     config_tr,
    #     set_source='val',
    #     evaluate=True,
    #     valid_transform=valid_transform)

    # train_sampler = DistributedGivenIterationSampler(
    #     train_dataset,
    #     config_tr.lr_scheduler.max_iter,
    #     config_tr.batch_size,
    #     world_size=1,
    #     rank=0,
    #     last_iter=last_iter)

    # import pdb; pdb.set_trace()
    # train_loader = DataLoader(
    #     train_dataset,
    #     batch_size=config_tr.batch_size,
    #     shuffle=False,
    #     num_workers=config_tr.workers,
    #     pin_memory=True)

    tb_logger = SummaryWriter(config_tr.save_path + '/events')
    logger = create_logger('global_logger', config_tr.save_path + '/log.txt')
    # logger.info('args: {}'.format(pprint.pformat(args)))
    logger.info('config: {}'.format(pprint.pformat(config_tr)))

    # tracking_module = TrackingModule(model_tr, criterion_tr,
                                    #  config_tr.det_type)
    # tracking_module.model.train()
    #### tracking setup done

    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)
    patchs = patchs or []
    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    for patch in patchs:
        patch = "config." + patch 
        exec(patch)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config

    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    class_names = target_assigner.classes
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    # if use_second_stage:
    #     net = second_2stage_builder.build(model_cfg, voxel_generator, target_assigner)
    if use_endtoend:
        net = second_endtoend_builder_spatio.build(model_cfg, voxel_generator, target_assigner, criterion_tr, config_tr.det_type)
    else:
        net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()
    print("num_trainable parameters:", len(list(net.parameters())))

    for n, p in net.named_parameters():
        print(n, p.shape)
    # pth_name = './pre_weight/first_stage_gating_det/voxelnet-17013.tckpt'
    pth_name = './pre_weight/second_stage_gating_det/voxelnet-35000.tckpt'

    res_pre_weights = torch.load(pth_name)
    new_res_state_dict = OrderedDict()
    model_dict = net.state_dict()
    for k,v in res_pre_weights.items():
        if 'global_step' not in k:
            # if 'dir' not in k:
            new_res_state_dict[k] = v
    model_dict.update(new_res_state_dict)
    net.load_state_dict(model_dict)

    # for k, weight in dict(net.named_parameters()).items(): # lidar_conv, p_lidar_conv, fusion_module, w_det, w_link, appearance, point_net
    #     if 'middle_feature_extractor' in '%s'%(k) or 'rpn' in '%s'%(k) or 'second_rpn' in '%s'%(k):
    #         weight.requires_grad = False

    # BUILD OPTIMIZER
    #####################
    # we need global_step to create lr_scheduler, so restore net first.
    torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    gstep = net.get_global_step() - 1
    optimizer_cfg = train_cfg.optimizer
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    loss_scale = train_cfg.loss_scale_factor
    mixed_optimizer = optimizer_builder.build(optimizer_cfg, net, mixed=train_cfg.enable_mixed_precision, loss_scale=loss_scale)
    optimizer = mixed_optimizer

    # must restore optimizer AFTER using MixedPrecisionWrapper
    torchplus.train.try_restore_latest_checkpoints(model_dir,
                                                   [mixed_optimizer])
    lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer, train_cfg.steps)
    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32
    ######################
    # PREPARE INPUT
    ######################
    # import pdb; pdb.set_trace()
    dataset = input_reader_builder_tr_vid_spatio.build(
        input_cfg,
        model_cfg,
        training=True,
        voxel_generator=voxel_generator,
        target_assigner=target_assigner,
        config_tr=config_tr,
        set_source='train',
        evaluate=False,
        train_transform=train_transform)
    eval_dataset = input_reader_builder_tr_vid_spatio.build(
        eval_input_cfg,
        model_cfg,
        training=False,
        voxel_generator=voxel_generator,
        target_assigner=target_assigner,
        config_tr=config_tr,
        set_source='val',
        evaluate=True,
        valid_transform=valid_transform)

    def _worker_init_fn(worker_id):
        time_seed = np.array(time.time(), dtype=np.int32)
        np.random.seed(time_seed + worker_id)
        print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0])

    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=input_cfg.batch_size,
        shuffle=True,
        num_workers=input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch_tr_vid_spatio,
        worker_init_fn=_worker_init_fn)

    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=eval_input_cfg.batch_size,
        shuffle=False,
        num_workers=eval_input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch_tr_vid_spatio)
    
    data_iter = iter(dataloader)

    ######################
    # TRAINING
    ######################
    training_detail = []
    log_path = model_dir / 'log.txt'
    training_detail_path = model_dir / 'log.json'
    if training_detail_path.exists():
        with open(training_detail_path, 'r') as f:
            training_detail = json.load(f)
    logf = open(log_path, 'a')
    logf.write(proto_str)
    logf.write("\n")
    summary_dir = model_dir / 'summary'
    summary_dir.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(str(summary_dir))

    total_step_elapsed = 0
    remain_steps = train_cfg.steps - net.get_global_step()
    t = time.time()
    ckpt_start_time = t

    total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1
    clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch

    if train_cfg.steps % train_cfg.steps_per_eval == 0:
        total_loop -= 1
    mixed_optimizer.zero_grad()
    
    # optimizer_tr.zero_grad()
    logger = logging.getLogger('global_logger')
    best_mota = 0
    losses = AverageMeter(config_tr.print_freq)

    total_steps = train_cfg.steps
    total_loop = total_steps // len(dataloader)
    
    kkkk = 0
    for step in range(total_loop):
        for i, (example) in enumerate(dataloader):

            curr_step = 0 + i
            kkkk += 1
            lr_scheduler.step(net.get_global_step())

            example_torch = example_convert_to_torch(example, float_dtype)

            batch_size = example["anchors"].shape[0]

            ret_dict = net(example_torch, train_param=True)

            cls_preds = ret_dict["cls_preds"]
            loss = ret_dict["loss"].mean()
            cls_loss_reduced = ret_dict["cls_loss_reduced"].mean()
            loc_loss_reduced = ret_dict["loc_loss_reduced"].mean()
            cls_pos_loss = ret_dict["cls_pos_loss"]
            cls_neg_loss = ret_dict["cls_neg_loss"]
            loc_loss = ret_dict["loc_loss"]
            cls_loss = ret_dict["cls_loss"]
            dir_loss_reduced = ret_dict["dir_loss_reduced"]
            cared = ret_dict["cared"]
            # loss_tr = ret_dict["loss_tr"]

            if use_second_stage or use_endtoend:
                labels = ret_dict["labels"]
            else:
                labels = example_torch["labels"]
            if train_cfg.enable_mixed_precision:
                loss *= loss_scale

            try:
                loss.backward()
            except:
                abc = 1
            #     import pdb; pdb.set_trace()
            #     abc = 1
            # torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0)
            # optimizer_tr.step()
            # optimizer_tr.zero_grad()
            mixed_optimizer.step()
            mixed_optimizer.zero_grad()
            net.update_global_step()
            net_metrics = net.update_metrics(cls_loss_reduced,
                                                loc_loss_reduced, cls_preds,
                                                labels, cared)

            step_time = (time.time() - t)
            t = time.time()
            metrics = {}
            num_pos = int((labels > 0)[0].float().sum().cpu().numpy())
            num_neg = int((labels == 0)[0].float().sum().cpu().numpy())
            if 'anchors_mask' not in example_torch:
                num_anchors = example_torch['anchors'].shape[1]
            else:
                num_anchors = int(example_torch['anchors_mask'][0].sum())
            global_step = net.get_global_step()
            # print(step)
            if global_step % display_step == 0:
                loc_loss_elem = [
                    float(loc_loss[:, :, i].sum().detach().cpu().numpy() /
                            batch_size) for i in range(loc_loss.shape[-1])
                ]
                metrics["type"] = "step_info"
                metrics["step"] = global_step
                metrics["steptime"] = step_time
                metrics.update(net_metrics)
                metrics["loss"] = {}
                metrics["loss"]["loc_elem"] = loc_loss_elem
                metrics["loss"]["cls_pos_rt"] = float(
                    cls_pos_loss.detach().cpu().numpy())
                metrics["loss"]["cls_neg_rt"] = float(
                    cls_neg_loss.detach().cpu().numpy())
                if model_cfg.use_direction_classifier:
                    metrics["loss"]["dir_rt"] = float(
                        dir_loss_reduced.detach().cpu().numpy())
                metrics["num_vox"] = int(example_torch["voxels"].shape[0])
                metrics["num_pos"] = int(num_pos)
                metrics["num_neg"] = int(num_neg)
                metrics["num_anchors"] = int(num_anchors)
                metrics["lr"] = float(
                    optimizer.lr)

                metrics["image_idx"] = example['image_idx'][0][7:]
                training_detail.append(metrics)
                flatted_metrics = flat_nested_json_dict(metrics)
                flatted_summarys = flat_nested_json_dict(metrics, "/")
                for k, v in flatted_summarys.items():
                    if isinstance(v, (list, tuple)):
                        v = {str(i): e for i, e in enumerate(v)}
                        if type(v) != str and ('loc_elem' not in k):
                            writer.add_scalars(k, v, global_step)
                    else:
                        if (type(v) != str) and ('loc_elem' not in k):
                            writer.add_scalar(k, v, global_step)

                metrics_str_list = []
                for k, v in flatted_metrics.items():
                    if isinstance(v, float):
                        metrics_str_list.append(f"{k}={v:.3}")
                    elif isinstance(v, (list, tuple)):
                        if v and isinstance(v[0], float):
                            v_str = ', '.join([f"{e:.3}" for e in v])
                            metrics_str_list.append(f"{k}=[{v_str}]")
                        else:
                            metrics_str_list.append(f"{k}={v}")
                    else:
                        metrics_str_list.append(f"{k}={v}")
                log_str = ', '.join(metrics_str_list)
                print(log_str, file=logf)
                print(log_str)

            ckpt_elasped_time = time.time() - ckpt_start_time
            if ckpt_elasped_time > train_cfg.save_checkpoints_secs:
                torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step())

                ckpt_start_time = time.time()

            if kkkk > 0 and (kkkk) % config_tr.val_freq == 0:
            # if True:
                torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step())
                net.eval()
                result_path_step = result_path / f"step_{net.get_global_step()}"
                result_path_step.mkdir(parents=True, exist_ok=True)
                print("#################################")
                print("#################################", file=logf)
                print("# EVAL")
                print("# EVAL", file=logf)
                print("#################################")
                print("#################################", file=logf)
                print("Generate output labels...")
                print("Generate output labels...", file=logf)
                t = time.time()
                dt_annos = []
                prog_bar = ProgressBar()
                net.clear_timer()
                prog_bar.start((len(eval_dataset) + eval_input_cfg.batch_size - 1) // eval_input_cfg.batch_size)
                for example in iter(eval_dataloader):
                    example = example_convert_to_torch(example, float_dtype)
                    if pickle_result:
                        results = predict_kitti_to_anno(
                            net, example, class_names, center_limit_range,
                            model_cfg.lidar_input)
                        dt_annos += results

                    else:
                        _predict_kitti_to_file(net, example, result_path_step,
                                            class_names, center_limit_range,
                                            model_cfg.lidar_input)

                    prog_bar.print_bar()

                sec_per_ex = len(eval_dataset) / (time.time() - t)
                print(f'generate label finished({sec_per_ex:.2f}/s). start eval:')
                print(f'generate label finished({sec_per_ex:.2f}/s). start eval:',file=logf)
                gt_annos = [
                    info["annos"] for info in eval_dataset.dataset.kitti_infos
                ]
                if not pickle_result:
                    dt_annos = kitti.get_label_annos(result_path_step)
                # result = get_official_eval_result_v2(gt_annos, dt_annos, class_names)
                # print(json.dumps(result, indent=2), file=logf)
                result = get_official_eval_result(gt_annos, dt_annos, class_names)
                print(result, file=logf)
                print(result)
                result_1 = result.split("\n")[:5]
                result_2 = result.split("\n")[10:15]
                result_3 = result.split("\n")[20:25]
                emh = ['0_easy', '1_mod', '2_hard']
                result_save = result_1
                for i in range(len(result_save)-1):
                    save_targ = result_save[i+1]
                    name_val = save_targ.split(':')[0].split(' ')[0]
                    value_val = save_targ.split(':')[1:]
                    for ev in range(3):
                        each_val = value_val[0].split(',')[ev]
                        merge_txt = 'AP_kitti/car_70/' + name_val+'/'+emh[ev]
                        try:
                            writer.add_scalar(merge_txt, float(each_val), global_step)
                        except:
                            abc=1
                            import pdb; pdb.set_trace()
                            abc=1
                if pickle_result:
                    with open(result_path_step / "result.pkl", 'wb') as f:
                        pickle.dump(dt_annos, f)
                writer.add_text('eval_result', result, global_step)

                logger.info('Evaluation on validation set:')
                # MOTA, MOTP, recall, prec, F1, fp, fn, id_switches = validate(
                #     val_dataset,
                #     net,
                #     str(0 + 1),
                #     config_tr,
                #     result_path_tr,
                #     part='val')
                # print(MOTA, MOTP, recall, prec, F1, fp, fn, id_switches)

                # curr_step = step
                # if tb_logger is not None:
                #     tb_logger.add_scalar('prec', prec, curr_step)
                #     tb_logger.add_scalar('recall', recall, curr_step)
                #     tb_logger.add_scalar('mota', MOTA, curr_step)
                #     tb_logger.add_scalar('motp', MOTP, curr_step)
                #     tb_logger.add_scalar('fp', fp, curr_step)
                #     tb_logger.add_scalar('fn', fn, curr_step)
                #     tb_logger.add_scalar('f1', F1, curr_step)
                #     tb_logger.add_scalar('id_switches', id_switches, curr_step)
                    # if lr_scheduler is not None:
                        # tb_logger.add_scalar('lr', current_lr, curr_step)

                # is_best = MOTA > best_mota
                # best_mota = max(MOTA, best_mota)
                # print(best_mota)

                # import pdb; pdb.set_trace()
                # save_checkpoint(
                #     {   'step': net.get_global_step(),
                #         'score_arch': config_tr.model.score_arch,
                #         'appear_arch': config_tr.model.appear_arch,
                #         'best_mota': best_mota,
                #         'state_dict': tracking_module.model.state_dict(),
                #         'optimizer': tracking_module.optimizer.state_dict(),
                #     }, is_best, config_tr.save_path + '/ckpt')

                # net.train()

    # save model before exit
    torchplus.train.save_models(model_dir, [net, optimizer],
                                net.get_global_step())
    logf.close()
#   remove_environment: false
#   kitti_info_path: "...kitti_infos_test.pkl"
#   kitti_root_path: "/SparseConvNet/second.pytorch/second/data/object"
# }
input_cfg = config.eval_input_reader

#model_cfg: structure and loss parameters of net
model_cfg = config.model.second
#train_cfg: optimizer parameters and iteration steps
train_cfg = config.train_config
#class_names: ["Cyclist", "Pedestrian"]
class_names = list(input_cfg.class_names)
#post_center_limit_range: [0, -50, -2.5, 80, 50, -0.5]
center_limit_range = model_cfg.post_center_limit_range
##generate voxel, initial anchors
voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
box_coder = box_coder_builder.build(model_cfg.box_coder)
target_assigner_cfg = model_cfg.target_assigner
target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range,
                                                box_coder)

# voxel_size = voxel_generator.voxel_size
# pc_range = voxel_generator.point_cloud_range
# grid_size = voxel_generator.grid_size
# feature_map_size = grid_size[:2] // 2
# feature_map_size = [*feature_map_size, 1][::-1]
# points = np.fromfile(
#         str(point_file), dtype=np.float32,
#         count=-1).reshape([-1, 4])
def test_onnx_for_trt(onnx_path, config_path, model_dir, ckpt_path=None):
    dummy_dev_pillar_x_ = np.random.random(size=(1, 1, 12000,
                                                 100)).astype(np.float32)
    dummy_dev_pillar_y_ = np.random.random(size=(1, 1, 12000,
                                                 100)).astype(np.float32)
    dummy_dev_pillar_z_ = np.random.random(size=(1, 1, 12000,
                                                 100)).astype(np.float32)
    dummy_dev_pillar_i_ = np.random.random(size=(1, 1, 12000,
                                                 100)).astype(np.float32)
    dummy_dev_num_points_per_pillar_ = np.random.random(size=(1, 1, 12000,
                                                              1)).astype(
                                                                  np.float32)
    dummy_dev_x_coors_for_sub_shaped_ = np.random.random(size=(1, 1, 12000,
                                                               100)).astype(
                                                                   np.float32)
    dummy_dev_y_coors_for_sub_shaped_ = np.random.random(size=(1, 1, 12000,
                                                               100)).astype(
                                                                   np.float32)
    dummy_dev_pillar_feature_mask_ = np.random.random(size=(1, 1, 12000,
                                                            100)).astype(
                                                                np.float32)

    model = onnx.load(onnx_path)
    engine = backend.prepare(model, device='CUDA:0', max_batch_size=1)
    print("model read success")
    print()
    output_data = engine.run(
        (dummy_dev_pillar_x_, dummy_dev_pillar_y_, dummy_dev_pillar_z_,
         dummy_dev_pillar_i_, dummy_dev_num_points_per_pillar_,
         dummy_dev_x_coors_for_sub_shaped_, dummy_dev_y_coors_for_sub_shaped_,
         dummy_dev_pillar_feature_mask_))

    # ##########compare with pytorch output #########################
    for i in range(len(output_data)):
        print(output_data[i].shape)
    print(output_data[0][0, 0, 0:100])

    model_dir = pathlib.Path(model_dir)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    model_cfg = config.model.second
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    net = second_builder_for_official_onnx_and_cuda.build(
        model_cfg, voxel_generator, target_assigner)
    net.cuda()
    net.eval()

    # since the model is changed, dont restore first
    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    dummy_dev_pillar_x_ = torch.as_tensor(dummy_dev_pillar_x_, device="cuda")
    dummy_dev_pillar_y_ = torch.as_tensor(dummy_dev_pillar_y_, device="cuda")
    dummy_dev_pillar_z_ = torch.as_tensor(dummy_dev_pillar_z_, device="cuda")
    dummy_dev_pillar_i_ = torch.as_tensor(dummy_dev_pillar_i_, device="cuda")
    dummy_dev_num_points_per_pillar_ = torch.as_tensor(
        dummy_dev_num_points_per_pillar_, device="cuda")
    dummy_dev_x_coors_for_sub_shaped_ = torch.as_tensor(
        dummy_dev_x_coors_for_sub_shaped_, device="cuda")
    dummy_dev_y_coors_for_sub_shaped_ = torch.as_tensor(
        dummy_dev_y_coors_for_sub_shaped_, device="cuda")
    dummy_dev_pillar_feature_mask_ = torch.as_tensor(
        dummy_dev_pillar_feature_mask_, device="cuda")
    output_pytorch = net.voxel_feature_extractor(
        dummy_dev_pillar_x_, dummy_dev_pillar_y_, dummy_dev_pillar_z_,
        dummy_dev_pillar_i_, dummy_dev_num_points_per_pillar_,
        dummy_dev_x_coors_for_sub_shaped_, dummy_dev_y_coors_for_sub_shaped_,
        dummy_dev_pillar_feature_mask_)

    print(output_pytorch[0, 0, 0:100])
def train(config_path,
          model_dir,
          use_fusion=False,
          use_ft=False,
          use_second_stage=False,
          use_endtoend=False,
          result_path=None,
          create_folder=False,
          display_step=50,
          summary_step=5,
          local_rank=0,
          pickle_result=True,
          patchs=None):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)
    patchs = patchs or []
    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    for patch in patchs:
        patch = "config." + patch
        exec(patch)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config

    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    class_names = target_assigner.classes
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    if use_second_stage:
        net = second_2stage_builder.build(model_cfg, voxel_generator,
                                          target_assigner)
    if use_endtoend:
        net = second_endtoend_builder.build(model_cfg, voxel_generator,
                                            target_assigner)
    else:
        net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()
    # import pdb; pdb.set_trace()
    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)
    # pth_name = 'pre_weight/first_stage/fusion_split/voxelnet-35210.tckpt'
    # # pth_name = 'pre_weight/first_stage/fusion_split/voxelnet-20130.tckpt'

    # res_pre_weights = torch.load(pth_name)
    # new_res_state_dict = OrderedDict()
    # model_dict = net.state_dict()
    # for k,v in res_pre_weights.items():
    #     if 'global_step' not in k:
    #         if 'dir' not in k:
    #             new_res_state_dict[k] = v
    # model_dict.update(new_res_state_dict)
    # net.load_state_dict(model_dict)

    ######################
    if use_second_stage or use_endtoend:
        if use_fusion:
            # pth_name = 'pre_weight/8020/voxelnet-20130.tckpt'
            pth_name = 'pre_weight/first_stage/fusion_split/voxelnet-35210.tckpt'
            for i in range(30):
                print(
                    '################## load Fusion First stage weight complete #######################'
                )
        else:
            pth_name = 'pre_weight/first_stage/lidaronly/voxelnet-30950.tckpt'
            for i in range(30):
                print(
                    '################## load LiDAR Only First stage weight complete #######################'
                )

        res_pre_weights = torch.load(pth_name)
        new_res_state_dict = OrderedDict()
        model_dict = net.state_dict()
        for k, v in res_pre_weights.items():
            if 'global_step' not in k:
                if 'dir' not in k:
                    new_res_state_dict[k] = v
        model_dict.update(new_res_state_dict)
        net.load_state_dict(model_dict)

    ############ load FPN18 pre-weight #############
    if (use_fusion and not use_second_stage and not use_endtoend):
        # if True:
        #  or (use_endtoend and use_fusion):
        fpn_depth = 18
        pth_name = 'pre_weight/FPN' + str(fpn_depth) + '_retinanet_968.pth'
        res_pre_weights = torch.load(pth_name)
        new_res_state_dict = OrderedDict()
        model_dict = net.state_dict()
        for k, v in res_pre_weights['state_dict'].items():
            if ('regressionModel' not in k) and ('classificationModel'
                                                 not in k):
                name = k.replace('module', 'rpn')
                new_res_state_dict[name] = v
        model_dict.update(new_res_state_dict)
        net.load_state_dict(model_dict)
        for i in range(30):
            print('!!!!!!!!!!!!!!!!!! load FPN' + str(fpn_depth) +
                  ' weight complete !!!!!!!!!!!!!!!!!!')
    ################################################
    # BUILD OPTIMIZER
    #####################
    # we need global_step to create lr_scheduler, so restore net first.
    torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    gstep = net.get_global_step() - 1
    optimizer_cfg = train_cfg.optimizer
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    loss_scale = train_cfg.loss_scale_factor
    mixed_optimizer = optimizer_builder.build(
        optimizer_cfg,
        net,
        mixed=train_cfg.enable_mixed_precision,
        loss_scale=loss_scale)
    optimizer = mixed_optimizer
    """
    if train_cfg.enable_mixed_precision:
        mixed_optimizer = torchplus.train.MixedPrecisionWrapper(
            optimizer, loss_scale)
    else:
        mixed_optimizer = optimizer
    """
    # must restore optimizer AFTER using MixedPrecisionWrapper
    torchplus.train.try_restore_latest_checkpoints(model_dir,
                                                   [mixed_optimizer])
    lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer,
                                              train_cfg.steps)
    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32
    ######################
    # PREPARE INPUT
    ######################

    dataset = input_reader_builder.build(input_cfg,
                                         model_cfg,
                                         training=True,
                                         voxel_generator=voxel_generator,
                                         target_assigner=target_assigner)
    eval_dataset = input_reader_builder.build(eval_input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)

    def _worker_init_fn(worker_id):
        time_seed = np.array(time.time(), dtype=np.int32)
        np.random.seed(time_seed + worker_id)
        print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0])

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=input_cfg.batch_size,
                                             shuffle=True,
                                             num_workers=input_cfg.num_workers,
                                             pin_memory=False,
                                             collate_fn=merge_second_batch,
                                             worker_init_fn=_worker_init_fn)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=eval_input_cfg.batch_size,
        shuffle=False,
        num_workers=eval_input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)

    data_iter = iter(dataloader)

    ######################
    # TRAINING
    ######################
    training_detail = []
    log_path = model_dir / 'log.txt'
    training_detail_path = model_dir / 'log.json'
    if training_detail_path.exists():
        with open(training_detail_path, 'r') as f:
            training_detail = json.load(f)
    logf = open(log_path, 'a')
    logf.write(proto_str)
    logf.write("\n")
    summary_dir = model_dir / 'summary'
    summary_dir.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(str(summary_dir))

    total_step_elapsed = 0
    remain_steps = train_cfg.steps - net.get_global_step()
    t = time.time()
    ckpt_start_time = t

    total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1
    # total_loop = remain_steps // train_cfg.steps_per_eval + 1
    clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch

    if train_cfg.steps % train_cfg.steps_per_eval == 0:
        total_loop -= 1
    mixed_optimizer.zero_grad()
    try:
        for _ in range(total_loop):
            if total_step_elapsed + train_cfg.steps_per_eval > train_cfg.steps:
                steps = train_cfg.steps % train_cfg.steps_per_eval
            else:
                steps = train_cfg.steps_per_eval
            for step in range(steps):
                lr_scheduler.step(net.get_global_step())
                try:
                    example = next(data_iter)
                except StopIteration:
                    print("end epoch")
                    if clear_metrics_every_epoch:
                        net.clear_metrics()
                    data_iter = iter(dataloader)
                    example = next(data_iter)
                example_torch = example_convert_to_torch(example, float_dtype)

                batch_size = example["anchors"].shape[0]

                ret_dict = net(example_torch)

                # box_preds = ret_dict["box_preds"]
                cls_preds = ret_dict["cls_preds"]
                loss = ret_dict["loss"].mean()
                cls_loss_reduced = ret_dict["cls_loss_reduced"].mean()
                loc_loss_reduced = ret_dict["loc_loss_reduced"].mean()
                cls_pos_loss = ret_dict["cls_pos_loss"]
                cls_neg_loss = ret_dict["cls_neg_loss"]
                loc_loss = ret_dict["loc_loss"]
                cls_loss = ret_dict["cls_loss"]
                dir_loss_reduced = ret_dict["dir_loss_reduced"]
                cared = ret_dict["cared"]
                # idx_offset = ret_dict["idx_offset"]

                # labels = example_torch["labels"]
                if use_second_stage or use_endtoend:
                    labels = ret_dict["labels"]
                else:
                    labels = example_torch["labels"]
                if train_cfg.enable_mixed_precision:
                    loss *= loss_scale
                loss.backward()
                # import pdb; pdb.set_trace()
                torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0)
                mixed_optimizer.step()
                mixed_optimizer.zero_grad()
                net.update_global_step()
                net_metrics = net.update_metrics(cls_loss_reduced,
                                                 loc_loss_reduced, cls_preds,
                                                 labels, cared)

                step_time = (time.time() - t)
                t = time.time()
                metrics = {}
                num_pos = int((labels > 0)[0].float().sum().cpu().numpy())
                num_neg = int((labels == 0)[0].float().sum().cpu().numpy())
                if 'anchors_mask' not in example_torch:
                    num_anchors = example_torch['anchors'].shape[1]
                else:
                    num_anchors = int(example_torch['anchors_mask'][0].sum())
                global_step = net.get_global_step()
                # print(step)
                if global_step % display_step == 0:
                    loc_loss_elem = [
                        float(loc_loss[:, :, i].sum().detach().cpu().numpy() /
                              batch_size) for i in range(loc_loss.shape[-1])
                    ]
                    metrics["type"] = "step_info"
                    metrics["step"] = global_step
                    metrics["steptime"] = step_time
                    metrics.update(net_metrics)
                    metrics["loss"] = {}
                    metrics["loss"]["loc_elem"] = loc_loss_elem
                    metrics["loss"]["cls_pos_rt"] = float(
                        cls_pos_loss.detach().cpu().numpy())
                    metrics["loss"]["cls_neg_rt"] = float(
                        cls_neg_loss.detach().cpu().numpy())
                    if model_cfg.use_direction_classifier:
                        metrics["loss"]["dir_rt"] = float(
                            dir_loss_reduced.detach().cpu().numpy())
                    metrics["num_vox"] = int(example_torch["voxels"].shape[0])
                    metrics["num_pos"] = int(num_pos)
                    metrics["num_neg"] = int(num_neg)
                    metrics["num_anchors"] = int(num_anchors)
                    # metrics["idx_offset_mean"] = float(idx_offset.mean().detach().cpu().numpy())
                    # metrics["idx_offset_sum"] = float(idx_offset.sum().detach().cpu().numpy())
                    # metrics["lr"] = float(
                    #     mixed_optimizer.param_groups[0]['lr'])
                    metrics["lr"] = float(optimizer.lr)

                    metrics["image_idx"] = example['image_idx'][0]
                    training_detail.append(metrics)
                    flatted_metrics = flat_nested_json_dict(metrics)
                    flatted_summarys = flat_nested_json_dict(metrics, "/")
                    for k, v in flatted_summarys.items():
                        if isinstance(v, (list, tuple)):
                            v = {str(i): e for i, e in enumerate(v)}
                            if type(v) != str and ('loc_elem' not in k):
                                writer.add_scalars(k, v, global_step)
                        else:
                            if (type(v) != str) and ('loc_elem' not in k):
                                writer.add_scalar(k, v, global_step)

                    # if use_second_stage or use_endtoend:
                    #     bev_logs =  ret_dict['bev_crops_output'][:64,0,...].view(64,1,14,14)
                    #     bev_vis = torchvision.utils.make_grid(bev_logs,normalize=True,scale_each=True)
                    #     writer.add_image('bev_crop',img_tensor=bev_vis, global_step=global_step)
                    #     if ret_dict['concat_crops_output'] is not None:
                    #         concat_logs =  ret_dict['concat_crops_output'][:64,0,...].view(64,1,14,14)
                    #         concat_vis = torchvision.utils.make_grid(concat_logs,normalize=True,scale_each=True)
                    #         writer.add_image('concat_crop',img_tensor=concat_vis, global_step=global_step)

                    metrics_str_list = []
                    for k, v in flatted_metrics.items():
                        if isinstance(v, float):
                            metrics_str_list.append(f"{k}={v:.3}")
                        elif isinstance(v, (list, tuple)):
                            if v and isinstance(v[0], float):
                                v_str = ', '.join([f"{e:.3}" for e in v])
                                metrics_str_list.append(f"{k}=[{v_str}]")
                            else:
                                metrics_str_list.append(f"{k}={v}")
                        else:
                            metrics_str_list.append(f"{k}={v}")
                    log_str = ', '.join(metrics_str_list)
                    print(log_str, file=logf)
                    print(log_str)
                ckpt_elasped_time = time.time() - ckpt_start_time
                if ckpt_elasped_time > train_cfg.save_checkpoints_secs:
                    torchplus.train.save_models(model_dir, [net, optimizer],
                                                net.get_global_step())

                    ckpt_start_time = time.time()
            total_step_elapsed += steps

            torchplus.train.save_models(model_dir, [net, optimizer],
                                        net.get_global_step())
            net.eval()
            result_path_step = result_path / f"step_{net.get_global_step()}"
            result_path_step.mkdir(parents=True, exist_ok=True)
            print("#################################")
            print("#################################", file=logf)
            print("# EVAL")
            print("# EVAL", file=logf)
            print("#################################")
            print("#################################", file=logf)
            print("Generate output labels...")
            print("Generate output labels...", file=logf)
            t = time.time()
            dt_annos = []
            prog_bar = ProgressBar()
            net.clear_timer()
            prog_bar.start(
                (len(eval_dataset) + eval_input_cfg.batch_size - 1) //
                eval_input_cfg.batch_size)
            for example in iter(eval_dataloader):
                example = example_convert_to_torch(example, float_dtype)
                if pickle_result:
                    dt_annos += predict_kitti_to_anno(net, example,
                                                      class_names,
                                                      center_limit_range,
                                                      model_cfg.lidar_input)
                else:
                    _predict_kitti_to_file(net, example, result_path_step,
                                           class_names, center_limit_range,
                                           model_cfg.lidar_input)

                prog_bar.print_bar()

            sec_per_ex = len(eval_dataset) / (time.time() - t)

            print(f'generate label finished({sec_per_ex:.2f}/s). start eval:')
            print(f'generate label finished({sec_per_ex:.2f}/s). start eval:',
                  file=logf)
            gt_annos = [
                info["annos"] for info in eval_dataset.dataset.kitti_infos
            ]
            if not pickle_result:
                dt_annos = kitti.get_label_annos(result_path_step)
            # result = get_official_eval_result_v2(gt_annos, dt_annos, class_names)
            # print(json.dumps(result, indent=2), file=logf)
            result = get_official_eval_result(gt_annos, dt_annos, class_names)
            print(result, file=logf)
            print(result)
            result_1 = result.split("\n")[:5]
            result_2 = result.split("\n")[10:15]
            result_3 = result.split("\n")[20:25]
            emh = ['0_easy', '1_mod', '2_hard']
            result_save = result_1
            for i in range(len(result_save) - 1):
                save_targ = result_save[i + 1]
                name_val = save_targ.split(':')[0].split(' ')[0]
                value_val = save_targ.split(':')[1:]
                for ev in range(3):
                    each_val = value_val[0].split(',')[ev]
                    merge_txt = 'AP_kitti/car_70/' + name_val + '/' + emh[ev]
                    writer.add_scalar(merge_txt, float(each_val), global_step)
            if pickle_result:
                with open(result_path_step / "result.pkl", 'wb') as f:
                    pickle.dump(dt_annos, f)
            writer.add_text('eval_result', result, global_step)
            net.train()
    except Exception as e:
        torchplus.train.save_models(model_dir, [net, optimizer],
                                    net.get_global_step())
        logf.close()
        raise e
    # save model before exit
    torchplus.train.save_models(model_dir, [net, optimizer],
                                net.get_global_step())
    logf.close()
Exemple #21
0
def predict(config_path,
            model_dir,
            result_path=None,
            predict_test=False,
            ckpt_path=None,
            ref_detfile=None,
            pickle_result=True,
            bb_save_dir=None,
            pub_bb=None,
            pub_lidar=None):
    ''' Setup network and provide useful output '''

    ####################
    # SETUP PARAMETERS #
    ####################
    model_dir = pathlib.Path(model_dir)
    if predict_test:
        result_name = 'predict_test'
    else:
        result_name = 'eval_results'
    if result_path is None:
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    # TODO: include this program as a function call in the localization/mapping code as needed
    # TODO: use whole pointcloud data instead of reduced pointcloud
    # TODO: [Done] store data in respective pcd and bounding box (csv) files
    # TODO: [Done] create a cpp file to read and show (n number of) pcd files with respective bounding boxes
    # > [Done] Check if pcl_viewer can open pcd
    # > [Done] Check if pcl_viewer can be called from a cpp program for vizualization
    # > [Done] Check if that cpp program can also show a bounding box
    input_cfg = config.eval_input_reader  # Read the config file data into useful structures
    model_cfg = config.model.second  # Read the config file data into useful structures
    train_cfg = config.train_config  # Read the config file data into useful structures
    class_names = list(input_cfg.class_names)
    center_limit_range = model_cfg.post_center_limit_range

    #########################
    # BUILD VOXEL GENERATOR #
    #########################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)

    #####################
    # NETWORK GENERATOR #
    #####################
    # Build the NN in GPU mode
    net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()

    # Standard conversion approach if using FloatingPoint16 instead of FloatingPoint32 type of tensor
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    # Restore old checkpoint if possible
    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    # Setup network for evaluation mode
    net.eval()

    #####################
    # DATASET GENERATOR #
    #####################
    # Dataset build for easy usage
    eval_dataset = input_reader_builder.build(input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=input_cfg.batch_size,
        shuffle=False,
        num_workers=input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)

    # Further variable setup
    result_path_step = result_path / f"step_{net.get_global_step()}"
    result_path_step.mkdir(parents=True, exist_ok=True)
    t = time.time()
    dt_annos = []
    global_set = None
    print()
    print("Generate output labels...")
    bar = ProgressBar()
    bar.start(len(eval_dataset) // input_cfg.batch_size + 1)

    #################
    # NETWORK USAGE #
    #################
    # Predict a set of 'num_workers'  samples, get info and reformat data as needed
    # temp_count = 0
    for example in iter(eval_dataloader):
        # pprint.pprint(example, width=1)
        # for key, value in example.items():
        # 	print(key)
        # 	print(np.shape(value))
        example = example_convert_to_torch(example, float_dtype)
        print(example['image_idx'])
        # pprint.pprint(example, width=1)
        # for key, value in example.items():
        # 	print(key)
        # 	print(np.shape(value))
        # # # # if pickle_result:

        # NOTE: Predict network output
        # start_time = time.time()
        predictions_dicts = net(example)

        # # Save copy of data if user requested
        # if save_pcd:
        # 	np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4])

        # # Publish original data
        # if pub_lidar:
        # 	data=PointCloud2()
        # 	# FIXME: Extract pointclound info from 'example' (use original kitti data file if needed) > publish
        # 	pub_lidar.publish(data)

        # # Publish network output
        # if pub_bb:
        # 	data = MarkerArray()
        # 	# FIXME: Create a wireframe 3D bounding box and, if possible, a transluscent 3D cuboid as well > publish
        # 	pub_bb.publish(data)

        # # print('Network predict time: {}'.format(time.time()-start_time))
        # pprint.pprint(predictions_dicts[0])
        # for key, value in predictions_dicts[0].items():
        # 	print(key)
        # 	print(np.shape(value))

        if bb_save_dir:
            save_path = pathlib.Path(bb_save_dir)
            save_path.mkdir(
                parents=True, exist_ok=True
            )  # create directory (and its parents) if non-existent

            for pred_dict in predictions_dicts:
                if pred_dict['box3d_lidar'] is not None:
                    bb_lidar = pred_dict['box3d_lidar'].detach().cpu().numpy()
                else:
                    bb_lidar = [[
                        'temp', 'temp', 'temp', 'temp', 'temp', 'temp', 'temp'
                    ]]
                df = pd.DataFrame(bb_lidar)
                df.columns = ['x', 'y', 'z', 'w', 'l', 'h', 't']
                filename = save_path.joinpath(
                    str(pred_dict['image_idx']) + '.csv')
                filename.write_text(df.to_csv(index=False))
Exemple #22
0
    import getpass
    usr_name = getpass.getuser()
    os.chdir('/home/'+usr_name+'/source_code/py/second.pytorch/second')
    config_path='./configs/xyres_28_huituo.config'
    parse=argparse.ArgumentParser()
    parse.add_argument("--num",type=float,default=3)
    parse.add_argument("--diff", type=float, default=0.25)
    args=parse.parse_args()

    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    voxel_cfg=config.model.second.voxel_generator
    voxel_cfg.point_cloud_range[:]=[-29.84, -66.32, -9, 59.76, 59.12, 3]
    voxel_generator=voxel_builder.build(voxel_cfg)

    data_dir="/home/"+usr_name+"/dataset/roadside/1212/rslidar"
    data_reduced_dir = "/home/"+ usr_name+"/dataset/roadside/1113/training/velodyne_reduced"
    bg_file="./data/1212"
    bin_files=glob.glob(data_dir+"/*.pcd")
    bin_files.sort()
    myfilter = bg_filter(voxel_generator.config, num_path='./data/1212/num_table.txt',
                         var_path='./data/1212/var_table.txt',num_point=args.num,
                         is_statistic=False,diff=args.diff)
    # myfilter.generate_table(bg_file,voxel_generator)
    # myfilter=bg_filter_ring(voxel_cfg,z_path="./data/1113/z_table.txt",azimuth_res=0.2,
    #                         laser_angle_path="./data/1113/laser_angles.txt",diff_z=args.num)
    # myfilter.generate_table(bg_file+"/000082.pcd")
    # vis = o3d.visualization.Visualizer()
    # vis=o3d.Visualizer()
Exemple #23
0
def evaluate(config_path,
             model_dir,
             result_path=None,
             predict_test=False,
             ckpt_path=None,
             ref_detfile=None,
             pickle_result=True):
    model_dir = pathlib.Path(model_dir)
    if predict_test:
        result_name = 'predict_test'
    else:
        result_name = 'eval_results'
    if result_path is None:
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    class_names = list(input_cfg.class_names)
    center_limit_range = model_cfg.post_center_limit_range
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)

    net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)

    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    eval_dataset = input_reader_builder.build(input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=input_cfg.batch_size,
        shuffle=False,
        num_workers=input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)

    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    net.eval()
    result_path_step = result_path / f"step_{net.get_global_step()}"
    result_path_step.mkdir(parents=True, exist_ok=True)
    t = time.time()
    dt_annos = []
    global_set = None
    print("Generate output labels...")
    bar = ProgressBar()
    bar.start(len(eval_dataset) // input_cfg.batch_size + 1)

    for example in iter(eval_dataloader):
        example = example_convert_to_torch(example, float_dtype)
        if pickle_result:
            dt_annos += predict_kitti_to_anno(net, example, class_names,
                                              center_limit_range,
                                              model_cfg.lidar_input,
                                              global_set)
        else:
            _predict_kitti_to_file(net, example, result_path_step, class_names,
                                   center_limit_range, model_cfg.lidar_input)
        bar.print_bar()

    sec_per_example = len(eval_dataset) / (time.time() - t)
    print(f'generate label finished({sec_per_example:.2f}/s). start eval:')

    print(f"avg forward time per example: {net.avg_forward_time:.3f}")
    print(f"avg postprocess time per example: {net.avg_postprocess_time:.3f}")
    if not predict_test:
        gt_annos = [info["annos"] for info in eval_dataset.dataset.kitti_infos]
        if not pickle_result:
            dt_annos = kitti.get_label_annos(result_path_step)
        result = get_official_eval_result(gt_annos, dt_annos, class_names)
        print(result)
        result = get_coco_eval_result(gt_annos, dt_annos, class_names)
        print(result)
        if pickle_result:
            with open(result_path_step / "result.pkl", 'wb') as f:
                pickle.dump(dt_annos, f)
Exemple #24
0
def onnx_model_predict(config_path=None, model_dir=None):
    import onnxruntime
    from second.pytorch.models.pointpillars import PillarFeatureNet, PointPillarsScatter

    # check the pfe onnx model IR input paramters as follows
    # pillar_x = torch.ones([1, 1, 12000, 100], dtype=torch.float32, device="cuda:0")
    # pillar_y = torch.ones([1, 1, 12000, 100], dtype=torch.float32, device="cuda:0")
    # pillar_z = torch.ones([1, 1, 12000, 100], dtype=torch.float32, device="cuda:0")
    # pillar_i = torch.ones([1, 1, 12000, 100], dtype=torch.float32, device="cuda:0")
    # num_points_per_pillar = torch.ones([1, 12000], dtype=torch.float32, device="cuda:0")
    # x_sub_shaped = torch.ones([1, 1, 12000, 100], dtype=torch.float32, device="cuda:0")
    # y_sub_shaped = torch.ones([1, 1, 12000, 100], dtype=torch.float32, device="cuda:0")
    # mask = torch.ones([1, 1, 12000, 100], dtype=torch.float32, device="cuda:0")

    # check the rpn onnx model IR input paramters as follows
    pillar_x = torch.ones([1, 1, 9918, 100],
                          dtype=torch.float32,
                          device="cuda:0")
    pillar_y = torch.ones([1, 1, 9918, 100],
                          dtype=torch.float32,
                          device="cuda:0")
    pillar_z = torch.ones([1, 1, 9918, 100],
                          dtype=torch.float32,
                          device="cuda:0")
    pillar_i = torch.ones([1, 1, 9918, 100],
                          dtype=torch.float32,
                          device="cuda:0")
    num_points_per_pillar = torch.ones([1, 9918],
                                       dtype=torch.float32,
                                       device="cuda:0")
    x_sub_shaped = torch.ones([1, 1, 9918, 100],
                              dtype=torch.float32,
                              device="cuda:0")
    y_sub_shaped = torch.ones([1, 1, 9918, 100],
                              dtype=torch.float32,
                              device="cuda:0")
    mask = torch.ones([1, 1, 9918, 100], dtype=torch.float32, device="cuda:0")

    pfe_session = onnxruntime.InferenceSession("pfe.onnx")

    # Compute ONNX Runtime output prediction
    pfe_inputs = {
        pfe_session.get_inputs()[0].name: (pillar_x.data.cpu().numpy()),
        pfe_session.get_inputs()[1].name: (pillar_y.data.cpu().numpy()),
        pfe_session.get_inputs()[2].name: (pillar_z.data.cpu().numpy()),
        pfe_session.get_inputs()[3].name: (pillar_i.data.cpu().numpy()),
        pfe_session.get_inputs()[4].name:
        (num_points_per_pillar.data.cpu().numpy()),
        pfe_session.get_inputs()[5].name: (x_sub_shaped.data.cpu().numpy()),
        pfe_session.get_inputs()[6].name: (y_sub_shaped.data.cpu().numpy()),
        pfe_session.get_inputs()[7].name: (mask.data.cpu().numpy())
    }

    pfe_outs = pfe_session.run(None, pfe_inputs)
    print(
        '-------------------------- PFE ONNX Outputs ----------------------------'
    )
    print(pfe_outs)  # also you could save it to file for comparing
    print(
        '-------------------------- PFE ONNX Ending ----------------------------'
    )
    ##########################Middle-Features-Extractor#########################
    # numpy --> tensor
    pfe_outs = np.array(pfe_outs)
    voxel_features_tensor = torch.from_numpy(pfe_outs)

    voxel_features = voxel_features_tensor.squeeze()
    # voxel_features = np.array(pfe_outs).squeeze()
    voxel_features = voxel_features.permute(1, 0)

    if isinstance(config_path, str):
        config = pipeline_pb2.TrainEvalPipelineConfig()
        with open(config_path, "r") as f:
            proto_str = f.read()
            text_format.Merge(proto_str, config)
    else:
        config = config_path
    model_cfg = config.model.second
    vfe_num_filters = list(model_cfg.voxel_feature_extractor.num_filters)
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    grid_size = voxel_generator.grid_size
    output_shape = [1] + grid_size[::-1].tolist() + [vfe_num_filters[-1]]
    num_input_features = vfe_num_filters[-1]
    batch_size = 2
    mid_feature_extractor = PointPillarsScatter(output_shape,
                                                num_input_features, batch_size)

    device = torch.device("cuda:0")
    coors_numpy = np.loadtxt('./onnx_predict_outputs/coors.txt',
                             dtype=np.int32)
    coors = torch.from_numpy(coors_numpy)
    coors = coors.to(device).cuda()  # CPU Tensor --> GPU Tensor

    voxel_features = voxel_features.to(device).cuda()
    rpn_input_features = mid_feature_extractor(voxel_features, coors)

    #################################RPN-Feature-Extractor########################################
    # rpn_input_features = torch.ones([1, 64, 496, 432], dtype=torch.float32, device='cuda:0')
    rpn_session = onnxruntime.InferenceSession("rpn.onnx")
    # compute RPN ONNX Runtime output prediction
    rpn_inputs = {
        rpn_session.get_inputs()[0].name:
        (rpn_input_features.data.cpu().numpy())
    }

    rpn_outs = rpn_session.run(None, rpn_inputs)
    print('---------------------- RPN ONNX Outputs ----------------------')
    print(rpn_outs)
    print('---------------------- RPN ONNX Ending ----------------------')
Exemple #25
0
def evaluate(config_path,
             model_dir,
             result_path=None,
             predict_test=False,
             ckpt_path=None,
             ref_detfile=None,
             pickle_result=True):

    model_dir = str(Path(model_dir).resolve())
    if predict_test:
        result_name = 'predict_test'
    else:
        result_name = 'eval_results'
    if result_path is None:
        model_dir = Path(model_dir)
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)

    if isinstance(config_path, str):
        config = pipeline_pb2.TrainEvalPipelineConfig()
        with open(config_path, "r") as f:
            proto_str = f.read()
            text_format.Merge(proto_str, config)
    else:
        config = config_path

    input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    class_names = list(input_cfg.class_names)
    center_limit_range = model_cfg.post_center_limit_range
    #########################
    # Build Voxel Generator
    #########################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)

    net = second_builder.build(model_cfg, voxel_generator, target_assigner,
                               input_cfg.batch_size)
    net.cuda()
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)

    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    net.eval()
    result_path_step = result_path / f"step_{net.get_global_step()}"
    result_path_step.mkdir(parents=True, exist_ok=True)
    t = time.time()
    # dt_annos = []
    global_set = None
    print("Generate output labels...")

    example_tuple = generate_example()

    dt_annos = predict_kitti_to_anno(net, example_tuple, class_names,
                                     center_limit_range, model_cfg.lidar_input,
                                     global_set)

    print(dt_annos)
Exemple #26
0
def train(config_path,
          model_dir,
          result_path=None,
          create_folder=False,
          display_step=50,
          summary_step=5,
          pickle_result=True):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)

    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    eval_checkpoint_dir = model_dir / 'eval_checkpoints'
    eval_checkpoint_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config

    class_names = list(input_cfg.class_names)
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    # net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net = second_builder.build(model_cfg, voxel_generator, target_assigner, input_cfg.batch_size)
    net.cuda()
    # net_train = torch.nn.DataParallel(net).cuda()
    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)
    ######################
    # BUILD OPTIMIZER
    ######################
    # we need global_step to create lr_scheduler, so restore net first.
    torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    gstep = net.get_global_step() - 1
    optimizer_cfg = train_cfg.optimizer
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    optimizer = optimizer_builder.build(optimizer_cfg, net.parameters())
    if train_cfg.enable_mixed_precision:
        loss_scale = train_cfg.loss_scale_factor
        mixed_optimizer = torchplus.train.MixedPrecisionWrapper(
            optimizer, loss_scale)
    else:
        mixed_optimizer = optimizer
    # must restore optimizer AFTER using MixedPrecisionWrapper
    torchplus.train.try_restore_latest_checkpoints(model_dir,
                                                   [mixed_optimizer])
    lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer, gstep)
    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32
    ######################
    # PREPARE INPUT
    ######################

    dataset = input_reader_builder.build(
        input_cfg,
        model_cfg,
        training=True,
        voxel_generator=voxel_generator,
        target_assigner=target_assigner)
    eval_dataset = input_reader_builder.build(
        eval_input_cfg,
        model_cfg,
        training=False,
        voxel_generator=voxel_generator,
        target_assigner=target_assigner)

    def _worker_init_fn(worker_id):
        time_seed = np.array(time.time(), dtype=np.int32)
        np.random.seed(time_seed + worker_id)
        print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0])

    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=input_cfg.batch_size,
        shuffle=True,
        num_workers=input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch,
        worker_init_fn=_worker_init_fn)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=eval_input_cfg.batch_size,
        shuffle=False,
        num_workers=eval_input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)
    data_iter = iter(dataloader)

    ######################
    # TRAINING
    ######################
    log_path = model_dir / 'log.txt'
    logf = open(log_path, 'a')
    logf.write(proto_str)
    logf.write("\n")
    summary_dir = model_dir / 'summary'
    summary_dir.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(str(summary_dir))

    total_step_elapsed = 0
    remain_steps = train_cfg.steps - net.get_global_step()
    t = time.time()
    ckpt_start_time = t

    total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1
    # total_loop = remain_steps // train_cfg.steps_per_eval + 1
    clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch

    if train_cfg.steps % train_cfg.steps_per_eval == 0:
        total_loop -= 1
    mixed_optimizer.zero_grad()
    try:
        for _ in range(total_loop):
            if total_step_elapsed + train_cfg.steps_per_eval > train_cfg.steps:
                steps = train_cfg.steps % train_cfg.steps_per_eval
            else:
                steps = train_cfg.steps_per_eval
            for step in range(steps):
                lr_scheduler.step()
                try:
                    example = next(data_iter)
                except StopIteration:
                    print("end epoch")
                    if clear_metrics_every_epoch:
                        net.clear_metrics()
                    data_iter = iter(dataloader)
                    example = next(data_iter)
                example_torch = example_convert_to_torch(example, float_dtype)

                batch_size = example["anchors"].shape[0]

                example_tuple = list(example_torch.values())
                example_tuple[11] = torch.from_numpy(example_tuple[11])
                example_tuple[12] = torch.from_numpy(example_tuple[12])
                assert 13==len(example_tuple), "something wring with training input size!"
                # training example:[0:'voxels', 1:'num_points', 2:'coordinates', 3:'rect',
                # 4:'Trv2c', 5:'P2',
                # 6:'anchors', 7:'anchors_mask', 8:'labels', 9:'reg_targets', 10:'reg_weights',
                # 11:'image_idx', 12:'image_shape']
                # ret_dict = net(example_torch)

                # training input from example
                # print("example[0] size", example_tuple[0].size())
                pillar_x = example_tuple[0][:,:,0].unsqueeze(0).unsqueeze(0)
                pillar_y = example_tuple[0][:,:,1].unsqueeze(0).unsqueeze(0)
                pillar_z = example_tuple[0][:,:,2].unsqueeze(0).unsqueeze(0)
                pillar_i = example_tuple[0][:,:,3].unsqueeze(0).unsqueeze(0)
                num_points_per_pillar = example_tuple[1].float().unsqueeze(0)

                # Find distance of x, y, and z from pillar center
                # assuming xyres_16.proto
                coors_x = example_tuple[2][:, 3].float()
                coors_y = example_tuple[2][:, 2].float()
                # self.x_offset = self.vx / 2 + pc_range[0]
                # self.y_offset = self.vy / 2 + pc_range[1]
                # this assumes xyres 20
                # x_sub = coors_x.unsqueeze(1) * 0.16 + 0.1
                # y_sub = coors_y.unsqueeze(1) * 0.16 + -39.9
                # here assumes xyres 16
                x_sub = coors_x.unsqueeze(1) * 0.16 + 0.08
                y_sub = coors_y.unsqueeze(1) * 0.16 + -39.6
                ones = torch.ones([1, 100],dtype=torch.float32, device=pillar_x.device )
                x_sub_shaped = torch.mm(x_sub, ones).unsqueeze(0).unsqueeze(0)
                y_sub_shaped = torch.mm(y_sub, ones).unsqueeze(0).unsqueeze(0)

                num_points_for_a_pillar = pillar_x.size()[3]
                mask = get_paddings_indicator(num_points_per_pillar, num_points_for_a_pillar, axis=0)
                mask = mask.permute(0, 2, 1)
                mask = mask.unsqueeze(1)
                mask = mask.type_as(pillar_x)

                coors   = example_tuple[2]
                anchors = example_tuple[6]
                labels  = example_tuple[8]
                reg_targets = example_tuple[9]

                input = [pillar_x, pillar_y, pillar_z, pillar_i,
                         num_points_per_pillar, x_sub_shaped, y_sub_shaped, mask, coors,
                         anchors, labels, reg_targets]

                ret_dict = net(input)
                assert 10==len(ret_dict), "something wring with training output size!"
                # return 0
                # ret_dict {
                #     0:"loss": loss,
                #     1:"cls_loss": cls_loss,
                #     2:"loc_loss": loc_loss,
                #     3:"cls_pos_loss": cls_pos_loss,
                #     4:"cls_neg_loss": cls_neg_loss,
                #     5:"cls_preds": cls_preds,
                #     6:"dir_loss_reduced": dir_loss_reduced,
                #     7:"cls_loss_reduced": cls_loss_reduced,
                #     8:"loc_loss_reduced": loc_loss_reduced,
                #     9:"cared": cared,
                # }
                # cls_preds = ret_dict["cls_preds"]
                cls_preds = ret_dict[5]
                # loss = ret_dict["loss"].mean()
                loss = ret_dict[0].mean()
                # cls_loss_reduced = ret_dict["cls_loss_reduced"].mean()
                cls_loss_reduced = ret_dict[7].mean()
                # loc_loss_reduced = ret_dict["loc_loss_reduced"].mean()
                loc_loss_reduced = ret_dict[8].mean()
                # cls_pos_loss = ret_dict["cls_pos_loss"]
                cls_pos_loss = ret_dict[3]
                # cls_neg_loss = ret_dict["cls_neg_loss"]
                cls_neg_loss = ret_dict[4]
                # loc_loss = ret_dict["loc_loss"]
                loc_loss = ret_dict[2]
                # cls_loss = ret_dict["cls_loss"]
                cls_loss = ret_dict[1]
                # dir_loss_reduced = ret_dict["dir_loss_reduced"]
                dir_loss_reduced = ret_dict[6]
                # cared = ret_dict["cared"]
                cared = ret_dict[9]
                # labels = example_torch["labels"]
                labels = example_tuple[8]
                if train_cfg.enable_mixed_precision:
                    loss *= loss_scale
                loss.backward()
                torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0)
                mixed_optimizer.step()
                mixed_optimizer.zero_grad()
                net.update_global_step()
                net_metrics = net.update_metrics(cls_loss_reduced,
                                                 loc_loss_reduced, cls_preds,
                                                 labels, cared)

                step_time = (time.time() - t)
                t = time.time()
                metrics = {}
                num_pos = int((labels > 0)[0].float().sum().cpu().numpy())
                num_neg = int((labels == 0)[0].float().sum().cpu().numpy())
                # if 'anchors_mask' not in example_torch:
                #     num_anchors = example_torch['anchors'].shape[1]
                # else:
                #     num_anchors = int(example_torch['anchors_mask'][0].sum())
                num_anchors = int(example_tuple[7][0].sum())
                global_step = net.get_global_step()
                if global_step % display_step == 0:
                    loc_loss_elem = [
                        float(loc_loss[:, :, i].sum().detach().cpu().numpy() /
                              batch_size) for i in range(loc_loss.shape[-1])
                    ]
                    metrics["step"] = global_step
                    metrics["steptime"] = step_time
                    metrics.update(net_metrics)
                    metrics["loss"] = {}
                    metrics["loss"]["loc_elem"] = loc_loss_elem
                    metrics["loss"]["cls_pos_rt"] = float(
                        cls_pos_loss.detach().cpu().numpy())
                    metrics["loss"]["cls_neg_rt"] = float(
                        cls_neg_loss.detach().cpu().numpy())
                    # if unlabeled_training:
                    #     metrics["loss"]["diff_rt"] = float(
                    #         diff_loc_loss_reduced.detach().cpu().numpy())
                    if model_cfg.use_direction_classifier:
                        metrics["loss"]["dir_rt"] = float(
                            dir_loss_reduced.detach().cpu().numpy())
                    # metrics["num_vox"] = int(example_torch["voxels"].shape[0])
                    metrics["num_vox"] = int(example_tuple[0].shape[0])
                    metrics["num_pos"] = int(num_pos)
                    metrics["num_neg"] = int(num_neg)
                    metrics["num_anchors"] = int(num_anchors)
                    metrics["lr"] = float(
                        mixed_optimizer.param_groups[0]['lr'])
                    # metrics["image_idx"] = example['image_idx'][0]
                    metrics["image_idx"] = example_tuple[11][0]
                    flatted_metrics = flat_nested_json_dict(metrics)
                    flatted_summarys = flat_nested_json_dict(metrics, "/")
                    for k, v in flatted_summarys.items():
                        if isinstance(v, (list, tuple)):
                            v = {str(i): e for i, e in enumerate(v)}
                            writer.add_scalars(k, v, global_step)
                        else:
                            writer.add_scalar(k, v, global_step)
                    metrics_str_list = []
                    for k, v in flatted_metrics.items():
                        if isinstance(v, float):
                            metrics_str_list.append(f"{k}={v:.3}")
                        elif isinstance(v, (list, tuple)):
                            if v and isinstance(v[0], float):
                                v_str = ', '.join([f"{e:.3}" for e in v])
                                metrics_str_list.append(f"{k}=[{v_str}]")
                            else:
                                metrics_str_list.append(f"{k}={v}")
                        else:
                            metrics_str_list.append(f"{k}={v}")
                    log_str = ', '.join(metrics_str_list)
                    print(log_str, file=logf)
                    print(log_str)
                ckpt_elasped_time = time.time() - ckpt_start_time
                if ckpt_elasped_time > train_cfg.save_checkpoints_secs:
                    torchplus.train.save_models(model_dir, [net, optimizer],
                                                net.get_global_step())
                    ckpt_start_time = time.time()
            total_step_elapsed += steps
            torchplus.train.save_models(model_dir, [net, optimizer],
                                        net.get_global_step())

            # Ensure that all evaluation points are saved forever
            torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer], net.get_global_step(), max_to_keep=100)

            # net.eval()
            # result_path_step = result_path / f"step_{net.get_global_step()}"
            # result_path_step.mkdir(parents=True, exist_ok=True)
            # print("#################################")
            # print("#################################", file=logf)
            # print("# EVAL")
            # print("# EVAL", file=logf)
            # print("#################################")
            # print("#################################", file=logf)
            # print("Generate output labels...")
            # print("Generate output labels...", file=logf)
            # t = time.time()
            # dt_annos = []
            # prog_bar = ProgressBar()
            # prog_bar.start(len(eval_dataset) // eval_input_cfg.batch_size + 1)
            # for example in iter(eval_dataloader):
            #     example = example_convert_to_torch(example, float_dtype)
            #     # evaluation example:[0:'voxels', 1:'num_points', 2:'coordinates', 3:'rect',
            #     # 4:'Trv2c', 5:'P2',
            #     # 6:'anchors', 7:'anchors_mask', 8:'image_idx', 9:'image_shape']
            #     example_tuple = list(example.values())
            #     example_tuple[8] = torch.from_numpy(example_tuple[8])
            #     example_tuple[9] = torch.from_numpy(example_tuple[9])
            #     if pickle_result:
            #         dt_annos += predict_kitti_to_anno(
            #             net, example_tuple, class_names, center_limit_range,
            #             model_cfg.lidar_input)
            #     else:
            #         _predict_kitti_to_file(net, example, result_path_step,
            #                                class_names, center_limit_range,
            #                                model_cfg.lidar_input)
            #
            #     prog_bar.print_bar()
            #
            # sec_per_ex = len(eval_dataset) / (time.time() - t)
            # print(f"avg forward time per example: {net.avg_forward_time:.3f}")
            # print(
            #     f"avg postprocess time per example: {net.avg_postprocess_time:.3f}"
            # )
            #
            # net.clear_time_metrics()
            # print(f'generate label finished({sec_per_ex:.2f}/s). start eval:')
            # print(
            #     f'generate label finished({sec_per_ex:.2f}/s). start eval:',
            #     file=logf)
            # gt_annos = [
            #     info["annos"] for info in eval_dataset.dataset.kitti_infos
            # ]
            # if not pickle_result:
            #     dt_annos = kitti.get_label_annos(result_path_step)
            # result, mAPbbox, mAPbev, mAP3d, mAPaos = get_official_eval_result(gt_annos, dt_annos, class_names,
            #                                                                   return_data=True)
            # print(result, file=logf)
            # print(result)
            # writer.add_text('eval_result', result, global_step)
            #
            # for i, class_name in enumerate(class_names):
            #     writer.add_scalar('bev_ap:{}'.format(class_name), mAPbev[i, 1, 0], global_step)
            #     writer.add_scalar('3d_ap:{}'.format(class_name), mAP3d[i, 1, 0], global_step)
            #     writer.add_scalar('aos_ap:{}'.format(class_name), mAPaos[i, 1, 0], global_step)
            # writer.add_scalar('bev_map', np.mean(mAPbev[:, 1, 0]), global_step)
            # writer.add_scalar('3d_map', np.mean(mAP3d[:, 1, 0]), global_step)
            # writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step)
            #
            # result = get_coco_eval_result(gt_annos, dt_annos, class_names)
            # print(result, file=logf)
            # print(result)
            # if pickle_result:
            #     with open(result_path_step / "result.pkl", 'wb') as f:
            #         pickle.dump(dt_annos, f)
            # writer.add_text('eval_result', result, global_step)
            # net.train()
    except Exception as e:
        torchplus.train.save_models(model_dir, [net, optimizer],
                                    net.get_global_step())
        logf.close()
        raise e
    # save model before exit
    torchplus.train.save_models(model_dir, [net, optimizer],
                                net.get_global_step())
    logf.close()
def build_network(model_cfg):
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    net = second_builder.build(
        model_cfg, voxel_generator, box_coder)
    return net
Exemple #28
0
def train(config_path,
          model_dir,
          result_path=None,
          create_folder=False,
          display_step=50,
          summary_step=5,
          pickle_result=True):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)

    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    eval_checkpoint_dir = model_dir / 'eval_checkpoints'
    eval_checkpoint_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config

    class_names = list(input_cfg.class_names)
    #########################
    # Build Voxel Generator
    #########################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    #########################
    # Build Target Assigner
    #########################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    ######################
    # Build NetWork
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    # net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net = second_builder.build(model_cfg, voxel_generator, target_assigner,
                               input_cfg.batch_size)
    net.cuda()
    # net_train = torch.nn.DataParallel(net).cuda()
    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)
    ######################
    # Build Optimizer
    ######################
    # we need global_step to create lr_scheduler, so restore net first.
    torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    gstep = net.get_global_step() - 1
    optimizer_cfg = train_cfg.optimizer
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    optimizer = optimizer_builder.build(optimizer_cfg, net.parameters())
    if train_cfg.enable_mixed_precision:
        loss_scale = train_cfg.loss_scale_factor
        mixed_optimizer = torchplus.train.MixedPrecisionWrapper(
            optimizer, loss_scale)
    else:
        mixed_optimizer = optimizer
    # must restore optimizer AFTER using MixedPrecisionWrapper
    torchplus.train.try_restore_latest_checkpoints(model_dir,
                                                   [mixed_optimizer])
    lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer, gstep)
    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32
    ######################
    # Prepare Input
    ######################

    dataset = input_reader_builder.build(input_cfg,
                                         model_cfg,
                                         training=True,
                                         voxel_generator=voxel_generator,
                                         target_assigner=target_assigner)
    eval_dataset = input_reader_builder.build(eval_input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)

    def _worker_init_fn(worker_id):
        time_seed = np.array(time.time(), dtype=np.int32)
        np.random.seed(time_seed + worker_id)
        print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0])

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=input_cfg.batch_size,
                                             shuffle=True,
                                             num_workers=input_cfg.num_workers,
                                             pin_memory=False,
                                             collate_fn=merge_second_batch,
                                             worker_init_fn=_worker_init_fn)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=eval_input_cfg.batch_size,
        shuffle=False,
        num_workers=eval_input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)
    data_iter = iter(dataloader)

    ######################
    # Training
    ######################
    log_path = model_dir / 'log.txt'
    logf = open(log_path, 'a')
    logf.write(proto_str)
    logf.write("\n")
    summary_dir = model_dir / 'summary'
    summary_dir.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(str(summary_dir))

    total_step_elapsed = 0
    remain_steps = train_cfg.steps - net.get_global_step()
    t = time.time()
    ckpt_start_time = t

    total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1
    # total_loop = remain_steps // train_cfg.steps_per_eval + 1
    clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch

    if train_cfg.steps % train_cfg.steps_per_eval == 0:
        total_loop -= 1
    mixed_optimizer.zero_grad()
    try:
        for _ in range(total_loop):
            if total_step_elapsed + train_cfg.steps_per_eval > train_cfg.steps:
                steps = train_cfg.steps % train_cfg.steps_per_eval
            else:
                steps = train_cfg.steps_per_eval
            for step in range(steps):
                lr_scheduler.step()
                try:
                    example = next(data_iter)
                except StopIteration:
                    print("end epoch")
                    if clear_metrics_every_epoch:
                        net.clear_metrics()
                    data_iter = iter(dataloader)
                    example = next(data_iter)
                example_torch = example_convert_to_torch(example, float_dtype)

                batch_size = example["anchors"].shape[0]

                example_tuple = list(example_torch.values())
                example_tuple[11] = torch.from_numpy(example_tuple[11])
                example_tuple[12] = torch.from_numpy(example_tuple[12])

                assert 13 == len(
                    example_tuple), "something write with training input size!"

                # ret_dict = net(example_torch)

                # Training Input form example
                pillar_x = example_tuple[0][:, :, 0].unsqueeze(0).unsqueeze(0)
                pillar_y = example_tuple[0][:, :, 1].unsqueeze(0).unsqueeze(0)
                pillar_z = example_tuple[0][:, :, 2].unsqueeze(0).unsqueeze(0)
                pillar_i = example_tuple[0][:, :, 3].unsqueeze(0).unsqueeze(0)
                num_points_per_pillar = example_tuple[1].float().unsqueeze(0)

                ################################################################
                # Find distance of x, y, z from pillar center
                # assume config_file xyres_16.proto
                coors_x = example_tuple[2][:, 3].float()
                coors_y = example_tuple[2][:, 2].float()
                # self.x_offset = self.vx / 2 + pc_range[0]
                # self.y_offset = self.vy / 2 + pc_range[1]
                # this assumes xyres 20
                # x_sub = coors_x.unsqueeze(1) * 0.16 + 0.1
                # y_sub = coors_y.unsqueeze(1) * 0.16 + -39.9
                ################################################################

                # assumes xyres_16
                x_sub = coors_x.unsqueeze(1) * 0.16 + 0.08
                y_sub = coors_y.unsqueeze(1) * 0.16 - 39.6
                ones = torch.ones([1, 100],
                                  dtype=torch.float32,
                                  device=pillar_x.device)
                x_sub_shaped = torch.mm(x_sub, ones).unsqueeze(0).unsqueeze(0)
                y_sub_shaped = torch.mm(y_sub, ones).unsqueeze(0).unsqueeze(0)

                num_points_for_a_pillar = pillar_x.size()[3]
                mask = get_paddings_indicator(num_points_per_pillar,
                                              num_points_for_a_pillar,
                                              axis=0)
                mask = mask.permute(0, 2, 1)
                mask = mask.unsqueeze(1)
                mask = mask.type_as(pillar_x)

                coors = example_tuple[2]
                anchors = example_tuple[6]
                labels = example_tuple[8]
                reg_targets = example_tuple[9]

                input = [
                    pillar_x, pillar_y, pillar_z, pillar_i,
                    num_points_per_pillar, x_sub_shaped, y_sub_shaped, mask,
                    coors, anchors, labels, reg_targets
                ]

                ret_dict = net(input)

                assert 10 == len(
                    ret_dict), "something write with training output size!"

                cls_preds = ret_dict[5]
                loss = ret_dict[0].mean()
                cls_loss_reduced = ret_dict[7].mean()
                loc_loss_reduced = ret_dict[8].mean()
                cls_pos_loss = ret_dict[3]
                cls_neg_loss = ret_dict[4]
                loc_loss = ret_dict[2]
                cls_loss = ret_dict[1]
                dir_loss_reduced = ret_dict[6]
                cared = ret_dict[9]
                labels = example_tuple[8]
                if train_cfg.enable_mixed_precision:
                    loss *= loss_scale
                loss.backward()
                torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0)
                mixed_optimizer.step()
                mixed_optimizer.zero_grad()
                net.update_global_step()
                net_metrics = net.update_metrics(cls_loss_reduced,
                                                 loc_loss_reduced, cls_preds,
                                                 labels, cared)

                step_time = (time.time() - t)
                t = time.time()
                metrics = {}
                num_pos = int((labels > 0)[0].float().sum().cpu().numpy())
                num_neg = int((labels == 0)[0].float().sum().cpu().numpy())
                # if 'anchors_mask' not in example_torch:
                #     num_anchors = example_torch['anchors'].shape[1]
                # else:
                #     num_anchors = int(example_torch['anchors_mask'][0].sum())
                num_anchors = int(example_tuple[7][0].sum())
                global_step = net.get_global_step()
                if global_step % display_step == 0:
                    loc_loss_elem = [
                        float(loc_loss[:, :, i].sum().detach().cpu().numpy() /
                              batch_size) for i in range(loc_loss.shape[-1])
                    ]
                    metrics["step"] = global_step
                    metrics["steptime"] = step_time
                    metrics.update(net_metrics)
                    metrics["loss"] = {}
                    metrics["loss"]["loc_elem"] = loc_loss_elem
                    metrics["loss"]["cls_pos_rt"] = float(
                        cls_pos_loss.detach().cpu().numpy())
                    metrics["loss"]["cls_neg_rt"] = float(
                        cls_neg_loss.detach().cpu().numpy())
                    # if unlabeled_training:
                    #     metrics["loss"]["diff_rt"] = float(
                    #         diff_loc_loss_reduced.detach().cpu().numpy())
                    if model_cfg.use_direction_classifier:
                        metrics["loss"]["dir_rt"] = float(
                            dir_loss_reduced.detach().cpu().numpy())

                    metrics["num_vox"] = int(example_tuple[0].shape[0])
                    metrics["num_pos"] = int(num_pos)
                    metrics["num_neg"] = int(num_neg)
                    metrics["num_anchors"] = int(num_anchors)
                    metrics["lr"] = float(
                        mixed_optimizer.param_groups[0]['lr'])
                    metrics["image_idx"] = example_tuple[11][0]
                    flatted_metrics = flat_nested_json_dict(metrics)
                    flatted_summarys = flat_nested_json_dict(metrics, "/")
                    for k, v in flatted_summarys.items():
                        if isinstance(v, (list, tuple)):
                            v = {str(i): e for i, e in enumerate(v)}
                            writer.add_scalars(k, v, global_step)
                        else:
                            writer.add_scalar(k, v, global_step)
                    metrics_str_list = []
                    for k, v in flatted_metrics.items():
                        if isinstance(v, float):
                            metrics_str_list.append(f"{k}={v:.3}")
                        elif isinstance(v, (list, tuple)):
                            if v and isinstance(v[0], float):
                                v_str = ', '.join([f"{e:.3}" for e in v])
                                metrics_str_list.append(f"{k}=[{v_str}]")
                            else:
                                metrics_str_list.append(f"{k}={v}")
                        else:
                            metrics_str_list.append(f"{k}={v}")
                    log_str = ', '.join(metrics_str_list)
                    print(log_str, file=logf)
                    print(log_str)
                ckpt_elasped_time = time.time() - ckpt_start_time
                if ckpt_elasped_time > train_cfg.save_checkpoints_secs:
                    torchplus.train.save_models(model_dir, [net, optimizer],
                                                net.get_global_step())
                    ckpt_start_time = time.time()

            total_step_elapsed += steps
            torchplus.train.save_models(model_dir, [net, optimizer],
                                        net.get_global_step())

            # Ensure that all evaluation points are saved forever
            torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer],
                                        net.get_global_step(),
                                        max_to_keep=100)

    except Exception as e:
        torchplus.train.save_models(model_dir, [net, optimizer],
                                    net.get_global_step())
        logf.close()
        raise e
    # save model before exit
    torchplus.train.save_models(model_dir, [net, optimizer],
                                net.get_global_step())
    logf.close()
Exemple #29
0
def train(config_path,
          model_dir,
          result_path=None,
          create_folder=False,
          display_step=50,
          summary_step=5,
          pickle_result=True):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)

    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    eval_checkpoint_dir = model_dir / 'eval_checkpoints'
    eval_checkpoint_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config

    class_names = list(input_cfg.class_names)
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()
    # net_train = torch.nn.DataParallel(net).cuda()
    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)
    ######################
    # BUILD OPTIMIZER
    ######################
    # we need global_step to create lr_scheduler, so restore net first.
    torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    gstep = net.get_global_step() - 1
    optimizer_cfg = train_cfg.optimizer
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    optimizer = optimizer_builder.build(optimizer_cfg, net.parameters())
    if train_cfg.enable_mixed_precision:
        loss_scale = train_cfg.loss_scale_factor
        mixed_optimizer = torchplus.train.MixedPrecisionWrapper(
            optimizer, loss_scale)
    else:
        mixed_optimizer = optimizer
    # must restore optimizer AFTER using MixedPrecisionWrapper
    torchplus.train.try_restore_latest_checkpoints(model_dir,
                                                   [mixed_optimizer])
    lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer, gstep)
    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32
    ######################
    # PREPARE INPUT
    ######################

    dataset = input_reader_builder.build(input_cfg,
                                         model_cfg,
                                         training=True,
                                         voxel_generator=voxel_generator,
                                         target_assigner=target_assigner)
    eval_dataset = input_reader_builder.build(eval_input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)

    def _worker_init_fn(worker_id):
        time_seed = np.array(time.time(), dtype=np.int32)
        np.random.seed(time_seed + worker_id)
        print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0])

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=input_cfg.batch_size,
                                             shuffle=True,
                                             num_workers=input_cfg.num_workers,
                                             pin_memory=False,
                                             collate_fn=merge_second_batch,
                                             worker_init_fn=_worker_init_fn)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=eval_input_cfg.batch_size,
        shuffle=False,
        num_workers=eval_input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)
    data_iter = iter(dataloader)

    ######################
    # TRAINING
    ######################
    log_path = model_dir / 'log.txt'
    logf = open(log_path, 'a')
    logf.write(proto_str)
    logf.write("\n")
    summary_dir = model_dir / 'summary'
    summary_dir.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(str(summary_dir))

    total_step_elapsed = 0
    remain_steps = train_cfg.steps - net.get_global_step()
    t = time.time()
    ckpt_start_time = t

    total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1
    # total_loop = remain_steps // train_cfg.steps_per_eval + 1
    clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch

    if train_cfg.steps % train_cfg.steps_per_eval == 0:
        total_loop -= 1
    mixed_optimizer.zero_grad()
    try:
        for _ in range(total_loop):
            if total_step_elapsed + train_cfg.steps_per_eval > train_cfg.steps:
                steps = train_cfg.steps % train_cfg.steps_per_eval
            else:
                steps = train_cfg.steps_per_eval
            for step in range(steps):
                lr_scheduler.step()
                try:
                    example = next(data_iter)
                except StopIteration:
                    print("end epoch")
                    if clear_metrics_every_epoch:
                        net.clear_metrics()
                    data_iter = iter(dataloader)
                    example = next(data_iter)
                example_torch = example_convert_to_torch(example, float_dtype)

                batch_size = example["anchors"].shape[0]

                ret_dict = net(example_torch)

                # box_preds = ret_dict["box_preds"]
                cls_preds = ret_dict["cls_preds"]
                loss = ret_dict["loss"].mean()
                cls_loss_reduced = ret_dict["cls_loss_reduced"].mean()
                loc_loss_reduced = ret_dict["loc_loss_reduced"].mean()
                cls_pos_loss = ret_dict["cls_pos_loss"]
                cls_neg_loss = ret_dict["cls_neg_loss"]
                loc_loss = ret_dict["loc_loss"]
                cls_loss = ret_dict["cls_loss"]
                dir_loss_reduced = ret_dict["dir_loss_reduced"]
                cared = ret_dict["cared"]
                labels = example_torch["labels"]
                if train_cfg.enable_mixed_precision:
                    loss *= loss_scale
                loss.backward()
                torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0)
                mixed_optimizer.step()
                mixed_optimizer.zero_grad()
                net.update_global_step()
                net_metrics = net.update_metrics(cls_loss_reduced,
                                                 loc_loss_reduced, cls_preds,
                                                 labels, cared)

                step_time = (time.time() - t)
                t = time.time()
                metrics = {}
                num_pos = int((labels > 0)[0].float().sum().cpu().numpy())
                num_neg = int((labels == 0)[0].float().sum().cpu().numpy())
                if 'anchors_mask' not in example_torch:
                    num_anchors = example_torch['anchors'].shape[1]
                else:
                    num_anchors = int(example_torch['anchors_mask'][0].sum())
                global_step = net.get_global_step()
                if global_step % display_step == 0:
                    loc_loss_elem = [
                        float(loc_loss[:, :, i].sum().detach().cpu().numpy() /
                              batch_size) for i in range(loc_loss.shape[-1])
                    ]
                    metrics["step"] = global_step
                    metrics["steptime"] = step_time
                    metrics.update(net_metrics)
                    metrics["loss"] = {}
                    metrics["loss"]["loc_elem"] = loc_loss_elem
                    metrics["loss"]["cls_pos_rt"] = float(
                        cls_pos_loss.detach().cpu().numpy())
                    metrics["loss"]["cls_neg_rt"] = float(
                        cls_neg_loss.detach().cpu().numpy())
                    # if unlabeled_training:
                    #     metrics["loss"]["diff_rt"] = float(
                    #         diff_loc_loss_reduced.detach().cpu().numpy())
                    if model_cfg.use_direction_classifier:
                        metrics["loss"]["dir_rt"] = float(
                            dir_loss_reduced.detach().cpu().numpy())
                    metrics["num_vox"] = int(example_torch["voxels"].shape[0])
                    metrics["num_pos"] = int(num_pos)
                    metrics["num_neg"] = int(num_neg)
                    metrics["num_anchors"] = int(num_anchors)
                    metrics["lr"] = float(
                        mixed_optimizer.param_groups[0]['lr'])
                    metrics["image_idx"] = example['image_idx'][0]
                    flatted_metrics = flat_nested_json_dict(metrics)
                    flatted_summarys = flat_nested_json_dict(metrics, "/")
                    for k, v in flatted_summarys.items():
                        if isinstance(v, (list, tuple)):
                            v = {str(i): e for i, e in enumerate(v)}
                            writer.add_scalars(k, v, global_step)
                        else:
                            writer.add_scalar(k, v, global_step)
                    metrics_str_list = []
                    for k, v in flatted_metrics.items():
                        if isinstance(v, float):
                            metrics_str_list.append(f"{k}={v:.3}")
                        elif isinstance(v, (list, tuple)):
                            if v and isinstance(v[0], float):
                                v_str = ', '.join([f"{e:.3}" for e in v])
                                metrics_str_list.append(f"{k}=[{v_str}]")
                            else:
                                metrics_str_list.append(f"{k}={v}")
                        else:
                            metrics_str_list.append(f"{k}={v}")
                    log_str = ', '.join(metrics_str_list)
                    print(log_str, file=logf)
                    print(log_str)
                ckpt_elasped_time = time.time() - ckpt_start_time
                if ckpt_elasped_time > train_cfg.save_checkpoints_secs:
                    torchplus.train.save_models(model_dir, [net, optimizer],
                                                net.get_global_step())
                    ckpt_start_time = time.time()
            total_step_elapsed += steps
            torchplus.train.save_models(model_dir, [net, optimizer],
                                        net.get_global_step())

            # Ensure that all evaluation points are saved forever
            torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer],
                                        net.get_global_step(),
                                        max_to_keep=100)

            net.eval()
            result_path_step = result_path / f"step_{net.get_global_step()}"
            result_path_step.mkdir(parents=True, exist_ok=True)
            print("#################################")
            print("#################################", file=logf)
            print("# EVAL")
            print("# EVAL", file=logf)
            print("#################################")
            print("#################################", file=logf)
            print("Generate output labels...")
            print("Generate output labels...", file=logf)
            t = time.time()
            dt_annos = []
            prog_bar = ProgressBar()
            prog_bar.start(len(eval_dataset) // eval_input_cfg.batch_size + 1)
            for example in iter(eval_dataloader):
                example = example_convert_to_torch(example, float_dtype)
                if pickle_result:
                    dt_annos += predict_kitti_to_anno(net, example,
                                                      class_names,
                                                      center_limit_range,
                                                      model_cfg.lidar_input)
                else:
                    _predict_kitti_to_file(net, example, result_path_step,
                                           class_names, center_limit_range,
                                           model_cfg.lidar_input)

                prog_bar.print_bar()

            sec_per_ex = len(eval_dataset) / (time.time() - t)
            print(f"avg forward time per example: {net.avg_forward_time:.3f}")
            print(
                f"avg postprocess time per example: {net.avg_postprocess_time:.3f}"
            )

            net.clear_time_metrics()
            print(f'generate label finished({sec_per_ex:.2f}/s). start eval:')
            print(f'generate label finished({sec_per_ex:.2f}/s). start eval:',
                  file=logf)
            gt_annos = [
                info["annos"] for info in eval_dataset.dataset.kitti_infos
            ]
            if not pickle_result:
                dt_annos = kitti.get_label_annos(result_path_step)
            result, mAPbbox, mAPbev, mAP3d, mAPaos = get_official_eval_result(
                gt_annos, dt_annos, class_names, return_data=True)
            print(result, file=logf)
            print(result)
            writer.add_text('eval_result', result, global_step)

            for i, class_name in enumerate(class_names):
                writer.add_scalar('bev_ap:{}'.format(class_name),
                                  mAPbev[i, 1, 0], global_step)
                writer.add_scalar('3d_ap:{}'.format(class_name),
                                  mAP3d[i, 1, 0], global_step)
                writer.add_scalar('aos_ap:{}'.format(class_name),
                                  mAPaos[i, 1, 0], global_step)
            writer.add_scalar('bev_map', np.mean(mAPbev[:, 1, 0]), global_step)
            writer.add_scalar('3d_map', np.mean(mAP3d[:, 1, 0]), global_step)
            writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step)

            result = get_coco_eval_result(gt_annos, dt_annos, class_names)
            print(result, file=logf)
            print(result)
            if pickle_result:
                with open(result_path_step / "result.pkl", 'wb') as f:
                    pickle.dump(dt_annos, f)
            writer.add_text('eval_result', result, global_step)
            net.train()
    except Exception as e:
        torchplus.train.save_models(model_dir, [net, optimizer],
                                    net.get_global_step())
        logf.close()
        raise e
    # save model before exit
    torchplus.train.save_models(model_dir, [net, optimizer],
                                net.get_global_step())
    logf.close()
Exemple #30
0
def onnx_model_generate(config_path,
                        model_dir,
                        result_path=None,
                        predict_test=False,
                        ckpt_path=None):
    model_dir = pathlib.Path(model_dir)
    if predict_test:
        result_name = 'predict_test'
    else:
        result_name = 'eval_results'
    if result_path is None:
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    class_names = list(input_cfg.class_names)
    center_limit_range = model_cfg.post_center_limit_range

    ##########################
    ## Build Voxel Generator
    ##########################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)

    net = second_builder.build(model_cfg, voxel_generator, target_assigner, 1)
    net.cuda()
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)

    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    eval_dataset = input_reader_builder.build(input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=1,
        pin_memory=False,
        collate_fn=merge_second_batch)

    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    net.eval()
    result_path_step = result_path / f"step_{net.get_global_step()}"
    result_path_step.mkdir(parents=True, exist_ok=True)

    dt_annos = []
    global_set = None
    print("Generate output labels...")
    bar = ProgressBar()
    bar.start(len(eval_dataset) // input_cfg.batch_size + 1)

    for example in iter(eval_dataloader):
        example = example_convert_to_torch(example, float_dtype)
        example_tuple = list(example.values())
        batch_image_shape = example_tuple[8]
        example_tuple[8] = torch.from_numpy(example_tuple[8])
        example_tuple[9] = torch.from_numpy(example_tuple[9])

        dt_annos = export_onnx(net, example_tuple, class_names,
                               batch_image_shape, center_limit_range,
                               model_cfg.lidar_input, global_set)
        return 0
        bar.print_bar()