def initialize_model(args): global model, voxel_generator cfg = Config.fromfile(args.config) model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) if args.checkpoint is not None: load_checkpoint(model, args.checkpoint, map_location="cpu") # print(model) if args.fp16: print("cast model to fp16") model = model.half() model = model.cuda() model.eval() global device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") range = cfg.voxel_generator.range voxel_size = cfg.voxel_generator.voxel_size max_points_in_voxel = cfg.voxel_generator.max_points_in_voxel max_voxel_num = cfg.voxel_generator.max_voxel_num[1] voxel_generator = VoxelGenerator(voxel_size=voxel_size, point_cloud_range=range, max_num_points=max_points_in_voxel, max_voxels=max_voxel_num) return model
def main(): args = parse_args() if len(args.shape) == 1: input_shape = (3, args.shape[0], args.shape[0]) elif len(args.shape) == 2: input_shape = (3, ) + tuple(args.shape) else: raise ValueError("invalid input shape") cfg = Config.fromfile(args.config) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda() model.eval() if hasattr(model, "forward_dummy"): model.forward = model.forward_dummy else: raise NotImplementedError( "FLOPs counter is currently not currently supported with {}". format(model.__class__.__name__)) flops, params = get_model_complexity_info(model, input_shape) split_line = "=" * 30 print("{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}".format( split_line, input_shape, flops, params))
def read_config(self): config_path = self.config_path cfg = Config.fromfile(self.config_path) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.net = build_detector( cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) self.net.load_state_dict(torch.load(self.model_path)["state_dict"]) self.net = self.net.to(self.device).eval() self.range = cfg.voxel_generator.range self.voxel_size = cfg.voxel_generator.voxel_size self.max_points_in_voxel = cfg.voxel_generator.max_points_in_voxel self.max_voxel_num = cfg.voxel_generator.max_voxel_num self.voxel_generator = VoxelGenerator( voxel_size=self.voxel_size, point_cloud_range=self.range, max_num_points=self.max_points_in_voxel, max_voxels=self.max_voxel_num, ) # nuscenes dataset lidar2imu_t = np.array([0.985793, 0.0, 1.84019]) lidar2imu_r = Quaternion([0.706749235, -0.01530099378, 0.0173974518, -0.7070846]) ## UDI dataset # lidar2imu_t = np.array([1.50, 0., 1.42]) # lidar2imu_r = Quaternion([1., 0., 0., 0.]) self.lidar2imu = transform_matrix(lidar2imu_t, lidar2imu_r, inverse=True) self.imu2lidar = transform_matrix(lidar2imu_t, lidar2imu_r, inverse=False)
def read_config(self): config_path = self.config_path cfg = Config.fromfile(self.config_path) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.net = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) self.net.load_state_dict(torch.load(self.model_path)["state_dict"]) self.net = self.net.to(self.device).eval() self.range = cfg.voxel_generator.range self.voxel_size = cfg.voxel_generator.voxel_size self.max_points_in_voxel = cfg.voxel_generator.max_points_in_voxel self.max_voxel_num = cfg.voxel_generator.max_voxel_num self.voxel_generator = VoxelGenerator( voxel_size=self.voxel_size, point_cloud_range=self.range, max_num_points=self.max_points_in_voxel, max_voxels=self.max_voxel_num, )
def main(): args = parse_args() cfg = Config.fromfile(args.config) # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) checkpoint = torch.load(args.checkpoint, map_location='cpu') state_dict = checkpoint['state_dict'] if list(state_dict.keys())[0].startswith("module."): state_dict = {k[7:]: v for k, v in checkpoint["state_dict"].items()} convert_state_dict(model, state_dict) save_checkpoint(model, osp.join(args.work_dir, 'voxelnet_converted.pth'))
def main(): # config cfg = Config.fromfile(CONFIG_FILE) # model loading model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) checkpoint = load_checkpoint(model, CHECK_POINT, map_location="cpu") model = model.cuda() model.eval() # data loader dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, batch_size=cfg.data.samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False, ) # infer detections = [] for i, data_batch in enumerate(data_loader): print("step:", i) with torch.no_grad(): outputs = batch_processor( model, data_batch, train_mode=False, local_rank=0, ) for output in outputs: token = output["metadata"]["token"] for k, v in output.items(): if k not in [ "metadata", ]: output[k] = v.to(cpu_device) detections.update({ token: output, }) all_predictions = all_gather(detections)
"data": image_str, "datatype": "png", } return data # todo: for debug if __name__ == "__main__": warnings.filterwarnings('ignore') data_path = "/mnt/proj50/zhengwu/KITTI/object" info_path = "/mnt/proj50/zhengwu/KITTI/object/kitti_infos_train.pkl" from det3d.torchie import Config from torch.utils.data import Dataset, DataLoader cfg = Config.fromfile("../../../examples/second/configs/config.py") pipeline = cfg.train_pipeline kitti = KittiDataset(data_path, info_path, pipeline=pipeline) data = kitti.get_sensor_data(99, by_index=True) # for i in range(3000): # print(i) # data = kitti.get_sensor_data(i, by_index=False) import ipdb; ipdb.set_trace() trainloader = DataLoader(kitti, batch_size=4, shuffle=True) dataitr = iter(trainloader) data = next(dataitr) import ipdb; ipdb.set_trace()
def main(): cfg = Config.fromfile( 'configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_demo.py') model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) dataset = build_dataset(cfg.data.val) data_loader = DataLoader( dataset, batch_size=1, sampler=None, shuffle=False, num_workers=8, collate_fn=collate_kitti, pin_memory=False, ) checkpoint = load_checkpoint( model, 'work_dirs/centerpoint_pillar_512_demo/latest.pth', map_location="cpu") model.eval() model = model.cuda() cpu_device = torch.device("cpu") points_list = [] gt_annos = [] detections = [] for i, data_batch in enumerate(data_loader): info = dataset._nusc_infos[i] gt_annos.append(convert_box(info)) points = data_batch['points'][:, 1:4].cpu().numpy() with torch.no_grad(): outputs = batch_processor( model, data_batch, train_mode=False, local_rank=0, ) for output in outputs: for k, v in output.items(): if k not in [ "metadata", ]: output[k] = v.to(cpu_device) detections.append(output) points_list.append(points.T) print( 'Done model inference. Please wait a minute, the matplotlib is a little slow...' ) for i in range(len(points_list)): visual(points_list[i], gt_annos[i], detections[i], i) print("Rendered Image {}".format(i)) image_folder = 'demo' video_name = 'video.avi' images = [img for img in os.listdir(image_folder) if img.endswith(".png")] images.sort(key=lambda img_name: int(img_name.split('.')[0][4:])) frame = cv2.imread(os.path.join(image_folder, images[0])) height, width, layers = frame.shape video = cv2.VideoWriter(video_name, 0, 1, (width, height)) cv2_images = [] for image in images: cv2_images.append(cv2.imread(os.path.join(image_folder, image))) for img in cv2_images: video.write(img) cv2.destroyAllWindows() video.release() print("Successfully save video in the main folder")
def main(): # torch.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(0) args = parse_args() cfg = Config.fromfile(args.config) cfg.local_rank = args.local_rank # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from distributed = False if "WORLD_SIZE" in os.environ: distributed = int(os.environ["WORLD_SIZE"]) > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.gpus = torch.distributed.get_world_size() if args.autoscale_lr: cfg.lr_config.lr_max = cfg.lr_config.lr_max * cfg.gpus # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info("Distributed training: {}".format(distributed)) logger.info(f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}") if args.local_rank == 0: # copy important files to backup backup_dir = os.path.join(cfg.work_dir, "det3d") os.makedirs(backup_dir, exist_ok=True) os.system("cp -r ./det3d %s/" % backup_dir) os.system("cp -r ./tools %s/" % backup_dir) os.system("cp -r ./examples %s/" % backup_dir) logger.info(f"Backup source files to {cfg.work_dir}/det3d") # set random seeds if args.seed is not None: logger.info("Set random seed to {}".format(args.seed)) set_random_seed(args.seed) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: datasets.append(build_dataset(cfg.data.val)) if cfg.checkpoint_config is not None: # save det3d version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict( det3d_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES ) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector( model, datasets, cfg, distributed=distributed, validate=args.validate, logger=logger, )
def main(): # torch.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(0) args = parse_args() cfg = Config.fromfile(args.config) # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from distributed = torch.cuda.device_count() > 1 if distributed: if args.launcher == "pytorch": torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.local_rank = args.local_rank elif args.launcher == "slurm": proc_id = int(os.environ["SLURM_PROCID"]) ntasks = int(os.environ["SLURM_NTASKS"]) node_list = os.environ["SLURM_NODELIST"] num_gpus = torch.cuda.device_count() cfg.gpus = num_gpus torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( f"scontrol show hostname {node_list} | head -n1") # specify master port port = None if port is not None: os.environ["MASTER_PORT"] = str(port) elif "MASTER_PORT" in os.environ: pass # use MASTER_PORT in the environment variable else: # 29500 is torch.distributed default port os.environ["MASTER_PORT"] = "29501" # use MASTER_ADDR in the environment variable if it already exists if "MASTER_ADDR" not in os.environ: os.environ["MASTER_ADDR"] = addr os.environ["WORLD_SIZE"] = str(ntasks) os.environ["LOCAL_RANK"] = str(proc_id % num_gpus) os.environ["RANK"] = str(proc_id) dist.init_process_group(backend="nccl") cfg.local_rank = int(os.environ["LOCAL_RANK"]) cfg.gpus = dist.get_world_size() else: cfg.local_rank = 0 cfg.gpus = 1 if args.autoscale_lr: cfg.lr_config.lr_max = cfg.lr_config.lr_max * cfg.gpus # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info("Distributed training: {}".format(distributed)) logger.info( f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}") model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: datasets.append(build_dataset(cfg.data.val)) if cfg.checkpoint_config is not None: # save det3d version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(det3d_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector( model, datasets, cfg, distributed=distributed, validate=args.validate, logger=logger, )
def main(): # torch.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(0) args = parse_args() cfg = Config.fromfile(args.config) cfg.local_rank = args.local_rank # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir distributed = False if "WORLD_SIZE" in os.environ: distributed = int(os.environ["WORLD_SIZE"]) > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.gpus = torch.distributed.get_world_size() else: cfg.gpus = args.gpus # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info("Distributed testing: {}".format(distributed)) logger.info( f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}") model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) dataset = build_dataset(cfg.data.val) data_loader = build_dataloader( dataset, batch_size=cfg.data.samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False, ) checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu") # put model on gpus if distributed: model = apex.parallel.convert_syncbn_model(model) model = DistributedDataParallel( model.cuda(cfg.local_rank), device_ids=[cfg.local_rank], output_device=cfg.local_rank, # broadcast_buffers=False, find_unused_parameters=True, ) else: model = model.cuda() model.eval() mode = "val" logger.info(f"work dir: {args.work_dir}") if cfg.local_rank == 0: prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus) detections = {} cpu_device = torch.device("cpu") for i, data_batch in enumerate(data_loader): with torch.no_grad(): outputs = batch_processor( model, data_batch, train_mode=False, local_rank=args.local_rank, ) for output in outputs: token = output["metadata"]["token"] for k, v in output.items(): if k not in [ "metadata", ]: output[k] = v.to(cpu_device) detections.update({ token: output, }) if args.local_rank == 0: prog_bar.update() synchronize() all_predictions = all_gather(detections) if args.local_rank != 0: return predictions = {} for p in all_predictions: predictions.update(p) result_dict, _ = dataset.evaluation(predictions, output_dir=args.work_dir) for k, v in result_dict["results"].items(): print(f"Evaluation {k}: {v}") if args.txt_result: res_dir = os.path.join(os.getcwd(), "predictions") for k, dt in predictions.items(): with open( os.path.join(res_dir, "%06d.txt" % int(dt["metadata"]["token"])), "w") as fout: lines = kitti.annos_to_kitti_label(dt) for line in lines: fout.write(line + "\n") ap_result_str, ap_dict = kitti_evaluate( "/data/Datasets/KITTI/Kitti/object/training/label_2", res_dir, label_split_file="/data/Datasets/KITTI/Kitti/ImageSets/val.txt", current_class=0, ) print(ap_result_str)
def main(): # torch.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(0) args = parse_args() cfg = Config.fromfile(args.config) # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir distributed = torch.cuda.device_count() > 1 if distributed: if args.launcher == "pytorch": torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.local_rank = args.local_rank elif args.launcher == "slurm": proc_id = int(os.environ["SLURM_PROCID"]) ntasks = int(os.environ["SLURM_NTASKS"]) node_list = os.environ["SLURM_NODELIST"] num_gpus = torch.cuda.device_count() cfg.gpus = num_gpus torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( f"scontrol show hostname {node_list} | head -n1") # specify master port port = None if port is not None: os.environ["MASTER_PORT"] = str(port) elif "MASTER_PORT" in os.environ: pass # use MASTER_PORT in the environment variable else: # 29500 is torch.distributed default port os.environ["MASTER_PORT"] = "29501" # use MASTER_ADDR in the environment variable if it already exists if "MASTER_ADDR" not in os.environ: os.environ["MASTER_ADDR"] = addr os.environ["WORLD_SIZE"] = str(ntasks) os.environ["LOCAL_RANK"] = str(proc_id % num_gpus) os.environ["RANK"] = str(proc_id) dist.init_process_group(backend="nccl") cfg.local_rank = int(os.environ["LOCAL_RANK"]) cfg.gpus = torch.distributed.get_world_size() else: cfg.gpus = args.gpus # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info("Distributed testing: {}".format(distributed)) logger.info(f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}") model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) if args.testset: print("Use Test Set") dataset = build_dataset(cfg.data.test) else: print("Use Val Set") dataset = build_dataset(cfg.data.val) data_loader = build_dataloader( dataset, batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False, ) checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu") # put model on gpus if distributed: # model = apex.parallel.convert_syncbn_model(model) model = DistributedDataParallel( model.cuda(cfg.local_rank), device_ids=[cfg.local_rank], output_device=cfg.local_rank, # broadcast_buffers=False, find_unused_parameters=True, ) else: # model = fuse_bn_recursively(model) model = model.cuda() model.eval() mode = "val" prog_bar = None logger.info(f"work dir: {args.work_dir}") if cfg.local_rank == 0: prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus) detections = {} cpu_device = torch.device("cpu") start = time.time() start = int(len(dataset) / 3) end = int(len(dataset) * 2 /3) time_start = 0 time_end = 0 for i, data_batch in enumerate(data_loader): if i == start: torch.cuda.synchronize() time_start = time.time() if i == end: torch.cuda.synchronize() time_end = time.time() with torch.no_grad(): outputs = batch_processor( model, data_batch, train_mode=False, local_rank=args.local_rank, ) for output in outputs: token = output["metadata"]["token"] for k, v in output.items(): if k not in [ "metadata", ]: output[k] = v.to(cpu_device) detections.update( {token: output,} ) if args.local_rank == 0: if prog_bar is not None: prog_bar.update() synchronize() all_predictions = all_gather(detections) print("\n Total time per frame: ", (time_end - time_start) / (end - start)) if args.local_rank != 0: return predictions = {} for p in all_predictions: predictions.update(p) if not os.path.exists(args.work_dir): os.makedirs(args.work_dir) save_pred(predictions, args.work_dir) result_dict, _ = dataset.evaluation(copy.deepcopy(predictions), output_dir=args.work_dir, testset=args.testset) if result_dict is not None: for k, v in result_dict["results"].items(): print(f"Evaluation {k}: {v}") if args.txt_result: assert False, "No longer support kitti"
def main(): # torch.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(0) args = parse_args() cfg = Config.fromfile(args.config) cfg.local_rank = args.local_rank # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir distributed = False if "WORLD_SIZE" in os.environ: distributed = int(os.environ["WORLD_SIZE"]) > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.gpus = torch.distributed.get_world_size() else: cfg.gpus = args.gpus # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info("Distributed testing: {}".format(distributed)) logger.info( f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}") model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) if args.testset: print("Use Test Set") dataset = build_dataset(cfg.data.test) else: print("Use Val Set") dataset = build_dataset(cfg.data.val) data_loader = build_dataloader( dataset, batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False, ) checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu") # put model on gpus if distributed: model = apex.parallel.convert_syncbn_model(model) model = DistributedDataParallel( model.cuda(cfg.local_rank), device_ids=[cfg.local_rank], output_device=cfg.local_rank, # broadcast_buffers=False, find_unused_parameters=True, ) else: # model = fuse_bn_recursively(model) model = model.cuda() model.eval() mode = "val" logger.info(f"work dir: {args.work_dir}") if cfg.local_rank == 0: prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus) detections = {} cpu_device = torch.device("cpu") start = time.time() start = int(len(dataset) / 3) end = int(len(dataset) * 2 / 3) time_start = 0 time_end = 0 for i, data_batch in enumerate(data_loader): if i == start: torch.cuda.synchronize() time_start = time.time() if i == end: torch.cuda.synchronize() time_end = time.time() with torch.no_grad(): outputs = batch_processor( model, data_batch, train_mode=False, local_rank=args.local_rank, ) for output in outputs: token = output["metadata"]["token"] for k, v in output.items(): if k not in [ "metadata", ]: output[k] = v.to(cpu_device) detections.update({ token: output, }) if args.local_rank == 0: prog_bar.update() synchronize() all_predictions = all_gather(detections) print("\n Total time per frame: ", (time_end - time_start) / (end - start)) if args.local_rank != 0: return predictions = {} for p in all_predictions: predictions.update(p) if not os.path.exists(args.work_dir): os.makedirs(args.work_dir) save_pred(predictions, args.work_dir) with open(os.path.join(args.work_dir, 'prediction.pkl'), 'rb') as f: predictions = pickle.load(f) result_dict, _ = dataset.evaluation(copy.deepcopy(predictions), output_dir=args.work_dir, testset=args.testset) if result_dict is not None: for k, v in result_dict["results"].items(): print(f"Evaluation {k}: {v}") if args.txt_result: assert False, "No longer support kitti"
import copy from pathlib import Path import pickle import fire from det3d.datasets.kitti import kitti_common as kitti_ds from det3d.datasets.utils.create_gt_database import create_groundtruth_database from det3d.torchie import Config cfg = Config.fromfile( "../examples/second/configs/kitti_car_vfev3_spmiddlefhd_rpn1_mghead_syncbn.py" ) def kitti_data_prep(root_path): # compress info of image(path), velodyne(path), label (all info but dc removed), # calib (all) into pkl file; DontCare has been removed # root_path: "/mnt/proj50/zhengwu/KITTI/object" # # kitti_infos_train/trainval/val/test for each sample in train.txt/trainval.txt/val.txt/test.txt # info{ # "point_cloud": { # "num_featuers": 4, # "velodyne_path": "/training/velodyne/000000.bin", # } # "image": { # "image_idx": index, (eg. element in ImageSets/trainval.txt) # "image_path": "/training/image_2/000000.png", # "image_shape": [H(370, y), W(1224, x)], (eg. in image coord) # }
from det3d.torchie.trainer import load_checkpoint from det3d.torchie.parallel import collate, collate_kitti from torch.utils.data import DataLoader import matplotlib.cm as cm import subprocess import cv2 from tools.demo_utils import visual from collections import defaultdict import mayavi.mlab as mlab import cv2 as cv from tools.visualize import showPoints, createRotation from tools.fileLoader import * if __name__ == "__main__": cfg = Config.fromfile('configs/centerpoint/myconfig.py') for data in getBatchFromDepth(cfg): points = data['points'] voxels = data['voxels'] # static of all points xs = points[:, 0] ys = points[:, 1] zs = points[:, 2] print('X: max {}, min {}'.format(xs.max(), xs.min())) print('Y: max {}, min {}'.format(ys.max(), ys.min())) print('Z: max {}, min {}'.format(zs.max(), zs.min())) # # DO NOT display them in the for loop! # # display all points
def main(): # torch.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(0) args = parse_args() cfg = Config.fromfile(args.config) cfg.local_rank = args.local_rank # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir distributed = False if "WORLD_SIZE" in os.environ: distributed = int(os.environ["WORLD_SIZE"]) > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.gpus = torch.distributed.get_world_size() else: cfg.gpus = args.gpus # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info("Distributed testing: {}".format(distributed)) logger.info( f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}") torch.cuda.empty_cache() model = build_detector(cfg.nohead_model, train_cfg=None, test_cfg=cfg.test_cfg) if args.testset: print("Use Test Set") dataset = build_dataset(cfg.data.test) else: print("Use Val Set") dataset = build_dataset(cfg.data.val) data_loader = build_dataloader( dataset, batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False, ) checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu") # put model on gpus # model = fuse_bn_recursively(model) model = model.cuda() model.eval() mode = "val" logger.info(f"work dir: {args.work_dir}") if cfg.local_rank == 0: prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus) detections = {} cpu_device = torch.device("cpu") start = time.time() start = int(len(dataset) / 3) end = int(len(dataset) * 2 / 3) time_start = 0 time_end = 0 device = torch.device(args.local_rank) POINTS_NUM = 2 for i, data_batch in enumerate(data_loader): if i == start: torch.cuda.synchronize() time_start = time.time() if i == end: torch.cuda.synchronize() time_end = time.time() with torch.no_grad(): sample = example_to_device(data_batch, device=device) for i in range(len(sample["metadata"])): sample["metadata"][i]['image_prefix'] = None del sample["metadata"] del sample["points"] #del sample["shape"] sample["shape"] = torch.tensor(sample["shape"]) sample["voxels"] = sample["voxels"][0:POINTS_NUM, :, :] sample["num_points"] = sample["num_points"][0:POINTS_NUM] sample["coordinates"] = sample["coordinates"][0:POINTS_NUM, :] outputs = model(sample, return_loss=False) #outputs = batch_processor( # model, data_batch, train_mode=False, local_rank=args.local_rank, #) for k, t in sample.items(): print("====", k) print(t.shape) print("============== start =============") register_custom_op_symbolic("spconv::get_indice_pairs_3d", symbolic_get_indice_pairs_3d, 11) torch.onnx.export( model, # model being run sample, # model input (or a tuple for multiple inputs) "/workspace/data/center_point.onnx", # where to save the model (can be a file or file-like object) export_params= True, # store the trained parameter weights inside the model file opset_version=11, # the ONNX version to export the model to do_constant_folding=True ) # whether to execute constant folding for optimization print("============== finish =============") break for output in outputs: token = output["metadata"]["token"] for k, v in output.items(): if k not in [ "metadata", ]: output[k] = v.to(cpu_device) detections.update({ token: output, }) if args.local_rank == 0: prog_bar.update() synchronize() all_predictions = all_gather(detections) print("\n Total time per frame: ", (time_end - time_start) / (end - start)) if args.local_rank != 0: return predictions = {} for p in all_predictions: predictions.update(p) if not os.path.exists(args.work_dir): os.makedirs(args.work_dir) save_pred(predictions, args.work_dir) result_dict, _ = dataset.evaluation(copy.deepcopy(predictions), output_dir=args.work_dir, testset=args.testset) if result_dict is not None: for k, v in result_dict["results"].items(): print(f"Evaluation {k}: {v}") if args.txt_result: assert False, "No longer support kitti"
def main(): cfg = Config.fromfile( 'configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_demo_export_onnx.py' ) model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) dataset = build_dataset(cfg.data.val) data_loader = DataLoader( dataset, batch_size=1, sampler=None, shuffle=False, num_workers=8, collate_fn=collate_kitti, pin_memory=False, ) checkpoint = load_checkpoint(model, './latest.pth', map_location="cpu") model.eval() model = model.cuda() gpu_device = torch.device("cuda") points_list = [] gt_annos = [] detections = [] data_iter = iter(data_loader) data_batch = next(data_iter) pp_model = PointPillars(model) points = data_batch['points'][:, 1:4].cpu().numpy() with torch.no_grad(): example = example_to_device(data_batch, gpu_device, non_blocking=False) example["voxels"] = torch.zeros( (example["voxels"].shape[0], example["voxels"].shape[1], 10), dtype=torch.float32, device=gpu_device) example.pop("metadata") example.pop("points") example["shape"] = torch.tensor(example["shape"], dtype=torch.int32, device=gpu_device) model(example) torch.onnx.export( model.reader, (example["voxels"], example["num_voxels"], example["coordinates"]), "onnx_model/pfe.onnx", opset_version=11) rpn_input = torch.zeros((1, 64, 512, 512), dtype=torch.float32, device=gpu_device) torch.onnx.export(pp_model, rpn_input, "onnx_model/rpn.onnx", opset_version=11) print("Done")