def trt_eval(): with open('configs/ntusl_20cm.json', 'r') as f: config = json.load(f) device = torch.device("cuda:0") config['device'] = device voxel_generator = VoxelGenerator(config) anchor_assigner = AnchorAssigner(config) inference = Inference(config, anchor_assigner) infer_data = InferData(config, voxel_generator, anchor_assigner, torch.float32) pfn_engine_path = '../deployment/pfn16.engine' rpn_engine_path = '../deployment/rpn16.engine' head_engine_path = '../deployment/head16.engine' net = PointPillars(config, pfn_engine_path, rpn_engine_path, head_engine_path) data_root = Path(config['data_root']) info_paths = config['eval_info'] infos = [] for info_path in info_paths: info_path = data_root / info_path with open(info_path, 'rb') as f: infos += pickle.load(f) changeInfo(infos) dt_annos = [] time_elapse, pre_time_avg, net_time_avg, post_time_avg = 0.0, 0.0, 0.0, 0.0 len_infos = len(infos) for idx, info in enumerate(infos): print('\ridx %d' % idx, end='') v_path = data_root / info['velodyne_path'] points = np.fromfile(v_path, dtype=np.float32, count=-1).reshape([-1, 4]) start_time = time.time() example = infer_data.get(points, toTorch=True) pre_time = time.time() with torch.no_grad(): preds_dict = net(example) torch.cuda.synchronize() net_time = time.time() dt_annos += inference.infer_gpu(example, preds_dict) post_time = time.time() pre_time_avg += pre_time - start_time net_time_avg += net_time - pre_time post_time_avg += post_time - net_time time_elapse += post_time - start_time print("\naverage time : \t\t\t%.5f" % (time_elapse / len_infos)) print("pre-processing time : \t%.5f" % (pre_time_avg / len_infos)) print("network time : \t\t\t%.5f" % (net_time_avg / len_infos)) # print("pfn_time time : \t\t\t%.5f" % (net.pfn_time / len_infos)) # print("scatter time : \t\t\t\t%.5f" % (net.scatter_time / len_infos)) # print("rpn time : \t\t\t\t\t%.5f" % (net.rpn_time / len_infos)) # print("heads time : \t\t\t\t%.5f" % (net.heads_time / len_infos)) print("post-processing time : \t%.5f" % (post_time_avg / len_infos)) print("p1 time : \t\t\t\t\t%.5f" % (inference.p1 / len_infos)) print("p2 time : \t\t\t\t\t%.5f" % (inference.p2 / len_infos)) print("p3 time : \t\t\t\t\t%.5f" % (inference.p3 / len_infos)) print("p4 time : \t\t\t\t\t%.5f" % (inference.p4 / len_infos)) dt_path = Path( config['data_root']) / config['result_path'] / config['experiment'] if not os.path.exists(dt_path): os.makedirs(dt_path) with open(dt_path / config['dt_info'], 'wb') as f: pickle.dump(dt_annos, f) gt_annos = [info["annos"] for info in infos] eval_classes = ["vehicle", "pedestrian", "cyclist"] # ["vehicle", "pedestrian", "cyclist"] for range_thresh in np.arange(80.0, 90.0, 10.0): APs, eval_str = get_official_eval_result(gt_annos, dt_annos, eval_classes, range_thresh) print(eval_str)
def infer_trt(): with open('configs/ntusl_20cm.json', 'r') as f: config = json.load(f) device = torch.device("cuda:0") config['device'] = device voxel_generator = VoxelGenerator(config) anchor_assigner = AnchorAssigner(config) inference = Inference(config, anchor_assigner) infer_data = InferData(config, voxel_generator, anchor_assigner, torch.float32) net = PointPillars(config) # model_path = Path(config['model_path']) / config['experiment'] # latest_model_path = model_path / '265000.pth' # # checkpoint = torch.load(latest_model_path, map_location=lambda storage, loc: storage) # net.load_state_dict(checkpoint['model_state_dict']) print('model loaded') net.to(device) # net.half() net.eval() data_root = Path(config['data_root']) info_paths = config['eval_info'] infos = [] for info_path in info_paths: info_path = data_root / info_path with open(info_path, 'rb') as f: infos += pickle.load(f) changeInfo(infos) dt_annos = [] time_elapse, pre_time_avg, net_time_avg, post_time_avg = 0.0, 0.0, 0.0, 0.0 len_infos = len(infos) for idx, info in enumerate(infos): print('\ridx %d' % idx, end='') v_path = data_root / info['velodyne_path'] points = np.fromfile(v_path, dtype=np.float32, count=-1).reshape([-1, 4]) start_time = time.time() example = infer_data.get(points, toTorch=True) pre_time = time.time() with torch.no_grad(): # inputs = (example["voxels"], example["num_points_per_voxel"], example["coordinates"], example["voxel_num"]) # input_names = ['voxels', 'num_points_per_voxel', 'coordinates', 'voxel_num'] # torch.onnx.export(net, inputs, "pp.onnx", verbose=True, opset_version=11, input_names=input_names) # return 0 preds_dict = net(example) torch.cuda.synchronize() net_time = time.time() dt_annos += inference.infer_gpu(example, preds_dict) post_time = time.time() pre_time_avg += pre_time - start_time net_time_avg += net_time - pre_time post_time_avg += post_time - net_time time_elapse += post_time - start_time print("\naverage time : \t\t\t%.5f" % (time_elapse / len_infos)) print("pre-processing time : \t%.5f" % (pre_time_avg / len_infos)) print("network time : \t\t\t%.5f" % (net_time_avg / len_infos)) # print("pfn_time time : \t\t\t%.5f" % (net.pfn_time / len_infos)) # print("scatter time : \t\t\t\t%.5f" % (net.scatter_time / len_infos)) # print("rpn time : \t\t\t\t\t%.5f" % (net.rpn_time / len_infos)) # print("heads time : \t\t\t\t%.5f" % (net.heads_time / len_infos)) print("post-processing time : \t%.5f" % (post_time_avg / len_infos)) print("p1 time : \t\t\t\t\t%.5f" % (inference.p1 / len_infos)) print("p2 time : \t\t\t\t\t%.5f" % (inference.p2 / len_infos)) print("p3 time : \t\t\t\t\t%.5f" % (inference.p3 / len_infos)) print("p4 time : \t\t\t\t\t%.5f" % (inference.p4 / len_infos)) dt_path = Path( config['data_root']) / config['result_path'] / config['experiment'] if not os.path.exists(dt_path): os.makedirs(dt_path) with open(dt_path / config['dt_info'], 'wb') as f: pickle.dump(dt_annos, f) gt_annos = [info["annos"] for info in infos] eval_classes = ["vehicle", "pedestrian", "cyclist"] # ["vehicle", "pedestrian", "cyclist"] for range_thresh in np.arange(80.0, 90.0, 10.0): APs, eval_str = get_official_eval_result(gt_annos, dt_annos, eval_classes, range_thresh) print(eval_str)
def train(): with open('configs/ntusl_20cm.json', 'r') as f: config = json.load(f) # cuda_id = config['device'] device = torch.device("cuda:0") config['device'] = device voxel_generator = VoxelGenerator(config) anchor_assigner = AnchorAssigner(config) loss_generator = LossGenerator(config) metrics = Metric() inference = Inference(config, anchor_assigner) train_dataset = GenericDataset(config, config['train_info'], voxel_generator, anchor_assigner, training=True, augm=True) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=config['num_workers'], pin_memory=False, drop_last=True, collate_fn=merge_second_batch, worker_init_fn=worker_init_fn) eval_dataset = GenericDataset(config, config['eval_info'], voxel_generator, anchor_assigner, training=False) eval_dataloader = torch.utils.data.DataLoader( eval_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], pin_memory=False, drop_last=True, collate_fn=merge_second_batch) eval_annos = [info["annos"] for info in eval_dataset.infos] net = PointPillars(config) net.to(device) optimizer = torch.optim.Adam(net.parameters(), lr=config['learning_rate']) # AdamW step_num = 0 model_path = Path( config['data_root']) / config['model_path'] / config['experiment'] latest_model_path = os.path.join(model_path, 'latest.pth') log_file = os.path.join(model_path, 'log.txt') if not os.path.exists(model_path): os.makedirs(model_path) elif os.path.exists(latest_model_path): checkpoint = torch.load(latest_model_path) net.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) optimizer.param_groups[0]['lr'] = config['learning_rate'] # optimizer.param_groups[0]['betas'] = (0.5, 0.999) step_num = checkpoint['step'] print('model loaded') print("num_trainable parameters:", len(list(net.parameters()))) print("num train data:", len(train_dataset)) print("num eval data:", len(eval_dataset)) net.train() display_step = 50 save_step = 5000 eval_step = 5000 avg_loss = 0 data_iter = iter(train_dataloader) avg_time = time.time() # scaler = torch.cuda.amp.GradScaler() for step in range(step_num + 1, 10000000): epoch = (step * config['batch_size']) // len(train_dataset) + 1 try: example = next(data_iter) except StopIteration: print("end epoch") data_iter = iter(train_dataloader) example = next(data_iter) optimizer.zero_grad() example = example_convert_to_torch(example, device=device) # with torch.cuda.amp.autocast(): preds_dict = net(example) loss_dict = loss_generator.generate(preds_dict, example) loss = loss_dict['loss'] loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0) optimizer.step() # scaler.scale(loss).backward() #loss.backward() # scaler.step(optimizer) #optimizer.step() # scaler.update() labels = example['labels'] cls_preds = preds_dict['cls_preds'].view(config['batch_size'], -1, 1) metrics.update(labels, cls_preds) avg_loss += loss.detach().item() if step % save_step == 0: torch.save( { 'step': step, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, latest_model_path) step_model_path = os.path.join(model_path, str(step) + '.pth') torch.save( { 'step': step, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, step_model_path) print("Model saved") if step % display_step == 0: avg_loss = avg_loss / display_step avg_time = (time.time() - avg_time) / display_step print('### Epoch %d, Step %d, Loss: %f, Time: %f' % (epoch, step, avg_loss, avg_time)) print(metrics) metrics.clear() avg_loss = 0 avg_time = time.time() if step % eval_step == 0: net.eval() print("#################################") print("# EVAL") print("#################################") dt_annos = [] t = time.time() eval_total = len(eval_dataloader) for count, example in enumerate(eval_dataloader, start=1): print('\r%d / %d' % (count, eval_total), end='') example = example_convert_to_torch(example, device=device) preds_dict = net(example) dt_annos += inference.infer_gpu(example, preds_dict) t = (time.time() - t) / len(eval_dataloader) print('\nTime for each frame: %f\n' % t) gt_annos = copy.deepcopy(eval_annos) eval_classes = ["vehicle", "pedestrian", "cyclist"] # ["vehicle", "pedestrian", "cyclist"] APs, eval_str = get_official_eval_result(gt_annos, dt_annos, eval_classes, 1000.0) log_str = '\nStep: %d%s' % (step, eval_str) print(log_str) with open(log_file, 'a+') as f: f.write(log_str) net.train()