def complex_yolo(pointcloud): pointcloud = get_filtered_lidar(pointcloud, cnf.boundary) bev_maps = makeBEVMap(pointcloud, cnf.boundary) bev_maps = torch.from_numpy(bev_maps) bev_maps = torch.unsqueeze(bev_maps, 0) input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float() t1 = time_synchronized() outputs = model(input_bev_maps) outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) # detections size (batch_size, K, 10) detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],outputs['dim'], K=configs.K) detections = detections.cpu().detach().numpy().astype(np.float32) detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) t2 = time_synchronized() detections = detections[0] # only first batch # Draw prediction in the image bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8) bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes) bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) cv2.imshow("BEV", bev_map) print('\tDone testing in time: {:.1f}ms, speed {:.2f}FPS'.format((t2 - t1) * 1000,1 / (t2 - t1)))
def do_detect(configs, model, bevmap, is_front): if not is_front: bevmap = torch.flip(bevmap, [1, 2]) input_bev_maps = bevmap.unsqueeze(0).to(configs.device, non_blocking=True).float() t1 = time_synchronized() outputs = model(input_bev_maps) outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) # detections size (batch_size, K, 10) detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], outputs['dim'], K=configs.K) detections = detections.cpu().numpy().astype(np.float32) detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) t2 = time_synchronized() # Inference speed fps = 1 / (t2 - t1) return detections[0], bevmap, fps
def predict(self, pointcloud): # convert to bird eye view -> get heatmap output -> convert to kitti format output bev = self.preprocesiing(pointcloud) t1 = time_synchronized() outputs = self.model(bev) detections = self.post_procesiing(outputs) t2 = time_synchronized() print('\tDone testing in time: {:.1f}ms, speed {:.2f}FPS'.format((t2 - t1) * 1000,1 / (t2 - t1))) return detections
def callback(self,data): rospy.loginfo("detection") with torch.no_grad(): gen = point_cloud2.read_points(data) for idx, p in enumerate(gen): print(idx) print(p) b = kitti_bev_utils.removePoints(gen, cnf.boundary) imgs_bev = kitti_bev_utils.makeBVFeature(b, cnf.DISCRETIZATION, cnf.boundary) input_imgs = imgs_bev.to(device=configs.device).float() t1 = time_synchronized() outputs = self.model(input_imgs) t2 = time_synchronized() detections = post_processing_v2(outputs, conf_thresh=configs.conf_thresh, nms_thresh=configs.nms_thresh) img_detections = [] # Stores detections for each image index img_detections.extend(detections) img_bev = imgs_bev.squeeze() * 255 img_bev = img_bev.permute(1, 2, 0).numpy().astype(np.uint8) img_bev = cv2.resize(img_bev, (configs.img_size, configs.img_size)) for detections in img_detections: if detections is None: continue # Rescale boxes to original image detections = rescale_boxes(detections, configs.img_size, img_bev.shape[:2]) for x, y, w, l, im, re, *_, cls_pred in detections: yaw = np.arctan2(im, re) # Draw rotated box kitti_bev_utils.drawRotatedBox(img_bev, x, y, w, l, yaw, cnf.colors[int(cls_pred)]) img_bev = cv2.flip(cv2.flip(img_bev, 0), 1) out_img = img_bev cv2.imshow('test-img', out_img) cv2.waitKey(1)
configs.pretrained_path) model.load_state_dict(torch.load(configs.pretrained_path)) configs.device = torch.device( 'cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) model = model.to(device=configs.device) out_cap = None model.eval() test_dataloader = create_test_dataloader(configs) with torch.no_grad(): for batch_idx, (img_paths, imgs_bev) in enumerate(test_dataloader): input_imgs = imgs_bev.to(device=configs.device).float() t1 = time_synchronized() outputs = model(input_imgs) t2 = time_synchronized() detections = post_processing(outputs, conf_thresh=configs.conf_thresh, nms_thresh=configs.nms_thresh) img_detections = [] # Stores detections for each image index img_detections.extend(detections) img_bev = imgs_bev.squeeze() * 255 img_bev = img_bev.permute(1, 2, 0).numpy().astype(np.uint8) img_bev = cv2.resize(img_bev, (configs.img_size, configs.img_size)) for detections in img_detections: if detections is None: continue
def evaluate_mAP(val_loader, model, configs, logger): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(len(val_loader), [batch_time, data_time], prefix="Evaluation phase...") labels = [] sample_metrics = [] # List of tuples (TP, confs, pred) # switch to evaluate mode model.eval() with torch.no_grad(): start_time = time.time() for batch_idx, batch_data in enumerate(tqdm(val_loader)): metadatas, targets = batch_data batch_size = len(metadatas['img_path']) voxelinput = metadatas['voxels'] coorinput = metadatas['coors'] numinput = metadatas['num_points'] dtype = torch.float32 voxelinputr = torch.tensor(voxelinput, dtype=torch.float32, device=configs.device).to(dtype) coorinputr = torch.tensor(coorinput, dtype=torch.int32, device=configs.device) numinputr = torch.tensor(numinput, dtype=torch.int32, device=configs.device) t1 = time_synchronized() outputs = model(voxelinputr, coorinputr, numinputr) outputs = outputs._asdict() outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) # detections size (batch_size, K, 10) detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], outputs['dim'], K=configs.K) detections = detections.cpu().numpy().astype(np.float32) detections = post_processingv2(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) for sample_i in range(len(detections)): # print(output.shape) num = targets['count'][sample_i] # print(targets['batch'][sample_i][:num].shape) target = targets['batch'][sample_i][:num] #print(target[:, 8].tolist()) labels += target[:, 8].tolist() sample_metrics += get_batch_statistics_rotated_bbox( detections, targets, iou_threshold=configs.iou_thresh) t2 = time_synchronized() # measure elapsed time # torch.cuda.synchronize() batch_time.update(time.time() - start_time) # Log message if logger is not None: if ((batch_idx + 1) % configs.print_freq) == 0: logger.info(progress.get_message(batch_idx)) start_time = time.time() # Concatenate sample statistics true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class( true_positives, pred_scores, pred_labels, labels) return precision, recall, AP, f1, ap_class
def demo(configs): video_loader = TTNet_Video_Loader(configs.video_path, configs.input_size, configs.num_frames_sequence) result_filename = os.path.join(configs.save_demo_dir, 'results.txt') frame_rate = video_loader.video_fps if configs.save_demo_output: configs.frame_dir = os.path.join(configs.save_demo_dir, 'frame') if not os.path.isdir(configs.frame_dir): os.makedirs(configs.frame_dir) configs.device = torch.device('cuda:{}'.format(configs.gpu_idx)) # model model = create_model(configs) model.cuda() assert configs.pretrained_path is not None, "Need to load the pre-trained model" model = load_pretrained_model(model, configs.pretrained_path, configs.gpu_idx, configs.overwrite_global_2_local) model.eval() middle_idx = int(configs.num_frames_sequence / 2) queue_frames = deque(maxlen=middle_idx + 1) frame_idx = 0 w_original, h_original = 1920, 1080 w_resize, h_resize = 320, 128 w_ratio = w_original / w_resize h_ratio = h_original / h_resize with torch.no_grad(): for count, resized_imgs in video_loader: # take the middle one img = cv2.resize(resized_imgs[3 * middle_idx: 3 * (middle_idx + 1)].transpose(1, 2, 0), (w_original, h_original)) # Expand the first dim resized_imgs = torch.from_numpy(resized_imgs).to(configs.device, non_blocking=True).float().unsqueeze(0) t1 = time_synchronized() pred_ball_global, pred_ball_local, pred_events, pred_seg = model.run_demo(resized_imgs) t2 = time_synchronized() prediction_global, prediction_local, prediction_seg, prediction_events = post_processing( pred_ball_global, pred_ball_local, pred_events, pred_seg, configs.input_size[0], configs.thresh_ball_pos_mask, configs.seg_thresh, configs.event_thresh) prediction_ball_final = [ int(prediction_global[0] * w_ratio + prediction_local[0] - w_resize / 2), int(prediction_global[1] * h_ratio + prediction_local[1] - h_resize / 2) ] # Get infor of the (middle_idx + 1)th frame if len(queue_frames) == middle_idx + 1: frame_pred_infor = queue_frames.popleft() seg_img = frame_pred_infor['seg'].astype(np.uint8) ball_pos = frame_pred_infor['ball'] seg_img = cv2.resize(seg_img, (w_original, h_original)) ploted_img = plot_detection(img, ball_pos, seg_img, prediction_events) ploted_img = cv2.cvtColor(ploted_img, cv2.COLOR_RGB2BGR) if configs.show_image: cv2.imshow('ploted_img', ploted_img) cv2.waitKey(10) if configs.save_demo_output: cv2.imwrite(os.path.join(configs.frame_dir, '{:06d}.jpg'.format(frame_idx)), ploted_img) frame_pred_infor = { 'seg': prediction_seg, 'ball': prediction_ball_final } queue_frames.append(frame_pred_infor) frame_idx += 1 print('Done frame_idx {} - time {:.3f}s'.format(frame_idx, t2 - t1)) if configs.output_format == 'video': output_video_path = os.path.join(configs.save_demo_dir, 'result.mp4') cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -b 5000k -c:v mpeg4 {}'.format( os.path.join(configs.frame_dir), output_video_path) os.system(cmd_str)
def evaluate_mAP(val_loader, model, configs, logger): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(len(val_loader), [batch_time, data_time], prefix="Evaluation phase...") labels = [] sample_metrics = [] # List of tuples (TP, confs, pred) # switch to evaluate mode model.eval() class_id = {0:'Car', 1:'Pedestrian', 2:'Cyclist'} with torch.no_grad(): start_time = time.time() for batch_idx, batch_data in enumerate(tqdm(val_loader)): metadatas, targets= batch_data batch_size = len(metadatas['img_path']) voxelinput = metadatas['voxels'] coorinput = metadatas['coors'] numinput = metadatas['num_points'] dtype = torch.float32 voxelinputr = torch.tensor( voxelinput, dtype=torch.float32, device=configs.device).to(dtype) coorinputr = torch.tensor( coorinput, dtype=torch.int32, device=configs.device) numinputr = torch.tensor( numinput, dtype=torch.int32, device=configs.device) t1 = time_synchronized() outputs = model(voxelinputr, coorinputr, numinputr) outputs = outputs._asdict() outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) # detections size (batch_size, K, 10) img_path = metadatas['img_path'][0] #print(img_path) calib = Calibration(img_path.replace(".png", ".txt").replace("image_2", "calib")) detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], outputs['dim'], K=configs.K) detections = detections.cpu().numpy().astype(np.float32) detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) for i in range(configs.batch_size): detections[i] = convert_det_to_real_values(detections[i]) img_path = metadatas['img_path'][i] #rint(img_path) datap = str.split(img_path,'/') filename = str.split(datap[7],'.') file_write_obj = open('../result/' + filename[0] + '.txt', 'w') lidar_path = '/' + datap[1] + '/' + datap[2] + '/' + datap[3] + '/' + \ datap[4] + '/' + datap[5] + '/' + 'velodyne' + '/' + filename[0] + '.bin' #print(lidar_path) #show3dlidar(lidar_path, detections[i], calib.V2C, calib.R0, calib.P2) dets = detections[i] if len(dets) >0 : dets[:, 1:] = lidar_to_camera_box(dets[:, 1:], calib.V2C, calib.R0, calib.P2) for box_idx, label in enumerate(dets): location, dim, ry = label[1:4], label[4:7], label[7] if ry < -np.pi: ry = 2*np.pi + ry if ry > np.pi: ry = -2*np.pi + ry corners_3d = compute_box_3d(dim, location, ry) corners_2d = project_to_image(corners_3d, calib.P2) minxy = np.min(corners_2d, axis=0) maxxy = np.max(corners_2d, axis=0) bbox = np.concatenate([minxy, maxxy], axis=0) if bbox[0] < 0 or bbox[2]<0: continue if bbox[1] > 1272 or bbox[3] > 375: continue oblist = ['Car',' ','0.0', ' ', '0', ' ', '-10', ' ','%.2f'%bbox[0], ' ', \ '%.2f' %bbox[1], ' ','%.2f'%bbox[2], ' ','%.2f'%bbox[3], ' ','%.2f'%dim[0], ' ','%.2f'%dim[1], ' ','%.2f'%dim[2], ' ', \ '%.2f' %location[0], ' ','%.2f'%location[1], ' ','%.2f'%location[2], ' ', '%.2f'%ry, '\n'] file_write_obj.writelines(oblist) file_write_obj.close() '''for sample_i in range(len(detections)):