def get_lidar(self, raw_date, raw_sequence_id, raw_frame_id): raw_frame_id_filled = str(raw_frame_id).zfill(10) lidar_filename = os.path.join(self.root_dir, raw_date, raw_sequence_id, 'velodyne_points', 'data', raw_frame_id_filled + '.bin') return utils.load_velo_scan(lidar_filename)
if __name__ == "__main__": print("Starting") image_path = '/home/eshan/Pictures/000009.jpeg' img = cv2.imread(image_path) file_path = '/home/eshan/Pictures/0001.txt' filecontent = np.array([f.split() for f in open(file_path, 'r')]) alllabels = np.unique(filecontent[:, 2]) labels = ['Car', 'Pedestrian', 'Cyclist'] finalset = [x for x in alllabels if x not in labels] for val in finalset: filecontent = filecontent[filecontent[:, 2] != val, :] data = (Object3d(getstringfromarray(line[2:])) for line in filecontent[filecontent[:, 0] == '0', :]) calib = Calibration('/home/eshan/Pictures/calib.txt') velo = load_velo_scan('/home/eshan/Pictures/000009.bin', np.float32, n_vec=4)[:, 0:4] img2 = np.copy(img) show_lidar_with_boxes(velo, data, calib) for obj in data: print("here") box3d_pts_2d, _ = compute_box_3d(obj, calib.P) img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(0, 0, 0)) text = 'gt ID: %d, Type: %s' % (0, obj.type) if box3d_pts_2d is not None: print("also") img2 = cv2.putText( img2, text, (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5,
def get_lidar(self, idx): assert(idx<self.num_samples) lidar_filename = os.path.join(self.lidar_dir, '%06d.bin'%(idx)) return utils.load_velo_scan(lidar_filename)
def f(img_n): idx = fn[-10:-4] #string vel_n = origin_data_dir+'/velodyne/'+int(idx)+'.bin' cal_n = origin_data_dir+'/calib/'+idx+'.txt' lab_n = origin_data_dir+'/label_2/'+idx+'.txt' calib_data = kitti_utils.Calibration(cal_n) vel = kitti_utils.load_velo_scan(vel_n) vel = calib_data.project_velo_to_rect(vel) # 0 < z < 70 vel = vel[np.where(vel[:,2] > 0)] vel = vel[np.where(vel[:,2] < 70)] # -3 < y < 1 vel = vel[np.where(vel[:,1] < 1)] vel = vel[np.where(vel[:,1] > -3)] # -40 < y < 40 vel = vel[np.where(vel[:,0] < 40)] vel = vel[np.where(vel[:,0] > -40)] # 这里没有中心化!注意变化!!!! # coords=np.ascontiguousarray(vel[:,:3]-v[:,:3].mean(0)) coords = np.ascontiguousarray(vel) # Get color vel_2d = calib_data.project_rect_to_image(vel) vel_rgb=np.zeros((len(vel_2d),3)) colors = kitti_utils.img_xy2rgb(img_n, vel_2d) # read label f = open(lab_n) str_label = f.readlines() label_3d_boxes = [] # label.append(KITTI_LABEL_NAMES.index(name)) for label in str_label: if label[0:8] == "DontCare": str_label.remove(label) continue obj = Object3d(label) box3d = compute_box_3d(obj, calib_data.P) label_3d_boxes.append(box3d[1]) label_3d_boxes = np.asarray(label_3d_boxes) label_3d_boxes = label_3d_boxes.astype(np.float) str_label = [x for x in str_label if x[0:8] != 'DontCare'] str_label = [x.split(' ') for x in str_label] nplabel = np.asarray(str_label) for i in range(len(nplabel)): nplabel[i][0] = KITTI_LABEL_NAMES.index(nplabel[i][0]) nplabel = nplabel.astype(np.float) #第1列truck 表示图中出现了卡车(一共有'Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc' or 'DontCare'这些类别,Don’t care 是没有3D标注的,原因是雷达扫不了那么远,即使可以视觉检测出来) #第2列0.0表示其是否被截断的程度为0。(如果车在图片边缘,那么就有可能发生部分被截断的情况。用0-1 表示被截断的程度。) #第3列0表示没有被遮挡。(0表示完全可见,1表示部分遮挡,2表示大部分被遮挡,3表示未知。) #第4列 -1.57 表示卡车中心与相机中心构成的矢量与在bird view下的夹角为-1.57,实际上就是说明改开叉在-90,即正前方。这个信息反映目标物体中心在bird view相对方向信息。 #第5-8列的599.41 156.40 629.75 189.25是目标的2D bounding box 像素位置,形式为xyxy,前两个值为bounding box左上点的x,y位置,后两个点为右下角的x,y位置。 # 第9-11列 2.85 2.63 12.34 表示该车的高度,宽度,和长度,单位为米。 # 第12-14列 0.47 1.49 69.44 表示该车的3D中心在相机坐标下的xyz坐标。 # 第15列 -1.56 表示车体朝向,绕相机坐标系y轴的弧度值。注意和第4列区别开来,第四列不在乎车体朝向,而是车体中心与相机中心所构成矢量在与相机坐标系z轴的夹角(其实这里笔者有点疑虑,如果车体中心位置已知,车体朝向是不知道的,但是第4列的alpha是可以算出来的,那么其实第4列的数据是冗余的?)。 torch.save((coords,colors,nplabel,label_3d_boxes),proc_data_dir+idx+'.pth') print(idx)
def vis(result_sha, data_root, result_root): def show_image_with_boxes(img, velo, objects_res, objects_res_det, objects_res_raw, labeldata, object_gt, calib, save_path, height_threshold=0, show_lidar=True, save_image=False): img2 = np.copy(img) for obj in objects_res: box3d_pts_2d, _ = compute_box_3d(obj, calib.P) color_tmp = tuple( [int(tmp * 255) for tmp in colors[obj.id % max_color]]) img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(0, 0, 255)) text = 'Tracked ID: %d, Type: %s' % (obj.id, obj.type) if box3d_pts_2d is not None: img2 = cv2.putText( img2, text, (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color=(0, 0, 255)) for obj in objects_res_det: box3d_pts_2d, _ = compute_box_3d(obj, calib.P) color_tmp = tuple( [int(tmp * 255) for tmp in colors[obj.id % max_color]]) img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(0, 255, 0)) text = 'Detection ID: %d, Type: %s' % (obj.id, obj.type) if box3d_pts_2d is not None: img2 = cv2.putText( img2, text, (int(box3d_pts_2d[3, 0]), int(box3d_pts_2d[3, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color=(0, 255, 0)) import itertools labeldata, labeldata2 = itertools.tee(labeldata) for obj in labeldata: # print("here") box3d_pts_2d, _ = compute_box_3d(obj, calib.P) img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(255, 0, 0)) text = 'GT, Type: %s' % (obj.type) if box3d_pts_2d is not None: # print("also") print(text) img2 = cv2.putText( img2, text, (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color=(255, 0, 0)) # for obj in objects_res_raw: # box3d_pts_2d, _ = compute_box_3d(obj, calib.P) # color_tmp = tuple([int(tmp * 255) # for tmp in colors[obj.id % max_color]]) # img2 = draw_projected_box3d(img2, box3d_pts_2d, color=(255,0,0)) # text = 'Estimate ID: %d' % obj.id # if box3d_pts_2d is not None: # img2 = cv2.putText(img2, text, (int(box3d_pts_2d[2, 0]), int( # box3d_pts_2d[2, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color=(255,0,0)) if show_lidar: show_lidar_with_boxes(velo, objects=labeldata2, calib=calib, objects_pred=objects_res) img = Image.fromarray(img2) img = img.resize((width, height)) cv2.imshow("Image", img2) cv2.waitKey() if save_image: print("Saving Image at", save_path) img.save(save_path) return img2 for seq in seq_list: image_dir = os.path.join(data_root, 'image_02/%s' % seq) calib_file = os.path.join(data_root, 'calib/%s.txt' % seq) label_file = os.path.join(data_root, 'label_02/%s.txt' % seq) velo_dir = os.path.join(data_root, 'velodyne/%s' % seq) result_dir = [ os.path.join(result_root, '%s/trk_withid/%s' % (result_sha[0], seq)), os.path.join(result_root, '%s/trk_withid/%s' % (result_sha[1], seq)), os.path.join(result_root, '%s/trk_withid/%s' % (result_sha[2], seq)) ] save_3d_bbox_dir = os.path.join( result_root, '%s/trk_image_vis/%s' % ("Combined_Final_WithLabel", seq)) mkdir_if_missing(save_3d_bbox_dir) # load the list images_list, num_images = load_list_from_folder(image_dir) velo_list, num_velo = load_list_from_folder(velo_dir) print('number of images to visualize is %d' % num_images) start_count = 0 filecontent = np.array([f.split() for f in open(label_file, 'r')]) # alllabels = np.unique(filecontent[:,2]) # labels = ['Car', 'Pedestrian', 'Cyclist'] # finallabelset = [x for x in alllabels if x not in labels] # print(alllabels) # print(finallabelset) # for val in finallabelset: # filecontent = filecontent[filecontent[:,2]!=val,:] # print(np.unique(filecontent[:,2])) size = (width, height) out = cv2.VideoWriter(f'{result_root}/{seq}.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size) num_images = 1 for count in range(start_count, num_images): image_tmp = images_list[count] velo_tmp = velo_list[count] if not is_path_exists(image_tmp): count += 1 continue image_index = int(fileparts(image_tmp)[1]) image_tmp = np.array(Image.open(image_tmp)) img_height, img_width, img_channel = image_tmp.shape filecontentframe = filecontent[filecontent[:, 0] == str(image_index), :] print(len(filecontentframe)) print(f"Labels for frame {image_index}", np.unique(filecontentframe[:, 2])) labeldata = (Object3d(getstringfromarray(line[2:])) for line in filecontentframe) object_res = [] object_res_det = [] object_res_raw = [] for dirt in result_dir: result_tmp = os.path.join(dirt, '%06d.txt' % image_index) # load the result if is_path_exists(result_tmp): object_res = object_res + read_label(result_tmp) result_tmp_det = os.path.join(dirt, 'det%06d.txt' % image_index) # load the result if is_path_exists(result_tmp_det): object_res_det = object_res_det + \ read_label(result_tmp_det) result_tmp_raw = os.path.join(dirt, 'raw%06d.txt' % image_index) # load the result if is_path_exists(result_tmp_raw): object_res_raw = object_res_raw + \ read_label(result_tmp_raw) print('processing index: %d, %d/%d, results from %s' % (image_index, count + 1, num_images, result_tmp)) calib_tmp = Calibration(calib_file) # load the calibration object_res_filtered = [] for object_tmp in object_res: if object_tmp.type not in type_whitelist: continue if hasattr(object_tmp, 'score'): if object_tmp.score < score_threshold: continue center = object_tmp.t object_res_filtered.append(object_tmp) object_res_filtered_det = [] for object_tmp in object_res_det: if object_tmp.type not in type_whitelist: continue if hasattr(object_tmp, 'score'): if object_tmp.score < score_threshold: continue center = object_tmp.t object_res_filtered_det.append(object_tmp) object_res_filtered_raw = [] for object_tmp in object_res_raw: if object_tmp.type not in type_whitelist: continue # if hasattr(object_tmp, 'score'): # if object_tmp.score < score_threshold: # continue center = object_tmp.t object_res_filtered_raw.append(object_tmp) num_instances = len(object_res_filtered) save_image_with_3dbbox_gt_path = os.path.join( save_3d_bbox_dir, '%06d.jpg' % (image_index)) velodyne_scan = load_velo_scan(velo_tmp, np.float32, n_vec=4)[:, 0:4] img = show_image_with_boxes( image_tmp, velodyne_scan, object_res_filtered, object_res_filtered_det, object_res_filtered_raw, labeldata, [], calib_tmp, save_path=save_image_with_3dbbox_gt_path) print('number of objects to plot is %d, %d, %d' % (num_instances, len(object_res_filtered_det), len(object_res_filtered_raw))) count += 1 out.write(img) out.release()
def __getitem__(self, index): # start time get_item_start_time = time.time() # get point cloud lidar_filename = os.path.join(self.lidar_dir, '%06d.bin' % index) lidar_data = kitti_utils.load_velo_scan(lidar_filename) # time for loading point cloud read_point_cloud_end_time = time.time() read_point_cloud_time = read_point_cloud_end_time - get_item_start_time # voxelize point cloud voxel_point_cloud = torch.tensor( kitti_utils.voxelize(point_cloud=lidar_data), requires_grad=True, device=self.device).float() # time for voxelization voxelization_end_time = time.time() voxelization_time = voxelization_end_time - read_point_cloud_end_time # channels along first dimensions according to PyTorch convention voxel_point_cloud = voxel_point_cloud.permute([2, 0, 1]) # get image if self.get_image: image_filename = os.path.join(self.image_dir, '%06d.png' % index) image = kitti_utils.get_image(image_filename) # get current time read_labels_start_time = time.time() # get calibration calib_filename = os.path.join(self.calib_dir, '%06d.txt' % index) calib = kitti_utils.Calibration(calib_filename) # get labels label_filename = os.path.join(self.label_dir, '%06d.txt' % index) labels = kitti_utils.read_label(label_filename) read_labels_end_time = time.time() read_labels_time = read_labels_end_time - read_labels_start_time # compute network label if self.split == 'training': # get current time compute_label_start_time = time.time() # create empty pixel labels regression_label = np.zeros( (OUTPUT_DIM_0, OUTPUT_DIM_1, OUTPUT_DIM_REG)) classification_label = np.zeros( (OUTPUT_DIM_0, OUTPUT_DIM_1, OUTPUT_DIM_CLA)) # iterate over all 3D label objects in list for label in labels: if label.type == 'Car': # compute corners of 3D bounding box in camera coordinates _, bbox_corners_camera_coord = kitti_utils.compute_box_3d( label, calib.P, scale=1.0) # get pixel label for classification and BEV bounding box regression_label, classification_label = compute_pixel_labels\ (regression_label, classification_label, label, bbox_corners_camera_coord) # stack classification and regression label regression_label = torch.tensor(regression_label, device=self.device).float() classification_label = torch.tensor(classification_label, device=self.device).float() training_label = torch.cat( (regression_label, classification_label), dim=2) # get time for computing pixel label compute_label_end_time = time.time() compute_label_time = compute_label_end_time - compute_label_start_time # total time for data loading get_item_end_time = time.time() get_item_time = get_item_end_time - get_item_start_time if self.show_times: print('---------------------------') print('Get Item Time: {:.4f} s'.format(get_item_time)) print('---------------------------') print('Read Point Cloud Time: {:.4f} s'.format( read_point_cloud_time)) print('Voxelization Time: {:.4f} s'.format(voxelization_time)) print('Read Labels Time: {:.4f} s'.format(read_labels_time)) print( 'Compute Labels Time: {:.4f} s'.format(compute_label_time)) return voxel_point_cloud, training_label else: if self.get_image: return image, voxel_point_cloud, labels, calib else: return voxel_point_cloud, labels, calib
x_out = torch.cat((x_reg, x_class), dim=1) return x_out ######## # Main # ######## if __name__ == '__main__': # exemplary input point cloud base_dir = 'Data/training/velodyne' index = 1 lidar_filename = os.path.join(base_dir, '%06d.bin' % index) lidar_data = kitti_utils.load_velo_scan(lidar_filename) # create torch tensor from numpy array voxel_point_cloud = torch.tensor(kitti_utils.voxelize(lidar_data), requires_grad=True, device='cpu').float() # channels along first dimensions according to PyTorch convention voxel_point_cloud = voxel_point_cloud.permute([2, 0, 1]) voxel_point_cloud = torch.unsqueeze( voxel_point_cloud, 0) # add dimension 0 to tensor for batch # forward pass through network pixor = PIXOR() prediction = pixor(voxel_point_cloud) classification_prediction = prediction[:, :, -1] regression_prediction = prediction[:, :, :-1]