def __init__(self, cfg, split='training'): super(KittiMonoDataset, self).__init__() preprocessed_path = cfg.path.preprocessed_path obj_types = cfg.obj_types is_train = (split == 'training') imdb_file_path = os.path.join(preprocessed_path, split, 'imdb.pkl') self.imdb = pickle.load(open(imdb_file_path, 'rb')) # list of kittiData self.output_dict = { "calib": False, "image": True, "label": False, "velodyne": False } if is_train: self.transform = build_augmentator(cfg.data.train_augmentation) else: self.transform = build_augmentator(cfg.data.test_augmentation) self.projector = BBox3dProjector() self.is_train = is_train self.obj_types = obj_types self.use_right_image = getattr(cfg.data, 'use_right_image', True) self.is_reproject = getattr(cfg.data, 'is_reproject', True) # if reproject 2d
def __init__(self, cfg, split='training'): super(KittiDepthMonoDataset, self).__init__() raw_path = cfg.path.raw_path depth_paths = cfg.path.depth_path if isinstance(cfg.path.depth_path, list) else [cfg.path.depth_path] aug_cfg = cfg.data.augmentation manager = Manager() # multithread manage wrapping for list objects self.is_eval = not split == 'training' self.size = aug_cfg.cropSize #[352, 1216] raw_sequences = {} for date_time in os.listdir(raw_path): sequences = os.listdir(os.path.join(raw_path, date_time)) sequences = [sequence for sequence in sequences if not sequence.endswith(".txt")] P2, P3 = read_P23_from_sequence(os.path.join(raw_path, date_time, "calib_cam_to_cam.txt")) T = read_T_from_sequence (os.path.join(raw_path, date_time, "calib_velo_to_cam.txt")) for sequence in sequences: raw_sequences[sequence] = dict(P2=P2, P3=P3, T_velo2cam=T, date_time=date_time) self.imdb = [] print("Start counting images in depth prediction dataset.") for depth_path in depth_paths: for sequence in tqdm(os.listdir(depth_path)): sequence_gt_path = os.path.join(depth_path, sequence, 'proj_depth', 'groundtruth') P2 = raw_sequences[sequence]['P2'] P3 = raw_sequences[sequence]['P3'] T = raw_sequences[sequence]['T_velo2cam'] left_gt_dir = os.path.join(sequence_gt_path, 'image_02') right_gt_dir = os.path.join(sequence_gt_path, 'image_03') gt_names = set(os.listdir(left_gt_dir)) left_image_dir = os.path.join(raw_path, raw_sequences[sequence]['date_time'], sequence, 'image_02', 'data') right_image_dir = os.path.join(raw_path, raw_sequences[sequence]['date_time'], sequence, 'image_03', 'data') point_cloud_dir = os.path.join(raw_path, raw_sequences[sequence]['date_time'], sequence, 'velodyne_points', 'data') image_names = set(os.listdir(left_image_dir)) intersection = gt_names.intersection(image_names) # names in both instances = [ dict( image_2_file = os.path.join(left_image_dir, name), image_3_file = os.path.join(right_image_dir, name), gt_2_file = os.path.join(left_gt_dir, name), gt_3_file = os.path.join(right_gt_dir, name), P2 = P2.copy(), P3 = P3.copy(), # T_velo2cam = T.copy(), # point_cloud_file = os.path.join(point_cloud_dir, name.replace('.png', '.bin')) ) for name in sorted(intersection) ] self.imdb += instances self.imdb = manager.list(self.imdb) if not self.is_eval: self.transform = build_augmentator(cfg.data.train_augmentation) else: self.transform = build_augmentator(cfg.data.test_augmentation)
def __init__(self, cfg, split='validation'): super(KittiDepthMonoValTestDataset, self).__init__() base_path = cfg.path[split + "_path"] self.transform = build_augmentator(cfg.data.test_augmentation) self.imdb = [] image_dir = os.path.join(base_path, "image") intrinsic_dir = os.path.join(base_path, "intrinsics") image_list = os.listdir(image_dir) image_list.sort() K_list = os.listdir(intrinsic_dir) K_list.sort() self.imdb = [ dict(image_file=os.path.join(image_dir, image_list[i]), K=read_K_from_depth_prediction( os.path.join(intrinsic_dir, K_list[i]))) for i in range(len(image_list)) ]
def compute_dispairity_for_split(cfg, index_names: List[str], data_root_dir: str, output_dict: Dict, data_split: str = 'training', time_display_inter: int = 100, use_point_cloud: bool = True): save_dir = os.path.join(cfg.path.preprocessed_path, data_split) if not os.path.isdir(save_dir): os.makedirs(save_dir) disp_dir = os.path.join(save_dir, 'disp') if not os.path.isdir(disp_dir): os.mkdir(disp_dir) if not use_point_cloud: stereo_matcher = cv2.StereoBM_create(192, 25) N = len(index_names) frames = [None] * N print("start reading {} data".format(data_split)) timer = Timer() preprocess = build_augmentator(cfg.data.test_augmentation) for i, index_name in tqdm(enumerate(index_names)): # read data with dataloader api data_frame = KittiData(data_root_dir, index_name, output_dict) calib, image, right_image, label, velo = data_frame.read_data() original_image = image.copy() baseline = (calib.P2[0, 3] - calib.P3[0, 3]) / calib.P2[0, 0] image, image_3, P2, P3 = preprocess(original_image, right_image.copy(), p2=deepcopy(calib.P2), p3=deepcopy(calib.P3)) if use_point_cloud: ## gathering disparity with point cloud back projection disparity_left = generate_dispariy_from_velo(velo[:, 0:3], image.shape[0], image.shape[1], calib.Tr_velo_to_cam, calib.R0_rect, P2, baseline=baseline) disparity_right = generate_dispariy_from_velo(velo[:, 0:3], image.shape[0], image.shape[1], calib.Tr_velo_to_cam, calib.R0_rect, P3, baseline=baseline) else: ## gathering disparity with stereoBM from opencv left_image = denorm(image, cfg.data.augmentation.rgb_mean, cfg.data.augmentation.rgb_std) right_image = denorm(image_3, cfg.data.augmentation.rgb_mean, cfg.data.augmentation.rgb_std) gray_image1 = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY) gray_image2 = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY) disparity_left = stereo_matcher.compute(gray_image1, gray_image2) disparity_left[disparity_left < 0] = 0 disparity_left = disparity_left.astype(np.uint16) disparity_right = stereo_matcher.compute(gray_image2[:, ::-1], gray_image1[:, ::-1]) disparity_right[disparity_right < 0] = 0 disparity_right = disparity_right.astype(np.uint16) disparity_left = skimage.measure.block_reduce(disparity_left, (4, 4), np.max) file_name = os.path.join(disp_dir, "P2%06d.png" % i) cv2.imwrite(file_name, disparity_left) disparity_right = skimage.measure.block_reduce(disparity_right, (4, 4), np.max) file_name = os.path.join(disp_dir, "P3%06d.png" % i) cv2.imwrite(file_name, disparity_left) print("{} split finished precomputing disparity".format(data_split))
def read_one_split(cfg, index_names, data_root_dir, output_dict, data_split='training', time_display_inter=100): save_dir = os.path.join(cfg.path.preprocessed_path, data_split) if not os.path.isdir(save_dir): os.makedirs(save_dir) if data_split == 'training': disp_dir = os.path.join(save_dir, 'disp') if not os.path.isdir(disp_dir): os.mkdir(disp_dir) N = len(index_names) frames = [None] * N print("start reading {} data".format(data_split)) timer = Timer() anchor_prior = getattr(cfg, 'anchor_prior', True) total_objects = [0 for _ in range(len(cfg.obj_types))] total_usable_objects = [0 for _ in range(len(cfg.obj_types))] if anchor_prior: anchor_manager = Anchors(cfg.path.preprocessed_path, readConfigFile=False, **cfg.detector.head.anchors_cfg) preprocess = build_augmentator(cfg.data.test_augmentation) total_objects = [0 for _ in range(len(cfg.obj_types))] total_usable_objects = [0 for _ in range(len(cfg.obj_types))] len_scale = len(anchor_manager.scales) len_ratios = len(anchor_manager.ratios) len_level = len(anchor_manager.pyramid_levels) examine = np.zeros( [len(cfg.obj_types), len_level * len_scale, len_ratios]) sums = np.zeros( [len(cfg.obj_types), len_level * len_scale, len_ratios, 3]) squared = np.zeros( [len(cfg.obj_types), len_level * len_scale, len_ratios, 3], dtype=np.float64) uniform_sum_each_type = np.zeros( (len(cfg.obj_types), 6), dtype=np.float64) #[z, sin2a, cos2a, w, h, l] uniform_square_each_type = np.zeros((len(cfg.obj_types), 6), dtype=np.float64) for i, index_name in enumerate(index_names): # read data with dataloader api data_frame = KittiData(data_root_dir, index_name, output_dict) calib, image, label, velo = data_frame.read_data() # store the list of kittiObjet and kittiCalib max_occlusion = getattr(cfg.data, 'max_occlusion', 2) min_z = getattr(cfg.data, 'min_z', 3) if data_split == 'training': data_frame.label = [ obj for obj in label.data if obj.type in cfg.obj_types and obj.occluded < max_occlusion and obj.z > min_z ] if anchor_prior: for j in range(len(cfg.obj_types)): total_objects[j] += len([ obj for obj in data_frame.label if obj.type == cfg.obj_types[j] ]) data = np.array([[ obj.z, np.sin(2 * obj.alpha), np.cos(2 * obj.alpha), obj.w, obj.h, obj.l ] for obj in data_frame.label if obj.type == cfg.obj_types[j]]) #[N, 6] if data.any(): uniform_sum_each_type[j, :] += np.sum(data, axis=0) uniform_square_each_type[j, :] += np.sum(data**2, axis=0) else: data_frame.label = [ obj for obj in label.data if obj.type in cfg.obj_types ] data_frame.calib = calib if data_split == 'training' and anchor_prior: original_image = image.copy() baseline = (calib.P2[0, 3] - calib.P3[0, 3]) / calib.P2[0, 0] image, P2, label = preprocess(original_image, p2=deepcopy(calib.P2), labels=deepcopy(data_frame.label)) _, P3 = preprocess(original_image, p2=deepcopy(calib.P3)) ## Computing statistic for positive anchors if len(data_frame.label) > 0: anchors, _ = anchor_manager( image[np.newaxis].transpose([0, 3, 1, 2]), torch.tensor(P2).reshape([-1, 3, 4])) for j in range(len(cfg.obj_types)): bbox2d = torch.tensor( [[obj.bbox_l, obj.bbox_t, obj.bbox_r, obj.bbox_b] for obj in label if obj.type == cfg.obj_types[j]]).cuda() if len(bbox2d) < 1: continue bbox3d = torch.tensor([[ obj.x, obj.y, obj.z, np.sin(2 * obj.alpha), np.cos(2 * obj.alpha) ] for obj in label if obj.type == cfg.obj_types[j] ]).cuda() usable_anchors = anchors[0] IoUs = calc_iou(usable_anchors, bbox2d) #[N, K] IoU_max, IoU_argmax = torch.max(IoUs, dim=0) IoU_max_anchor, IoU_argmax_anchor = torch.max(IoUs, dim=1) num_usable_object = torch.sum( IoU_max > cfg.detector.head.loss_cfg.fg_iou_threshold ).item() total_usable_objects[j] += num_usable_object positive_anchors_mask = IoU_max_anchor > cfg.detector.head.loss_cfg.fg_iou_threshold positive_ground_truth_3d = bbox3d[IoU_argmax_anchor[ positive_anchors_mask]].cpu().numpy() used_anchors = usable_anchors[positive_anchors_mask].cpu( ).numpy() #[x1, y1, x2, y2] sizes_int, ratio_int = anchor_manager.anchors2indexes( used_anchors) for k in range(len(sizes_int)): examine[j, sizes_int[k], ratio_int[k]] += 1 sums[j, sizes_int[k], ratio_int[k]] += positive_ground_truth_3d[k, 2:5] squared[j, sizes_int[k], ratio_int[k]] += positive_ground_truth_3d[ k, 2:5]**2 frames[i] = data_frame if (i + 1) % time_display_inter == 0: avg_time = timer.compute_avg_time(i + 1) eta = timer.compute_eta(i + 1, N) print( "{} iter:{}/{}, avg-time:{}, eta:{}, total_objs:{}, usable_objs:{}" .format(data_split, i + 1, N, avg_time, eta, total_objects, total_usable_objects), end='\r') save_dir = os.path.join(cfg.path.preprocessed_path, data_split) if not os.path.isdir(save_dir): os.makedirs(save_dir) if data_split == 'training' and anchor_prior: for j in range(len(cfg.obj_types)): global_mean = uniform_sum_each_type[j] / total_objects[j] global_var = np.sqrt(uniform_square_each_type[j] / total_objects[j] - global_mean**2) avg = sums[j] / (examine[j][:, :, np.newaxis] + 1e-8) EX_2 = squared[j] / (examine[j][:, :, np.newaxis] + 1e-8) std = np.sqrt(EX_2 - avg**2) avg[examine[j] < 10, :] = -100 # with such negative mean Z, anchors/losses will filter them out std[examine[j] < 10, :] = 1e10 avg[np.isnan(std)] = -100 std[np.isnan(std)] = 1e10 avg[std < 1e-3] = -100 std[std < 1e-3] = 1e10 whl_avg = np.ones([avg.shape[0], avg.shape[1], 3 ]) * global_mean[3:6] whl_std = np.ones([avg.shape[0], avg.shape[1], 3 ]) * global_var[3:6] avg = np.concatenate([avg, whl_avg], axis=2) std = np.concatenate([std, whl_std], axis=2) npy_file = os.path.join( save_dir, 'anchor_mean_{}.npy'.format(cfg.obj_types[j])) np.save(npy_file, avg) std_file = os.path.join( save_dir, 'anchor_std_{}.npy'.format(cfg.obj_types[j])) np.save(std_file, std) pkl_file = os.path.join(save_dir, 'imdb.pkl') pickle.dump(frames, open(pkl_file, 'wb')) print("{} split finished precomputing".format(data_split))