def generate_semantic_mask(self, split): output_dir = os.path.join(self.kitti_path, 'training/semantic_mask') with open(os.path.join(self.kitti_path, split + '.txt')) as f: frame_ids = [line.rstrip('\n') for line in f] depth_map_dir = os.path.join(self.kitti_path, 'training/depth_map_dense') if not os.path.isdir(output_dir): os.mkdir(output_dir) for fid in frame_ids: fid = int(fid) depth_map = np.load(os.path.join(depth_map_dir, '{:06}.npy'.format(fid))) pc_dense = self.to_point_cloud(depth_map, fid) objects = self.kitti_dataset.get_label_objects(fid) image = self.kitti_dataset.get_image(fid) calib = self.kitti_dataset.get_calibration(fid) # 3 by 4 matrix objects = filter(lambda obj: obj.type in type_whitelist and obj.difficulty in difficulties_whitelist, objects) pts_mask = { 'NonObject': np.zeros((len(pc_dense),), dtype=np.bool), 'Car': np.zeros((len(pc_dense),), dtype=np.bool), 'Pedestrian': np.zeros((len(pc_dense),), dtype=np.bool), 'Cyclist': np.zeros((len(pc_dense),), dtype=np.bool) } for obj in objects: _,obj_box_3d = utils.compute_box_3d(obj, calib.P) _,mask = extract_pc_in_box3d(pc_dense, obj_box_3d) pts_mask[obj.type] = np.logical_or(mask, pts_mask[obj.type]) pts_mask['NonObject'] = (pts_mask['Car'] + pts_mask['Pedestrian'] + pts_mask['Cyclist']) == 0 semantic_mask = np.ones(image.shape[0:2], dtype=np.int32) * 255 # disp_mask = np.ones(image.shape, dtype=np.int32) * 255 # color_map = { # 'NonObject': (0,0,0), # 'Car': (0,0,142), # 'Pedestrian': (220, 20, 60), # 'Cyclist': (20, 255, 60) # } for k,v in pts_mask.items(): pts_2d = calib.project_rect_to_image(pc_dense[v, :3]) for u, v in pts_2d.astype(np.int32): if u >= image.shape[1] or v >= image.shape[0]: continue semantic_mask[v, u] = g_type2onehotclass[k] # disp_mask[v, u] = color_map[k] # cv2.imshow('semantic_mask', disp_mask/255.0) # cv2.waitKey(0) fout = os.path.join(output_dir, '{:06}.png'.format(fid)) # np.save(fout, semantic_mask) cv2.imwrite(fout, semantic_mask) print('save', fid)
def expand_points(self, pc_rect, proposal, calib, seed_ind, data_idx_str): prop = copy.deepcopy(proposal) prop.l += 1 prop.w += 1 prop_corners_image_2d, prop_corners_3d = utils.compute_box_3d( prop, calib.P) _, local_ind = extract_pc_in_box3d(pc_rect, prop_corners_3d) local_points = pc_rect[local_ind] local_seg = self.pc_seg[data_idx_str][local_ind] fg_ind = local_seg == 1 keypoints = pc_rect[seed_ind] # print(np.sum(seed_ind)) # print(np.sum(local_ind)) # print(np.sum(fg_ind)) print('before: ', keypoints.shape[0]) candidates_ind = np.logical_and(np.logical_not(seed_ind), local_ind) candidates_ind = np.logical_and(candidates_ind, self.pc_seg[data_idx_str] == 1) candidates_points = pc_rect[candidates_ind] print('candidates size: ', candidates_points.shape[0]) while True: tree = KDTree(keypoints[:, :3], leaf_size=2) new_kp = [] selected = np.zeros((len(candidates_points), )) for i in range(len(candidates_points)): if tree.query_radius(np.expand_dims(candidates_points[i, :3], axis=0), r=0.3, count_only=True) >= 3: new_kp.append(candidates_points[i]) selected[i] = 1 if len(new_kp) == 0: break keypoints = np.concatenate((keypoints, new_kp), axis=0) candidates_points = candidates_points[selected == 0] print('after: ', keypoints.shape[0]) return keypoints
def visualize(dataset, frame_id, prediction, seg_mask=None, show_3d=False, output_dir=None): fig_size = (12.42, 3.75) is_video = type(dataset).__name__ == 'kitti_object_video' # pred_fig, pred_2d_axes, pred_3d_axes = \ # vis_utils.visualization(dataset.image_dir, # int(frame_id), # display=False, # fig_size=fig_size) pred_fig, pred_3d_axes = vis_utils.visualize_single_plot(dataset.image_dir, int(frame_id), is_video, flipped=False, display=False, fig_size=fig_size) calib = dataset.get_calibration(frame_id) # 3 by 4 matrix # 2d visualization # draw groundtruth # labels = dataset.get_label_objects(frame_id) # labels = filter(lambda obj: obj.type in type_whitelist and obj.difficulty in difficulties_whitelist, labels) # draw_boxes(labels, calib, pred_2d_axes) # draw prediction on second image pred_corners = draw_boxes(prediction, calib, pred_3d_axes) if output_dir: filename = os.path.join(output_dir, 'result_2d_image/%06d.png' % frame_id) plt.savefig(filename) plt.close(pred_fig) else: plt.show() raw_input() if seg_mask is not None: img = dataset.get_image(frame_id) img = cv2.resize(img, (1200, 360)) seg_img = add_mask_to_img(img, seg_mask) filename = os.path.join(output_dir, 'result_seg_image/%06d.png' % frame_id) cv2.imwrite(filename, seg_img) if show_3d: # 3d visualization pc_velo = dataset.get_lidar(frame_id) image = dataset.get_image(frame_id) img_height, img_width = image.shape[0:2] _, _, img_fov_inds = get_lidar_in_image_fov(pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True) pc_velo = pc_velo[img_fov_inds, :] boxes3d_velo = [] mask = np.zeros((len(pc_velo), )) for corners in pred_corners: pts_velo = calib.project_rect_to_velo(corners) boxes3d_velo.append(pts_velo) _, obj_mask = extract_pc_in_box3d(pc_velo, pts_velo) mask = np.logical_or(mask, obj_mask) fig = draw_lidar(pc_velo, pts_color=(1, 1, 1)) fig = draw_lidar(pc_velo[mask == 1], fig=fig, pts_color=(1, 0, 0)) fig = draw_gt_boxes3d(boxes3d_velo, fig, draw_text=False, color=(1, 1, 1)) # raw_input() if output_dir: filename = os.path.join(output_dir, 'result_3d_image/%06d.png' % frame_id) mlab.savefig(filename, figure=fig)
def load_frame_data(self, data_idx_str, random_flip=False, random_rotate=False, random_shift=False, pca_jitter=False): '''load one frame''' data_idx = int(data_idx_str) # print(data_idx_str) calib = self.kitti_dataset.get_calibration(data_idx) # 3 by 4 matrix if self.is_training: objects = self.kitti_dataset.get_label_objects(data_idx) else: objects = [] objects = filter( lambda obj: obj.type in self.types_list and obj.difficulty in self. difficulties_list, objects) gt_boxes = [] # ground truth boxes image = self.kitti_dataset.get_image(data_idx) pc_velo = self.kitti_dataset.get_lidar(data_idx) img_height, img_width = image.shape[0:2] # data augmentation if pca_jitter: image = apply_pca_jitter(image)[0] # if random_flip and np.random.random()>0.5: # 50% chance flipping if random_flip and False: pc_velo[:, 1] *= -1 # FIXME: to flip the scene, optical center also need to be adjusted # dont use with image now calib.P[0, 2] = img_width - calib.P[0, 2] image = np.flip(image, axis=1) for obj in objects: obj.t = [-obj.t[0], obj.t[1], obj.t[2]] # ensure that ry is [-pi, pi] if obj.ry >= 0: obj.ry = np.pi - obj.ry else: obj.ry = -np.pi - obj.ry # use original point cloud for rpn if not self.train_img_seg: _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov( pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True) pc_velo = pc_velo[img_fov_inds, :] choice = np.random.choice(pc_velo.shape[0], self.npoints, replace=True) point_set = pc_velo[choice, :] pc_rect = np.zeros_like(point_set) pc_rect[:, 0:3] = calib.project_velo_to_rect(point_set[:, 0:3]) pc_rect[:, 3] = point_set[:, 3] else: # use dense point cloud for training image segmentation pc_dense = self.dense_points.load_dense_points(data_idx) # get_lidar_in_image_fov pts_2d = calib.project_rect_to_image(pc_dense) fov_inds = (pts_2d[:,0]<img_width) & (pts_2d[:,0]>=0) & \ (pts_2d[:,1]<img_height) & (pts_2d[:,1]>=0) # fov_inds = fov_inds & (pc_dense[:,2]>2.0) pc_dense = pc_dense[fov_inds, :] obj_mask = { 'Car': np.zeros((len(pc_dense), ), dtype=np.bool), 'Pedestrian': np.zeros((len(pc_dense), ), dtype=np.bool), 'Cyclist': np.zeros((len(pc_dense), ), dtype=np.bool) } for obj in objects: _, obj_box_3d = utils.compute_box_3d(obj, calib.P) _, mask = extract_pc_in_box3d(pc_dense, obj_box_3d) obj_mask[obj.type] = np.logical_or(mask, obj_mask[obj.type]) bg_mask = (obj_mask['Car'] + obj_mask['Pedestrian'] + obj_mask['Cyclist']) == 0 car = np.where(obj_mask['Car'])[0] # oversampling ped = np.where(obj_mask['Pedestrian'])[0] if ped.shape[0] > 0: #ped = ped[np.random.choice(ped.shape[0], car.shape[0], replace=True)] ped = ped[np.random.choice(ped.shape[0], ped.shape[0] * 5, replace=True)] cyc = np.where(obj_mask['Cyclist'])[0] if cyc.shape[0] > 0: #cyc = cyc[np.random.choice(cyc.shape[0], car.shape[0], replace=True)] cyc = cyc[np.random.choice(cyc.shape[0], cyc.shape[0] * 10, replace=True)] fg_num = car.shape[0] + ped.shape[0] + cyc.shape[0] #print('total points: ', len(pc_dense)) #print('fg_num: {0}, car: {1}, ped: {2}, cyc: {3}'.format(fg_num, car.shape[0], ped.shape[0], cyc.shape[0])) bg = np.where(bg_mask)[0] bg = bg[np.random.choice(bg.shape[0], max(self.npoints - fg_num, 0), replace=True)] choice = np.concatenate((car, ped, cyc, bg), axis=0)[:self.npoints] #choice = np.random.choice(pc_dense.shape[0], self.npoints, replace=True) pc_rect = np.zeros((self.npoints, 4)) pc_rect[:, :3] = pc_dense[choice, :] #start = time.time() seg_mask = np.zeros((pc_rect.shape[0])) # data augmentation # dont use with image now if random_rotate and False: ry = (np.random.random() - 0.5) * math.radians( 20) # -10~10 degrees pc_rect[:, 0:3] = rotate_points_along_y(pc_rect[:, 0:3], ry) for obj in objects: obj.t = rotate_points_along_y(obj.t, ry) obj.ry -= ry # ensure that ry is [-pi, pi] if obj.ry > np.pi: obj.ry -= 2 * np.pi elif obj.ry < -np.pi: obj.ry += 2 * np.pi proposal_of_point = {} # point index to proposal vector gt_box_of_point = {} # point index to corners_3d for obj in objects: _, obj_box_3d = utils.compute_box_3d(obj, calib.P) _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d) if np.sum(obj_mask) == 0: # label without 3d points # print('skip object without points') continue seg_mask[obj_mask] = g_type2onehotclass[obj.type] #seg_mask[obj_mask] = 1 gt_boxes.append(obj_box_3d) obj_idxs = np.where(obj_mask)[0] # data augmentation # FIXME: jitter point will make valid loss growing # Also may go out of image view if random_shift and False: # jitter object points pc_rect[obj_idxs, :3] = shift_point_cloud( pc_rect[obj_idxs, :3], 0.02) for idx in obj_idxs: # to save time if self.train_img_seg: proposal_of_point[idx] = [0, [0, 0, 0], 0, 0, 0, [0, 0, 0]] else: proposal_of_point[idx] = box_encoder.encode( obj, pc_rect[idx, :3]) gt_box_of_point[idx] = obj_box_3d # self.viz_frame(pc_rect, seg_mask, gt_boxes) # return pc_rect, seg_mask, proposal_of_point, gt_box_of_point, gt_boxes calib_matrix = np.copy(calib.P) calib_matrix[0, :] *= (1200.0 / image.shape[1]) calib_matrix[1, :] *= (360.0 / image.shape[0]) #print('construct', time.time() - start) return { 'pointcloud': pc_rect, 'image': cv2.resize(image, (1200, 360)), 'calib': calib_matrix, 'mask_label': seg_mask, 'proposal_of_point': self.get_proposal_out(proposal_of_point), 'gt_box_of_point': self.get_gt_box_of_points(gt_box_of_point), 'gt_boxes': gt_boxes, 'pc_choice': choice }
def get_one_sample(self, proposal, pc_rect, image, calib, iou, gt_box_3d, gt_object, data_idx_str, img_seg_map): '''convert to frustum sample format''' prop_corners_image_2d, prop_corners_3d = utils.compute_box_3d( proposal, calib.P) if prop_corners_image_2d is None: print('skip proposal behind camera') return False # get points within proposal box # expand proposal proposal_expand = copy.deepcopy(proposal) proposal_expand.l += 0.5 proposal_expand.w += 0.5 proposal_expand.h += 0.5 _, prop_corners_3d = utils.compute_box_3d(proposal_expand, calib.P) _, prop_inds = extract_pc_in_box3d(pc_rect, prop_corners_3d) pc_in_prop_box = pc_rect[prop_inds, :] seg_mask = np.zeros((pc_in_prop_box.shape[0])) if len(pc_in_prop_box) == 0: print('Reject proposal with no point') return False # Get frustum angle box2d_center = calib.project_rect_to_image(np.array([proposal.t]))[0] uvdepth = np.zeros((1, 3)) uvdepth[0, 0:2] = box2d_center uvdepth[0, 2] = 20 # some random depth box2d_center_rect = calib.project_image_to_rect(uvdepth) frustum_angle = -1 * np.arctan2(box2d_center_rect[0, 2], box2d_center_rect[0, 0]) if gt_object is not None: obj_type = gt_object.type _, inds = extract_pc_in_box3d(pc_in_prop_box, gt_box_3d) seg_mask[inds] = 1 # Reject object with too few point if np.sum(seg_mask) < 5: print('Reject object with too few point') return False # Get 3D BOX heading heading_angle = gt_object.ry # Get 3D BOX size box3d_size = np.array([gt_object.l, gt_object.w, gt_object.h]) else: obj_type = 'NonObject' gt_box_3d = np.zeros((8, 3)) heading_angle = 0 box3d_size = np.zeros((1, 3)) #frustum_angle = 0 rot_angle = self.get_center_view_rot_angle(frustum_angle) # Get point cloud if self.rotate_to_center: point_set = self.get_center_view_point_set(pc_in_prop_box, rot_angle) else: point_set = pc_in_prop_box # ------------------------------ LABELS ---------------------------- # classification # assert(obj_type in ['Car', 'Pedestrian', 'Cyclist', 'NonObject']) assert (obj_type in type_whitelist) cls_label = g_type2onehotclass[obj_type] # Get center point of 3D box if self.rotate_to_center: box3d_center = self.get_center_view_box3d_center( gt_box_3d, rot_angle) else: box3d_center = self.get_box3d_center(gt_box_3d) # Heading if self.rotate_to_center: heading_angle = heading_angle - rot_angle # Size size_class, size_residual = size2class(box3d_size, obj_type) angle_class, angle_residual = angle2class(heading_angle, NUM_HEADING_BIN) self.sample_id_counter += 1 return Sample(self.sample_id_counter, point_set, seg_mask, box3d_center, angle_class, angle_residual,\ size_class, size_residual, rot_angle, cls_label, proposal, heading_angle, iou, img_seg_map, calib.P)
def get_one_sample(self, proposal, pc_rect, image, calib, iou, gt_box_3d, gt_object, data_idx_str, img_seg_map): '''convert to frustum sample format''' prop_corners_image_2d, prop_corners_3d = utils.compute_box_3d( proposal, calib.P) if prop_corners_image_2d is None: print('skip proposal behind camera') return False # get points within proposal box _, prop_inds = extract_pc_in_box3d(pc_rect, prop_corners_3d) pc_in_prop_box = pc_rect[prop_inds, :] seg_mask = np.zeros((pc_in_prop_box.shape[0])) if len(pc_in_prop_box) == 0: print('Reject proposal with no point') return False # Get frustum angle image_points = calib.project_rect_to_image(pc_in_prop_box[:, :3]) expand_image_points = np.concatenate( (prop_corners_image_2d, image_points), axis=0) xmin, ymin = expand_image_points.min(0) xmax, ymax = expand_image_points.max(0) # TODO: frustum angle is important, make use of image # use gt angle for testing if gt_object is not None: xmin, ymin, xmax, ymax = gt_object.box2d box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0]) uvdepth = np.zeros((1, 3)) uvdepth[0, 0:2] = box2d_center uvdepth[0, 2] = 20 # some random depth box2d_center_rect = calib.project_image_to_rect(uvdepth) frustum_angle = -1 * np.arctan2(box2d_center_rect[0, 2], box2d_center_rect[0, 0]) if gt_object is not None: obj_type = gt_object.type # TODO: use dbscan instead of ground truth #_,gt_inds = extract_pc_in_box3d(pc_rect, gt_box_3d) #prop_inds = np.logical_or(prop_inds, gt_inds) _, inds = extract_pc_in_box3d(pc_in_prop_box, gt_box_3d) seg_mask[inds] = 1 # Reject object with too few point if np.sum(seg_mask) < 5: print('Reject object with too few point') return False # Get 3D BOX heading heading_angle = gt_object.ry # Get 3D BOX size box3d_size = np.array([gt_object.l, gt_object.w, gt_object.h]) else: obj_type = 'NonObject' gt_box_3d = np.zeros((8, 3)) heading_angle = 0 box3d_size = np.zeros((1, 3)) #frustum_angle = 0 rot_angle = self.get_center_view_rot_angle(frustum_angle) # Get point cloud if self.rotate_to_center: point_set = self.get_center_view_point_set(pc_in_prop_box, rot_angle) else: point_set = pc_in_prop_box # ------------------------------ LABELS ---------------------------- # classification # assert(obj_type in ['Car', 'Pedestrian', 'Cyclist', 'NonObject']) assert (obj_type in type_whitelist) cls_label = g_type2onehotclass[obj_type] # Get center point of 3D box if self.rotate_to_center: box3d_center = self.get_center_view_box3d_center( gt_box_3d, rot_angle) else: box3d_center = self.get_box3d_center(gt_box_3d) # Heading if self.rotate_to_center: heading_angle = heading_angle - rot_angle # Size size_class, size_residual = size2class(box3d_size, obj_type) angle_class, angle_residual = angle2class(heading_angle, NUM_HEADING_BIN) self.sample_id_counter += 1 return Sample(self.sample_id_counter, point_set, seg_mask, box3d_center, angle_class, angle_residual,\ size_class, size_residual, rot_angle, cls_label, proposal, heading_angle, iou, img_seg_map, calib.P)
def get_sample(self, pc_rect, image, img_seg_map, calib, proposal_, max_iou, max_idx, objects): thres_low = 0.3 thres_high = 0.55 if max_iou >= thres_high: label = objects[max_idx] if max_iou < thres_low: label = None if self.is_training and max_iou >= thres_low and max_iou < thres_high: return False # expand proposal boxes proposal_expand = copy.deepcopy(proposal_) proposal_expand.l += 0.5 proposal_expand.w += 0.5 _, box_3d = utils.compute_box_3d(proposal_expand, calib.P) _, mask = extract_pc_in_box3d(pc_rect, box_3d) # ignore proposal with no points if (np.sum(mask) == 0): return False points = pc_rect[mask, :] points_with_feats = np.zeros((points.shape[0], self.num_channel)) points_with_feats[:, :6] = points # xyz, intensity, seg_one_hot # pooled points canonical transformation points_with_feats[:, :3] -= proposal_.t points_with_feats[:, :3] = rotate_points_along_y( points_with_feats[:, :3], proposal_.ry) sample = {} self.sample_id_counter += 1 sample['id'] = self.sample_id_counter sample['class'] = 0 sample['pointcloud'] = points_with_feats sample['image'] = cv2.resize(image, (1200, 360)) sample['img_seg_map'] = img_seg_map sample['calib'] = np.copy(calib.P) # scale projection matrix sample['calib'][0, :] *= (1200.0 / image.shape[1]) sample['calib'][1, :] *= (360.0 / image.shape[0]) sample['proposal_box'] = np.array([ proposal_.t[0], proposal_.t[1], proposal_.t[2], proposal_.l, proposal_.h, proposal_.w, proposal_.ry ]) sample['center_cls'] = np.zeros((2, ), dtype=np.int32) sample['center_res'] = np.zeros((3, )) sample['angle_cls'] = 0 sample['angle_res'] = 0 sample['size_cls'] = 0 sample['size_res'] = np.zeros((3, )) sample['gt_box'] = np.zeros((8, 3)) sample['train_regression'] = max_iou >= thres_high if label: sample['class'] = g_type2onehotclass[label.type] # rotation canonical transformation label_norm = copy.deepcopy(label) label_norm.ry = label.ry - proposal_.ry obj_vec = box_encoder.encode(label_norm, proposal_.t) sample['center_cls'] = obj_vec[0] sample['center_res'] = obj_vec[1] sample['angle_cls'] = obj_vec[2] sample['angle_res'] = obj_vec[3] sample['size_cls'] = obj_vec[4] sample['size_res'] = obj_vec[5] _, gt_box_3d = utils.compute_box_3d(label, calib.P) sample['gt_box'] = gt_box_3d #self.viz_sample(sample) return sample
def load_frame_data(self, data_idx_str, aug): '''load one frame''' start = time.time() data_idx = int(data_idx_str) try: with open(os.path.join(self.data_dir, data_idx_str + '.pkl'), 'rb') as fin: rpn_out = pickle.load(fin) # load image segmentation output img_seg_map = np.load( os.path.join(self.data_dir, data_idx_str + '_seg.npy')) except Exception as e: print(e) return [] proposals = self.get_proposals(rpn_out) #print(data_idx_str) calib = self.kitti_dataset.get_calibration(data_idx) # 3 by 4 matrix if self.is_training: objects = self.kitti_dataset.get_label_objects(data_idx) else: # while testing, all proposals will have class 0 objects = [] image = self.kitti_dataset.get_image(data_idx) pc_velo = self.kitti_dataset.get_lidar(data_idx) img_height, img_width = image.shape[0:2] _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov( pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True) pc_velo = pc_velo[img_fov_inds, :] # Same point sampling as RPN choice = rpn_out['pc_choices'] point_set = pc_velo[choice, :] # point_set = pc_velo pc_rect = np.zeros_like(point_set) pc_rect[:, 0:3] = calib.project_velo_to_rect(point_set[:, 0:3]) pc_rect[:, 3] = point_set[:, 3] # Segmentation results from RPN seg_one_hot = np.zeros((pc_rect.shape[0], 2)) bg_ind = rpn_out['segmentation'] == 0 fg_ind = rpn_out['segmentation'] == 1 seg_one_hot[bg_ind, 0] = 1 seg_one_hot[fg_ind, 1] = 1 pc_rect = np.concatenate((pc_rect, seg_one_hot), axis=-1) # 6 channels objects = filter( lambda obj: obj.type in self.types_list and obj.difficulty in self. difficulties_list, objects) gt_boxes = [] # ground truth boxes gt_boxes_xy = [] recall = np.zeros((len(objects), ), dtype=np.int32) for obj in objects: _, obj_box_3d = utils.compute_box_3d(obj, calib.P) # skip label with no point _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d) if np.sum(obj_mask) == 0: continue gt_boxes.append(obj_box_3d) gt_boxes_xy.append(obj_box_3d[:4, [0, 2]]) positive_samples = [] negative_samples = [] show_boxes = [] # boxes_2d = [] def process_proposal(prop): b2d, prop_box_3d = utils.compute_box_3d(prop, calib.P) prop_box_xy = prop_box_3d[:4, [0, 2]] max_idx, max_iou = find_match_label(prop_box_xy, gt_boxes_xy) sample = self.get_sample(pc_rect, image, img_seg_map, calib, prop, max_iou, max_idx, objects) # print(max_iou) if not sample: return -1 if sample['class'] == 0: show_boxes.append(prop_box_3d) negative_samples.append(sample) else: positive_samples.append(sample) show_boxes.append(prop_box_3d) recall[max_idx] = 1 return sample['class'] aug_proposals = [] AUG_X = {1: 1, 2: 2, 3: 2} for prop in proposals: cls = process_proposal(prop) if not aug or cls <= 0: continue for x in range(AUG_X[cls]): prop_ = random_shift_box3d(copy.deepcopy(prop), 0.1) aug_proposals.append(prop_) # add more proposals using label to increase training samples ''' if self.split == 'train': miss_objs = [objects[i] for i in range(len(objects)) if recall[i]==0] aug_proposals += self.fill_proposals_with_gt(miss_objs) ''' for prop in aug_proposals: process_proposal(prop) if self.is_training: random.shuffle(negative_samples) samples = positive_samples + negative_samples[:len(positive_samples )] else: samples = positive_samples + negative_samples random.shuffle(samples) # self.viz_frame(pc_rect, np.zeros((pc_rect.shape[0],)), gt_boxes, show_boxes) return samples
def stat_proposal(self): '''statistic of proposals''' total_iou_3d = 0 total_iou_2d = 0 total_angle_res = 0 total = 0 total_num = 0 total_recall = 0 total_gt = 0 for frame_id in self.frame_ids: print(frame_id) try: with open(os.path.join(self.data_dir, frame_id+'.pkl'), 'rb') as fin: rpn_out = pickle.load(fin) except Exception as e: print(e) continue data_idx = int(frame_id) pc_velo = self.kitti_dataset.get_lidar(data_idx) image = self.kitti_dataset.get_image(data_idx) calib = self.kitti_dataset.get_calibration(data_idx) # 3 by 4 matrix img_height, img_width = image.shape[0:2] _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(pc_velo[:,0:3], calib, 0, 0, img_width, img_height, True) pc_velo = pc_velo[img_fov_inds, :] pc_rect = np.zeros_like(pc_velo) pc_rect[:,0:3] = calib.project_velo_to_rect(pc_velo[:,0:3]) pc_rect[:,3] = pc_velo[:,3] objects = self.kitti_dataset.get_label_objects(data_idx) objects = filter(lambda obj: obj.type in self.types_list and obj.difficulty in self.difficulties_list, objects) proposals = self.get_proposals(rpn_out) gt_boxes = [] # ground truth boxes gt_boxes_xy = [] recall = np.zeros((len(objects),)) for obj in objects: _,obj_box_3d = utils.compute_box_3d(obj, calib.P) # skip label with no point _,obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d) if np.sum(obj_mask) == 0: continue gt_boxes.append(obj_box_3d) gt_boxes_xy.append(obj_box_3d[:4, [0,2]]) for prop in proposals: b2d,prop_box_3d = utils.compute_box_3d(prop, calib.P) prop_box_xy = prop_box_3d[:4, [0,2]] gt_idx, gt_iou = find_match_label(prop_box_xy, gt_boxes_xy) if gt_idx == -1: continue if gt_iou >= 0.5: recall[gt_idx] = 1 total_iou_2d += gt_iou ry_residual = abs(prop.ry - objects[gt_idx].ry) total_angle_res += ry_residual total += 1 total_recall += np.sum(recall) total_gt += len(objects) total_num += len(proposals) print('Average IOU 2d {0}'.format(total_iou_2d/total)) print('Average angle residual {0}'.format(total_angle_res/total)) print('Average proposal number: {0}'.format(total_num/len(self.frame_ids))) print('Recall: {0}'.format(float(total_recall)/total_gt))
def stat(kitti_path, split, data_dir, type_whitelist, difficulties_whitelist): kitti_dataset = kitti_object(kitti_path, 'training') with open(os.path.join(kitti_path, split + '.txt')) as f: frame_ids = [line.rstrip('\n') for line in f] missed_sample = 0 missed_seg = 0 missed_seg_sample = 0 missed_obj_point = 0 tp = 0 fp = 0 fn = 0 total_recall = {10: 0, 20: 0, 30: 0, 40: 0, 50: 0, 100: 0, 300: 0} label_count = 0 for frame_id in frame_ids: print(frame_id) sys.stdout.flush() try: with open(os.path.join(data_dir, frame_id + '.pkl'), 'rb') as fin: rpn_out = pickle.load(fin) except Exception as e: print(e) continue data_idx = int(frame_id) pc_choices = rpn_out['pc_choices'] seg_pred_point = rpn_out['segmentation'] # point seg seg_pred = rpn_out['segmentation_fuse'] # point+img seg fg_indices = rpn_out['fg_indices'] # after sampling seg_pred = seg_pred_point pc_velo = kitti_dataset.get_lidar(data_idx) image = kitti_dataset.get_image(data_idx) calib = kitti_dataset.get_calibration(data_idx) # 3 by 4 matrix img_height, img_width = image.shape[0:2] _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov( pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True) pc_velo = pc_velo[img_fov_inds, :3] pc_velo_sampled = pc_velo[pc_choices] pc_rect = calib.project_velo_to_rect(pc_velo) pc_rect_sampled = calib.project_velo_to_rect(pc_velo_sampled) # 16384 pc_seg = pc_rect_sampled[seg_pred > 0] # all seg fg points if pc_seg.shape[0] > 0: pc_seg_sampled = pc_rect_sampled[ fg_indices] # sampled fg points 2048 else: pc_seg_sampled = pc_seg objects = kitti_dataset.get_label_objects(data_idx) objects = filter( lambda obj: obj.type in type_whitelist and obj.difficulty in difficulties_whitelist, objects) gt_boxes = [] # ground truth boxes gt_boxes_xy = [] seg_mask = np.zeros((pc_rect_sampled.shape[0], )) for obj in objects: _, obj_box_3d = utils.compute_box_3d(obj, calib.P) # skip label with no point _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d) if np.sum(obj_mask) == 0: continue gt_boxes.append(obj_box_3d) gt_boxes_xy.append(obj_box_3d[:4, [0, 2]]) label_count += 1 _, obj_mask = extract_pc_in_box3d(pc_rect_sampled, obj_box_3d) seg_mask[obj_mask] = 1 if np.sum(obj_mask) == 0: missed_sample += 1 _, mask = extract_pc_in_box3d(pc_seg, obj_box_3d) if np.sum(mask) == 0: missed_seg += 1 missed_obj_point += np.sum(obj_mask) _, mask = extract_pc_in_box3d(pc_seg_sampled, obj_box_3d) if np.sum(mask) == 0: missed_seg_sample += 1 tp += np.sum(np.logical_and(seg_pred == seg_mask, seg_mask != 0)) fp += np.sum(np.logical_and(seg_pred != seg_mask, seg_mask == 0)) fn += np.sum(np.logical_and(seg_pred != seg_mask, seg_mask != 0)) for max_keep in total_recall.keys(): recall = np.zeros((len(objects), )) proposals = get_proposals(rpn_out, 0.7, max_keep) for prop in proposals: if np.sum(recall) == len(objects): break b2d, prop_box_3d = utils.compute_box_3d(prop, calib.P) prop_box_xy = prop_box_3d[:4, [0, 2]] gt_idx, gt_iou = find_match_label(prop_box_xy, gt_boxes_xy) if gt_idx == -1: continue if gt_iou >= 0.5: recall[gt_idx] = 1 #if objects[gt_idx].type != 'Car' and gt_iou >= 0.5: # print(objects[gt_idx].type) total_recall[max_keep] += np.sum(recall) print('Missed sample ratio: ', float(missed_sample) / label_count) print('Missed seg ratio: ', float(missed_seg) / label_count) print('Missed object average point: ', missed_obj_point / missed_seg) print('Missed seg sample ratio: ', float(missed_seg_sample) / label_count) for max_keep in total_recall.keys(): total_recall[max_keep] = float(total_recall[max_keep]) / label_count print('Recall on proposal num: ', total_recall) print('Seg recall: ', float(tp) / (tp + fn)) print('Seg precision: ', float(tp) / (tp + fp)) sys.stdout.flush()
def load_frame_data(self, data_idx_str, random_flip=False, random_rotate=False, random_shift=False, pca_jitter=False): '''load one frame''' if self.use_aug_scene: data_idx = int(data_idx_str[2:]) else: data_idx = int(data_idx_str) # print(data_idx_str) calib = self.kitti_dataset.get_calibration(data_idx) # 3 by 4 matrix image = self.kitti_dataset.get_image(data_idx) img_height, img_width = image.shape[0:2] # data augmentation if pca_jitter: image = apply_pca_jitter(image)[0] objects = [] if not self.use_aug_scene or data_idx_str[:2] == '00': pc_velo = self.kitti_dataset.get_lidar(data_idx) _, pc_image_coord, img_fov_inds = get_lidar_in_image_fov( pc_velo[:, 0:3], calib, 0, 0, img_width, img_height, True) pc_velo = pc_velo[img_fov_inds, :] choice = np.random.choice(pc_velo.shape[0], self.npoints, replace=True) point_set = pc_velo[choice, :] pc_rect = np.zeros_like(point_set) pc_rect[:, 0:3] = calib.project_velo_to_rect(point_set[:, 0:3]) pc_rect[:, 3] = point_set[:, 3] if self.is_training: objects = self.kitti_dataset.get_label_objects(data_idx) else: pc_rect = utils.load_velo_scan( os.path.join( self.kitti_path, 'aug_scene/rectified_data/{0}.bin'.format(data_idx_str))) choice = np.random.choice(pc_rect.shape[0], self.npoints, replace=True) pc_rect = pc_rect[choice, :] if self.is_training: objects = utils.read_label( os.path.join( self.kitti_path, 'aug_scene/aug_label/{0}.txt'.format(data_idx_str))) objects = filter( lambda obj: obj.type in self.types_list and obj.difficulty in self. difficulties_list, objects) gt_boxes = [] # ground truth boxes #start = time.time() seg_mask = np.zeros((pc_rect.shape[0])) # data augmentation if random_flip and np.random.random() > 0.5: # 50% chance flipping pc_rect[:, 0] *= -1 for obj in objects: obj.t = [-obj.t[0], obj.t[1], obj.t[2]] # ensure that ry is [-pi, pi] if obj.ry >= 0: obj.ry = np.pi - obj.ry else: obj.ry = -np.pi - obj.ry if random_rotate: ry = (np.random.random() - 0.5) * math.radians( 20) # -10~10 degrees pc_rect[:, 0:3] = rotate_points_along_y(pc_rect[:, 0:3], ry) for obj in objects: obj.t = rotate_points_along_y(obj.t, ry) obj.ry -= ry # ensure that ry is [-pi, pi] if obj.ry > np.pi: obj.ry -= 2 * np.pi elif obj.ry < -np.pi: obj.ry += 2 * np.pi proposal_of_point = {} # point index to proposal vector gt_box_of_point = {} # point index to corners_3d for obj in objects: _, obj_box_3d = utils.compute_box_3d(obj, calib.P) _, obj_mask = extract_pc_in_box3d(pc_rect, obj_box_3d) if np.sum(obj_mask) == 0: # label without 3d points # print('skip object without points') continue # IMPORTANT: this must match with NUM_SEG_CLASSES #seg_mask[obj_mask] = g_type2onehotclass[obj.type] seg_mask[obj_mask] = 1 gt_boxes.append(obj_box_3d) obj_idxs = np.where(obj_mask)[0] # data augmentation # FIXME: jitter point will make valid loss growing # Also may go out of image view if random_shift and False: # jitter object points pc_rect[obj_idxs, :3] = shift_point_cloud( pc_rect[obj_idxs, :3], 0.02) for idx in obj_idxs: proposal_of_point[idx] = box_encoder.encode( obj, pc_rect[idx, :3]) gt_box_of_point[idx] = obj_box_3d # self.viz_frame(pc_rect, seg_mask, gt_boxes) # return pc_rect, seg_mask, proposal_of_point, gt_box_of_point, gt_boxes calib_matrix = np.copy(calib.P) calib_matrix[0, :] *= (1200.0 / image.shape[1]) calib_matrix[1, :] *= (360.0 / image.shape[0]) #print('construct', time.time() - start) return { 'pointcloud': pc_rect, 'image': cv2.resize(image, (1200, 360)), 'calib': calib_matrix, 'mask_label': seg_mask, 'proposal_of_point': self.get_proposal_out(proposal_of_point), 'gt_box_of_point': self.get_gt_box_of_points(gt_box_of_point), 'gt_boxes': gt_boxes, 'pc_choice': choice }