def test_3d_to_2d_point_projection(self): anchor_corners = np.asarray( [[ 1., 1., -1., -1., 1., 1., -1., -1., 4., 4., 2., 2., 4., 4., 2., 2. ], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [6., 0., 0., 6., 6., 0., 0., 6., 4., 2., 2., 4., 4., 2., 2., 4.]]) stereo_calib_p2 = \ np.asarray([[7.21537700e+02, 0.0, 6.09559300e+02, 4.48572800e+01], [0.0, 7.21537700e+02, 1.72854000e+02, 2.16379100e-01], [0.0, 0.0, 1.0, 2.74588400e-03]]) # Do projection in numpy space points_2d = calib_utils.project_to_image(anchor_corners, stereo_calib_p2) # Do projection in tensor space tf_anchor_corners = tf.convert_to_tensor(anchor_corners, dtype=tf.float32) tf_stereo_calib_p2 = tf.convert_to_tensor(stereo_calib_p2, dtype=tf.float32) tf_points_2d = anchor_projector.project_to_image_tensor( tf_anchor_corners, tf_stereo_calib_p2) sess = tf.Session() with sess.as_default(): points_2d_out = tf_points_2d.eval() np.testing.assert_allclose( points_2d, points_2d_out, err_msg='Incorrect tensor 3D->2D projection')
def project_img_to_point_cloud(points, image, calib_dir, img_idx): """ Projects image colours to point cloud points Arguments: points (N by [x,y,z]): list of points where N is the number of points image (X by Y by [r,g,b]): colour values in image space calib_dir (str): calibration directory img_idx (int): index of the requested image Returns: [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond to the indices of the points in the 'points' argument """ # Save the pixel colour corresponding to each point frame_calib = calib.read_calibration(calib_dir, img_idx) point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T point_in_im_rounded = np.floor(point_in_im) point_in_im_rounded = point_in_im_rounded.astype(np.int32) point_colours = [] for point in point_in_im_rounded: point_colours.append(image[point[1], point[0], :]) point_colours = np.asanyarray(point_colours) return point_colours
def project_box3d_to_image(corners_3d, p): """Computes the 3D bounding box projected onto image space. Keyword arguments: obj -- object file to draw bounding box p -- transform matrix Returns: corners : numpy array of corner points projected onto image space. face_idx: numpy array of 3D bounding box face """ # index for 3d bounding box face # it is converted to 4x4 matrix face_idx = np.array([ 0, 1, 5, 4, # front face 1, 2, 6, 5, # left face 2, 3, 7, 6, # back face 3, 0, 4, 7 ]).reshape((4, 4)) # right face return calib.project_to_image(corners_3d, p), face_idx
def compute_orientation_3d(obj, p): """Computes the orientation given object and camera matrix Keyword arguments: obj -- object file to draw bounding box p -- transform matrix """ # compute rotational matrix rot = np.array([[+np.cos(obj.ry), 0, +np.sin(obj.ry)], [0, 1, 0], [-np.sin(obj.ry), 0, +np.cos(obj.ry)]]) orientation3d = np.array([0.0, obj.l, 0.0, 0.0, 0.0, 0.0]).reshape(3, 2) orientation3d = np.dot(rot, orientation3d) orientation3d[0, :] = orientation3d[0, :] + obj.t[0] orientation3d[1, :] = orientation3d[1, :] + obj.t[1] orientation3d[2, :] = orientation3d[2, :] + obj.t[2] # only draw for boxes that are in front of the camera for idx in np.arange(orientation3d.shape[1]): if orientation3d[2, idx] < 0.1: return None return calib.project_to_image(orientation3d, p)
def project_depths_xy(point_cloud, camera_p, image_shape, max_depth=100.0): """Projects a point cloud into image space and saves depths per pixel. :param point_cloud: point cloud (N, 3) :param camera_p: stereo calibration p matrix :param image_shape: image shape [h, w] :param max_depth: image shape [h, w] :return all_depths: projected depth map """ # Only keep points in front of the camera point_cloud = point_cloud.T #point_cloud = point_cloud[point_cloud[:,0] > 0] # Save the depth corresponding to each point point_in_im = calib_utils.project_to_image(point_cloud.T, p=camera_p).T point_in_im_rounded = np.array(np.int32(np.floor(point_in_im))) #filtered out out of boxes points image_filter = (point_in_im_rounded[:, 0] > 0) & \ (point_in_im_rounded[:, 0] < image_shape[1]) & \ (point_in_im_rounded[:, 1] > 0) & \ (point_in_im_rounded[:, 1] < image_shape[0]) point_in_im_rounded = point_in_im_rounded[image_filter] all_points = np.array(point_cloud) # Invert depths all_points[:, 2] = max_depth - all_points[:, 2] # Only save valid pixels, keep closer points when overlapping projected_depths = np.zeros(image_shape) x_proj = np.zeros(image_shape) y_proj = np.zeros(image_shape) valid_indices = tuple( [point_in_im_rounded[:, 1], point_in_im_rounded[:, 0]]) projected_depths[valid_indices] = [ max( projected_depths[point_in_im_rounded[idx, 1], point_in_im_rounded[idx, 0]], all_points[idx, 2]) for idx in range(len(point_in_im_rounded)) ] projected_depths[valid_indices] = \ max_depth - projected_depths[valid_indices] x_proj[valid_indices] = [ all_points[idx, 0] for idx in range(len(point_in_im_rounded)) ] y_proj[valid_indices] = [ all_points[idx, 1] for idx in range(len(point_in_im_rounded)) ] return projected_depths, x_proj, y_proj
def get_lidar_point_cloud_jhuang(img_idx, frame_calib, velo_dir, im_size=None, min_intensity=None, prefix=''): """ Calculates the lidar point cloud, and optionally returns only the points that are projected to the image. :param img_idx: image index :param calib_dir: directory with calibration files :param velo_dir: directory with velodyne files :param im_size: (optional) 2 x 1 list containing the size of the image to filter the point cloud [w, h] :param min_intensity: (optional) minimum intensity required to keep a point :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read lidar data x, y, z, i = calib_utils.read_lidar_jhuang(velo_dir=velo_dir, img_idx=img_idx, prefix=prefix) # Calculate the point cloud pts = np.vstack((x, y, z)).T pts = calib_utils.lidar_to_cam_frame(pts, frame_calib) # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1]) if not min_intensity: return pts[image_filter].T else: intensity_filter = i > min_intensity point_filter = np.logical_and(image_filter, intensity_filter) return pts[point_filter].T
def get_lidar_point_cloud_with_color(img_idx, img_dir, calib_dir, velo_dir, im_size=None): """ Calculates the lidar point cloud, and optionally returns only the points that are projected to the image. :param img_idx: image index :param calib_dir: directory with calibration files :param velo_dir: directory with velodyne files :param im_size: (optional) 2 x 1 list containing the size of the image to filter the point cloud [w, h] :param min_intensity: (optional) minimum intensity required to keep a point :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read calibration info frame_calib = calib_utils.read_calibration(calib_dir, img_idx) x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx) # Calculate the point cloud pts = np.vstack((x, y, z)).T pts = calib_utils.lidar_to_cam_frame(pts, frame_calib) # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1]) img_dir = img_dir + "/%06d.png" % img_idx img = Image.open(img_dir) img = np.array(img) point_colors = img[point_in_im[image_filter, 1].astype(np.int), point_in_im[image_filter, 0].astype(np.int)] # return np.vstack((pts[image_filter].T, point_colors[image_filter].T)) return pts[image_filter].T, point_colors.T
def get_lidar_point_cloud(img_idx, calib_dir, velo_dir, im_size=None, min_intensity=None): """ Calculates the lidar point cloud, and optionally returns only the points that are projected to the image. :param img_idx: image index :param calib_dir: directory with calibration files :param velo_dir: directory with velodyne files :param im_size: (optional) 2 x 1 list containing the size of the image to filter the point cloud [w, h] :param min_intensity: (optional) minimum intensity required to keep a point :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read calibration info frame_calib = calib_utils.read_calibration(calib_dir, img_idx)#读取calib文件信息并保存到对象中 x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx)#从文件读取点云数据的x,y,z,和密度 # Calculate the point cloud pts = np.vstack((x, y, z)).T#点云位置信息 pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)#点云投射到相机坐标 # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) 相机坐标是z轴,已经投影到相机坐标了 pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame #投影到像素坐标 point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size 保留在图片范围的点云,坐标在相机坐标系下 image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1])#索引值 if not min_intensity: return pts[image_filter].T else: intensity_filter = i > min_intensity point_filter = np.logical_and(image_filter, intensity_filter) return pts[point_filter].T
def genRectOcc(image_input, point_cloud, sin_input_name, mask_2d=None, frame_calib_p2=None): """ mask_2d: 2x2 array. mask_2d[0] -> r_len, r_corner (0~1 scale) y-axis (height) mask_2d[1] -> r_len, r_corner (0~1 scale) x-axis (width) """ image_shape = np.shape(image_input) if mask_2d is None: mask_2d = genMask2D() # Generate rectangular occlusion in 2D image plane im_y_start = np.round(mask_2d[0, 1] * image_shape[0]).astype(np.int) im_y_end = min( image_shape[0], im_y_start + np.round(mask_2d[0, 0] * image_shape[0]).astype(np.int)) im_x_start = np.round(mask_2d[1, 1] * image_shape[1]).astype(np.int) im_x_end = min( image_shape[1], im_x_start + np.round(mask_2d[1, 0] * image_shape[1]).astype(np.int)) if sin_input_name == 'image': if len(image_shape) == 2: image_input[im_y_start:im_y_end, im_x_start:im_x_end] = 0 elif len(image_shape) == 3: image_input[im_y_start:im_y_end, im_x_start:im_x_end, :] = 0 else: raise ValueError( 'Image shape is wrong. Must be either 2d (b/w) or 3d.') return image_input elif sin_input_name == 'lidar': # Occlude lidar point cloudes to match 2d mask in image plane point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib_p2).T # Filter based on the given image size im_occ_filter = (point_in_im[:, 1] >= im_y_start) & \ (point_in_im[:, 1] < im_y_end) & \ (point_in_im[:, 0] > im_x_start) & \ (point_in_im[:, 0] < im_x_end) return point_cloud[:, np.invert(im_occ_filter)] else: raise ValueError('Currently supports {}'.format( ','.join(SIN_INPUT_NAMES)))
def genVertOcc(image_input, point_cloud, sin_input_name, frame_calib_p2=None, sin_level=1): """ mask_2d: 2x2 array. mask_2d[0] -> r_len, r_corner (0~1 scale) y-axis (height) mask_2d[1] -> r_len, r_corner (0~1 scale) x-axis (width) """ image_shape = np.shape(image_input) im_width = image_shape[1] width_vert = int(im_width * minmax_scale( sin_level, SINFields.SIN_LEVEL_MIN, SINFields.SIN_LEVEL_MAX, SINFields.VERT_W_MIN, SINFields.VERT_W_MAX)) idx_x_starts = np.arange(0, im_width, 2 * width_vert) idx_x_ends = np.arange(width_vert, im_width, 2 * width_vert) m_verts = len(idx_x_ends) if len(idx_x_starts) > len(idx_x_ends): idx_x_ends = np.append(idx_x_ends, im_width) m_verts += 1 idx_zeros = np.zeros(im_width, dtype=np.bool) for j in range(m_verts): idx_zeros[idx_x_starts[j]:idx_x_ends[j]] = True if sin_input_name == 'image': if len(image_shape) == 2: image_input[:, idx_zeros] = 0 elif len(image_shape) == 3: image_input[:, idx_zeros, :] = 0 else: raise ValueError( 'Image shape is wrong. Must be either 2d (b/w) or 3d.') return image_input elif sin_input_name == 'lidar': # Occlude lidar point cloudes to match 2d mask in image plane point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib_p2).T # Filter based on the given image size im_occ_filter = idx_zeros[point_in_im[:, 0].astype(int)] return point_cloud[:, np.invert(im_occ_filter)] else: raise ValueError('Currently supports {}'.format( ','.join(SIN_INPUT_NAMES)))
def _project_and_show(self, sample_name, point_cloud, color, title): "将点云投影到像素坐标,并在对应的图像中显示" img_idx = int(sample_name) img = Image.open(self.dataset.get_rgb_image_path(sample_name)) img_array = np.array( img) #np.array(默认情况下)将会copy该对象,而np.asarray除非必要,否则不会copy该对象 frame_calib = calib_utils.read_calibration( self.dataset.calib_dir, img_idx) #读取calib文件信息并保存到对象中 point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T point_in_im = point_in_im[:, [1, 0]] point_in_im = point_in_im.astype(int) img_array[point_in_im[:, 0], point_in_im[:, 1], :] = ImageColor.getrgb(color) #相当于zip img = Image.fromarray(img_array) img.show()
def get_depth_map_point_cloud(img_idx, calib_dir, depth_dir, im_size): """ Calculates the point cloud from a depth map :param img_idx: image index :param calib_dir: directory with calibration files :param depth_dir: directory with depth maps :param im_size: size of the image [h, w] :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ depth_map = depth_map_utils.get_depth_map(img_idx, depth_dir) # Calculate point cloud from depth map frame_calib = calib.read_calibration(calib_dir, img_idx) stereo_calibration_info = calib.get_stereo_calibration( frame_calib.p2, frame_calib.p3) # Calculate points from depth map depth_map_flattened = depth_map.flatten() xx, yy = np.meshgrid(np.arange(1, im_size[0] + 1, 1), np.arange(1, im_size[1] + 1, 1)) xx = xx.flatten() - stereo_calibration_info.center_u yy = yy.flatten() - stereo_calibration_info.center_v temp = np.divide(depth_map_flattened, stereo_calibration_info.f) x = np.multiply(xx, temp) y = np.multiply(yy, temp) z = depth_map_flattened # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam) x_offset = -stereo_calibration_info.p[0, 3] / stereo_calibration_info.f point_cloud = np.asarray([x + x_offset, y, z]) points = point_cloud.T # Filter points to image frame point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T image_filter = \ (point_in_im[:, 0] > 0) & (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & (point_in_im[:, 1] < im_size[1]) filtered_point_cloud = points[image_filter].T return filtered_point_cloud
def occ_aug(point_cloud, calib, labels): # point cloud # calib: calibration matrix # masks labels list # return augumentated point cloud point_in_im = calib_utils.project_to_image(point_cloud, p=calib).T point_cloud = point_cloud.T mask_labels = occ_aug_mask(labels) occ_filter = False for obj in mask_labels: occ_filter = occ_filter | (point_in_im[:, 0] > obj[0]) & \ (point_in_im[:, 0] < obj[2]) & \ (point_in_im[:, 1] > obj[1]) & \ (point_in_im[:, 1] < obj[3]) return point_cloud[np.logical_not(occ_filter)].T
def _get_point_cloud(self, image_shape, pointcloud, frame_calib, min_intensity=None): im_size = [image_shape[1], image_shape[0]] x, y, z, i = pointcloud print("Shape of x, y, z, i: ", x.shape, y.shape, z.shape, i.shape) # Calculate the point cloud pts = np.vstack((x, y, z)).T pts = calib_utils.lidar_to_cam_frame(pts, frame_calib) # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1]) if not min_intensity: point_cloud = pts[image_filter].T else: intensity_filter = i > min_intensity point_filter = np.logical_and(image_filter, intensity_filter) point_cloud = pts[point_filter].T return point_cloud
def pc_to_xyz(point_cloud, calib_p, image_shape): """Projects Lidar Point cloud to XYZ image encoding. :param point_cloud: input point cloud (3,N) :param calib_p: stereo calibration p2 matrix :param image_shape: image shape [h, w] :return xyz_enc: point cloud encoded as an (3 x h x w) with x y z """ num_of_points = len(point_cloud.T) point_in_im = calib_utils.project_to_image(point_cloud, p=calib_p).T point_in_im_rounded = np.int32(np.floor(point_in_im)) all_x, all_y, all_z = fill_xyz(point_cloud, image_shape, num_of_points, point_in_im_rounded) xyz_enc = np.dstack((all_x, all_y, all_z)) return xyz_enc
def project_flipped_img_to_point_cloud(points, image_flipped, calib_dir, img_idx): """ Projects image colours to point cloud points Arguments: points (N by [x,y,z]): list of points where N is the number of points image (Y by X by [r,g,b]): colour values in image space calib_dir (str): calibration directory img_idx (int): index of the requested image Returns: [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond to the indices of the points in the 'points' argument """ # Save the pixel colour corresponding to each point frame_calib = calib_utils.read_calibration(calib_dir, img_idx) # Fix flipped p2 matrix flipped_p2 = np.copy(frame_calib.p2) flipped_p2[0, 2] = image_flipped.shape[1] - flipped_p2[0, 2] flipped_p2[0, 3] = -flipped_p2[0, 3] # Use fixed matrix point_in_im = calib_utils.project_to_image(points.T, p=flipped_p2).T point_in_im_rounded = np.floor(point_in_im) point_in_im_rounded = point_in_im_rounded.astype(np.int32) # image_shape = image_flipped.shape point_colours = [] for point in point_in_im_rounded: point_colours.append(image_flipped[point[1], point[0], :]) point_colours = np.asanyarray(point_colours) return point_colours
def draw_boxes(prediction, sample, plot_axes): all_corners = [] for pred in prediction: box = np.array(pred[0:7]) cls_idx = int(pred[8]) obj = box_3d_encoder.box_3d_to_object_label(box, obj_type=type_whitelist[cls_idx]) obj.score = pred[7] vis_utils.draw_box_3d(plot_axes, obj, sample[constants.KEY_STEREO_CALIB_P2], show_orientation=False, color_table=['r', 'y', 'r', 'w'], line_width=2, double_line=False) corners = compute_box_3d(obj) all_corners.append(corners) # draw text info projected = calib_utils.project_to_image(corners.T, sample[constants.KEY_STEREO_CALIB_P2]) x1 = np.amin(projected[0]) y1 = np.amin(projected[1]) x2 = np.amax(projected[0]) y2 = np.amax(projected[1]) text_x = (x1 + x2) / 2 text_y = y1 text = "{}\n{:.2f}".format(obj.type, obj.score) plot_axes.text(text_x, text_y - 4, text, verticalalignment='bottom', horizontalalignment='center', color=BOX_COLOUR_SCHEME[obj.type], fontsize=10, fontweight='bold', path_effects=[ patheffects.withStroke(linewidth=2, foreground='black')]) return all_corners
def plotSINImage(data_dir, out_dir, img_idx, sin_type, sin_level, on_img, sin_input_name, mask_2d=None): fname = '{:06d}'.format(img_idx) path_img = os.path.join(data_dir, 'image_2') path_velo = os.path.join(data_dir, 'velodyne') path_calib = os.path.join(data_dir, 'calib') # Load image cv_bgr_image = cv2.imread(os.path.join(path_img, fname + '.png')) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] # Load point cloud frame_calib = calib_utils.read_calibration(path_calib, img_idx) x, y, z, _ = calib_utils.read_lidar(velo_dir=path_velo, img_idx=img_idx) if sin_type == 'lowres': starts = np.where(np.diff(np.sign(y)) > 0)[0] + 1 true_starts = np.append(np.diff(starts) > 2, [True]) starts = starts[true_starts] n_lasers = starts.shape[0] + 1 starts = [0] + starts.tolist() + [len(x)] include = np.zeros(len(x), dtype=bool) stride_sub = get_stride_sub(sin_level) lasers_num = range(0, SINFields.LASERS_NUM, stride_sub) for laser in lasers_num: if laser < n_lasers: include[starts[laser]:starts[laser + 1]] = True pts_lowres = np.vstack((x[include], y[include], z[include])).T # N x 3 pts_lowres = calib_utils.lidar_to_cam_frame(pts_lowres, frame_calib) pts_lowres = pts_lowres[ pts_lowres[:, 2] > 0] # Only keep points in front of camera (positive z) # Project to image frame point_in_im_lowres = calib_utils.project_to_image( pts_lowres.T, p=frame_calib.p2).T # N x 3 im_size = [image_shape[1], image_shape[0]] # Filter based on the given image size image_filter_lowres = (point_in_im_lowres[:, 0] > 0) & \ (point_in_im_lowres[:, 0] < im_size[0]) & \ (point_in_im_lowres[:, 1] > 0) & \ (point_in_im_lowres[:, 1] < im_size[1]) point_cloud_lowres = pts_lowres[image_filter_lowres, :].T else: include = np.ones(len(x), dtype=bool) pts = np.vstack((x, y, z)).T # N x 3 pts = calib_utils.lidar_to_cam_frame(pts, frame_calib) pts = pts[pts[:, 2] > 0] # Only keep points in front of camera (positive z) # Project to image frame point_in_im = calib_utils.project_to_image(pts.T, p=frame_calib.p2).T # N x 3 im_size = [image_shape[1], image_shape[0]] # Filter based on the given image size image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1]) point_cloud = pts[image_filter, :].T point_in_im = point_in_im[image_filter, :] image_input_sin, point_cloud_sin = genSINtoInputs( rgb_image, point_cloud, sin_type=sin_type, sin_level=sin_level, sin_input_name=sin_input_name, mask_2d=mask_2d, frame_calib_p2=frame_calib.p2) if sin_type == 'lowres': point_cloud_sin = point_cloud_lowres fname_out = fname + '_{}_sin_{}_{}'.format(sin_input_name, sin_type, sin_level) if sin_input_name == 'image': cv2.imwrite(os.path.join(out_dir, fname + '_image_org.png'), cv_bgr_image) cv2.imwrite(os.path.join(out_dir, fname_out + '.png'), image_input_sin[..., ::-1]) elif sin_input_name == 'lidar': # Clip distance with min/max values (for visualization) pointsDist = point_cloud[2, :] # for i_pt,pdist in enumerate(pointsDist): # pointsDist[i_pt] = D_MAX if pdist>D_MAX \ # else pdist if pdist>D_MIN \ # else D_MIN image_w_pts = points_on_img(point_in_im, pointsDist, rgb_image, on_img=on_img) point_in_im2 = calib_utils.project_to_image(point_cloud_sin, p=frame_calib.p2).T # Clip distance with min/max values (for visualization) pointsDist2 = point_cloud_sin[2, :] # for i_pt,pdist in enumerate(pointsDist2): # pointsDist2[i_pt] = D_MAX if pdist>D_MAX \ # else pdist if pdist>D_MIN \ # else D_MIN image_filter2 = (point_in_im2[:, 0] > 0) & \ (point_in_im2[:, 0] < im_size[0]) & \ (point_in_im2[:, 1] > 0) & \ (point_in_im2[:, 1] < im_size[1]) point_in_im2 = point_in_im2[image_filter2, :] pointsDist2 = pointsDist2[image_filter2] image_w_pts2 = points_on_img(point_in_im2, pointsDist2, rgb_image, on_img=on_img) cv2.imwrite(os.path.join(out_dir, fname + '_lidar_org.png'), image_w_pts[..., ::-1]) if on_img: cv2.imwrite(os.path.join(out_dir, fname_out + '.png'), image_w_pts2[..., ::-1]) else: cv2.imwrite(os.path.join(out_dir, fname_out + '_black.png'), image_w_pts2[..., ::-1]) else: raise ValueError("Invalid sin_input_name {}".format(sin_input_name))
def inference(rpn_model_path, detect_model_path, avod_config_path): model_config, _, eval_config, dataset_config = \ config_builder.get_configs_from_pipeline_file( avod_config_path, is_training=False) # Setup the model model_name = model_config.model_name # Overwrite repeated field model_config = config_builder.proto_to_obj(model_config) # Switch path drop off during evaluation model_config.path_drop_probabilities = [1.0, 1.0] dataset = get_dataset(dataset_config, 'val') # run avod proposal network rpn_endpoints, sess1, rpn_model = get_proposal_network(model_config, dataset, rpn_model_path) end_points, sess2 = get_detection_network(detect_model_path) all_prediction = [] all_id_list = None all_2d_boxes = [] for idx in range(3769): feed_dict1 = rpn_model.create_feed_dict() kitti_samples = dataset.load_samples([idx]) sample = kitti_samples[0] ''' if sample[constants.KEY_SAMPLE_NAME] < '001100': continue if sample[constants.KEY_SAMPLE_NAME] > '001200': break ''' start_time = time.time() rpn_predictions = sess1.run(rpn_endpoints, feed_dict=feed_dict1) top_anchors = rpn_predictions[RpnModel.PRED_TOP_ANCHORS] top_proposals = box_3d_encoder.anchors_to_box_3d(top_anchors) softmax_scores = rpn_predictions[RpnModel.PRED_TOP_OBJECTNESS_SOFTMAX] proposals_and_scores = np.column_stack((top_proposals, softmax_scores)) top_img_roi = rpn_predictions[RpnModel.PRED_TOP_IMG_ROI] top_bev_roi = rpn_predictions[RpnModel.PRED_TOP_BEV_ROI] roi_num = len(top_img_roi) top_img_roi = np.reshape(top_img_roi, (roi_num, -1)) top_bev_roi = np.reshape(top_bev_roi, (roi_num, -1)) roi_features = np.column_stack((top_img_roi, top_bev_roi)) ''' # save proposal if os.path.exists(os.path.join('/data/ssd/public/jlliu/Kitti/object/training/proposal', '%s.txt'%(sample[constants.KEY_SAMPLE_NAME]))): continue np.savetxt(os.path.join('./proposals_and_scores/', '%s.txt'%sample[constants.KEY_SAMPLE_NAME]), proposals_and_scores, fmt='%.3f') np.savetxt(os.path.join('./roi_features/', '%s_roi.txt'%sample[constants.KEY_SAMPLE_NAME]), roi_features, fmt='%.5f') print('save ' + sample[constants.KEY_SAMPLE_NAME]) ''' # run frustum_pointnets_v2 point_clouds, feature_vec, rot_angle_list, prop_cls_labels = get_pointnet_input(sample, proposals_and_scores, roi_features) try: prediction = detect_batch(sess2, end_points, point_clouds, feature_vec, rot_angle_list, prop_cls_labels) except: traceback.print_exc() continue elapsed_time = time.time() - start_time print(sample[constants.KEY_SAMPLE_NAME], elapsed_time) # concat all predictions for kitti eval id_list = np.ones((len(prediction),)) * int(sample[constants.KEY_SAMPLE_NAME]) if all_id_list is None: all_id_list = id_list else: all_id_list = np.concatenate((all_id_list, id_list), axis=0) for pred in prediction: obj = box_3d_encoder.box_3d_to_object_label(np.array(pred[0:7]), obj_type=type_whitelist[pred[8]]) corners = compute_box_3d(obj) projected = calib_utils.project_to_image(corners.T, sample[constants.KEY_STEREO_CALIB_P2]) x1 = np.amin(projected[0]) y1 = np.amin(projected[1]) x2 = np.amax(projected[0]) y2 = np.amax(projected[1]) all_2d_boxes.append([x1, y1, x2, y2]) all_prediction += prediction # save result pickle.dump({'proposals_and_scores': proposals_and_scores, 'roi_features': roi_features}, open("rpn_out/%s"%sample[constants.KEY_SAMPLE_NAME], "wb")) pickle.dump(prediction, open('final_out/%s' % sample[constants.KEY_SAMPLE_NAME], 'wb')) visualize(dataset, sample, prediction) # for kitti eval write_detection_results('./detection_results', all_prediction, all_id_list, all_2d_boxes)
def project_3d_box_to_image_space(box_3d, stereo_calib_p2, image_shape): """ Projects 3D anchors into image space Args: 3d_boxes: list of anchors in anchor format N x [x, y, z, l, w, h, ry] stereo_calib_p2: stereo camera calibration p2 matrix image_shape: dimensions of the image [h, w] Returns: box_corners: corners in image space - N x [x1, y1, x2, y2] box_corners_norm: corners as a percentage of the image size - N x [x1, y1, x2, y2] """ if box_3d.shape[1] != 7: raise ValueError("Invalid shape for anchors {}, should be " "(N, 6)".format(anchors.shape[1])) # Figure out box mins and maxes x = (box_3d[:, 0]) y = (box_3d[:, 1]) z = (box_3d[:, 2]) l = (box_3d[:, 3]) w = (box_3d[:, 4]) h = (box_3d[:, 5]) ry = (box_3d[:, 6]) #dim_x = (anchors[:, 3]) #dim_y = (anchors[:, 4]) #dim_z = (anchors[:, 5]) #dim_x_half = dim_x / 2. #dim_z_half = dim_z / 2. # Calculate 3D BB corners x_corners = np.array([ x + l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry), # dim_x_half, x - l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry), #dim_x_half, x - l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry), # dim_x_half, x + l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry), # dim_x_half, x + l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry), # + dim_x_half, x - l / 2.0 * np.cos(ry) + w / 2.0 * np.sin(ry), #+ dim_x_half, x - l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry), #- dim_x_half, x + l / 2.0 * np.cos(ry) - w / 2.0 * np.sin(ry) ]).T.reshape(1, -1) y_corners = np.array([y, y, y, y, y - h, y - h, y - h, y - h]).T.reshape(1, -1) z_corners = np.array([ z - l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry), z + l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry), # - dim_z_half, z + l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry), # - dim_z_half, z - l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry), # + dim_z_half, z - l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry), #+ dim_z_half, z + l / 2.0 * np.sin(ry) + w / 2.0 * np.cos(ry), #- dim_z_half, z + l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry), #- dim_z_half, z - l / 2.0 * np.sin(ry) - w / 2.0 * np.cos(ry) ]).T.reshape(1, -1) anchor_corners = np.vstack([x_corners, y_corners, z_corners]) # Apply the 2D image plane transformation pts_2d = calib_utils.project_to_image(anchor_corners, stereo_calib_p2) image_shape_h = image_shape[0] image_shape_w = image_shape[1] # Get the min and maxes of image coordinates # TODO, remove out-of-boundary points i_axis_min_points = np.amin(pts_2d[0, :].reshape(-1, 8), axis=1) mask_i_min = i_axis_min_points < 0 ## remove negative i_axis_min_points[mask_i_min] = 0 mask_i_min = i_axis_min_points > image_shape_w - 2 ## remove too large ones i_axis_min_points[mask_i_min] = image_shape_w - 3 j_axis_min_points = np.amin(pts_2d[1, :].reshape(-1, 8), axis=1) mask_j_min = j_axis_min_points < 0 j_axis_min_points[mask_j_min] = 0 mask_j_min = j_axis_min_points > image_shape_h - 2 j_axis_min_points[mask_j_min] = image_shape_h - 3 i_axis_max_points = np.amax(pts_2d[0, :].reshape(-1, 8), axis=1) mask_i_max = i_axis_max_points > image_shape_w - 1 i_axis_max_points[mask_i_max] = image_shape_w - 1 mask_i_max = i_axis_max_points <= 0 i_axis_max_points[mask_i_max] = 2 j_axis_max_points = np.amax(pts_2d[1, :].reshape(-1, 8), axis=1) mask_j_max = j_axis_max_points > image_shape_h - 1 j_axis_max_points[mask_j_max] = image_shape_h - 1 mask_j_max = j_axis_max_points <= 0 j_axis_max_points[mask_j_max] = 2 box_corners = np.vstack([ i_axis_min_points, j_axis_min_points, i_axis_max_points, j_axis_max_points ]).T # Normalize image_shape_tiled = [ image_shape_w, image_shape_h, image_shape_w, image_shape_h ] box_corners_norm = box_corners / image_shape_tiled return np.array(box_corners, dtype=np.float32), \ np.array(box_corners_norm, dtype=np.float32)
def project_to_image_space(anchors, stereo_calib_p2, image_shape): """ Projects 3D anchors into image space Args: anchors: list of anchors in anchor format N x [x, y, z, dim_x, dim_y, dim_z] stereo_calib_p2: stereo camera calibration p2 matrix image_shape: dimensions of the image [h, w] Returns: box_corners: corners in image space - N x [x1, y1, x2, y2] box_corners_norm: corners as a percentage of the image size - N x [x1, y1, x2, y2] """ if anchors.shape[1] != 6: raise ValueError("Invalid shape for anchors {}, should be " "(N, 6)".format(anchors.shape[1])) # Figure out box mins and maxes x = (anchors[:, 0]) y = (anchors[:, 1]) z = (anchors[:, 2]) dim_x = (anchors[:, 3]) dim_y = (anchors[:, 4]) dim_z = (anchors[:, 5]) dim_x_half = dim_x / 2. dim_z_half = dim_z / 2. # Calculate 3D BB corners x_corners = np.array([ x + dim_x_half, x + dim_x_half, x - dim_x_half, x - dim_x_half, x + dim_x_half, x + dim_x_half, x - dim_x_half, x - dim_x_half ]).T.reshape(1, -1) y_corners = np.array( [y, y, y, y, y - dim_y, y - dim_y, y - dim_y, y - dim_y]).T.reshape(1, -1) z_corners = np.array([ z + dim_z_half, z - dim_z_half, z - dim_z_half, z + dim_z_half, z + dim_z_half, z - dim_z_half, z - dim_z_half, z + dim_z_half ]).T.reshape(1, -1) anchor_corners = np.vstack([x_corners, y_corners, z_corners]) # Apply the 2D image plane transformation pts_2d = calib_utils.project_to_image(anchor_corners, stereo_calib_p2) # Get the min and maxes of image coordinates i_axis_min_points = np.amin(pts_2d[0, :].reshape(-1, 8), axis=1) j_axis_min_points = np.amin(pts_2d[1, :].reshape(-1, 8), axis=1) i_axis_max_points = np.amax(pts_2d[0, :].reshape(-1, 8), axis=1) j_axis_max_points = np.amax(pts_2d[1, :].reshape(-1, 8), axis=1) box_corners = np.vstack([ i_axis_min_points, j_axis_min_points, i_axis_max_points, j_axis_max_points ]).T # Normalize image_shape_h = image_shape[0] image_shape_w = image_shape[1] image_shape_tiled = [ image_shape_w, image_shape_h, image_shape_w, image_shape_h ] box_corners_norm = box_corners / image_shape_tiled return np.array(box_corners, dtype=np.float32), \ np.array(box_corners_norm, dtype=np.float32)
def project_to_image_space(box_3d, calib_p2, truncate=False, image_size=None, discard_before_truncation=True): """ Projects a box_3d into image space Args: box_3d: single box_3d to project calib_p2: stereo calibration p2 matrix truncate: if True, 2D projections are truncated to be inside the image image_size: [w, h] must be provided if truncate is True, used for truncation discard_before_truncation: If True, discard boxes that are larger than 80% of the image in width OR height BEFORE truncation. If False, discard boxes that are larger than 80% of the width AND height AFTER truncation. Returns: Projected box in image space [x1, y1, x2, y2] Returns None if box is not inside the image """ format_checker.check_box_3d_format(box_3d) obj_label = box_3d_encoder.box_3d_to_object_label(box_3d) corners_3d = obj_utils.compute_box_corners_3d(obj_label) projected = calib_utils.project_to_image(corners_3d, calib_p2) x1 = np.amin(projected[0]) y1 = np.amin(projected[1]) x2 = np.amax(projected[0]) y2 = np.amax(projected[1]) img_box = np.array([x1, y1, x2, y2]) if truncate: if not image_size: raise ValueError('Image size must be provided') image_w = image_size[0] image_h = image_size[1] # Discard invalid boxes (outside image space) if img_box[0] > image_w or \ img_box[1] > image_h or \ img_box[2] < 0 or \ img_box[3] < 0: return None # Discard boxes that are larger than 80% of the image width OR height if discard_before_truncation: img_box_w = img_box[2] - img_box[0] img_box_h = img_box[3] - img_box[1] if img_box_w > (image_w * 0.8) or img_box_h > (image_h * 0.8): return None # Truncate remaining boxes into image space if img_box[0] < 0: img_box[0] = 0 if img_box[1] < 0: img_box[1] = 0 if img_box[2] > image_w: img_box[2] = image_w if img_box[3] > image_h: img_box[3] = image_h # Discard boxes that are covering the the whole image after truncation if not discard_before_truncation: img_box_w = img_box[2] - img_box[0] img_box_h = img_box[3] - img_box[1] if img_box_w > (image_w * 0.8) and img_box_h > (image_h * 0.8): return None return img_box
def interpolate(point_cloud, calib_p2, image_shape, custom_kernel, max_depth, show_process=False): """Interpolates the lidar point cloud by projecting the points into image space, and then applying classical image processing steps. The projected image is dilated with a custom kernel, closed, and a bilateral blur is applied to smooth the final output depth map. :param point_cloud: input point cloud (N, 3) :param calib_p2: stereo calibration p2 matrix :param image_shape: image shape [h, w] :param custom_kernel: custom kernel for initial dilation :param max_depth: maximum output depth :param show_process: (optional) flag to return image processing steps :return final_depths: interpolated lidar depth map :return process_dict: if show_process is True, this is an OrderedDict with entries showing the image processing steps, None otherwise """ all_points = point_cloud.T # Save the depth corresponding to each point point_in_im = calib_utils.project_to_image(all_points.T, p=calib_p2).T point_in_im_rounded = np.int32(np.floor(point_in_im)) # Invert depths all_points[:, 2] = max_depth - all_points[:, 2] # Vectorized version all_depths = np.zeros(image_shape) valid_indices = [point_in_im_rounded[:, 1], point_in_im_rounded[:, 0]] all_depths[valid_indices] = [ max( all_depths[point_in_im_rounded[idx, 1], point_in_im_rounded[idx, 0]], all_points[idx, 2]) for idx in range(len(all_points)) ] # Loop version (obsolete, keeping to show logic and as a backup) # all_depths = np.zeros((image_shape[1], image_shape[0])) # for point_idx in range(len(point_in_im_rounded)): # map_x = point_in_im_rounded[point_idx, 1] # map_y = point_in_im_rounded[point_idx, 0] # # point_depth = all_points[point_idx, 2] # # # Keep the farther distance for overlapping points # if all_depths[map_x, map_y] > 0.0: # all_depths[map_x, map_y] = \ # max(all_depths[map_x, map_y], point_depth) # else: # all_depths[map_x, map_y] = point_depth # # # Clip to specified maximum depth # all_depths[map_x, map_y] = \ # np.minimum(all_depths[map_x, map_y], max_depth) # Fill in the depth map lidar_depths = np.float32(all_depths) # Operations depths_in = lidar_depths dilated_depths = cv2.dilate(depths_in, custom_kernel) closed_depths = cv2.morphologyEx(dilated_depths, cv2.MORPH_CLOSE, FULL_KERNEL_5) blurred_depths = cv2.bilateralFilter(closed_depths, 3, 1, 2) depths_out = blurred_depths # Save final version to final_depths variable to be used later final_depths = depths_out.copy() # Invert valid_pixels = np.where(final_depths > 0.5) valid_pixels = np.asarray(valid_pixels).T final_depths[valid_pixels[:, 0], valid_pixels[:, 1]] = \ max_depth - final_depths[valid_pixels[:, 0], valid_pixels[:, 1]] process_dict = None if show_process: process_dict = collections.OrderedDict() process_dict['lidar_depths'] = lidar_depths process_dict['dilated_depths'] = dilated_depths process_dict['closed_depths'] = closed_depths process_dict['blurred_depths'] = blurred_depths process_dict['final_depths'] = final_depths return final_depths, process_dict
def get_lidar_point_cloud_sub(img_idx, calib_dir, velo_dir, im_size=None, min_intensity=None, stride_sub=1): """ Calculates the lidar point cloud, and optionally returns only the points that are projected to the image. :param img_idx: image index :param calib_dir: directory with calibration files :param velo_dir: directory with velodyne files :param im_size: (optional) 2 x 1 list containing the size of the image to filter the point cloud [w, h] :param min_intensity: (optional) minimum intensity required to keep a point :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read calibration info frame_calib = calib_utils.read_calibration(calib_dir, img_idx) x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx) starts = np.where(np.diff(np.sign(y)) > 0)[0] + 1 true_starts = np.append(np.diff(starts) > 2, [True]) starts = starts[true_starts] n_lasers = starts.shape[0] + 1 starts = [0] + starts.tolist() + [len(x)] include = np.zeros(len(x), dtype=bool) lasers_num = range(0, SINFields.LASERS_NUM, stride_sub) for laser in lasers_num: if laser < n_lasers: include[starts[laser]:starts[laser + 1]] = True i = i[include] # Calculate the point cloud pts = np.vstack((x[include], y[include], z[include])).T pts = calib_utils.lidar_to_cam_frame(pts, frame_calib) # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1]) if not min_intensity: return pts[image_filter].T else: intensity_filter = i > min_intensity point_filter = np.logical_and(image_filter, intensity_filter) return pts[point_filter].T