def visualize_test(): I = RigidTransform() g_ab = RigidTransform() g_ab.translation = np.array([0.05, 0, 0]) g_ab.rotation = np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]]) q_a = np.array([0., 0., 0.]) p_b = np.array([0., 0., 0.]) p_a = apply_transform(p_b, g_ab) print('g_ab = \n{}'.format(g_ab.matrix)) vis3d.pose(I, alpha=0.01, tube_radius=0.001, center_scale=0.001) vis3d.points(q_a, color=(1, 0, 0), scale=0.005) vis3d.pose(g_ab, alpha=0.01, tube_radius=0.001, center_scale=0.001) vis3d.points(p_a, color=(0, 1, 0), scale=0.005) vis3d.show()
def process_frame(frame_json, camera_intrinsics, models): num_objects = len(frame_json['objects']) if len(frame_json['objects']) == 0: print "no objects in frame!" return frame_json # copy relevant fields of json updated_frame_json = {'camera_data': {}, 'objects': []} if frame_json['camera_data']: updated_frame_json['camera_data'][ 'location_worldframe'] = copy.deepcopy( frame_json['camera_data']['location_worldframe']) updated_frame_json['camera_data'][ 'quaternion_xyzw_worldframe'] = copy.deepcopy( frame_json['camera_data']['quaternion_xyzw_worldframe']) else: print 'Warning: no `camera_data` field!' for i in range(len(frame_json['objects'])): updated_frame_json['objects'].append({}) updated_frame_json['objects'][i]['class'] = copy.deepcopy( frame_json['objects'][i]['class']) updated_frame_json['objects'][i]['bounding_box'] = copy.deepcopy( frame_json['objects'][i]['bounding_box']) updated_frame_json['objects'][i]['instance_id'] = copy.deepcopy( frame_json['objects'][i]['instance_id']) updated_frame_json['objects'][i]['visibility'] = copy.deepcopy( frame_json['objects'][i]['visibility']) # get object poses and flip symmetrical objects if necessary object_poses = [] for object_index in range(num_objects): model_name = updated_frame_json['objects'][object_index]['class'] scene_object_json = frame_json['objects'][object_index] (translation, rotation) = object_pose_from_json(scene_object_json) object_pose = RigidTransform(rotation=rotation, translation=translation, from_frame='source_model', to_frame='camera') object_pose = object_pose.dot(models[model_name].model_transform) # Handle symmetrical objects: if z axis (= x axis in mesh) points towards camera, # rotate by 180 degrees around x (= z axis in mesh). # This always keeps the "green/yellow" short side (in nvdu_viz) pointed towards # the camera; the "blue/magenta" short side should never be visible. This is necessary # for DOPE training, since both sides look the same because the KLT box is symmetrical. symmetry_flip_tf = RigidTransform(from_frame='target_model', to_frame='target_model_original') z_axis = rotation.dot(np.array([0.0, 0.0, 1.0], dtype=np.float64)) if z_axis.dot(translation) < 0: symmetry_flip_tf.rotation = RigidTransform.x_axis_rotation(math.pi) object_pose = object_pose.dot(symmetry_flip_tf) object_poses.append(object_pose) updated_frame_json['objects'][object_index][ 'location'] = object_pose.translation.tolist() updated_frame_json['objects'][object_index][ 'quaternion_xyzw'] = np.roll(object_pose.quaternion, -1).tolist() # add pose_transform_permuted for object_index in range(num_objects): ptp_json = pose_transform_permuted_to_json( object_poses[object_index].translation, object_poses[object_index].rotation) updated_frame_json['objects'][object_index].update(ptp_json) # compute cuboid, cuboid_centroid, projected_cuboid, projected_cuboid_centroid, bounding_box for object_index in range(num_objects): model_name = updated_frame_json['objects'][object_index]['class'] cuboid_json = get_cuboid(object_poses[object_index], models[model_name].cuboid_dimensions, camera_intrinsics) updated_frame_json['objects'][object_index].update(cuboid_json) return updated_frame_json
def register(sensor, config): """ Registers a camera to a chessboard. Parameters ---------- sensor : :obj:`perception.RgbdSensor` the sensor to register config : :obj:`autolab_core.YamlConfig` or :obj:`dict` configuration file for registration Returns ------- :obj:`ChessboardRegistrationResult` the result of registration Notes ----- The config must have the parameters specified in the Other Parameters section. Other Parameters ---------------- num_transform_avg : int the number of independent registrations to average together num_images : int the number of images to read for each independent registration corners_x : int the number of chessboard corners in the x-direction corners_y : int the number of chessboard corners in the y-direction color_image_rescale_factor : float amount to rescale the color image for detection (numbers around 4-8 are useful) vis : bool whether or not to visualize the registration """ # read config num_transform_avg = config['num_transform_avg'] num_images = config['num_images'] sx = config['corners_x'] sy = config['corners_y'] point_order = config['point_order'] color_image_rescale_factor = config['color_image_rescale_factor'] flip_normal = config['flip_normal'] y_points_left = False if 'y_points_left' in config.keys() and sx == sy: y_points_left = config['y_points_left'] num_images = 1 vis = config['vis'] # read params from sensor logging.info('Registering camera %s' %(sensor.frame)) ir_intrinsics = sensor.ir_intrinsics # repeat registration multiple times and average results R = np.zeros([3,3]) t = np.zeros([3,1]) points_3d_plane = PointCloud(np.zeros([3, sx*sy]), frame=sensor.ir_frame) k = 0 while k < num_transform_avg: # average a bunch of depth images together depth_ims = None for i in range(num_images): start = time.time() small_color_im, new_depth_im, _ = sensor.frames() end = time.time() logging.info('Frames Runtime: %.3f' %(end-start)) if depth_ims is None: depth_ims = np.zeros([new_depth_im.height, new_depth_im.width, num_images]) depth_ims[:,:,i] = new_depth_im.data med_depth_im = np.median(depth_ims, axis=2) depth_im = DepthImage(med_depth_im, sensor.ir_frame) # find the corner pixels in an upsampled version of the color image big_color_im = small_color_im.resize(color_image_rescale_factor) corner_px = big_color_im.find_chessboard(sx=sx, sy=sy) if vis: plt.figure() plt.imshow(big_color_im.data) for i in range(sx): plt.scatter(corner_px[i,0], corner_px[i,1], s=25, c='b') plt.show() if corner_px is None: logging.error('No chessboard detected! Check camera exposure settings') continue # convert back to original image small_corner_px = corner_px / color_image_rescale_factor if vis: plt.figure() plt.imshow(small_color_im.data) for i in range(sx): plt.scatter(small_corner_px[i,0], small_corner_px[i,1], s=25, c='b') plt.axis('off') plt.show() # project points into 3D camera_intr = sensor.ir_intrinsics points_3d = camera_intr.deproject(depth_im) # get round chessboard ind corner_px_round = np.round(small_corner_px).astype(np.uint16) corner_ind = depth_im.ij_to_linear(corner_px_round[:,0], corner_px_round[:,1]) if corner_ind.shape[0] != sx*sy: logging.warning('Did not find all corners. Discarding...') continue # average 3d points points_3d_plane = (k * points_3d_plane + points_3d[corner_ind]) / (k + 1) logging.info('Registration iteration %d of %d' %(k+1, config['num_transform_avg'])) k += 1 # fit a plane to the chessboard corners X = np.c_[points_3d_plane.x_coords, points_3d_plane.y_coords, np.ones(points_3d_plane.num_points)] y = points_3d_plane.z_coords A = X.T.dot(X) b = X.T.dot(y) w = np.linalg.inv(A).dot(b) n = np.array([w[0], w[1], -1]) n = n / np.linalg.norm(n) if flip_normal: n = -n mean_point_plane = points_3d_plane.mean() # find x-axis of the chessboard coordinates on the fitted plane T_camera_table = RigidTransform(translation = -points_3d_plane.mean().data, from_frame=points_3d_plane.frame, to_frame='table') points_3d_centered = T_camera_table * points_3d_plane # get points along y if point_order == 'row_major': coord_pos_x = int(math.floor(sx*sy/2.0)) coord_neg_x = int(math.ceil(sx*sy/2.0)) points_pos_x = points_3d_centered[coord_pos_x:] points_neg_x = points_3d_centered[:coord_neg_x] x_axis = np.mean(points_pos_x.data, axis=1) - np.mean(points_neg_x.data, axis=1) x_axis = x_axis - np.vdot(x_axis, n)*n x_axis = x_axis / np.linalg.norm(x_axis) y_axis = np.cross(n, x_axis) else: coord_pos_y = int(math.floor(sx*(sy-1)/2.0)) coord_neg_y = int(math.ceil(sx*(sy+1)/2.0)) points_pos_y = points_3d_centered[:coord_pos_y] points_neg_y = points_3d_centered[coord_neg_y:] y_axis = np.mean(points_pos_y.data, axis=1) - np.mean(points_neg_y.data, axis=1) y_axis = y_axis - np.vdot(y_axis, n)*n y_axis = y_axis / np.linalg.norm(y_axis) x_axis = np.cross(-n, y_axis) # produce translation and rotation from plane center and chessboard basis rotation_cb_camera = RigidTransform.rotation_from_axes(x_axis, y_axis, n) translation_cb_camera = mean_point_plane.data T_cb_camera = RigidTransform(rotation=rotation_cb_camera, translation=translation_cb_camera, from_frame='cb', to_frame=sensor.frame) if y_points_left and np.abs(T_cb_camera.y_axis[1]) > 0.1: if T_cb_camera.x_axis[0] > 0: T_cb_camera.rotation = T_cb_camera.rotation.dot(RigidTransform.z_axis_rotation(-np.pi/2).T) else: T_cb_camera.rotation = T_cb_camera.rotation.dot(RigidTransform.z_axis_rotation(np.pi/2).T) T_camera_cb = T_cb_camera.inverse() # optionally display cb corners with detected pose in 3d space if config['debug']: # display image with axes overlayed cb_center_im = camera_intr.project(Point(T_cb_camera.translation, frame=sensor.ir_frame)) cb_x_im = camera_intr.project(Point(T_cb_camera.translation + T_cb_camera.x_axis * config['scale_amt'], frame=sensor.ir_frame)) cb_y_im = camera_intr.project(Point(T_cb_camera.translation + T_cb_camera.y_axis * config['scale_amt'], frame=sensor.ir_frame)) cb_z_im = camera_intr.project(Point(T_cb_camera.translation + T_cb_camera.z_axis * config['scale_amt'], frame=sensor.ir_frame)) x_line = np.array([cb_center_im.data, cb_x_im.data]) y_line = np.array([cb_center_im.data, cb_y_im.data]) z_line = np.array([cb_center_im.data, cb_z_im.data]) plt.figure() plt.imshow(small_color_im.data) plt.scatter(cb_center_im.data[0], cb_center_im.data[1]) plt.plot(x_line[:,0], x_line[:,1], c='r', linewidth=3) plt.plot(y_line[:,0], y_line[:,1], c='g', linewidth=3) plt.plot(z_line[:,0], z_line[:,1], c='b', linewidth=3) plt.axis('off') plt.title('Chessboard frame in camera %s' %(sensor.frame)) plt.show() return ChessboardRegistrationResult(T_camera_cb, points_3d_plane)