def rotate(self, azimuth, axis=None): """Rotate the trackball about the "Up" axis by azimuth radians. Parameters ---------- azimuth : float The number of radians to rotate. """ target = self._target y_axis = self._n_T_camera_world.matrix[:3, 1].flatten() if axis is not None: y_axis = axis x_rot_mat = transformations.rotation_matrix(-azimuth, y_axis, target) x_rot_tf = RigidTransform(x_rot_mat[:3, :3], x_rot_mat[:3, 3], from_frame='world', to_frame='world') self._n_T_camera_world = x_rot_tf.dot(self._n_T_camera_world) y_axis = self._T_camera_world.matrix[:3, 1].flatten() if axis is not None: y_axis = axis x_rot_mat = transformations.rotation_matrix(-azimuth, y_axis, target) x_rot_tf = RigidTransform(x_rot_mat[:3, :3], x_rot_mat[:3, 3], from_frame='world', to_frame='world') self._T_camera_world = x_rot_tf.dot(self._T_camera_world)
def scroll(self, clicks): """Zoom using a mouse scroll wheel motion. Parameters ---------- clicks : int The number of clicks. Positive numbers indicate forward wheel movement. """ target = self._target ratio = 0.90 z_axis = self._n_T_camera_world.matrix[:3, 2].flatten() eye = self._n_T_camera_world.matrix[:3, 3].flatten() radius = np.linalg.norm(eye - target) translation = clicks * (1 - ratio) * radius * z_axis t_tf = RigidTransform(translation=translation, from_frame='world', to_frame='world') self._n_T_camera_world = t_tf.dot(self._n_T_camera_world) z_axis = self._T_camera_world.matrix[:3, 2].flatten() eye = self._T_camera_world.matrix[:3, 3].flatten() radius = np.linalg.norm(eye - target) translation = clicks * (1 - ratio) * radius * z_axis t_tf = RigidTransform(translation=translation, from_frame='world', to_frame='world') self._T_camera_world = t_tf.dot(self._T_camera_world)
def main(): # ==================================== # parse arguments # ==================================== parser = argparse.ArgumentParser( description='Generate segmentation images and updated json files.') parser.add_argument('-d', '--data-dir', default=os.getcwd(), help='directory containing images and json files') parser.add_argument( '-o', '--object-settings', help= 'object_settings file where the fixed_model_transform corresponds to the poses' 'in the frame annotation jsons.' 'default: <data_dir>/_object_settings.json') parser.add_argument( '-t', '--target-object-settings', help= 'if given, transform all poses into the fixed_model_transform from this file.' ) args = parser.parse_args() if not args.object_settings: args.object_settings = os.path.join(args.data_dir, '_object_settings.json') if not args.target_object_settings: args.target_object_settings = args.object_settings # ===================== # parse object_settings # ===================== with open(args.object_settings, 'r') as f: object_settings_json = json.load(f) with open(args.target_object_settings, 'r') as f: target_object_settings_json = json.load(f) if not len(object_settings_json['exported_objects']) == len( target_object_settings_json['exported_objects']): print "FATAL: object_settings and target_object_settings do not match!" sys.exit(-1) models = {} for model_json, target_model_json in zip( object_settings_json['exported_objects'], target_object_settings_json['exported_objects']): class_name = model_json['class'] if not class_name == target_model_json['class']: print "FATAL: object_settings and target_object_settings do not match!" sys.exit(-1) segmentation_class_id = model_json['segmentation_class_id'] cuboid_dimensions = np.array(model_json['cuboid_dimensions']) # calculate model_transform fixed_model_transform_mat = np.transpose( np.array(model_json['fixed_model_transform'])) fixed_model_transform = RigidTransform( rotation=fixed_model_transform_mat[:3, :3], translation=fixed_model_transform_mat[:3, 3], from_frame='ycb_model', to_frame='source_model') target_fixed_model_transform_mat = np.transpose( np.array(target_model_json['fixed_model_transform'])) target_fixed_model_transform = RigidTransform( rotation=target_fixed_model_transform_mat[:3, :3], translation=target_fixed_model_transform_mat[:3, 3], from_frame='ycb_model', to_frame='target_model_original') model_transform = fixed_model_transform.dot( target_fixed_model_transform.inverse()) models[class_name] = ModelConfig(class_name, segmentation_class_id, model_transform, cuboid_dimensions) # ================================================== # parse camera_settings and set up camera intrinsics # ================================================== with open(os.path.join(args.data_dir, '_camera_settings.json'), 'r') as f: camera_settings_json = json.load(f)['camera_settings'][0] camera_intrinsics = CameraIntrinsics( frame='camera', fx=camera_settings_json['intrinsic_settings']['fx'], fy=camera_settings_json['intrinsic_settings']['fy'], cx=camera_settings_json['intrinsic_settings']['cx'], cy=camera_settings_json['intrinsic_settings']['cy'], skew=camera_settings_json['intrinsic_settings']['s'], height=camera_settings_json['captured_image_size']['height'], width=camera_settings_json['captured_image_size']['width']) # ==================================== # process each frame # ==================================== pattern = re.compile(r'\d{3,}.json') json_files = sorted([ os.path.join(args.data_dir, f) for f in os.listdir(args.data_dir) if pattern.match(f) ]) for json_file in json_files: filename_prefix = json_file[:-len('json')] print '\n---------------------- {}*'.format(filename_prefix) with open(json_file, 'r') as f: frame_json = json.load(f) updated_frame_json = process_frame(frame_json, camera_intrinsics, models) with open(filename_prefix + 'flipped.json', 'w') as f: json.dump(updated_frame_json, f, indent=2, sort_keys=True)
def process_frame(frame_json, camera_intrinsics, models): num_objects = len(frame_json['objects']) if len(frame_json['objects']) == 0: print "no objects in frame!" return frame_json # copy relevant fields of json updated_frame_json = {'camera_data': {}, 'objects': []} if frame_json['camera_data']: updated_frame_json['camera_data'][ 'location_worldframe'] = copy.deepcopy( frame_json['camera_data']['location_worldframe']) updated_frame_json['camera_data'][ 'quaternion_xyzw_worldframe'] = copy.deepcopy( frame_json['camera_data']['quaternion_xyzw_worldframe']) else: print 'Warning: no `camera_data` field!' for i in range(len(frame_json['objects'])): updated_frame_json['objects'].append({}) updated_frame_json['objects'][i]['class'] = copy.deepcopy( frame_json['objects'][i]['class']) updated_frame_json['objects'][i]['bounding_box'] = copy.deepcopy( frame_json['objects'][i]['bounding_box']) updated_frame_json['objects'][i]['instance_id'] = copy.deepcopy( frame_json['objects'][i]['instance_id']) updated_frame_json['objects'][i]['visibility'] = copy.deepcopy( frame_json['objects'][i]['visibility']) # get object poses and flip symmetrical objects if necessary object_poses = [] for object_index in range(num_objects): model_name = updated_frame_json['objects'][object_index]['class'] scene_object_json = frame_json['objects'][object_index] (translation, rotation) = object_pose_from_json(scene_object_json) object_pose = RigidTransform(rotation=rotation, translation=translation, from_frame='source_model', to_frame='camera') object_pose = object_pose.dot(models[model_name].model_transform) # Handle symmetrical objects: if z axis (= x axis in mesh) points towards camera, # rotate by 180 degrees around x (= z axis in mesh). # This always keeps the "green/yellow" short side (in nvdu_viz) pointed towards # the camera; the "blue/magenta" short side should never be visible. This is necessary # for DOPE training, since both sides look the same because the KLT box is symmetrical. symmetry_flip_tf = RigidTransform(from_frame='target_model', to_frame='target_model_original') z_axis = rotation.dot(np.array([0.0, 0.0, 1.0], dtype=np.float64)) if z_axis.dot(translation) < 0: symmetry_flip_tf.rotation = RigidTransform.x_axis_rotation(math.pi) object_pose = object_pose.dot(symmetry_flip_tf) object_poses.append(object_pose) updated_frame_json['objects'][object_index][ 'location'] = object_pose.translation.tolist() updated_frame_json['objects'][object_index][ 'quaternion_xyzw'] = np.roll(object_pose.quaternion, -1).tolist() # add pose_transform_permuted for object_index in range(num_objects): ptp_json = pose_transform_permuted_to_json( object_poses[object_index].translation, object_poses[object_index].rotation) updated_frame_json['objects'][object_index].update(ptp_json) # compute cuboid, cuboid_centroid, projected_cuboid, projected_cuboid_centroid, bounding_box for object_index in range(num_objects): model_name = updated_frame_json['objects'][object_index]['class'] cuboid_json = get_cuboid(object_poses[object_index], models[model_name].cuboid_dimensions, camera_intrinsics) updated_frame_json['objects'][object_index].update(cuboid_json) return updated_frame_json
def drag(self, point): """Update the tracball during a drag. Parameters ---------- point : (2,) int The current x and y pixel coordinates of the mouse during a drag. This will compute a movement for the trackball with the relative motion between this point and the one marked by down(). """ point = np.array(point, dtype=np.float32) dx, dy = point - self._pdown mindim = 0.3 * np.min(self._size) target = self._target x_axis = self._T_camera_world.matrix[:3, 0].flatten() y_axis = self._T_camera_world.matrix[:3, 1].flatten() z_axis = self._T_camera_world.matrix[:3, 2].flatten() eye = self._T_camera_world.matrix[:3, 3].flatten() # Interpret drag as a rotation if self._state == Trackball.STATE_ROTATE: x_angle = dx / mindim x_rot_mat = transformations.rotation_matrix( x_angle, y_axis, target) x_rot_tf = RigidTransform(x_rot_mat[:3, :3], x_rot_mat[:3, 3], from_frame='world', to_frame='world') y_angle = dy / mindim y_rot_mat = transformations.rotation_matrix( y_angle, x_axis, target) y_rot_tf = RigidTransform(y_rot_mat[:3, :3], y_rot_mat[:3, 3], from_frame='world', to_frame='world') self._n_T_camera_world = y_rot_tf.dot( x_rot_tf.dot(self._T_camera_world)) # Interpret drag as a roll about the camera axis elif self._state == Trackball.STATE_ROLL: center = self._size / 2.0 v_init = self._pdown - center v_curr = point - center v_init = v_init / np.linalg.norm(v_init) v_curr = v_curr / np.linalg.norm(v_curr) theta = np.arctan2(v_curr[1], v_curr[0]) - np.arctan2( v_init[1], v_init[0]) rot_mat = transformations.rotation_matrix(theta, z_axis, target) rot_tf = RigidTransform(rot_mat[:3, :3], rot_mat[:3, 3], from_frame='world', to_frame='world') self._n_T_camera_world = rot_tf.dot(self._T_camera_world) # Interpret drag as a camera pan in view plane elif self._state == Trackball.STATE_PAN: dx = -dx / (5.0 * mindim) * self._scale dy = dy / (5.0 * mindim) * self._scale translation = dx * x_axis + dy * y_axis self._n_target = self._target + translation t_tf = RigidTransform(translation=translation, from_frame='world', to_frame='world') self._n_T_camera_world = t_tf.dot(self._T_camera_world) # Interpret drag as a zoom motion elif self._state == Trackball.STATE_ZOOM: radius = np.linalg.norm(eye - target) ratio = 0.0 if dy < 0: ratio = np.exp(abs(dy) / (0.5 * self._size[1])) - 1.0 elif dy > 0: ratio = 1.0 - np.exp(-dy / (0.5 * (self._size[1]))) translation = np.sign(dy) * ratio * radius * z_axis t_tf = RigidTransform(translation=translation, from_frame='world', to_frame='world') self._n_T_camera_world = t_tf.dot(self._T_camera_world)
translation=fixed_model_transform_mat[:3, 3], from_frame='ycb_model', to_frame='source_model') fmt = {} for j in target_model_json['exported_objects']: if j['class'] == MODEL_NAME: fmt = j['fixed_model_transform'] break target_fixed_model_transform_mat = np.transpose(np.array(fmt)) target_fixed_model_transform = RigidTransform( rotation=target_fixed_model_transform_mat[:3, :3], translation=target_fixed_model_transform_mat[:3, 3], from_frame='ycb_model', to_frame='target_model') model_transform = fixed_model_transform.dot( target_fixed_model_transform.inverse()) (translation, rotation) = object_pose_from_json(reference_source_json['objects'][0]) reference_source_pose = RigidTransform(rotation=rotation, translation=translation, from_frame='target_model', to_frame='camera') (translation, rotation) = object_pose_from_json(reference_target_json['objects'][0]) reference_target_pose = RigidTransform(rotation=rotation, translation=translation, from_frame='target_model_fixed', to_frame='camera')