def __init__(self, output_dir): self.NUM_OBJECTS = None self.table_file = DATA_DIR + '/meshes/table.obj' self.data_dir = DATA_DIR + '/meshes/dexnet/' output_dir = DATA_DIR + output_dir if not os.path.exists(output_dir): os.mkdir(output_dir) self.output_dir = output_dir self.config = YamlConfig( './cfg/tools/generate_oblique_gqcnn_dataset.yaml') self.phi_offsets = self._generate_phi_offsets() self.datasets, self.target_object_keys = self._load_datasets() self.tensor_dataset = TensorDataset(self.output_dir, self._set_tensor_config()) self.tensor_datapoint = self.tensor_dataset.datapoint_template self.gripper = self._set_gripper() self._table_mesh_filename = self._set_table_mesh_filename() self.table_mesh = self._set_table_mesh() self.cur_pose_label = 0 self.cur_obj_label = 0 self.cur_image_label = 0 self.grasp_upsample_distribuitions = self._get_upsample_distributions() self.camera_distributions = self._get_camera_distributions() self.obj = None self.T_obj_camera = None
def __init__(self, output_dir): self.NUM_OBJECTS = None self.table_file = DATA_DIR + '/meshes/table.obj' self.data_dir = DATA_DIR + '/meshes/dexnet/' if not os.path.exists(output_dir): os.mkdir(output_dir) self.image_dir = output_dir + '/images/' if not os.path.exists(self.image_dir): os.mkdir(self.image_dir) self.orig_image_dir = output_dir + '/orig_images/' if not os.path.exists(self.orig_image_dir): os.mkdir(self.orig_image_dir) self.config = YamlConfig(CONFIG) self.phi_offsets = self._generate_phi_offsets() self.datasets = self._load_datasets() self.tensor_dataset = TensorDataset(output_dir, self._set_tensor_config()) self.tensor_datapoint = self.tensor_dataset.datapoint_template self.gripper = self._set_gripper() self._table_mesh_filename = self._set_table_mesh_filename() self.table_mesh = self._set_table_mesh() self.cur_pose_label = 0 self.cur_obj_label = 0 self.cur_image_label = 0 self.cur_image_file = 0 self.obj = None self.T_obj_camera = None
def __init__(self, output_dir, output_dir_mod, config_dir): GenerateDataset.__init__(self, output_dir, config_dir) if not os.path.exists(output_dir_mod): os.mkdir(output_dir_mod) self.modified_tensor_dataset = TensorDataset(output_dir_mod, self._set_tensor_config())
def __init__(self, dataset_path, frame=None, loop=True): self._dataset_path = dataset_path self._frame = frame self._color_frame = frame self._ir_frame = frame self._im_index = 0 self._running = False from dexnet.learning import TensorDataset self._dataset = TensorDataset.open(self._dataset_path) self._num_images = self._dataset.num_datapoints self._image_rescale_factor = 1.0 if 'image_rescale_factor' in self._dataset.metadata.keys(): self._image_rescale_factor = 1.0 / self._dataset.metadata[ 'image_rescale_factor'] datapoint = self._dataset.datapoint( 0, [TensorDatasetVirtualSensor.CAMERA_INTR_FIELD]) camera_intr_vec = datapoint[ TensorDatasetVirtualSensor.CAMERA_INTR_FIELD] self._color_intr = CameraIntrinsics.from_vec( camera_intr_vec, frame=self._color_frame).resize(self._image_rescale_factor) self._ir_intr = CameraIntrinsics.from_vec( camera_intr_vec, frame=self._ir_frame).resize(self._image_rescale_factor)
def __init__(self, path, elev): if not os.path.exists(DATA_DIR): raise NameError( "Path %s is not specified. Is the docker file set up properly?" % DATA_DIR) table_file = DATA_DIR + '/meshes/table.obj' self.table_mesh = ObjFile(table_file).read() self.data_dir = DATA_DIR + '/meshes/dexnet/' self.output_dir = DATA_DIR + '/reprojections/' self.config = YamlConfig( './cfg/tools/generate_projected_gqcnn_dataset.yaml') self.random_positions = 1 self.image_size = (300, 300) self.elev = 0 self.show_images = False self.cur_obj_label = 0 self.cur_image_label = 0 self.cur_pose_label = 0 if path is not None: self.output_dir = DATA_DIR + '/reprojections/' + path if elev is not None: print("Elevation angle is being set to %d" % elev) self.config['env_rv_params']['min_elev'] = elev self.config['env_rv_params']['max_elev'] = elev self.elev = elev tensor_config = self.config['tensors'] tensor_config['fields']['depth_ims_tf_table']['height'] = 32 tensor_config['fields']['depth_ims_tf_table']['width'] = 32 tensor_config['fields']['robust_ferrari_canny'] = {} tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32' tensor_config['fields']['ferrari_canny'] = {} tensor_config['fields']['ferrari_canny']['dtype'] = 'float32' tensor_config['fields']['force_closure'] = {} tensor_config['fields']['force_closure']['dtype'] = 'float32' self.tensor_dataset = TensorDataset(self.output_dir, tensor_config) self.tensor_datapoint = self.tensor_dataset.datapoint_template if not os.path.exists(self.output_dir + '/images'): os.makedirs(self.output_dir + '/images') if not os.path.exists(self.output_dir + '/meshes'): os.makedirs(self.output_dir + '/meshes')
def __init__(self, output_dir, config_dir): self.NUM_OBJECTS = None self.table_file = DATA_DIR + '/meshes/table.obj' self.data_dir = DATA_DIR + '/meshes/dexnet/' self.config = YamlConfig(config_dir) self.phi_offsets = self._generate_phi_offsets() self.datasets, self.target_object_keys = self._load_datasets() self.tensor_dataset = TensorDataset(output_dir, self._set_tensor_config(), initial_tensor=0, initial_datapoint=0) json.dump(self._set_general_config(), open(output_dir + '/tensors/config.json', 'w')) self.output_dir = output_dir self.image_dir = output_dir + '/images/' self.tensor_datapoint = self.tensor_dataset.datapoint_template self.gripper = self._set_gripper() self._table_mesh_filename = self._set_table_mesh_filename() self.table_mesh = self._set_table_mesh() self.sample_phi_uniform = True self.cur_pose_label = 0 self.cur_obj_label = 0 self.cur_image_label = 0 self.crop_width = self.config['gqcnn']['crop_width'] self.crop_height = self.config['gqcnn']['crop_height'] if 'final_width' in self.config['gqcnn']: self.final_height = self.config['gqcnn']['final_height'] self.final_width = self.config['gqcnn']['final_width'] else: self.final_height = None self.final_width = None self.obj = None self.T_obj_camera = None try: self.test_obj_labels = np.loadtxt('/data/test_object_labels.txt') except IOError: self.test_obj_labels = None
class Reprojection: def __init__(self, path, elev): if not os.path.exists(DATA_DIR): raise NameError( "Path %s is not specified. Is the docker file set up properly?" % DATA_DIR) table_file = DATA_DIR + '/meshes/table.obj' self.table_mesh = ObjFile(table_file).read() self.data_dir = DATA_DIR + '/meshes/dexnet/' self.output_dir = DATA_DIR + '/reprojections/' self.config = YamlConfig( './cfg/tools/generate_projected_gqcnn_dataset.yaml') self.random_positions = 1 self.image_size = (300, 300) self.elev = 0 self.show_images = False self.cur_obj_label = 0 self.cur_image_label = 0 self.cur_pose_label = 0 if path is not None: self.output_dir = DATA_DIR + '/reprojections/' + path if elev is not None: print("Elevation angle is being set to %d" % elev) self.config['env_rv_params']['min_elev'] = elev self.config['env_rv_params']['max_elev'] = elev self.elev = elev tensor_config = self.config['tensors'] tensor_config['fields']['depth_ims_tf_table']['height'] = 32 tensor_config['fields']['depth_ims_tf_table']['width'] = 32 tensor_config['fields']['robust_ferrari_canny'] = {} tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32' tensor_config['fields']['ferrari_canny'] = {} tensor_config['fields']['ferrari_canny']['dtype'] = 'float32' tensor_config['fields']['force_closure'] = {} tensor_config['fields']['force_closure']['dtype'] = 'float32' self.tensor_dataset = TensorDataset(self.output_dir, tensor_config) self.tensor_datapoint = self.tensor_dataset.datapoint_template if not os.path.exists(self.output_dir + '/images'): os.makedirs(self.output_dir + '/images') if not os.path.exists(self.output_dir + '/meshes'): os.makedirs(self.output_dir + '/meshes') def _load_file_ids(self): dtype = [('Obj_id', (np.str_, 40)), ('Tensor', int), ('Array', int), ('Depth', float)] with open(self.data_dir + 'files.csv', 'r') as csv_file: data = [] csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: data.append( tuple([ int(value) if value.isdigit() else value for value in row ])) self.file_arr = np.array(data, dtype=dtype) files = os.listdir(self.data_dir) files = [name.split('.')[0] for name in files] files.remove('files') self.all_objects = list(set(files)) def _load_data(self, obj_id): self.grasp_metrics = json.load( open(self.data_dir + obj_id + '.json', 'r')) self.candidate_grasps_dict = pkl.load( open(self.data_dir + obj_id + '.pkl', 'rb')) self.object_mesh = ObjFile(self.data_dir + obj_id + '.obj').read() self.stable_poses = StablePoseFile(self.data_dir + obj_id + '.stp').read() self.tensor = self.file_arr['Tensor'][np.where( self.file_arr['Obj_id'] == obj_id)][0] self.array = self.file_arr['Array'][np.where( self.file_arr['Obj_id'] == obj_id)][0] depth = self.file_arr['Depth'][np.where( self.file_arr['Obj_id'] == obj_id)][0] self.config['env_rv_params']['min_radius'] = depth self.config['env_rv_params']['max_radius'] = depth def start_rendering(self): self._load_file_ids() for object_id in self.all_objects: self._load_data(object_id) for i, stable_pose in enumerate(self.stable_poses): try: candidate_grasp_info = self.candidate_grasps_dict[ stable_pose.id] except KeyError: continue if not candidate_grasp_info: Warning("Candidate grasp info of object id %s empty" % object_id) Warning("Continue.") continue T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp') T_obj_stp = self.object_mesh.get_T_surface_obj(T_obj_stp) T_table_obj = RigidTransform(from_frame='table', to_frame='obj') scene_objs = { 'table': SceneObject(self.table_mesh, T_table_obj) } urv = UniformPlanarWorksurfaceImageRandomVariable( self.object_mesh, [RenderMode.DEPTH_SCENE, RenderMode.SEGMASK], 'camera', self.config['env_rv_params'], scene_objs=scene_objs, stable_pose=stable_pose) render_sample = urv.rvs(size=self.random_positions) # for render_sample in render_samples: binary_im = render_sample.renders[RenderMode.SEGMASK].image depth_im = render_sample.renders[ RenderMode.DEPTH_SCENE].image.crop(300, 300) orig_im = Image.fromarray(self._scale_image(depth_im.data)) if self.show_images: orig_im.show() orig_im.convert('RGB').save(self.output_dir + '/images/' + object_id + '_elev_' + str(self.elev) + '_original.png') print("Saved original") T_stp_camera = render_sample.camera.object_to_camera_pose shifted_camera_intr = render_sample.camera.camera_intr.crop( 300, 300, 240, 320) depth_points = self._reproject_to_3D(depth_im, shifted_camera_intr) transformed_points, T_camera = self._transformation( depth_points) camera_dir = np.dot(T_camera.rotation, np.array([0.0, 0.0, -1.0])) pcd = o3d.geometry.PointCloud() # print(camera_dir) pcd.points = o3d.utility.Vector3dVector(transformed_points.T) # TODO check normals!! # pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) # pcd.normals = o3d.utility.Vector3dVector(-np.asarray(pcd.normals)) normals = np.repeat([camera_dir], len(transformed_points.T), axis=0) pcd.normals = o3d.utility.Vector3dVector(normals) # cs_points = [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]] # cs_lines = [[0, 1], [0, 2], [0, 3]] # colors = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] # cs = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(cs_points), # lines=o3d.utility.Vector2iVector(cs_lines)) # cs.colors = o3d.utility.Vector3dVector(colors) # o3d.visualization.draw_geometries([pcd]) depth = self._o3d_meshing(pcd) # projected_depth_im,new_camera_intr,table_height = self._projection(new_points,shifted_camera_intr) new_camera_intr = shifted_camera_intr new_camera_intr.cx = 150 new_camera_intr.cy = 150 projected_depth_im = np.asarray(depth) projected_depth_im[projected_depth_im == 0.0] = -1.0 table_height = np.median( projected_depth_im[projected_depth_im != -1.0].flatten()) print("Minimum depth:", min(projected_depth_im.flatten())) print("Maximum depth:", max(projected_depth_im.flatten())) im = Image.fromarray(self._scale_image(projected_depth_im)) projected_depth_im = DepthImage(projected_depth_im, frame='new_camera') cx = projected_depth_im.center[1] cy = projected_depth_im.center[0] # Grasp conversion T_obj_old_camera = T_stp_camera * T_obj_stp.as_frames( 'obj', T_stp_camera.from_frame) T_obj_camera = T_camera.dot(T_obj_old_camera) for grasp_info in candidate_grasp_info: grasp = grasp_info.grasp collision_free = grasp_info.collision_free grasp_2d = grasp.project_camera(T_obj_camera, new_camera_intr) dx = cx - grasp_2d.center.x dy = cy - grasp_2d.center.y translation = np.array([dy, dx]) # Project 3D old_camera_cs contact points into new camera cs contact_points = np.append(grasp_info.contact_point1, 1).T new_cam = np.dot(T_obj_camera.matrix, contact_points) c1 = new_camera_intr.project( Point(new_cam[0:3], frame=new_camera_intr.frame)) contact_points = np.append(grasp_info.contact_point2, 1).T new_cam = np.dot(T_obj_camera.matrix, contact_points) c2 = new_camera_intr.project( Point(new_cam[0:3], frame=new_camera_intr.frame)) # Check if there are occlusions at contact points if projected_depth_im.data[ c1.x, c1.y] == -1.0 or projected_depth_im.data[ c2.x, c2.y] == -1.0: print("Contact point at occlusion") contact_occlusion = True else: contact_occlusion = False # Mark contact points in image im = im.convert('RGB') if False: im_draw = ImageDraw.Draw(im) im_draw.line([(c1[0], c1[1] - 10), (c1[0], c1[1] + 10)], fill=(255, 0, 0, 255)) im_draw.line([(c1[0] - 10, c1[1]), (c1[0] + 10, c1[1])], fill=(255, 0, 0, 255)) im_draw.line([(c2[0], c2[1] - 10), (c2[0], c2[1] + 10)], fill=(255, 0, 0, 255)) im_draw.line([(c2[0] - 10, c2[1]), (c2[0] + 10, c2[1])], fill=(255, 0, 0, 255)) if self.show_images: im.show() im.save(self.output_dir + '/images/' + object_id + '_elev_' + str(self.elev) + '_reprojected.png') # Transform and crop image depth_im_tf = projected_depth_im.transform( translation, grasp_2d.angle) depth_im_tf = depth_im_tf.crop(96, 96) # Apply transformation to contact points trans_map = np.array([[1, 0, dx], [0, 1, dy]]) rot_map = cv2.getRotationMatrix2D( (cx, cy), np.rad2deg(grasp_2d.angle), 1) trans_map_aff = np.r_[trans_map, [[0, 0, 1]]] rot_map_aff = np.r_[rot_map, [[0, 0, 1]]] full_map = rot_map_aff.dot(trans_map_aff) # print("Full map",full_map) c1_rotated = (np.dot(full_map, np.r_[c1.vector, [1]]) - np.array([150 - 48, 150 - 48, 0])) / 3 c2_rotated = (np.dot(full_map, np.r_[c2.vector, [1]]) - np.array([150 - 48, 150 - 48, 0])) / 3 grasp_line = depth_im_tf.data[48] occlusions = len(np.where(np.squeeze(grasp_line) == -1)[0]) # Set occlusions to table height for resizing image depth_im_tf.data[depth_im_tf.data == -1.0] = table_height depth_image = Image.fromarray(np.asarray(depth_im_tf.data))\ .resize((32, 32), resample=Image.BILINEAR) depth_im_tf_table = np.asarray(depth_image).reshape( 32, 32, 1) # depth_im_tf_table = depth_im_tf.resize((32, 32,), interp='bilinear') im = Image.fromarray( self._scale_image(depth_im_tf_table.reshape( 32, 32))).convert('RGB') draw = ImageDraw.Draw(im) draw.line([(c1_rotated[0], c1_rotated[1] - 3), (c1_rotated[0], c1_rotated[1] + 3)], fill=(255, 0, 0, 255)) draw.line([(c1_rotated[0] - 3, c1_rotated[1]), (c1_rotated[0] + 3, c1_rotated[1])], fill=(255, 0, 0, 255)) draw.line([(c2_rotated[0], c2_rotated[1] - 3), (c2_rotated[0], c2_rotated[1] + 3)], fill=(255, 0, 0, 255)) draw.line([(c2_rotated[0] - 3, c2_rotated[1]), (c2_rotated[0] + 3, c2_rotated[1])], fill=(255, 0, 0, 255)) if self.show_images: im.show() im.save(self.output_dir + '/images/' + object_id + '_elev_' + str(self.elev) + '_transformed.png') hand_pose = np.r_[grasp_2d.center.y, grasp_2d.center.x, grasp_2d.depth, grasp_2d.angle, grasp_2d.center.y - new_camera_intr.cy, grasp_2d.center.x - new_camera_intr.cx, grasp_2d.width_px / 3] self.tensor_datapoint[ 'depth_ims_tf_table'] = depth_im_tf_table self.tensor_datapoint['hand_poses'] = hand_pose self.tensor_datapoint['obj_labels'] = self.cur_obj_label self.tensor_datapoint['collision_free'] = collision_free self.tensor_datapoint['pose_labels'] = self.cur_pose_label self.tensor_datapoint[ 'image_labels'] = self.cur_image_label self.tensor_datapoint['files'] = [self.tensor, self.array] self.tensor_datapoint['occlusions'] = occlusions self.tensor_datapoint[ 'contact_occlusion'] = contact_occlusion for metric_name, metric_val in self.grasp_metrics[str( grasp.id)].iteritems(): coll_free_metric = (1 * collision_free) * metric_val self.tensor_datapoint[metric_name] = coll_free_metric self.tensor_dataset.add(self.tensor_datapoint) print("Saved dataset point") self.cur_image_label += 1 self.cur_pose_label += 1 gc.collect() self.cur_obj_label += 1 self.tensor_dataset.flush() def _o3d_meshing(self, pcd): mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( pcd, depth=15) densities = np.asarray(densities) # print('visualize densities') # densities = np.asarray(densities) # density_colors = plt.get_cmap('plasma')( # (densities - densities.min()) / (densities.max() - densities.min())) # density_colors = density_colors[:, :3] # density_mesh = o3d.geometry.TriangleMesh() # density_mesh.vertices = mesh.vertices # density_mesh.triangles = mesh.triangles # density_mesh.triangle_normals = mesh.triangle_normals # density_mesh.vertex_colors = o3d.utility.Vector3dVector(density_colors) # o3d.visualization.draw_geometries([density_mesh]) vertices_to_remove = densities < 7.0 # np.quantile(densities, 0.01) mesh.remove_vertices_by_mask(vertices_to_remove) mesh.compute_vertex_normals() mesh.paint_uniform_color([0.6, 0.6, 0.6]) o3d.io.write_triangle_mesh( self.output_dir + '/meshes/' + "%05d_%03d.ply" % (self.tensor, self.array), mesh) if visualise_mesh: o3d.visualization.draw_geometries([mesh]) vis = o3d.visualization.Visualizer() vis.create_window(height=300, width=300, visible=False) vis.get_render_option().load_from_json("./data/renderconfig.json") vis.add_geometry(mesh) vic = vis.get_view_control() params = vic.convert_to_pinhole_camera_parameters() (fx, fy) = params.intrinsic.get_focal_length() (cx, cy) = params.intrinsic.get_principal_point() params.intrinsic.set_intrinsics(300, 300, 525, 525, cx, cy) params.extrinsic = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) vic.convert_from_pinhole_camera_parameters(params) vis.poll_events() vis.update_renderer() depth = vis.capture_depth_float_buffer(do_render=True) # vis.destroy_window() # del vis return depth def _projection(self, transformed_points, camera_intr): # Use Camera intrinsics new_camera_intr = camera_intr new_camera_intr.cx = 150 new_camera_intr.cy = 150 K = new_camera_intr.proj_matrix projected_points = np.dot(K, transformed_points) point_depths = projected_points[2, :] table = np.median(point_depths) point_z = np.tile(point_depths, [3, 1]) points_proj = np.divide(projected_points, point_z) # Rounding points_proj = np.round(points_proj) points_proj = points_proj[:2, :].astype(np.int16) valid_ind = np.where((points_proj[0, :] >= 0) & (points_proj[0, :] < self.image_size[0]) & (points_proj[1, :] >= 0) & (points_proj[1, :] < self.image_size[0]))[0] # Fill new image with NaN's fill_value = -1.0 depth_data = np.full([self.image_size[0], self.image_size[1]], fill_value) for ind in valid_ind: prev_depth = depth_data[points_proj[1, ind], points_proj[0, ind]] if prev_depth == fill_value or prev_depth >= point_depths[ind]: depth_data[points_proj[1, ind], points_proj[0, ind]] = point_depths[ind] return depth_data, new_camera_intr, table def _transformation(self, points): # Points are given in camera frame. Transform to new camera frame! camera_new_position = np.array([[0], [0], [0]]) ang = np.deg2rad(self.elev) Rot = np.array([[1, 0, 0], [0, np.cos(ang), -np.sin(ang)], [0, np.sin(ang), np.cos(ang)]]) dist = self.config['env_rv_params']['min_radius'] * np.sin(ang) height = self.config['env_rv_params']['min_radius'] - self.config[ 'env_rv_params']['min_radius'] * np.cos(ang) # Rotation around x axis, therefore translation back to object center alongside y axis trans = np.array([0, dist, height]) Rt = np.column_stack((Rot, trans)) # Apply transformation to homogeneous coordinates of the points homogeneous_points = np.append(np.transpose(points), np.ones((1, len(points))), axis=0) transformed_points = np.dot(Rt, homogeneous_points) return transformed_points, RigidTransform(rotation=Rot, translation=trans, from_frame='camera', to_frame='new_camera') # def _PCA(self, points, sorting=True): # mean = np.mean(points, axis=0) # data_adjusted = points - mean # # matrix = np.cov(data_adjusted.T) # eigenvalues, eigenvectors = np.linalg.eig(matrix) # # if sorting: # sort = eigenvalues.argsort()[::-1] # eigenvalues = eigenvalues[sort] # eigenvectors = eigenvectors[:, sort] # return eigenvalues, eigenvectors def _reproject_to_3D(self, depth_im, camera_intr): # depth points will be given in camera frame! depth_points = camera_intr.deproject(depth_im).data.T return depth_points def _scale_image(self, depth): size = depth.shape flattend = depth.flatten() scaled = np.interp(flattend, (0.5, 0.75), (0, 255)) integ = scaled.astype(np.uint8) integ.resize(size) return integ
def analyze_classification_performance(model_dir, config, dataset_path=None): # read params predict_batch_size = config['batch_size'] randomize = config['randomize'] plotting_config = config['plotting'] figsize = plotting_config['figsize'] font_size = plotting_config['font_size'] legend_font_size = plotting_config['legend_font_size'] line_width = plotting_config['line_width'] colors = plotting_config['colors'] dpi = plotting_config['dpi'] style = '-' class_remapping = None if 'class_remapping' in config.keys(): class_remapping = config['class_remapping'] # read training config training_config_filename = os.path.join(model_dir, 'training_config.yaml') training_config = YamlConfig(training_config_filename) # read training params indices_filename = None if dataset_path is None: dataset_path = training_config['dataset'] indices_filename = os.path.join(model_dir, 'splits.npz') dataset_prefix, dataset_name = os.path.split(dataset_path) if dataset_name == '': _, dataset_name = os.path.split(dataset_prefix) x_names = training_config['x_names'] y_name = training_config['y_name'] batch_size = training_config['training']['batch_size'] iterator_config = training_config['data_iteration'] x_name = x_names[0] # set analysis dir analysis_dir = os.path.join(model_dir, 'analysis') if not os.path.exists(analysis_dir): os.mkdir(analysis_dir) # setup log file experiment_log_filename = os.path.join(analysis_dir, '%s_analysis.log' %(dataset_name)) if os.path.exists(experiment_log_filename): os.remove(experiment_log_filename) formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s') hdlr = logging.FileHandler(experiment_log_filename) hdlr.setFormatter(formatter) logging.getLogger().addHandler(hdlr) # setup plotting plt.figure(figsize=(figsize, figsize)) # read dataset dataset = TensorDataset.open(dataset_path) # read dataset splits if indices_filename is None: splits = {dataset_name: np.arange(dataset.num_datapoints)} else: splits = np.load(indices_filename)['arr_0'].tolist() # load cnn logging.info('Loading model %s' %(model_dir)) cnn = ClassificationCNN.open(model_dir) # save examples logging.info('Saving examples of each class') all_labels = np.arange(cnn.num_classes) label_counts = {} [label_counts.update({l:0}) for l in all_labels] for tensor_ind in range(dataset.num_tensors): tensor = dataset.tensor(y_name, tensor_ind) for label in tensor: label_counts[label] += 1 d = utils.sqrt_ceil(cnn.num_classes) plt.clf() for i, label in enumerate(all_labels): tensor_ind = 0 label_found = False while not label_found and tensor_ind < dataset.num_tensors: tensor = dataset.tensor(y_name, tensor_ind) ind = np.where(tensor.arr == label)[0] if ind.shape[0] > 0: ind = ind[0] + dataset.datapoints_per_tensor * (tensor_ind) label_found = True tensor_ind += 1 if not label_found: continue datapoint = dataset[ind] example_im = datapoint[x_name] plt.subplot(d,d,i+1) plt.imshow(example_im[:,:,:3].astype(np.uint8)) plt.title('Class %03d: %.3f%%' %(label, float(label_counts[label]) / dataset.num_datapoints), fontsize=3) plt.axis('off') plt.savefig(os.path.join(analysis_dir, '%s_classes.pdf' %(dataset_name))) # evaluate on dataset results = {} for split_name, indices in splits.iteritems(): logging.info('Evaluating performance on split: %s' %(split_name)) # predict if randomize: pred_probs, true_labels = cnn.evaluate_on_dataset(dataset, indices=indices, batch_size=predict_batch_size) pred_labels = np.argmax(pred_probs, axis=1) else: true_labels = [] pred_labels = [] pred_probs = [] for datapoint in dataset: im = ColorImage(datapoint['color_ims'].astype(np.uint8)[:,:,:3]) true_label = datapoint['stp_labels'] pred_prob = cnn.predict(im) pred_label = np.argmax(pred_prob, axis=1) true_labels.append(true_label) pred_labels.append(pred_label) pred_probs.append(pred_prob.ravel()) """ if class_remapping is not None: true_label = class_remapping[true_label] plt.figure() plt.imshow(im.raw_data) plt.title('T: %d, P: %d' %(true_label, pred_label)) plt.show() """ true_labels = np.array(true_labels) pred_labels = np.array(pred_labels) pred_probs = np.array(pred_probs) # apply optional class re-mapping if class_remapping is not None: new_true_labels = np.zeros(true_labels.shape) for orig_label, new_label in class_remapping.iteritems(): new_true_labels[true_labels==orig_label] = new_label true_labels = new_true_labels # compute classification results result = ClassificationResult([pred_probs], [true_labels]) results[split_name] = result # print stats logging.info('SPLIT: %s' %(split_name)) logging.info('Acc: %.3f' %(result.accuracy)) logging.info('AP: %.3f' %(result.ap_score)) logging.info('AUC: %.3f' %(result.auc_score)) # save confusion matrix confusion = result.confusion_matrix.data plt.clf() plt.imshow(confusion, cmap=plt.cm.gray, interpolation='none') plt.locator_params(nticks=cnn.num_classes) plt.xlabel('Predicted', fontsize=font_size) plt.ylabel('Actual', fontsize=font_size) plt.savefig(os.path.join(analysis_dir, '%s_confusion.pdf' %(split_name)), dpi=dpi) # save analysis result_filename = os.path.join(analysis_dir, '%s.cres' %(split_name)) result.save(result_filename) # plot colormap = plt.get_cmap('tab10') num_colors = 9 plt.clf() for i, split_name in enumerate(splits.keys()): result = results[split_name] precision, recall, taus = result.precision_recall_curve() color = colormap(float(colors[i%num_colors]) / num_colors) plt.plot(recall, precision, linewidth=line_width, color=color, linestyle=style, label=split_name) plt.xlabel('Recall', fontsize=font_size) plt.ylabel('Precision', fontsize=font_size) plt.title('Precision-Recall Curve', fontsize=font_size) handles, plt_labels = plt.gca().get_legend_handles_labels() plt.legend(handles, plt_labels, loc='best', fontsize=legend_font_size) plt.savefig(os.path.join(analysis_dir, '%s_precision_recall.pdf' %(dataset_name)), dpi=dpi) plt.clf() for i, split_name in enumerate(splits.keys()): result = results[split_name] fpr, tpr, taus = result.roc_curve() color = colormap(float(colors[i%num_colors]) / num_colors) plt.plot(fpr, tpr, linewidth=line_width, color=color, linestyle=style, label=split_name) plt.xlabel('FPR', fontsize=font_size) plt.ylabel('TPR', fontsize=font_size) plt.title('Receiver Operating Characteristic', fontsize=font_size) handles, plt_labels = plt.gca().get_legend_handles_labels() plt.legend(handles, plt_labels, loc='best', fontsize=legend_font_size) plt.savefig(os.path.join(analysis_dir, '%s_roc.pdf' %(dataset_name)), dpi=dpi)
def generate_gqcnn_dataset(dataset_path, database, target_object_keys, env_rv_params, gripper_name, config): """ Generates a GQ-CNN TensorDataset for training models with new grippers, quality metrics, objects, and cameras. Parameters ---------- dataset_path : str path to save the dataset to database : :obj:`Hdf5Database` Dex-Net database containing the 3D meshes, grasps, and grasp metrics target_object_keys : :obj:`OrderedDict` dictionary mapping dataset names to target objects (either 'all' or a list of specific object keys) env_rv_params : :obj:`OrderedDict` parameters of the camera and object random variables used in sampling (see meshpy_berkeley.UniformPlanarWorksurfaceImageRandomVariable for more info) gripper_name : str name of the gripper to use config : :obj:`autolab_core.YamlConfig` other parameters for dataset generation Notes ----- Required parameters of config are specified in Other Parameters Other Parameters ---------------- images_per_stable_pose : int number of object and camera poses to sample for each stable pose stable_pose_min_p : float minimum probability of occurrence for a stable pose to be used in data generation (used to prune bad stable poses gqcnn/crop_width : int width, in pixels, of crop region around each grasp center, before resize (changes the size of the region seen by the GQ-CNN) gqcnn/crop_height : int height, in pixels, of crop region around each grasp center, before resize (changes the size of the region seen by the GQ-CNN) gqcnn/final_width : int width, in pixels, of final transformed grasp image for input to the GQ-CNN (defaults to 32) gqcnn/final_height : int height, in pixels, of final transformed grasp image for input to the GQ-CNN (defaults to 32) table_alignment/max_approach_table_angle : float max angle between the grasp axis and the table normal when the grasp approach is maximally aligned with the table normal table_alignment/max_approach_offset : float max deviation from perpendicular approach direction to use in grasp collision checking table_alignment/num_approach_offset_samples : int number of approach samples to use in collision checking collision_checking/table_offset : float max allowable interpenetration between the gripper and table to be considered collision free collision_checking/table_mesh_filename : str path to a table mesh for collision checking (default data/meshes/table.obj) collision_checking/approach_dist : float distance, in meters, between the approach pose and final grasp pose along the grasp axis collision_checking/delta_approach : float amount, in meters, to discretize the straight-line path from the gripper approach pose to the final grasp pose tensors/datapoints_per_file : int number of datapoints to store in each unique tensor file on disk tensors/fields : :obj:`dict` dictionary mapping field names to dictionaries specifying the data type, height, width, and number of channels for each tensor debug : bool True (or 1) if the random seed should be set to enforce deterministic behavior, False (0) otherwise vis/candidate_grasps : bool True (or 1) if the collision free candidate grasps should be displayed in 3D (for debugging) vis/rendered_images : bool True (or 1) if the rendered images for each stable pose should be displayed (for debugging) vis/grasp_images : bool True (or 1) if the transformed grasp images should be displayed (for debugging) """ # read data gen params output_dir = dataset_path gripper = RobotGripper.load(gripper_name) image_samples_per_stable_pose = config['images_per_stable_pose'] stable_pose_min_p = config['stable_pose_min_p'] # read gqcnn params gqcnn_params = config['gqcnn'] im_crop_height = gqcnn_params['crop_height'] im_crop_width = gqcnn_params['crop_width'] im_final_height = gqcnn_params['final_height'] im_final_width = gqcnn_params['final_width'] cx_crop = float(im_crop_width) / 2 cy_crop = float(im_crop_height) / 2 # open database dataset_names = target_object_keys.keys() datasets = [database.dataset(dn) for dn in dataset_names] # set target objects for dataset in datasets: if target_object_keys[dataset.name] == 'all': target_object_keys[dataset.name] = dataset.object_keys # setup grasp params table_alignment_params = config['table_alignment'] min_grasp_approach_offset = -np.deg2rad( table_alignment_params['max_approach_offset']) max_grasp_approach_offset = np.deg2rad( table_alignment_params['max_approach_offset']) max_grasp_approach_table_angle = np.deg2rad( table_alignment_params['max_approach_table_angle']) num_grasp_approach_samples = table_alignment_params[ 'num_approach_offset_samples'] phi_offsets = [] if max_grasp_approach_offset == min_grasp_approach_offset: phi_inc = 1 elif num_grasp_approach_samples == 1: phi_inc = max_grasp_approach_offset - min_grasp_approach_offset + 1 else: phi_inc = (max_grasp_approach_offset - min_grasp_approach_offset) / ( num_grasp_approach_samples - 1) phi = min_grasp_approach_offset while phi <= max_grasp_approach_offset: phi_offsets.append(phi) phi += phi_inc # setup collision checking coll_check_params = config['collision_checking'] approach_dist = coll_check_params['approach_dist'] delta_approach = coll_check_params['delta_approach'] table_offset = coll_check_params['table_offset'] table_mesh_filename = coll_check_params['table_mesh_filename'] if not os.path.isabs(table_mesh_filename): table_mesh_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', table_mesh_filename) table_mesh = ObjFile(table_mesh_filename).read() # set tensor dataset config tensor_config = config['tensors'] tensor_config['fields']['depth_ims_tf_table']['height'] = im_final_height tensor_config['fields']['depth_ims_tf_table']['width'] = im_final_width tensor_config['fields']['obj_masks']['height'] = im_final_height tensor_config['fields']['obj_masks']['width'] = im_final_width # add available metrics (assuming same are computed for all objects) metric_names = [] dataset = datasets[0] obj_keys = dataset.object_keys if len(obj_keys) == 0: raise ValueError('No valid objects in dataset %s' % (dataset.name)) obj = dataset[obj_keys[0]] grasps = dataset.grasps(obj.key, gripper=gripper.name) grasp_metrics = dataset.grasp_metrics(obj.key, grasps, gripper=gripper.name) metric_names = grasp_metrics[grasp_metrics.keys()[0]].keys() for metric_name in metric_names: tensor_config['fields'][metric_name] = {} tensor_config['fields'][metric_name]['dtype'] = 'float32' # init tensor dataset tensor_dataset = TensorDataset(output_dir, tensor_config) tensor_datapoint = tensor_dataset.datapoint_template # setup log file experiment_log_filename = os.path.join(output_dir, 'dataset_generation.log') formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s') hdlr = logging.FileHandler(experiment_log_filename) hdlr.setFormatter(formatter) logging.getLogger().addHandler(hdlr) root_logger = logging.getLogger() # copy config out_config_filename = os.path.join(output_dir, 'dataset_generation.json') ordered_dict_config = collections.OrderedDict() for key in config.keys(): ordered_dict_config[key] = config[key] with open(out_config_filename, 'w') as outfile: json.dump(ordered_dict_config, outfile) # 1. Precompute the set of valid grasps for each stable pose: # i) Perpendicular to the table # ii) Collision-free along the approach direction # load grasps if they already exist grasp_cache_filename = os.path.join(output_dir, CACHE_FILENAME) if os.path.exists(grasp_cache_filename): logging.info('Loading grasp candidates from file') candidate_grasps_dict = pkl.load(open(grasp_cache_filename, 'rb')) # otherwise re-compute by reading from the database and enforcing constraints else: # create grasps dict candidate_grasps_dict = {} # loop through datasets and objects for dataset in datasets: logging.info('Reading dataset %s' % (dataset.name)) for obj in dataset: if obj.key not in target_object_keys[dataset.name]: continue # init candidate grasp storage candidate_grasps_dict[obj.key] = {} # setup collision checker collision_checker = GraspCollisionChecker(gripper) collision_checker.set_graspable_object(obj) # read in the stable poses of the mesh stable_poses = dataset.stable_poses(obj.key) for i, stable_pose in enumerate(stable_poses): # render images if stable pose is valid if stable_pose.p > stable_pose_min_p: candidate_grasps_dict[obj.key][stable_pose.id] = [] # setup table in collision checker T_obj_stp = stable_pose.T_obj_table.as_frames( 'obj', 'stp') T_obj_table = obj.mesh.get_T_surface_obj( T_obj_stp, delta=table_offset).as_frames('obj', 'table') T_table_obj = T_obj_table.inverse() collision_checker.set_table(table_mesh_filename, T_table_obj) # read grasp and metrics grasps = dataset.grasps(obj.key, gripper=gripper.name) logging.info( 'Aligning %d grasps for object %s in stable %s' % (len(grasps), obj.key, stable_pose.id)) # align grasps with the table aligned_grasps = [ grasp.perpendicular_table(stable_pose) for grasp in grasps ] # check grasp validity logging.info( 'Checking collisions for %d grasps for object %s in stable %s' % (len(grasps), obj.key, stable_pose.id)) for aligned_grasp in aligned_grasps: # check angle with table plane and skip unaligned grasps _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_stp_z( stable_pose) perpendicular_table = ( np.abs(grasp_approach_table_angle) < max_grasp_approach_table_angle) if not perpendicular_table: continue # check whether any valid approach directions are collision free collision_free = False for phi_offset in phi_offsets: rotated_grasp = aligned_grasp.grasp_y_axis_offset( phi_offset) collides = collision_checker.collides_along_approach( rotated_grasp, approach_dist, delta_approach) if not collides: collision_free = True break # store if aligned to table candidate_grasps_dict[obj.key][ stable_pose.id].append( GraspInfo(aligned_grasp, collision_free)) # visualize if specified if collision_free and config['vis'][ 'candidate_grasps']: logging.info('Grasp %d' % (aligned_grasp.id)) vis.figure() vis.gripper_on_object(gripper, aligned_grasp, obj, stable_pose.T_obj_world) vis.show() # save to file logging.info('Saving to file') pkl.dump(candidate_grasps_dict, open(grasp_cache_filename, 'wb')) # 2. Render a dataset of images and associate the gripper pose with image coordinates for each grasp in the Dex-Net database # setup variables obj_category_map = {} pose_category_map = {} cur_pose_label = 0 cur_obj_label = 0 cur_image_label = 0 # render images for each stable pose of each object in the dataset render_modes = [RenderMode.SEGMASK, RenderMode.DEPTH_SCENE] for dataset in datasets: logging.info('Generating data for dataset %s' % (dataset.name)) # iterate through all object keys object_keys = dataset.object_keys for obj_key in object_keys: obj = dataset[obj_key] if obj.key not in target_object_keys[dataset.name]: continue # read in the stable poses of the mesh stable_poses = dataset.stable_poses(obj.key) for i, stable_pose in enumerate(stable_poses): # render images if stable pose is valid if stable_pose.p > stable_pose_min_p: # log progress logging.info('Rendering images for object %s in %s' % (obj.key, stable_pose.id)) # add to category maps if obj.key not in obj_category_map.keys(): obj_category_map[obj.key] = cur_obj_label pose_category_map['%s_%s' % (obj.key, stable_pose.id)] = cur_pose_label # read in candidate grasps and metrics candidate_grasp_info = candidate_grasps_dict[obj.key][ stable_pose.id] candidate_grasps = [g.grasp for g in candidate_grasp_info] grasp_metrics = dataset.grasp_metrics(obj.key, candidate_grasps, gripper=gripper.name) # compute object pose relative to the table T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp') T_obj_stp = obj.mesh.get_T_surface_obj(T_obj_stp) # sample images from random variable T_table_obj = RigidTransform(from_frame='table', to_frame='obj') scene_objs = { 'table': SceneObject(table_mesh, T_table_obj) } urv = UniformPlanarWorksurfaceImageRandomVariable( obj.mesh, render_modes, 'camera', env_rv_params, stable_pose=stable_pose, scene_objs=scene_objs) render_start = time.time() render_samples = urv.rvs( size=image_samples_per_stable_pose) render_stop = time.time() logging.info('Rendering images took %.3f sec' % (render_stop - render_start)) # visualize if config['vis']['rendered_images']: d = int(np.ceil( np.sqrt(image_samples_per_stable_pose))) # binary vis2d.figure() for j, render_sample in enumerate(render_samples): vis2d.subplot(d, d, j + 1) vis2d.imshow(render_sample.renders[ RenderMode.SEGMASK].image) # depth table vis2d.figure() for j, render_sample in enumerate(render_samples): vis2d.subplot(d, d, j + 1) vis2d.imshow(render_sample.renders[ RenderMode.DEPTH_SCENE].image) vis2d.show() # tally total amount of data num_grasps = len(candidate_grasps) num_images = image_samples_per_stable_pose num_save = num_images * num_grasps logging.info('Saving %d datapoints' % (num_save)) # for each candidate grasp on the object compute the projection # of the grasp into image space for render_sample in render_samples: # read images binary_im = render_sample.renders[ RenderMode.SEGMASK].image depth_im_table = render_sample.renders[ RenderMode.DEPTH_SCENE].image # read camera params T_stp_camera = render_sample.camera.object_to_camera_pose shifted_camera_intr = render_sample.camera.camera_intr # read pixel offsets cx = depth_im_table.center[1] cy = depth_im_table.center[0] # compute intrinsics for virtual camera of the final # cropped and rescaled images camera_intr_scale = float(im_final_height) / float( im_crop_height) cropped_camera_intr = shifted_camera_intr.crop( im_crop_height, im_crop_width, cy, cx) final_camera_intr = cropped_camera_intr.resize( camera_intr_scale) # create a thumbnail for each grasp for grasp_info in candidate_grasp_info: # read info grasp = grasp_info.grasp collision_free = grasp_info.collision_free # get the gripper pose T_obj_camera = T_stp_camera * T_obj_stp.as_frames( 'obj', T_stp_camera.from_frame) grasp_2d = grasp.project_camera( T_obj_camera, shifted_camera_intr) # center images on the grasp, rotate to image x axis dx = cx - grasp_2d.center.x dy = cy - grasp_2d.center.y translation = np.array([dy, dx]) binary_im_tf = binary_im.transform( translation, grasp_2d.angle) depth_im_tf_table = depth_im_table.transform( translation, grasp_2d.angle) # crop to image size binary_im_tf = binary_im_tf.crop( im_crop_height, im_crop_width) depth_im_tf_table = depth_im_tf_table.crop( im_crop_height, im_crop_width) # resize to image size binary_im_tf = binary_im_tf.resize( (im_final_height, im_final_width), interp='nearest') depth_im_tf_table = depth_im_tf_table.resize( (im_final_height, im_final_width)) # visualize the transformed images if config['vis']['grasp_images']: grasp_center = Point( depth_im_tf_table.center, frame=final_camera_intr.frame) tf_grasp_2d = Grasp2D( grasp_center, 0, grasp_2d.depth, width=gripper.max_width, camera_intr=final_camera_intr) # plot 2D grasp image vis2d.figure() vis2d.subplot(2, 2, 1) vis2d.imshow(binary_im) vis2d.grasp(grasp_2d) vis2d.subplot(2, 2, 2) vis2d.imshow(depth_im_table) vis2d.grasp(grasp_2d) vis2d.subplot(2, 2, 3) vis2d.imshow(binary_im_tf) vis2d.grasp(tf_grasp_2d) vis2d.subplot(2, 2, 4) vis2d.imshow(depth_im_tf_table) vis2d.grasp(tf_grasp_2d) vis2d.title('Coll Free? %d' % (grasp_info.collision_free)) vis2d.show() # plot 3D visualization vis.figure() T_obj_world = vis.mesh_stable_pose( obj.mesh, stable_pose.T_obj_world, style='surface', dim=0.5) vis.gripper(gripper, grasp, T_obj_world, color=(0.3, 0.3, 0.3)) vis.show() # form hand pose array hand_pose = np.r_[grasp_2d.center.y, grasp_2d.center.x, grasp_2d.depth, grasp_2d.angle, grasp_2d.center.y - shifted_camera_intr.cy, grasp_2d.center.x - shifted_camera_intr.cx, grasp_2d.width_px] # store to data buffers tensor_datapoint[ 'depth_ims_tf_table'] = depth_im_tf_table.raw_data tensor_datapoint[ 'obj_masks'] = binary_im_tf.raw_data tensor_datapoint['hand_poses'] = hand_pose tensor_datapoint['collision_free'] = collision_free tensor_datapoint['obj_labels'] = cur_obj_label tensor_datapoint['pose_labels'] = cur_pose_label tensor_datapoint['image_labels'] = cur_image_label for metric_name, metric_val in grasp_metrics[ grasp.id].iteritems(): coll_free_metric = ( 1 * collision_free) * metric_val tensor_datapoint[ metric_name] = coll_free_metric tensor_dataset.add(tensor_datapoint) # update image label cur_image_label += 1 # update pose label cur_pose_label += 1 # force clean up gc.collect() # update object label cur_obj_label += 1 # force clean up gc.collect() # save last file tensor_dataset.flush() # save category mappings obj_cat_filename = os.path.join(output_dir, 'object_category_map.json') json.dump(obj_category_map, open(obj_cat_filename, 'w')) pose_cat_filename = os.path.join(output_dir, 'pose_category_map.json') json.dump(pose_category_map, open(pose_cat_filename, 'w'))
'--config_filename', type=str, default=None, help= 'Yaml filename containing configuration parameters for the visualization' ) args = parser.parse_args() dataset_path = args.dataset_path config_filename = args.config_filename # handle config filename if config_filename is None: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'cfg/tools/visualize_gqcnn_dataset.yaml') # turn relative paths absolute if not os.path.isabs(dataset_path): dataset_path = os.path.join(os.getcwd(), dataset_path) if not os.path.isabs(config_filename): config_filename = os.path.join(os.getcwd(), config_filename) # read config config = YamlConfig(config_filename) # open tensor dataset dataset = TensorDataset.open(dataset_path) # visualize a tensor dataset visualize_tensor_dataset(dataset, config)
config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'cfg/tools/generate_gqcnn_dataset.yaml') config = YamlConfig(config_filename) # set tensor dataset config tensor_config = config['tensors'] # tensor_config['fields']['quality_ims_tf']['height'] = 200 # tensor_config['fields']['quality_ims_tf']['width'] = 200 tensor_config['fields']['grasp_angle_ims_tf']['height'] = 200 tensor_config['fields']['grasp_angle_ims_tf']['width'] = 200 # tensor_config['fields']['depth_images']['width'] = 200 # tensor_config['fields']['depth_images']['height'] = 200 tensor_dataset = TensorDataset(output_dir, tensor_config) # tensor_dataset = TensorDataset.open(output_dir) tensor_datapoint = tensor_dataset.datapoint_template ###################################################################### # open tensor dirs if not os.path.exists(dataset_path): raise ValueError('Dataset %s not found!' % (dataset_path)) # create subdirectories image_dir = os.path.join(dataset_path, 'images') if not os.path.exists(image_dir): raise ValueError('Image folder %s not found!' % (image_dir)) grasp_dir = os.path.join(dataset_path, 'grasps')
###################################################################### if config_filename is None: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'cfg/tools/generate_gqcnn_dataset.yaml') config = YamlConfig(config_filename) # set tensor dataset config tensor_config = config['tensors'] tensor_config['fields']['depth_ims_tf_table_96'][ 'height'] = im_final_height tensor_config['fields']['depth_ims_tf_table_96']['width'] = im_final_width # tensor_dataset = TensorDataset(output_dir, tensor_config) tensor_dataset = TensorDataset.open(output_dir) tensor_datapoint = tensor_dataset.datapoint_template ###################################################################### # open tensor dirs if not os.path.exists(dataset_path): raise ValueError('Dataset %s not found!' % (dataset_path)) # create subdirectories image_dir = os.path.join(dataset_path, 'images') if not os.path.exists(image_dir): raise ValueError('Image folder %s not found!' % (image_dir)) grasp_dir = os.path.join(dataset_path, 'grasps') if not os.path.exists(grasp_dir):
def finetune_classification_cnn(config): """ Main function. """ # read params dataset = config['dataset'] x_names = config['x_names'] y_name = config['y_name'] model_dir = config['model_dir'] debug = config['debug'] num_classes = None if 'num_classes' in config.keys(): num_classes = config['num_classes'] batch_size = config['training']['batch_size'] train_pct = config['training']['train_pct'] model_save_period = config['training']['model_save_period'] data_aug_config = config['data_augmentation'] preproc_config = config['preprocessing'] iterator_config = config['data_iteration'] model_config = config['model'] base_model_config = model_config['base'] optimization_config = config['optimization'] train_config = config['training'] generator_image_shape = None if 'image_shape' in data_aug_config.keys(): generator_image_shape = data_aug_config['image_shape'] optimizer_name = optimization_config['optimizer'] model_params = {} if 'params' in model_config.keys(): model_params = model_config['params'] base_model_params = {} if 'params' in base_model_config.keys(): base_model_params = base_model_config['params'] if debug: seed = 108 random.seed(seed) np.random.seed(seed) # generate model dir if not os.path.exists(model_dir): os.mkdir(model_dir) model_id = utils.gen_experiment_id() model_dir = os.path.join(model_dir, 'model_%s' % (model_id)) if not os.path.exists(model_dir): os.mkdir(model_dir) logging.info('Saving model to %s' % (model_dir)) latest_model_filename = os.path.join(model_dir, 'weights_{epoch:05d}.h5') best_model_filename = os.path.join(model_dir, 'weights.h5') # save config training_config_filename = os.path.join(model_dir, 'training_config.yaml') config.save(training_config_filename) # open dataset dataset = TensorDataset.open(dataset) # split dataset indices_filename = os.path.join(model_dir, 'splits.npz') if os.path.exists(indices_filename): indices = np.load(indices_filename)['arr_0'].tolist() train_indices = indices['train'] val_indices = indices['val'] else: train_indices, val_indices = dataset.split(train_pct) indices = np.array({'train': train_indices, 'val': val_indices}) np.savez_compressed(indices_filename, indices) num_train = train_indices.shape[0] num_val = val_indices.shape[0] val_steps = int(np.ceil(float(num_val) / batch_size)) # init generator train_generator_filename = os.path.join(model_dir, 'train_preprocessor.pkl') val_generator_filename = os.path.join(model_dir, 'val_preprocessor.pkl') if os.path.exists(train_generator_filename): logging.info('Loading generators') train_generator = pkl.load(open(train_generator_filename, 'rb')) val_generator = pkl.load(open(val_generator_filename, 'rb')) else: logging.info('Fitting generator') train_generator = TensorDataGenerator(num_classes=num_classes, **data_aug_config) val_generator = TensorDataGenerator( featurewise_center=data_aug_config['featurewise_center'], featurewise_std_normalization=data_aug_config[ 'featurewise_std_normalization'], image_shape=generator_image_shape, num_classes=num_classes) fit_start = time.time() train_generator.fit(dataset, x_names, y_name, indices=train_indices, **preproc_config) val_generator.mean = train_generator.mean val_generator.std = train_generator.std val_generator.min_output = train_generator.min_output val_generator.max_output = train_generator.max_output val_generator.num_classes = train_generator.num_classes fit_stop = time.time() logging.info('Generator fit took %.3f sec' % (fit_stop - fit_start)) pkl.dump(train_generator, open(train_generator_filename, 'wb')) pkl.dump(val_generator, open(val_generator_filename, 'wb')) if num_classes is None: num_classes = int(train_generator.num_classes) # init iterator train_iterator = train_generator.flow_from_dataset(dataset, x_names, y_name, indices=train_indices, batch_size=batch_size, **iterator_config) val_iterator = val_generator.flow_from_dataset(dataset, x_names, y_name, indices=val_indices, batch_size=batch_size, **iterator_config) # setup model base_cnn = ClassificationCNN.open(base_model_config['model'], base_model_config['type'], input_name=x_names[0], **base_model_params) cnn = FinetunedClassificationCNN(base_cnn=base_cnn, name='dexresnet', num_classes=num_classes, output_name=y_name, im_preprocessor=val_generator, **model_params) # setup training cnn.freeze_base_cnn() if optimizer_name == 'sgd': optimizer = SGD(lr=optimization_config['lr'], momentum=optimization_config['momentum']) elif optimizer_name == 'adam': optimizer = Adam(lr=optimization_config['lr']) else: raise ValueError('Optimizer %s not supported!' % (optimizer_name)) model = cnn.model model.compile(optimizer=optimizer, loss=optimization_config['loss'], metrics=optimization_config['metrics']) # train steps_per_epoch = int(np.ceil(float(num_train) / batch_size)) latest_model_ckpt = ModelCheckpoint(latest_model_filename, period=model_save_period) best_model_ckpt = ModelCheckpoint(best_model_filename, save_best_only=True, period=model_save_period) train_history_cb = TrainHistory(model_dir) callbacks = [latest_model_ckpt, best_model_ckpt, train_history_cb] history = model.fit_generator( train_iterator, steps_per_epoch=steps_per_epoch, epochs=train_config['epochs'], callbacks=callbacks, validation_data=val_iterator, validation_steps=val_steps, class_weight=train_config['class_weight'], use_multiprocessing=train_config['use_multiprocessing']) # save model cnn.save(model_dir) # save history history_filename = os.path.join(model_dir, 'history.pkl') pkl.dump(history.history, open(history_filename, 'wb'))
class GenerateBalancedObliqueDataset: def __init__(self, output_dir): self.NUM_OBJECTS = None self.table_file = DATA_DIR + '/meshes/table.obj' self.data_dir = DATA_DIR + '/meshes/dexnet/' if not os.path.exists(output_dir): os.mkdir(output_dir) self.image_dir = output_dir + '/images/' if not os.path.exists(self.image_dir): os.mkdir(self.image_dir) self.orig_image_dir = output_dir + '/orig_images/' if not os.path.exists(self.orig_image_dir): os.mkdir(self.orig_image_dir) self.config = YamlConfig(CONFIG) self.phi_offsets = self._generate_phi_offsets() self.datasets = self._load_datasets() self.tensor_dataset = TensorDataset(output_dir, self._set_tensor_config()) self.tensor_datapoint = self.tensor_dataset.datapoint_template self.gripper = self._set_gripper() self._table_mesh_filename = self._set_table_mesh_filename() self.table_mesh = self._set_table_mesh() self.cur_pose_label = 0 self.cur_obj_label = 0 self.cur_image_label = 0 self.cur_image_file = 0 self.obj = None self.T_obj_camera = None def _camera_configs(self): return self.config['env_rv_params'].copy() @property def _camera_intr_scale(self): return 32.0 / 96.0 @property def _render_modes(self): return [RenderMode.DEPTH_SCENE] @property def _max_grasp_approach_offset(self): return np.deg2rad(self.config['table_alignment']['max_approach_offset']) @property def _min_grasp_approach_offset(self): return -np.deg2rad(self.config['table_alignment']['max_approach_offset']) @property def _max_grasp_approach_table_angle(self): return np.deg2rad(self.config['table_alignment']['max_approach_table_angle']) @property def _num_grasp_approach_samples(self): return self.config['table_alignment']['num_approach_offset_samples'] def _generate_phi_offsets(self): phi_offsets = [] if self._max_grasp_approach_offset == self._min_grasp_approach_offset: phi_inc = 1 elif self._num_grasp_approach_samples == 1: phi_inc = self._max_grasp_approach_offset - self._min_grasp_approach_offset + 1 else: phi_inc = (self._max_grasp_approach_offset - self._min_grasp_approach_offset) / \ (self._num_grasp_approach_samples - 1) phi = self._min_grasp_approach_offset while phi <= self._max_grasp_approach_offset: phi_offsets.append(phi) phi += phi_inc return phi_offsets @property def _approach_dist(self): return self.config['collision_checking']['approach_dist'] @property def _delta_approach(self): return self.config['collision_checking']['delta_approach'] @property def _table_offset(self): return self.config['collision_checking']['table_offset'] @property def _stable_pose_min_p(self): return self.config['stable_pose_min_p'] def _set_table_mesh_filename(self): table_mesh_filename = self.config['collision_checking']['table_mesh_filename'] if not os.path.isabs(table_mesh_filename): return os.path.join(DATA_DIR, table_mesh_filename) return table_mesh_filename def _set_table_mesh(self): return ObjFile(self._table_mesh_filename).read() def _set_gripper(self): return RobotGripper.load(self.config['gripper']) def _load_datasets(self): """ Load the datasets in the Hdf5 database given through the config file. Returns ------- datasets (list): list of datasets """ database = Hdf5Database(self.config['database_name'], access_level=READ_ONLY_ACCESS) target_object_keys = self.config['target_objects'] dataset_names = target_object_keys.keys() datasets = [database.dataset(dn) for dn in dataset_names] if self.NUM_OBJECTS is not None: datasets = [dataset.subset(0, self.NUM_OBJECTS) for dataset in datasets] return datasets def _set_tensor_config(self): """ Sets the tensor config based on the used config file. Returns ------- tensor_config (dict): tensor config that can be used to initiate a tensor dataset. """ tensor_config = self.config['tensors'] tensor_config['fields']['robust_ferrari_canny'] = {} tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32' return tensor_config def render_images(self, scene_objs, stable_pose, num_images, camera_config=None): """ Renders depth and binary images from self.obj at the given stable pose. The camera sampling occurs within urv.rvs. Parameters ---------- scene_objs (dict): Objects occuring in the scene, mostly includes the table mesh. stable_pose (StablePose): Stable pose of the object num_images (int): Numbers of images to render camera_config (dict): Camera sampling parameters with minimum/maximum values for radius, polar angle, ... Returns ------- render_samples (list): list of rendered images including sampled camera positions """ if camera_config is None: camera_config = self.config['env_rv_params'] urv = UniformPlanarWorksurfaceImageRandomVariable(self.obj.mesh, self._render_modes, 'camera', camera_config, scene_objs=scene_objs, stable_pose=stable_pose) # Render images render_samples = urv.rvs(size=num_images) return render_samples def align_grasps_with_camera(self, grasps): """ Attempts to align all grasps in grasps with the z axis of the camera in self.T_obj_camera. Parameters ---------- grasps (list): List of grasps for the object mesh in the object frame. Returns ------- aligned_grasps (list): List of aligned grasps for the object mesh in the object frame """ z_axis_in_obj = np.dot(self.T_obj_camera.inverse().matrix, np.array((0, 0, -1, 1)).reshape(4, 1)) z_axis = z_axis_in_obj[0:3].squeeze() / np.linalg.norm(z_axis_in_obj[0:3].squeeze()) aligned_grasps = [grasp.perpendicular_table(z_axis) for grasp in grasps] return aligned_grasps def get_camera_pose(self, sample): """ Attempts to align all grasps in grasps with the z axis of the camera in self.T_obj_camera. Parameters ---------- sample (meshpy rendersample): Image sample Returns ------- camera pose (np.array): Array including the camera radius, elevation angle, polar angle, roll, focal length, and table translation in x and y. """ return np.r_[sample.camera.radius, sample.camera.elev, sample.camera.az, sample.camera.roll, sample.camera.focal, sample.camera.tx, sample.camera.ty] def is_grasp_aligned(self, aligned_grasp, stable_pose=None): """ Checks if the grasp is aligned with the camera z axis or the z axis of the stable pose, if given. Parameters ---------- aligned_grasp (Grasp) stable_pose (StablePose): stable pose of object. Default: None Returns ------- Aligned (bool): True if grasp is aligned with the desired z axis. """ if stable_pose is not None: _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_stp_z(stable_pose) else: _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_camera_z(self.T_obj_camera) perpendicular_table = (np.abs(grasp_approach_table_angle) < self._max_grasp_approach_table_angle) if not perpendicular_table: return False return True def is_grasp_collision_free(self, grasp, collision_checker): """ Checks if any of the +- phi grasp linear approaches are collision free with OpenRave collision checker. Parameters ---------- grasp (Grasp) collision_checker (GraspCollisionChecker) Returns ------- collision_free (bool): True if grasp is collision free. """ collision_free = False for phi_offset in self.phi_offsets: grasp.grasp_y_axis_offset(phi_offset) collides = collision_checker.collides_along_approach(grasp, self._approach_dist, self._delta_approach) if not collides: collision_free = True break return collision_free def get_hand_pose(self, grasp_center_x, grasp_center_y, grasp_2d, grasp_3d, grasp_width, grasp_width_px): """ Organises numpy array for hand_pose tensor. Parameters ---------- grasp_2d (Grasp2D) grasp_3d (Grasp3D) Returns ------- hand_pose (np.array): Hand_pose tensor including distance between camera and grasp tcp and grasp rotation quaternion in camera frame. """ return np.r_[grasp_center_x, grasp_center_y, grasp_2d.depth, grasp_3d.quaternion, # w x y z layout grasp_width, grasp_width_px] def _crop_and_resize(self, image): """ Crop and resize an image to the final height and width. Parameters ---------- image (DepthImage) Returns ------- final_im (np.array): cropped and resized according to crop_height/width and final_height/width in self.config['gqcnn'] """ cropped_im = image.crop(self.config['gqcnn']['crop_height'], self.config['gqcnn']['crop_width']) resized_im = Image.fromarray(np.asarray(cropped_im.data)).resize((self.config['gqcnn']['final_height'], self.config['gqcnn']['final_width']), resample=Image.BILINEAR) final_im = np.asarray(resized_im).reshape(self.config['gqcnn']['final_height'], self.config['gqcnn']['final_width'], 1) return final_im @staticmethod def scale(array): """ Scale image for visualisation purposes.""" size = array.shape flattend = array.flatten() scaled = np.interp(flattend, (min(flattend), max(flattend)), (0, 255), left=0, right=255) integ = scaled.astype(np.uint8) integ.resize(size) return integ.squeeze() def _show_image(self, image): """ Show image by saving it in /data/test_image.png""" scaled_image = self.scale(image) im = Image.fromarray(scaled_image).resize((300, 300), resample=Image.NEAREST) im.save('/data/test_image.png') im.show() def prepare_images(self, depth_im_table, dx, dy): # Get translation image distances to grasp translation = np.array([dy, dx]) # Transform, crop and resize image depth_im_tf_table = depth_im_table.transform(translation, 0.0) depth_im_tf = self._crop_and_resize(depth_im_tf_table) # self._show_image(depth_im_tf) # self._show_image(depth_unrotated_im_tf) return depth_im_tf def add_datapoint(self, hand_pose, collision_free, aligned_grasp, grasp_metrics): # Add data to tensor dataset self.tensor_datapoint['hand_poses'] = hand_pose self.tensor_datapoint['collision_free'] = collision_free # Add metrics to tensor dataset self.tensor_datapoint['robust_ferrari_canny'] = (1 * collision_free) * \ grasp_metrics[aligned_grasp.id]['robust_ferrari_canny'] self.tensor_dataset.add(self.tensor_datapoint) def width_px(self, camera_intr, point1, point2): """Returns the width in pixels.""" # Form the jaw locations in 3D space at the given depth. p1 = Point(point1, frame='camera') p2 = Point(point2, frame='camera') # Project into pixel space. u1 = camera_intr.project(p1) u2 = camera_intr.project(p2) return np.linalg.norm(u1.data - u2.data) def save_samples(self, sample, grasps, T_obj_stp, collision_checker, grasp_metrics, stable_pose): self.tensor_datapoint['image_labels'] = self.cur_image_label T_stp_camera = sample.camera.object_to_camera_pose self.T_obj_camera = T_stp_camera * T_obj_stp.as_frames('obj', T_stp_camera.from_frame) aligned_grasps = self.align_grasps_with_camera(grasps) depth_im_table = sample.renders[RenderMode.DEPTH_SCENE].image cx = depth_im_table.center[1] cy = depth_im_table.center[0] camera_pose = self.get_camera_pose(sample) self.tensor_datapoint['camera_poses'] = camera_pose shifted_camera_intr = sample.camera.camera_intr self.tensor_datapoint['camera_intrs'] = np.r_[shifted_camera_intr.fx, shifted_camera_intr.fy, shifted_camera_intr.cx, shifted_camera_intr.cy] self.save_orig_image(depth_im_table, camera_pose, stable_pose, shifted_camera_intr) grid_space = 200 grids = 3 centre = np.empty((grids, grids, 2)) saved = np.ones((grids, grids)) * -1 depth_images = np.empty((grids, grids, 32, 32, 1)) # Crop images in a grid around the centre for x_grid in range(grids): x_pos = grid_space / 4 - x_grid * grid_space / (grids + 1) for y_grid in range(grids): y_pos = grid_space / 4 - y_grid * grid_space / (grids + 1) depth_im = self.prepare_images(depth_im_table, x_pos, y_pos) self._show_image(depth_im) depth_images[x_grid, y_grid, :, :, :] = depth_im centre[x_grid, y_grid, 0] = x_pos centre[x_grid, y_grid, 1] = y_pos for cnt, aligned_grasp in enumerate(aligned_grasps): collision_free = self.is_grasp_collision_free(aligned_grasp, collision_checker) # Project grasp coordinates in image grasp_2d = aligned_grasp.project_camera(self.T_obj_camera, shifted_camera_intr) grasp_point = aligned_grasp.close_fingers(self.obj, vis=False, check_approach=False) if not grasp_point[0]: Warning("Could not find contact points") continue # Get grasp width in pixel from endpoints p_1 = np.dot(self.T_obj_camera.matrix, np.append(grasp_point[1][0].point, 1).T).T[:3] p_2 = np.dot(self.T_obj_camera.matrix, np.append(grasp_point[1][1].point, 1).T).T[:3] grasp_width_px = self.width_px(shifted_camera_intr, p_1, p_2) grasp_width = aligned_grasp.width_from_endpoints(grasp_point[1][0].point, grasp_point[1][1].point) pos_x = cx - grasp_2d.center.x pos_y = cy - grasp_2d.center.y distance = np.abs((centre[:, :, 0] - pos_x)**2 + (centre[:, :, 1] - pos_y)**2) idx = np.where(distance == np.amin(distance)) idx_x = idx[0][0] idx_y = idx[1][0] grasp_center_x = (300 - grasp_2d.center.x - centre[idx_x, idx_y, 0])//3 grasp_center_y = (300 - grasp_2d.center.y - centre[idx_x, idx_y, 1])//3 if saved[idx_x, idx_y] == -1: np.save(self.image_dir + "/depth_im_{:07d}.npy".format(self.cur_image_file), depth_images[idx_x, idx_y, :, :, :]) saved[idx_x, idx_y] = self.cur_image_file self.tensor_datapoint['image_files'] = self.cur_image_file self.cur_image_file += 1 else: self.tensor_datapoint['image_files'] = saved[idx_x, idx_y] T_grasp_camera = aligned_grasp.gripper_pose(self.gripper).inverse() * self.T_obj_camera.inverse() if VISUALISE_3D: z_axis = T_grasp_camera.z_axis theta = np.rad2deg(np.arccos((z_axis[2]) / (np.sqrt((z_axis[0] ** 2 + z_axis[1] ** 2 + z_axis[2] ** 2))))) vis.figure() T_obj_world = vis.mesh_stable_pose(self.obj.mesh.trimesh, stable_pose.T_obj_world, style='surface', dim=0.5, plot_table=True) T_camera_world = T_obj_world * self.T_obj_camera.inverse() vis.gripper(self.gripper, aligned_grasp, T_obj_world, color=(0.3, 0.3, 0.3), T_camera_world=T_camera_world) print(theta) vis.show() hand_pose = self.get_hand_pose(grasp_center_x, grasp_center_y, grasp_2d, T_grasp_camera, grasp_width, grasp_width_px) self.add_datapoint(hand_pose, collision_free, aligned_grasp, grasp_metrics) self.cur_image_label += 1 def save_orig_image(self, depth, camera_pose, stable_pose, camera_intr): depth_im = Image.fromarray(depth.data).crop((225, 225, 375, 375)) depth_im.save(self.orig_image_dir + 'depth_{:06d}.tiff'.format(self.cur_image_label)) with open(self.orig_image_dir + 'object_{:06d}.txt'.format(self.cur_image_label), 'w') as f: f.write(self.obj.key) self.T_obj_camera.save(self.orig_image_dir + 'camera_tf_{:06d}.tf'.format(self.cur_image_label)) np.savetxt(self.orig_image_dir + 'camera_pose_{:06d}.txt'.format(self.cur_image_label), camera_pose) save_stp = StablePoseFile(self.orig_image_dir + 'stable_pose_{:06d}.stp'.format(self.cur_image_label)) save_stp.write([stable_pose]) camera_intr.save(self.orig_image_dir + 'camera_intr_{:06d}.intr'.format(self.cur_image_label)) def render_data(self): logging.basicConfig(level=logging.WARNING) for dataset in self.datasets: logging.info('Generating data for dataset %s' % dataset.name) object_keys = dataset.object_keys for obj_key in object_keys: self.tensor_datapoint['object_labels'] = self.cur_obj_label if self.cur_obj_label % 10 == 0: logging.info("Object number: %d" % self.cur_obj_label) self.obj = dataset[obj_key] grasps = dataset.grasps(self.obj.key, gripper=self.gripper.name) # Load grasp metrics grasp_metrics = dataset.grasp_metrics(self.obj.key, grasps, gripper=self.gripper.name) # setup collision checker collision_checker = GraspCollisionChecker(self.gripper) collision_checker.set_graspable_object(self.obj) # read in the stable poses of the mesh stable_poses = dataset.stable_poses(self.obj.key) # Iterate through stable poses for i, stable_pose in enumerate(stable_poses): if not stable_pose.p > self._stable_pose_min_p: continue self.tensor_datapoint['pose_labels'] = self.cur_pose_label # setup table in collision checker T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp') T_obj_table = self.obj.mesh.get_T_surface_obj(T_obj_stp, delta=self._table_offset).as_frames('obj', 'table') T_table_obj = T_obj_table.inverse() T_obj_stp = self.obj.mesh.get_T_surface_obj(T_obj_stp) collision_checker.set_table(self._table_mesh_filename, T_table_obj) # sample images from random variable T_table_obj = RigidTransform(from_frame='table', to_frame='obj') scene_objs = {'table': SceneObject(self.table_mesh, T_table_obj)} # Set up image renderer before = time.time() samples = self.render_images(scene_objs, stable_pose, self.config['images_per_stable_pose'], camera_config=self._camera_configs()) # print("Rendering took {:05f} seconds".format(time.time()-before)) # times = [] for sample in samples: # before = time.time() self.save_samples(sample, grasps, T_obj_stp, collision_checker, grasp_metrics, stable_pose) # times.append(time.time() - before) # print("Saving one sample took avg {:05f} seconds".format(sum(times)/len(times))) self.cur_pose_label += 1 gc.collect() # next stable pose self.cur_obj_label += 1 # next object # Save dataset self.tensor_dataset.flush()
class GenerateBalancedObliqueDataset: def __init__(self, output_dir): self.NUM_OBJECTS = None self.table_file = DATA_DIR + '/meshes/table.obj' self.data_dir = DATA_DIR + '/meshes/dexnet/' output_dir = DATA_DIR + output_dir if not os.path.exists(output_dir): os.mkdir(output_dir) self.output_dir = output_dir self.config = YamlConfig( './cfg/tools/generate_oblique_gqcnn_dataset.yaml') self.phi_offsets = self._generate_phi_offsets() self.datasets, self.target_object_keys = self._load_datasets() self.tensor_dataset = TensorDataset(self.output_dir, self._set_tensor_config()) self.tensor_datapoint = self.tensor_dataset.datapoint_template self.gripper = self._set_gripper() self._table_mesh_filename = self._set_table_mesh_filename() self.table_mesh = self._set_table_mesh() self.cur_pose_label = 0 self.cur_obj_label = 0 self.cur_image_label = 0 self.grasp_upsample_distribuitions = self._get_upsample_distributions() self.camera_distributions = self._get_camera_distributions() self.obj = None self.T_obj_camera = None def _get_camera_distributions(self): # Load relative amount of grasps per elevation angle phi relative_elevation_angles = np.load( DATA_DIR + '/meshes/relative_elevation_angles.npy') relative_elevation_angles[relative_elevation_angles > 1] = 1.0 # Load positivity rate per elevation angle phi positivity_rate = np.load(DATA_DIR + '/meshes/positivity_rate.npy') # Get additional sample ratio through resampling positive grasps per elevation angle phi additional_sample_ratio = 1 + self.config[ 'gt_positive_ratio'] - positivity_rate / 100 # Add additonal sample ratio to relative amount of grasps per elevation angle phi camera_frequencies = 1 / (relative_elevation_angles * additional_sample_ratio) camera_frequencies = np.round( camera_frequencies / sum(camera_frequencies) * 10000).astype(int) # Get camera distribution according to goal sampling ratios camera_distribution = [0] * camera_frequencies[0] + [1] * camera_frequencies[1] + \ [2] * camera_frequencies[2] + [3] * camera_frequencies[3] + \ [4] * camera_frequencies[4] + [5] * camera_frequencies[5] + \ [6] * camera_frequencies[6] + [7] * camera_frequencies[7] + \ [8] * camera_frequencies[8] + [9] * camera_frequencies[9] + \ [10] * camera_frequencies[10] + [11] * camera_frequencies[11] return camera_distribution def _get_upsample_distributions(self): # Get ratio of ground truth positive grasps in unbalanced dataset positivity_rate = np.load(DATA_DIR + '/meshes/positivity_rate.npy') # Get desired ratio of ground truth positive grasps gt_positive_ratio = self.config['gt_positive_ratio'] * 100 # Get upsample ratio for positive grasps upsample_ratio = (gt_positive_ratio - positivity_rate) / positivity_rate # Set upsample ratio of negative sampling ratios to zero upsample_ratio[upsample_ratio < 0] = 0 X = [0, 1, 2, 3, 4, 5] sample_distributions = [] # Fit poisson distribution to ratios for ratio in upsample_ratio: poisson_pd = np.round(poisson.pmf(X, ratio) * 1000).astype(int) # Generate vector with possible sample sizes to randomly choose from sample_distribution = [0] * poisson_pd[0] + [1] * poisson_pd[1] + [2] * poisson_pd[2] + \ [3] * poisson_pd[3] + [4] * poisson_pd[4] + [5] * poisson_pd[5] sample_distributions.append(sample_distribution) return sample_distributions def _camera_configs(self): return self.config['env_rv_params'].copy() @property def _camera_intr_scale(self): return 32.0 / 96.0 @property def _render_modes(self): return [RenderMode.SEGMASK, RenderMode.DEPTH_SCENE] @property def _max_grasp_approach_offset(self): return np.deg2rad( self.config['table_alignment']['max_approach_offset']) @property def _min_grasp_approach_offset(self): return -np.deg2rad( self.config['table_alignment']['max_approach_offset']) @property def _max_grasp_approach_table_angle(self): return np.deg2rad( self.config['table_alignment']['max_approach_table_angle']) @property def _num_grasp_approach_samples(self): return self.config['table_alignment']['num_approach_offset_samples'] def _generate_phi_offsets(self): phi_offsets = [] if self._max_grasp_approach_offset == self._min_grasp_approach_offset: phi_inc = 1 elif self._num_grasp_approach_samples == 1: phi_inc = self._max_grasp_approach_offset - self._min_grasp_approach_offset + 1 else: phi_inc = (self._max_grasp_approach_offset - self._min_grasp_approach_offset) / \ (self._num_grasp_approach_samples - 1) phi = self._min_grasp_approach_offset while phi <= self._max_grasp_approach_offset: phi_offsets.append(phi) phi += phi_inc return phi_offsets @property def _approach_dist(self): return self.config['collision_checking']['approach_dist'] @property def _delta_approach(self): return self.config['collision_checking']['delta_approach'] @property def _table_offset(self): return self.config['collision_checking']['table_offset'] @property def _stable_pose_min_p(self): return self.config['stable_pose_min_p'] def _set_table_mesh_filename(self): table_mesh_filename = self.config['collision_checking'][ 'table_mesh_filename'] if not os.path.isabs(table_mesh_filename): return os.path.join(DATA_DIR, table_mesh_filename) return table_mesh_filename def _set_table_mesh(self): return ObjFile(self._table_mesh_filename).read() def _set_gripper(self): return RobotGripper.load(self.config['gripper']) def _load_datasets(self): database = Hdf5Database(self.config['database_name'], access_level=READ_ONLY_ACCESS) target_object_keys = self.config['target_objects'] dataset_names = target_object_keys.keys() datasets = [database.dataset(dn) for dn in dataset_names] if self.NUM_OBJECTS is not None: datasets = [ dataset.subset(0, self.NUM_OBJECTS) for dataset in datasets ] return datasets, target_object_keys def _set_tensor_config(self): tensor_config = self.config['tensors'] tensor_config['fields']['depth_ims_tf_table']['height'] = 32 tensor_config['fields']['depth_ims_tf_table']['width'] = 32 tensor_config['fields']['obj_masks']['height'] = 32 tensor_config['fields']['obj_masks']['width'] = 32 tensor_config['fields']['robust_ferrari_canny'] = {} tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32' tensor_config['fields']['ferrari_canny'] = {} tensor_config['fields']['ferrari_canny']['dtype'] = 'float32' tensor_config['fields']['force_closure'] = {} tensor_config['fields']['force_closure']['dtype'] = 'float32' return tensor_config def get_positive_grasps(self, dataset, grasps): metrics = dataset.grasp_metrics(self.obj.key, grasps, gripper=self.gripper.name) positive_grasps = [] for cnt in range(0, len(metrics)): if metrics[cnt]['robust_ferrari_canny'] >= 0.002: positive_grasps.append(grasps[cnt]) return positive_grasps def render_images(self, scene_objs, stable_pose, num_images, camera_config=None): if camera_config is None: camera_config = self.config['env_rv_params'] urv = UniformPlanarWorksurfaceImageRandomVariable( self.obj.mesh, self._render_modes, 'camera', camera_config, scene_objs=scene_objs, stable_pose=stable_pose) # Render images render_samples = urv.rvs(size=num_images) return render_samples def align_grasps(self, grasps): z_axis_in_obj = np.dot(self.T_obj_camera.inverse().matrix, np.array((0, 0, -1, 1)).reshape(4, 1)) z_axis = z_axis_in_obj[0:3].squeeze() / np.linalg.norm( z_axis_in_obj[0:3].squeeze()) aligned_grasps = [ grasp.perpendicular_table(z_axis) for grasp in grasps ] return aligned_grasps def get_camera_pose(self, sample): return np.r_[sample.camera.radius, sample.camera.elev, sample.camera.az, sample.camera.roll, sample.camera.focal, sample.camera.tx, sample.camera.ty] def is_grasp_aligned(self, aligned_grasp): _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_camera_z( self.T_obj_camera) perpendicular_table = (np.abs(grasp_approach_table_angle) < self._max_grasp_approach_table_angle) if not perpendicular_table: return False return True def is_grasp_collision_free(self, grasp, collision_checker): collision_free = False for phi_offset in self.phi_offsets: grasp.grasp_y_axis_offset(phi_offset) collides = collision_checker.collides_along_approach( grasp, self._approach_dist, self._delta_approach) if not collides: collision_free = True break return collision_free def get_hand_pose(self, grasp_2d, shifted_camera_intr): # Configure hand pose return np.r_[grasp_2d.center.y, grasp_2d.center.x, grasp_2d.depth, grasp_2d.angle, grasp_2d.center.y - shifted_camera_intr.cy, grasp_2d.center.x - shifted_camera_intr.cx, grasp_2d.width_px / 3] def prepare_images(self, depth_im_table, binary_im, grasp_2d, cx, cy): # Get translation image distances to grasp dx = cx - grasp_2d.center.x dy = cy - grasp_2d.center.y translation = np.array([dy, dx]) # Transform, crop and resize image binary_im_tf = binary_im.transform(translation, grasp_2d.angle) depth_im_tf_table = depth_im_table.transform(translation, grasp_2d.angle) binary_im_tf = binary_im_tf.crop(96, 96) depth_im_tf_table = depth_im_tf_table.crop(96, 96) depth_image = np.asarray(depth_im_tf_table.data) dep_image = Image.fromarray(depth_image).resize( (32, 32), resample=Image.BILINEAR) depth_im_tf = np.asarray(dep_image).reshape(32, 32, 1) binary_image = np.asarray(binary_im_tf.data) bin_image = Image.fromarray(binary_image).resize( (32, 32), resample=Image.BILINEAR) binary_im_tf = np.asarray(bin_image).reshape(32, 32, 1) return depth_im_tf, binary_im_tf def add_datapoint(self, depth_im_tf, binary_im_tf, hand_pose, collision_free, camera_pose, aligned_grasp, grasp_metrics): # Add data to tensor dataset self.tensor_datapoint['depth_ims_tf_table'] = depth_im_tf self.tensor_datapoint['obj_masks'] = binary_im_tf self.tensor_datapoint['hand_poses'] = hand_pose self.tensor_datapoint['obj_labels'] = self.cur_obj_label self.tensor_datapoint['collision_free'] = collision_free self.tensor_datapoint['pose_labels'] = self.cur_pose_label self.tensor_datapoint['image_labels'] = self.cur_image_label self.tensor_datapoint['camera_poses'] = camera_pose # Add metrics to tensor dataset for metric_name, metric_val in grasp_metrics[ aligned_grasp.id].iteritems(): coll_free_metric = (1 * collision_free) * metric_val self.tensor_datapoint[metric_name] = coll_free_metric self.tensor_dataset.add(self.tensor_datapoint) def save_samples(self, sample, grasps, T_obj_stp, collision_checker, grasp_metrics, only_positive=False): T_stp_camera = sample.camera.object_to_camera_pose self.T_obj_camera = T_stp_camera * T_obj_stp.as_frames( 'obj', T_stp_camera.from_frame) aligned_grasps = self.align_grasps(grasps) binary_im = sample.renders[RenderMode.SEGMASK].image depth_im_table = sample.renders[RenderMode.DEPTH_SCENE].image camera_pose = self.get_camera_pose(sample) shifted_camera_intr = sample.camera.camera_intr cx = depth_im_table.center[1] cy = depth_im_table.center[0] for cnt, aligned_grasp in enumerate(aligned_grasps): if not self.is_grasp_aligned(aligned_grasp): continue collision_free = self.is_grasp_collision_free( aligned_grasp, collision_checker) if only_positive and not collision_free: continue # Project grasp coordinates in image grasp_2d = aligned_grasp.project_camera(self.T_obj_camera, shifted_camera_intr) depth_im_tf, binary_im_tf = self.prepare_images( depth_im_table, binary_im, grasp_2d, cx, cy) hand_pose = self.get_hand_pose(grasp_2d, shifted_camera_intr) self.add_datapoint(depth_im_tf, binary_im_tf, hand_pose, collision_free, camera_pose, aligned_grasp, grasp_metrics) self.cur_image_label += 1 def render_data(self): logging.basicConfig(level=logging.WARNING) for dataset in self.datasets: logging.info('Generating data for dataset %s' % dataset.name) object_keys = dataset.object_keys for obj_key in object_keys: logging.info("Object number: %d" % self.cur_obj_label) self.obj = dataset[obj_key] grasps = dataset.grasps(self.obj.key, gripper=self.gripper.name) positive_grasps = self.get_positive_grasps(dataset, grasps) # Load grasp metrics grasp_metrics = dataset.grasp_metrics( self.obj.key, grasps, gripper=self.gripper.name) # setup collision checker collision_checker = GraspCollisionChecker(self.gripper) collision_checker.set_graspable_object(self.obj) # read in the stable poses of the mesh stable_poses = dataset.stable_poses(self.obj.key) # Iterate through stable poses for i, stable_pose in enumerate(stable_poses): if not stable_pose.p > self._stable_pose_min_p: continue # setup table in collision checker T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp') T_obj_table = self.obj.mesh.get_T_surface_obj( T_obj_stp, delta=self._table_offset).as_frames('obj', 'table') T_table_obj = T_obj_table.inverse() T_obj_stp = self.obj.mesh.get_T_surface_obj(T_obj_stp) collision_checker.set_table(self._table_mesh_filename, T_table_obj) # sample images from random variable T_table_obj = RigidTransform(from_frame='table', to_frame='obj') scene_objs = { 'table': SceneObject(self.table_mesh, T_table_obj) } # Set up image renderer for _ in range(self.config['images_per_stable_pose']): elev_angle = np.random.choice( self.camera_distributions) camera_config = self._camera_configs() camera_config['min_elev'] = elev_angle * 5.0 camera_config['max_elev'] = (elev_angle + 1) * 5.0 sample = self.render_images( scene_objs, stable_pose, 1, camera_config=camera_config) self.save_samples(sample, grasps, T_obj_stp, collision_checker, grasp_metrics) # Get camera elevation angle for current sample elev_angle = sample.camera.elev * 180 / np.pi elev_bar = int(elev_angle // 5) # Sample number of positive images from distribution number_positive_images = np.random.choice( self.grasp_upsample_distribuitions[elev_bar]) # Render only positive images new_config = self._camera_configs() new_config['min_elev'] = elev_angle new_config['max_elev'] = elev_angle positive_render_samples = self.render_images( scene_objs, stable_pose, number_positive_images, camera_config=new_config) if type(positive_render_samples) == list: for pos_sample in positive_render_samples: self.save_samples(pos_sample, positive_grasps, T_obj_stp, collision_checker, grasp_metrics, only_positive=True) else: self.save_samples(positive_render_samples, positive_grasps, T_obj_stp, collision_checker, grasp_metrics, only_positive=True) self.cur_pose_label += 1 gc.collect() # next stable pose self.cur_obj_label += 1 # next object # Save dataset self.tensor_dataset.flush()
class Overhead_zr_xy(GenerateDataset): def __init__(self, output_dir, output_dir_mod, config_dir): GenerateDataset.__init__(self, output_dir, config_dir) if not os.path.exists(output_dir_mod): os.mkdir(output_dir_mod) self.modified_tensor_dataset = TensorDataset(output_dir_mod, self._set_tensor_config()) def _set_tensor_config(self): tensor_config = self.config['tensors'] tensor_config['fields']['depth_ims_tf_table']['height'] = self.config[ 'gqcnn']['final_height'] tensor_config['fields']['depth_ims_tf_table']['width'] = self.config[ 'gqcnn']['final_width'] tensor_config['fields']['robust_ferrari_canny'] = {} tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32' return tensor_config def get_hand_pose(self, grasp_2d, shifted_camera_intr): # Configure hand pose return np.r_[grasp_2d.center.y, grasp_2d.center.x, grasp_2d.depth, grasp_2d.angle, grasp_2d.center.y - shifted_camera_intr.cy, grasp_2d.center.x - shifted_camera_intr.cx, grasp_2d.width_px / 3] def prepare_images(self, depth_im_table, grasp_2d, cx, cy): # Get translation image distances to grasp dx = cx - grasp_2d.center.x dy = cy - grasp_2d.center.y translation = np.array([dy, dx]) # Transform, crop and resize image depth_im_tf_table = depth_im_table.transform(translation, grasp_2d.angle) depth_im_tf = self._crop_and_resize(depth_im_tf_table) # Translate, crop and resize (no rotation) depth_unrotated_im_tf_table = depth_im_table.transform( translation, 0.0) depth_unrotated_im_tf = self._crop_and_resize( depth_unrotated_im_tf_table) return depth_im_tf, depth_unrotated_im_tf def add_datapoint(self, depth_im_tf, depth_im_no_rot, hand_pose, collision_free, camera_pose, aligned_grasp, grasp_metrics): # Add data to tensor dataset self.tensor_datapoint['depth_ims_tf_table'] = depth_im_tf self.tensor_datapoint['hand_poses'] = hand_pose self.tensor_datapoint['collision_free'] = collision_free self.tensor_datapoint['camera_poses'] = camera_pose # Add metrics to tensor dataset self.tensor_datapoint['robust_ferrari_canny'] = (1 * collision_free) * \ grasp_metrics[aligned_grasp.id]['robust_ferrari_canny'] self.tensor_dataset.add(self.tensor_datapoint) # Add data to tensor dataset self.tensor_datapoint['depth_ims_tf_table'] = depth_im_no_rot # Add metrics to tensor dataset self.modified_tensor_dataset.add(self.tensor_datapoint) def save_samples(self, sample, grasps, T_obj_stp, collision_checker, grasp_metrics): self.tensor_datapoint['image_labels'] = self.cur_image_label T_stp_camera = sample.camera.object_to_camera_pose self.T_obj_camera = T_stp_camera * T_obj_stp.as_frames( 'obj', T_stp_camera.from_frame) aligned_grasps = self.align_grasps_with_camera(grasps) depth_im_table = sample.renders[RenderMode.DEPTH_SCENE].image camera_pose = self.get_camera_pose(sample) shifted_camera_intr = sample.camera.camera_intr cx = depth_im_table.center[1] cy = depth_im_table.center[0] for cnt, aligned_grasp in enumerate(aligned_grasps): if not self.is_grasp_aligned(aligned_grasp): continue collision_free = self.is_grasp_collision_free( aligned_grasp, collision_checker) # Project grasp coordinates in image grasp_2d = aligned_grasp.project_camera(self.T_obj_camera, shifted_camera_intr) depth_im_tf, depth_im_no_rot = self.prepare_images( depth_im_table, grasp_2d, cx, cy) hand_pose = self.get_hand_pose(grasp_2d, shifted_camera_intr) self.add_datapoint(depth_im_tf, depth_im_no_rot, hand_pose, collision_free, camera_pose, aligned_grasp, grasp_metrics) self.cur_image_label += 1 def render_data(self): logging.basicConfig(level=logging.WARNING) for dataset in self.datasets: logging.info('Generating data for dataset %s' % dataset.name) object_keys = dataset.object_keys for obj_key in object_keys: if self.cur_obj_label % 10 == 0: logging.info("Object number: %d" % self.cur_obj_label) self.obj = dataset[obj_key] self.tensor_datapoint['obj_labels'] = self.cur_obj_label grasps = dataset.grasps(self.obj.key, gripper=self.gripper.name) # Load grasp metrics grasp_metrics = dataset.grasp_metrics( self.obj.key, grasps, gripper=self.gripper.name) # setup collision checker collision_checker = GraspCollisionChecker(self.gripper) collision_checker.set_graspable_object(self.obj) # read in the stable poses of the mesh stable_poses = dataset.stable_poses(self.obj.key) # Iterate through stable poses for i, stable_pose in enumerate(stable_poses): if not stable_pose.p > self._stable_pose_min_p: continue self.tensor_datapoint['pose_labels'] = self.cur_pose_label # setup table in collision checker T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp') T_obj_table = self.obj.mesh.get_T_surface_obj( T_obj_stp, delta=self._table_offset).as_frames('obj', 'table') T_table_obj = T_obj_table.inverse() T_obj_stp = self.obj.mesh.get_T_surface_obj(T_obj_stp) collision_checker.set_table(self._table_mesh_filename, T_table_obj) # sample images from random variable T_table_obj = RigidTransform(from_frame='table', to_frame='obj') scene_objs = { 'table': SceneObject(self.table_mesh, T_table_obj) } # Set up image renderer samples = self.render_images( scene_objs, stable_pose, self.config['images_per_stable_pose'], camera_config=self._camera_configs()) for sample in samples: self.save_samples(sample, grasps, T_obj_stp, collision_checker, grasp_metrics) self.cur_pose_label += 1 # next stable pose self.cur_obj_label += 1 gc.collect() # next object # Save dataset self.tensor_dataset.flush()