Beispiel #1
0
class Reprojection:
    def __init__(self, path, elev):
        if not os.path.exists(DATA_DIR):
            raise NameError(
                "Path %s is not specified. Is the docker file set up properly?"
                % DATA_DIR)
        table_file = DATA_DIR + '/meshes/table.obj'
        self.table_mesh = ObjFile(table_file).read()
        self.data_dir = DATA_DIR + '/meshes/dexnet/'
        self.output_dir = DATA_DIR + '/reprojections/'
        self.config = YamlConfig(
            './cfg/tools/generate_projected_gqcnn_dataset.yaml')

        self.random_positions = 1
        self.image_size = (300, 300)
        self.elev = 0
        self.show_images = False

        self.cur_obj_label = 0
        self.cur_image_label = 0
        self.cur_pose_label = 0

        if path is not None:
            self.output_dir = DATA_DIR + '/reprojections/' + path

        if elev is not None:
            print("Elevation angle is being set to %d" % elev)
            self.config['env_rv_params']['min_elev'] = elev
            self.config['env_rv_params']['max_elev'] = elev
            self.elev = elev

        tensor_config = self.config['tensors']
        tensor_config['fields']['depth_ims_tf_table']['height'] = 32
        tensor_config['fields']['depth_ims_tf_table']['width'] = 32
        tensor_config['fields']['robust_ferrari_canny'] = {}
        tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32'
        tensor_config['fields']['ferrari_canny'] = {}
        tensor_config['fields']['ferrari_canny']['dtype'] = 'float32'
        tensor_config['fields']['force_closure'] = {}
        tensor_config['fields']['force_closure']['dtype'] = 'float32'

        self.tensor_dataset = TensorDataset(self.output_dir, tensor_config)
        self.tensor_datapoint = self.tensor_dataset.datapoint_template
        if not os.path.exists(self.output_dir + '/images'):
            os.makedirs(self.output_dir + '/images')
        if not os.path.exists(self.output_dir + '/meshes'):
            os.makedirs(self.output_dir + '/meshes')

    def _load_file_ids(self):
        dtype = [('Obj_id', (np.str_, 40)), ('Tensor', int), ('Array', int),
                 ('Depth', float)]

        with open(self.data_dir + 'files.csv', 'r') as csv_file:
            data = []
            csv_reader = csv.reader(csv_file, delimiter=',')
            for row in csv_reader:
                data.append(
                    tuple([
                        int(value) if value.isdigit() else value
                        for value in row
                    ]))
            self.file_arr = np.array(data, dtype=dtype)
        files = os.listdir(self.data_dir)
        files = [name.split('.')[0] for name in files]
        files.remove('files')
        self.all_objects = list(set(files))

    def _load_data(self, obj_id):
        self.grasp_metrics = json.load(
            open(self.data_dir + obj_id + '.json', 'r'))
        self.candidate_grasps_dict = pkl.load(
            open(self.data_dir + obj_id + '.pkl', 'rb'))

        self.object_mesh = ObjFile(self.data_dir + obj_id + '.obj').read()
        self.stable_poses = StablePoseFile(self.data_dir + obj_id +
                                           '.stp').read()
        self.tensor = self.file_arr['Tensor'][np.where(
            self.file_arr['Obj_id'] == obj_id)][0]
        self.array = self.file_arr['Array'][np.where(
            self.file_arr['Obj_id'] == obj_id)][0]
        depth = self.file_arr['Depth'][np.where(
            self.file_arr['Obj_id'] == obj_id)][0]
        self.config['env_rv_params']['min_radius'] = depth
        self.config['env_rv_params']['max_radius'] = depth

    def start_rendering(self):
        self._load_file_ids()

        for object_id in self.all_objects:
            self._load_data(object_id)

            for i, stable_pose in enumerate(self.stable_poses):
                try:
                    candidate_grasp_info = self.candidate_grasps_dict[
                        stable_pose.id]
                except KeyError:
                    continue

                if not candidate_grasp_info:
                    Warning("Candidate grasp info of object id %s empty" %
                            object_id)
                    Warning("Continue.")
                    continue
                T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp')
                T_obj_stp = self.object_mesh.get_T_surface_obj(T_obj_stp)

                T_table_obj = RigidTransform(from_frame='table',
                                             to_frame='obj')
                scene_objs = {
                    'table': SceneObject(self.table_mesh, T_table_obj)
                }

                urv = UniformPlanarWorksurfaceImageRandomVariable(
                    self.object_mesh,
                    [RenderMode.DEPTH_SCENE, RenderMode.SEGMASK],
                    'camera',
                    self.config['env_rv_params'],
                    scene_objs=scene_objs,
                    stable_pose=stable_pose)
                render_sample = urv.rvs(size=self.random_positions)
                # for render_sample in render_samples:

                binary_im = render_sample.renders[RenderMode.SEGMASK].image
                depth_im = render_sample.renders[
                    RenderMode.DEPTH_SCENE].image.crop(300, 300)
                orig_im = Image.fromarray(self._scale_image(depth_im.data))
                if self.show_images:
                    orig_im.show()
                orig_im.convert('RGB').save(self.output_dir + '/images/' +
                                            object_id + '_elev_' +
                                            str(self.elev) + '_original.png')
                print("Saved original")

                T_stp_camera = render_sample.camera.object_to_camera_pose
                shifted_camera_intr = render_sample.camera.camera_intr.crop(
                    300, 300, 240, 320)
                depth_points = self._reproject_to_3D(depth_im,
                                                     shifted_camera_intr)

                transformed_points, T_camera = self._transformation(
                    depth_points)

                camera_dir = np.dot(T_camera.rotation,
                                    np.array([0.0, 0.0, -1.0]))

                pcd = o3d.geometry.PointCloud()
                # print(camera_dir)
                pcd.points = o3d.utility.Vector3dVector(transformed_points.T)
                # TODO check normals!!
                #  pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
                #  pcd.normals = o3d.utility.Vector3dVector(-np.asarray(pcd.normals))
                normals = np.repeat([camera_dir],
                                    len(transformed_points.T),
                                    axis=0)
                pcd.normals = o3d.utility.Vector3dVector(normals)

                # cs_points = [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]]
                # cs_lines = [[0, 1], [0, 2], [0, 3]]
                # colors = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
                # cs = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(cs_points),
                #                           lines=o3d.utility.Vector2iVector(cs_lines))
                # cs.colors = o3d.utility.Vector3dVector(colors)
                # o3d.visualization.draw_geometries([pcd])

                depth = self._o3d_meshing(pcd)

                # projected_depth_im,new_camera_intr,table_height = self._projection(new_points,shifted_camera_intr)
                new_camera_intr = shifted_camera_intr
                new_camera_intr.cx = 150
                new_camera_intr.cy = 150
                projected_depth_im = np.asarray(depth)
                projected_depth_im[projected_depth_im == 0.0] = -1.0
                table_height = np.median(
                    projected_depth_im[projected_depth_im != -1.0].flatten())
                print("Minimum depth:", min(projected_depth_im.flatten()))
                print("Maximum depth:", max(projected_depth_im.flatten()))

                im = Image.fromarray(self._scale_image(projected_depth_im))

                projected_depth_im = DepthImage(projected_depth_im,
                                                frame='new_camera')

                cx = projected_depth_im.center[1]
                cy = projected_depth_im.center[0]

                # Grasp conversion
                T_obj_old_camera = T_stp_camera * T_obj_stp.as_frames(
                    'obj', T_stp_camera.from_frame)
                T_obj_camera = T_camera.dot(T_obj_old_camera)
                for grasp_info in candidate_grasp_info:
                    grasp = grasp_info.grasp
                    collision_free = grasp_info.collision_free

                    grasp_2d = grasp.project_camera(T_obj_camera,
                                                    new_camera_intr)
                    dx = cx - grasp_2d.center.x
                    dy = cy - grasp_2d.center.y
                    translation = np.array([dy, dx])

                    # Project 3D old_camera_cs contact points into new camera cs

                    contact_points = np.append(grasp_info.contact_point1, 1).T
                    new_cam = np.dot(T_obj_camera.matrix, contact_points)
                    c1 = new_camera_intr.project(
                        Point(new_cam[0:3], frame=new_camera_intr.frame))
                    contact_points = np.append(grasp_info.contact_point2, 1).T
                    new_cam = np.dot(T_obj_camera.matrix, contact_points)
                    c2 = new_camera_intr.project(
                        Point(new_cam[0:3], frame=new_camera_intr.frame))

                    # Check if there are occlusions at contact points
                    if projected_depth_im.data[
                            c1.x, c1.y] == -1.0 or projected_depth_im.data[
                                c2.x, c2.y] == -1.0:
                        print("Contact point at occlusion")
                        contact_occlusion = True
                    else:
                        contact_occlusion = False
                    # Mark contact points in image
                    im = im.convert('RGB')
                    if False:
                        im_draw = ImageDraw.Draw(im)
                        im_draw.line([(c1[0], c1[1] - 10),
                                      (c1[0], c1[1] + 10)],
                                     fill=(255, 0, 0, 255))
                        im_draw.line([(c1[0] - 10, c1[1]),
                                      (c1[0] + 10, c1[1])],
                                     fill=(255, 0, 0, 255))
                        im_draw.line([(c2[0], c2[1] - 10),
                                      (c2[0], c2[1] + 10)],
                                     fill=(255, 0, 0, 255))
                        im_draw.line([(c2[0] - 10, c2[1]),
                                      (c2[0] + 10, c2[1])],
                                     fill=(255, 0, 0, 255))
                    if self.show_images:
                        im.show()
                    im.save(self.output_dir + '/images/' + object_id +
                            '_elev_' + str(self.elev) + '_reprojected.png')

                    # Transform and crop image

                    depth_im_tf = projected_depth_im.transform(
                        translation, grasp_2d.angle)
                    depth_im_tf = depth_im_tf.crop(96, 96)

                    # Apply transformation to contact points
                    trans_map = np.array([[1, 0, dx], [0, 1, dy]])
                    rot_map = cv2.getRotationMatrix2D(
                        (cx, cy), np.rad2deg(grasp_2d.angle), 1)
                    trans_map_aff = np.r_[trans_map, [[0, 0, 1]]]
                    rot_map_aff = np.r_[rot_map, [[0, 0, 1]]]
                    full_map = rot_map_aff.dot(trans_map_aff)
                    # print("Full map",full_map)
                    c1_rotated = (np.dot(full_map, np.r_[c1.vector, [1]]) -
                                  np.array([150 - 48, 150 - 48, 0])) / 3
                    c2_rotated = (np.dot(full_map, np.r_[c2.vector, [1]]) -
                                  np.array([150 - 48, 150 - 48, 0])) / 3

                    grasp_line = depth_im_tf.data[48]
                    occlusions = len(np.where(np.squeeze(grasp_line) == -1)[0])

                    # Set occlusions to table height for resizing image
                    depth_im_tf.data[depth_im_tf.data == -1.0] = table_height

                    depth_image = Image.fromarray(np.asarray(depth_im_tf.data))\
                        .resize((32, 32), resample=Image.BILINEAR)
                    depth_im_tf_table = np.asarray(depth_image).reshape(
                        32, 32, 1)

                    # depth_im_tf_table = depth_im_tf.resize((32, 32,), interp='bilinear')

                    im = Image.fromarray(
                        self._scale_image(depth_im_tf_table.reshape(
                            32, 32))).convert('RGB')
                    draw = ImageDraw.Draw(im)
                    draw.line([(c1_rotated[0], c1_rotated[1] - 3),
                               (c1_rotated[0], c1_rotated[1] + 3)],
                              fill=(255, 0, 0, 255))
                    draw.line([(c1_rotated[0] - 3, c1_rotated[1]),
                               (c1_rotated[0] + 3, c1_rotated[1])],
                              fill=(255, 0, 0, 255))
                    draw.line([(c2_rotated[0], c2_rotated[1] - 3),
                               (c2_rotated[0], c2_rotated[1] + 3)],
                              fill=(255, 0, 0, 255))
                    draw.line([(c2_rotated[0] - 3, c2_rotated[1]),
                               (c2_rotated[0] + 3, c2_rotated[1])],
                              fill=(255, 0, 0, 255))
                    if self.show_images:
                        im.show()
                    im.save(self.output_dir + '/images/' + object_id +
                            '_elev_' + str(self.elev) + '_transformed.png')

                    hand_pose = np.r_[grasp_2d.center.y, grasp_2d.center.x,
                                      grasp_2d.depth, grasp_2d.angle,
                                      grasp_2d.center.y - new_camera_intr.cy,
                                      grasp_2d.center.x - new_camera_intr.cx,
                                      grasp_2d.width_px / 3]

                    self.tensor_datapoint[
                        'depth_ims_tf_table'] = depth_im_tf_table
                    self.tensor_datapoint['hand_poses'] = hand_pose
                    self.tensor_datapoint['obj_labels'] = self.cur_obj_label
                    self.tensor_datapoint['collision_free'] = collision_free
                    self.tensor_datapoint['pose_labels'] = self.cur_pose_label
                    self.tensor_datapoint[
                        'image_labels'] = self.cur_image_label
                    self.tensor_datapoint['files'] = [self.tensor, self.array]
                    self.tensor_datapoint['occlusions'] = occlusions
                    self.tensor_datapoint[
                        'contact_occlusion'] = contact_occlusion

                    for metric_name, metric_val in self.grasp_metrics[str(
                            grasp.id)].iteritems():
                        coll_free_metric = (1 * collision_free) * metric_val
                        self.tensor_datapoint[metric_name] = coll_free_metric
                    self.tensor_dataset.add(self.tensor_datapoint)
                    print("Saved dataset point")
                    self.cur_image_label += 1
                self.cur_pose_label += 1
                gc.collect()
            self.cur_obj_label += 1

        self.tensor_dataset.flush()

    def _o3d_meshing(self, pcd):
        mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
            pcd, depth=15)
        densities = np.asarray(densities)

        # print('visualize densities')
        # densities = np.asarray(densities)
        # density_colors = plt.get_cmap('plasma')(
        #     (densities - densities.min()) / (densities.max() - densities.min()))
        # density_colors = density_colors[:, :3]
        # density_mesh = o3d.geometry.TriangleMesh()
        # density_mesh.vertices = mesh.vertices
        # density_mesh.triangles = mesh.triangles
        # density_mesh.triangle_normals = mesh.triangle_normals
        # density_mesh.vertex_colors = o3d.utility.Vector3dVector(density_colors)
        # o3d.visualization.draw_geometries([density_mesh])

        vertices_to_remove = densities < 7.0  # np.quantile(densities, 0.01)
        mesh.remove_vertices_by_mask(vertices_to_remove)
        mesh.compute_vertex_normals()
        mesh.paint_uniform_color([0.6, 0.6, 0.6])
        o3d.io.write_triangle_mesh(
            self.output_dir + '/meshes/' + "%05d_%03d.ply" %
            (self.tensor, self.array), mesh)

        if visualise_mesh:
            o3d.visualization.draw_geometries([mesh])
        vis = o3d.visualization.Visualizer()
        vis.create_window(height=300, width=300, visible=False)
        vis.get_render_option().load_from_json("./data/renderconfig.json")
        vis.add_geometry(mesh)
        vic = vis.get_view_control()
        params = vic.convert_to_pinhole_camera_parameters()
        (fx, fy) = params.intrinsic.get_focal_length()
        (cx, cy) = params.intrinsic.get_principal_point()
        params.intrinsic.set_intrinsics(300, 300, 525, 525, cx, cy)
        params.extrinsic = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0],
                                     [0, 0, 0, 1]])
        vic.convert_from_pinhole_camera_parameters(params)
        vis.poll_events()
        vis.update_renderer()
        depth = vis.capture_depth_float_buffer(do_render=True)
        # vis.destroy_window()
        # del vis
        return depth

    def _projection(self, transformed_points, camera_intr):
        # Use Camera intrinsics
        new_camera_intr = camera_intr
        new_camera_intr.cx = 150
        new_camera_intr.cy = 150
        K = new_camera_intr.proj_matrix

        projected_points = np.dot(K, transformed_points)

        point_depths = projected_points[2, :]
        table = np.median(point_depths)
        point_z = np.tile(point_depths, [3, 1])
        points_proj = np.divide(projected_points, point_z)

        # Rounding
        points_proj = np.round(points_proj)
        points_proj = points_proj[:2, :].astype(np.int16)

        valid_ind = np.where((points_proj[0, :] >= 0)
                             & (points_proj[0, :] < self.image_size[0])
                             & (points_proj[1, :] >= 0)
                             & (points_proj[1, :] < self.image_size[0]))[0]
        # Fill new image with NaN's
        fill_value = -1.0
        depth_data = np.full([self.image_size[0], self.image_size[1]],
                             fill_value)
        for ind in valid_ind:
            prev_depth = depth_data[points_proj[1, ind], points_proj[0, ind]]
            if prev_depth == fill_value or prev_depth >= point_depths[ind]:
                depth_data[points_proj[1, ind],
                           points_proj[0, ind]] = point_depths[ind]

        return depth_data, new_camera_intr, table

    def _transformation(self, points):
        # Points are given in camera frame. Transform to new camera frame!
        camera_new_position = np.array([[0], [0], [0]])

        ang = np.deg2rad(self.elev)
        Rot = np.array([[1, 0, 0], [0, np.cos(ang), -np.sin(ang)],
                        [0, np.sin(ang), np.cos(ang)]])
        dist = self.config['env_rv_params']['min_radius'] * np.sin(ang)
        height = self.config['env_rv_params']['min_radius'] - self.config[
            'env_rv_params']['min_radius'] * np.cos(ang)
        # Rotation around x axis, therefore translation back to object center alongside y axis
        trans = np.array([0, dist, height])
        Rt = np.column_stack((Rot, trans))
        # Apply transformation to homogeneous coordinates of the points
        homogeneous_points = np.append(np.transpose(points),
                                       np.ones((1, len(points))),
                                       axis=0)
        transformed_points = np.dot(Rt, homogeneous_points)
        return transformed_points, RigidTransform(rotation=Rot,
                                                  translation=trans,
                                                  from_frame='camera',
                                                  to_frame='new_camera')

    # def _PCA(self, points, sorting=True):
    #     mean = np.mean(points, axis=0)
    #     data_adjusted = points - mean
    #
    #     matrix = np.cov(data_adjusted.T)
    #     eigenvalues, eigenvectors = np.linalg.eig(matrix)
    #
    #     if sorting:
    #         sort = eigenvalues.argsort()[::-1]
    #         eigenvalues = eigenvalues[sort]
    #         eigenvectors = eigenvectors[:, sort]
    #     return eigenvalues, eigenvectors

    def _reproject_to_3D(self, depth_im, camera_intr):
        # depth points will be given in camera frame!
        depth_points = camera_intr.deproject(depth_im).data.T
        return depth_points

    def _scale_image(self, depth):
        size = depth.shape
        flattend = depth.flatten()
        scaled = np.interp(flattend, (0.5, 0.75), (0, 255))
        integ = scaled.astype(np.uint8)
        integ.resize(size)
        return integ
Beispiel #2
0
def generate_gqcnn_dataset(dataset_path, database, target_object_keys,
                           env_rv_params, gripper_name, config):
    """
    Generates a GQ-CNN TensorDataset for training models with new grippers, quality metrics, objects, and cameras.

    Parameters
    ----------
    dataset_path : str
        path to save the dataset to
    database : :obj:`Hdf5Database`
        Dex-Net database containing the 3D meshes, grasps, and grasp metrics
    target_object_keys : :obj:`OrderedDict`
        dictionary mapping dataset names to target objects (either 'all' or a list of specific object keys)
    env_rv_params : :obj:`OrderedDict`
        parameters of the camera and object random variables used in sampling (see meshpy_berkeley.UniformPlanarWorksurfaceImageRandomVariable for more info)
    gripper_name : str
        name of the gripper to use
    config : :obj:`autolab_core.YamlConfig`
        other parameters for dataset generation

    Notes
    -----
    Required parameters of config are specified in Other Parameters

    Other Parameters
    ----------------    
    images_per_stable_pose : int
        number of object and camera poses to sample for each stable pose
    stable_pose_min_p : float
        minimum probability of occurrence for a stable pose to be used in data generation (used to prune bad stable poses
    
    gqcnn/crop_width : int
        width, in pixels, of crop region around each grasp center, before resize (changes the size of the region seen by the GQ-CNN)
    gqcnn/crop_height : int
        height, in pixels,  of crop region around each grasp center, before resize (changes the size of the region seen by the GQ-CNN)
    gqcnn/final_width : int
        width, in pixels,  of final transformed grasp image for input to the GQ-CNN (defaults to 32)
    gqcnn/final_height : int
        height, in pixels,  of final transformed grasp image for input to the GQ-CNN (defaults to 32)

    table_alignment/max_approach_table_angle : float
        max angle between the grasp axis and the table normal when the grasp approach is maximally aligned with the table normal
    table_alignment/max_approach_offset : float
        max deviation from perpendicular approach direction to use in grasp collision checking
    table_alignment/num_approach_offset_samples : int
        number of approach samples to use in collision checking

    collision_checking/table_offset : float
        max allowable interpenetration between the gripper and table to be considered collision free
    collision_checking/table_mesh_filename : str
        path to a table mesh for collision checking (default data/meshes/table.obj)
    collision_checking/approach_dist : float
        distance, in meters, between the approach pose and final grasp pose along the grasp axis
    collision_checking/delta_approach : float
        amount, in meters, to discretize the straight-line path from the gripper approach pose to the final grasp pose

    tensors/datapoints_per_file : int
        number of datapoints to store in each unique tensor file on disk
    tensors/fields : :obj:`dict`
        dictionary mapping field names to dictionaries specifying the data type, height, width, and number of channels for each tensor

    debug : bool
        True (or 1) if the random seed should be set to enforce deterministic behavior, False (0) otherwise
    vis/candidate_grasps : bool
        True (or 1) if the collision free candidate grasps should be displayed in 3D (for debugging)
    vis/rendered_images : bool
        True (or 1) if the rendered images for each stable pose should be displayed (for debugging)
    vis/grasp_images : bool
        True (or 1) if the transformed grasp images should be displayed (for debugging)
    """
    # read data gen params
    output_dir = dataset_path
    gripper = RobotGripper.load(gripper_name)
    image_samples_per_stable_pose = config['images_per_stable_pose']
    stable_pose_min_p = config['stable_pose_min_p']

    # read gqcnn params
    gqcnn_params = config['gqcnn']
    im_crop_height = gqcnn_params['crop_height']
    im_crop_width = gqcnn_params['crop_width']
    im_final_height = gqcnn_params['final_height']
    im_final_width = gqcnn_params['final_width']
    cx_crop = float(im_crop_width) / 2
    cy_crop = float(im_crop_height) / 2

    # open database
    dataset_names = target_object_keys.keys()
    datasets = [database.dataset(dn) for dn in dataset_names]

    # set target objects
    for dataset in datasets:
        if target_object_keys[dataset.name] == 'all':
            target_object_keys[dataset.name] = dataset.object_keys

    # setup grasp params
    table_alignment_params = config['table_alignment']
    min_grasp_approach_offset = -np.deg2rad(
        table_alignment_params['max_approach_offset'])
    max_grasp_approach_offset = np.deg2rad(
        table_alignment_params['max_approach_offset'])
    max_grasp_approach_table_angle = np.deg2rad(
        table_alignment_params['max_approach_table_angle'])
    num_grasp_approach_samples = table_alignment_params[
        'num_approach_offset_samples']

    phi_offsets = []
    if max_grasp_approach_offset == min_grasp_approach_offset:
        phi_inc = 1
    elif num_grasp_approach_samples == 1:
        phi_inc = max_grasp_approach_offset - min_grasp_approach_offset + 1
    else:
        phi_inc = (max_grasp_approach_offset - min_grasp_approach_offset) / (
            num_grasp_approach_samples - 1)

    phi = min_grasp_approach_offset
    while phi <= max_grasp_approach_offset:
        phi_offsets.append(phi)
        phi += phi_inc

    # setup collision checking
    coll_check_params = config['collision_checking']
    approach_dist = coll_check_params['approach_dist']
    delta_approach = coll_check_params['delta_approach']
    table_offset = coll_check_params['table_offset']

    table_mesh_filename = coll_check_params['table_mesh_filename']
    if not os.path.isabs(table_mesh_filename):
        table_mesh_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), '..',
            table_mesh_filename)
    table_mesh = ObjFile(table_mesh_filename).read()

    # set tensor dataset config
    tensor_config = config['tensors']
    tensor_config['fields']['depth_ims_tf_table']['height'] = im_final_height
    tensor_config['fields']['depth_ims_tf_table']['width'] = im_final_width
    tensor_config['fields']['obj_masks']['height'] = im_final_height
    tensor_config['fields']['obj_masks']['width'] = im_final_width

    # add available metrics (assuming same are computed for all objects)
    metric_names = []
    dataset = datasets[0]
    obj_keys = dataset.object_keys
    if len(obj_keys) == 0:
        raise ValueError('No valid objects in dataset %s' % (dataset.name))

    obj = dataset[obj_keys[0]]
    grasps = dataset.grasps(obj.key, gripper=gripper.name)
    grasp_metrics = dataset.grasp_metrics(obj.key,
                                          grasps,
                                          gripper=gripper.name)
    metric_names = grasp_metrics[grasp_metrics.keys()[0]].keys()
    for metric_name in metric_names:
        tensor_config['fields'][metric_name] = {}
        tensor_config['fields'][metric_name]['dtype'] = 'float32'

    # init tensor dataset
    tensor_dataset = TensorDataset(output_dir, tensor_config)
    tensor_datapoint = tensor_dataset.datapoint_template

    # setup log file
    experiment_log_filename = os.path.join(output_dir,
                                           'dataset_generation.log')
    formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s')
    hdlr = logging.FileHandler(experiment_log_filename)
    hdlr.setFormatter(formatter)
    logging.getLogger().addHandler(hdlr)
    root_logger = logging.getLogger()

    # copy config
    out_config_filename = os.path.join(output_dir, 'dataset_generation.json')
    ordered_dict_config = collections.OrderedDict()
    for key in config.keys():
        ordered_dict_config[key] = config[key]
    with open(out_config_filename, 'w') as outfile:
        json.dump(ordered_dict_config, outfile)

    # 1. Precompute the set of valid grasps for each stable pose:
    #    i) Perpendicular to the table
    #   ii) Collision-free along the approach direction

    # load grasps if they already exist
    grasp_cache_filename = os.path.join(output_dir, CACHE_FILENAME)
    if os.path.exists(grasp_cache_filename):
        logging.info('Loading grasp candidates from file')
        candidate_grasps_dict = pkl.load(open(grasp_cache_filename, 'rb'))
    # otherwise re-compute by reading from the database and enforcing constraints
    else:
        # create grasps dict
        candidate_grasps_dict = {}

        # loop through datasets and objects
        for dataset in datasets:
            logging.info('Reading dataset %s' % (dataset.name))
            for obj in dataset:
                if obj.key not in target_object_keys[dataset.name]:
                    continue

                # init candidate grasp storage
                candidate_grasps_dict[obj.key] = {}

                # setup collision checker
                collision_checker = GraspCollisionChecker(gripper)
                collision_checker.set_graspable_object(obj)

                # read in the stable poses of the mesh
                stable_poses = dataset.stable_poses(obj.key)
                for i, stable_pose in enumerate(stable_poses):
                    # render images if stable pose is valid
                    if stable_pose.p > stable_pose_min_p:
                        candidate_grasps_dict[obj.key][stable_pose.id] = []

                        # setup table in collision checker
                        T_obj_stp = stable_pose.T_obj_table.as_frames(
                            'obj', 'stp')
                        T_obj_table = obj.mesh.get_T_surface_obj(
                            T_obj_stp,
                            delta=table_offset).as_frames('obj', 'table')
                        T_table_obj = T_obj_table.inverse()
                        collision_checker.set_table(table_mesh_filename,
                                                    T_table_obj)

                        # read grasp and metrics
                        grasps = dataset.grasps(obj.key, gripper=gripper.name)
                        logging.info(
                            'Aligning %d grasps for object %s in stable %s' %
                            (len(grasps), obj.key, stable_pose.id))

                        # align grasps with the table
                        aligned_grasps = [
                            grasp.perpendicular_table(stable_pose)
                            for grasp in grasps
                        ]

                        # check grasp validity
                        logging.info(
                            'Checking collisions for %d grasps for object %s in stable %s'
                            % (len(grasps), obj.key, stable_pose.id))
                        for aligned_grasp in aligned_grasps:
                            # check angle with table plane and skip unaligned grasps
                            _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_stp_z(
                                stable_pose)
                            perpendicular_table = (
                                np.abs(grasp_approach_table_angle) <
                                max_grasp_approach_table_angle)
                            if not perpendicular_table:
                                continue

                            # check whether any valid approach directions are collision free
                            collision_free = False
                            for phi_offset in phi_offsets:
                                rotated_grasp = aligned_grasp.grasp_y_axis_offset(
                                    phi_offset)
                                collides = collision_checker.collides_along_approach(
                                    rotated_grasp, approach_dist,
                                    delta_approach)
                                if not collides:
                                    collision_free = True
                                    break

                            # store if aligned to table
                            candidate_grasps_dict[obj.key][
                                stable_pose.id].append(
                                    GraspInfo(aligned_grasp, collision_free))

                            # visualize if specified
                            if collision_free and config['vis'][
                                    'candidate_grasps']:
                                logging.info('Grasp %d' % (aligned_grasp.id))
                                vis.figure()
                                vis.gripper_on_object(gripper, aligned_grasp,
                                                      obj,
                                                      stable_pose.T_obj_world)
                                vis.show()

        # save to file
        logging.info('Saving to file')
        pkl.dump(candidate_grasps_dict, open(grasp_cache_filename, 'wb'))

    # 2. Render a dataset of images and associate the gripper pose with image coordinates for each grasp in the Dex-Net database

    # setup variables
    obj_category_map = {}
    pose_category_map = {}

    cur_pose_label = 0
    cur_obj_label = 0
    cur_image_label = 0

    # render images for each stable pose of each object in the dataset
    render_modes = [RenderMode.SEGMASK, RenderMode.DEPTH_SCENE]
    for dataset in datasets:
        logging.info('Generating data for dataset %s' % (dataset.name))

        # iterate through all object keys
        object_keys = dataset.object_keys
        for obj_key in object_keys:
            obj = dataset[obj_key]
            if obj.key not in target_object_keys[dataset.name]:
                continue

            # read in the stable poses of the mesh
            stable_poses = dataset.stable_poses(obj.key)
            for i, stable_pose in enumerate(stable_poses):

                # render images if stable pose is valid
                if stable_pose.p > stable_pose_min_p:
                    # log progress
                    logging.info('Rendering images for object %s in %s' %
                                 (obj.key, stable_pose.id))

                    # add to category maps
                    if obj.key not in obj_category_map.keys():
                        obj_category_map[obj.key] = cur_obj_label
                    pose_category_map['%s_%s' %
                                      (obj.key,
                                       stable_pose.id)] = cur_pose_label

                    # read in candidate grasps and metrics
                    candidate_grasp_info = candidate_grasps_dict[obj.key][
                        stable_pose.id]
                    candidate_grasps = [g.grasp for g in candidate_grasp_info]
                    grasp_metrics = dataset.grasp_metrics(obj.key,
                                                          candidate_grasps,
                                                          gripper=gripper.name)

                    # compute object pose relative to the table
                    T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp')
                    T_obj_stp = obj.mesh.get_T_surface_obj(T_obj_stp)

                    # sample images from random variable
                    T_table_obj = RigidTransform(from_frame='table',
                                                 to_frame='obj')
                    scene_objs = {
                        'table': SceneObject(table_mesh, T_table_obj)
                    }
                    urv = UniformPlanarWorksurfaceImageRandomVariable(
                        obj.mesh,
                        render_modes,
                        'camera',
                        env_rv_params,
                        stable_pose=stable_pose,
                        scene_objs=scene_objs)

                    render_start = time.time()
                    render_samples = urv.rvs(
                        size=image_samples_per_stable_pose)
                    render_stop = time.time()
                    logging.info('Rendering images took %.3f sec' %
                                 (render_stop - render_start))

                    # visualize
                    if config['vis']['rendered_images']:
                        d = int(np.ceil(
                            np.sqrt(image_samples_per_stable_pose)))

                        # binary
                        vis2d.figure()
                        for j, render_sample in enumerate(render_samples):
                            vis2d.subplot(d, d, j + 1)
                            vis2d.imshow(render_sample.renders[
                                RenderMode.SEGMASK].image)

                        # depth table
                        vis2d.figure()
                        for j, render_sample in enumerate(render_samples):
                            vis2d.subplot(d, d, j + 1)
                            vis2d.imshow(render_sample.renders[
                                RenderMode.DEPTH_SCENE].image)
                        vis2d.show()

                    # tally total amount of data
                    num_grasps = len(candidate_grasps)
                    num_images = image_samples_per_stable_pose
                    num_save = num_images * num_grasps
                    logging.info('Saving %d datapoints' % (num_save))

                    # for each candidate grasp on the object compute the projection
                    # of the grasp into image space
                    for render_sample in render_samples:
                        # read images
                        binary_im = render_sample.renders[
                            RenderMode.SEGMASK].image
                        depth_im_table = render_sample.renders[
                            RenderMode.DEPTH_SCENE].image
                        # read camera params
                        T_stp_camera = render_sample.camera.object_to_camera_pose
                        shifted_camera_intr = render_sample.camera.camera_intr

                        # read pixel offsets
                        cx = depth_im_table.center[1]
                        cy = depth_im_table.center[0]

                        # compute intrinsics for virtual camera of the final
                        # cropped and rescaled images
                        camera_intr_scale = float(im_final_height) / float(
                            im_crop_height)
                        cropped_camera_intr = shifted_camera_intr.crop(
                            im_crop_height, im_crop_width, cy, cx)
                        final_camera_intr = cropped_camera_intr.resize(
                            camera_intr_scale)

                        # create a thumbnail for each grasp
                        for grasp_info in candidate_grasp_info:
                            # read info
                            grasp = grasp_info.grasp
                            collision_free = grasp_info.collision_free

                            # get the gripper pose
                            T_obj_camera = T_stp_camera * T_obj_stp.as_frames(
                                'obj', T_stp_camera.from_frame)
                            grasp_2d = grasp.project_camera(
                                T_obj_camera, shifted_camera_intr)

                            # center images on the grasp, rotate to image x axis
                            dx = cx - grasp_2d.center.x
                            dy = cy - grasp_2d.center.y
                            translation = np.array([dy, dx])

                            binary_im_tf = binary_im.transform(
                                translation, grasp_2d.angle)
                            depth_im_tf_table = depth_im_table.transform(
                                translation, grasp_2d.angle)

                            # crop to image size
                            binary_im_tf = binary_im_tf.crop(
                                im_crop_height, im_crop_width)
                            depth_im_tf_table = depth_im_tf_table.crop(
                                im_crop_height, im_crop_width)

                            # resize to image size
                            binary_im_tf = binary_im_tf.resize(
                                (im_final_height, im_final_width),
                                interp='nearest')
                            depth_im_tf_table = depth_im_tf_table.resize(
                                (im_final_height, im_final_width))

                            # visualize the transformed images
                            if config['vis']['grasp_images']:
                                grasp_center = Point(
                                    depth_im_tf_table.center,
                                    frame=final_camera_intr.frame)
                                tf_grasp_2d = Grasp2D(
                                    grasp_center,
                                    0,
                                    grasp_2d.depth,
                                    width=gripper.max_width,
                                    camera_intr=final_camera_intr)

                                # plot 2D grasp image
                                vis2d.figure()
                                vis2d.subplot(2, 2, 1)
                                vis2d.imshow(binary_im)
                                vis2d.grasp(grasp_2d)
                                vis2d.subplot(2, 2, 2)
                                vis2d.imshow(depth_im_table)
                                vis2d.grasp(grasp_2d)
                                vis2d.subplot(2, 2, 3)
                                vis2d.imshow(binary_im_tf)
                                vis2d.grasp(tf_grasp_2d)
                                vis2d.subplot(2, 2, 4)
                                vis2d.imshow(depth_im_tf_table)
                                vis2d.grasp(tf_grasp_2d)
                                vis2d.title('Coll Free? %d' %
                                            (grasp_info.collision_free))
                                vis2d.show()

                                # plot 3D visualization
                                vis.figure()
                                T_obj_world = vis.mesh_stable_pose(
                                    obj.mesh,
                                    stable_pose.T_obj_world,
                                    style='surface',
                                    dim=0.5)
                                vis.gripper(gripper,
                                            grasp,
                                            T_obj_world,
                                            color=(0.3, 0.3, 0.3))
                                vis.show()

                            # form hand pose array
                            hand_pose = np.r_[grasp_2d.center.y,
                                              grasp_2d.center.x,
                                              grasp_2d.depth, grasp_2d.angle,
                                              grasp_2d.center.y -
                                              shifted_camera_intr.cy,
                                              grasp_2d.center.x -
                                              shifted_camera_intr.cx,
                                              grasp_2d.width_px]

                            # store to data buffers
                            tensor_datapoint[
                                'depth_ims_tf_table'] = depth_im_tf_table.raw_data
                            tensor_datapoint[
                                'obj_masks'] = binary_im_tf.raw_data
                            tensor_datapoint['hand_poses'] = hand_pose
                            tensor_datapoint['collision_free'] = collision_free
                            tensor_datapoint['obj_labels'] = cur_obj_label
                            tensor_datapoint['pose_labels'] = cur_pose_label
                            tensor_datapoint['image_labels'] = cur_image_label

                            for metric_name, metric_val in grasp_metrics[
                                    grasp.id].iteritems():
                                coll_free_metric = (
                                    1 * collision_free) * metric_val
                                tensor_datapoint[
                                    metric_name] = coll_free_metric
                            tensor_dataset.add(tensor_datapoint)

                        # update image label
                        cur_image_label += 1

                    # update pose label
                    cur_pose_label += 1

                    # force clean up
                    gc.collect()

            # update object label
            cur_obj_label += 1

            # force clean up
            gc.collect()

    # save last file
    tensor_dataset.flush()

    # save category mappings
    obj_cat_filename = os.path.join(output_dir, 'object_category_map.json')
    json.dump(obj_category_map, open(obj_cat_filename, 'w'))
    pose_cat_filename = os.path.join(output_dir, 'pose_category_map.json')
    json.dump(pose_category_map, open(pose_cat_filename, 'w'))
Beispiel #3
0
class GenerateBalancedObliqueDataset:
    def __init__(self, output_dir):
        self.NUM_OBJECTS = None
        self.table_file = DATA_DIR + '/meshes/table.obj'
        self.data_dir = DATA_DIR + '/meshes/dexnet/'

        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        self.image_dir = output_dir + '/images/'
        if not os.path.exists(self.image_dir):
            os.mkdir(self.image_dir)
        self.orig_image_dir = output_dir + '/orig_images/'
        if not os.path.exists(self.orig_image_dir):
            os.mkdir(self.orig_image_dir)
        self.config = YamlConfig(CONFIG)

        self.phi_offsets = self._generate_phi_offsets()
        self.datasets = self._load_datasets()
        self.tensor_dataset = TensorDataset(output_dir, self._set_tensor_config())
        self.tensor_datapoint = self.tensor_dataset.datapoint_template
        self.gripper = self._set_gripper()
        self._table_mesh_filename = self._set_table_mesh_filename()
        self.table_mesh = self._set_table_mesh()

        self.cur_pose_label = 0
        self.cur_obj_label = 0
        self.cur_image_label = 0
        self.cur_image_file = 0

        self.obj = None
        self.T_obj_camera = None

    def _camera_configs(self):
        return self.config['env_rv_params'].copy()

    @property
    def _camera_intr_scale(self):
        return 32.0 / 96.0

    @property
    def _render_modes(self):
        return [RenderMode.DEPTH_SCENE]

    @property
    def _max_grasp_approach_offset(self):
        return np.deg2rad(self.config['table_alignment']['max_approach_offset'])

    @property
    def _min_grasp_approach_offset(self):
        return -np.deg2rad(self.config['table_alignment']['max_approach_offset'])

    @property
    def _max_grasp_approach_table_angle(self):
        return np.deg2rad(self.config['table_alignment']['max_approach_table_angle'])

    @property
    def _num_grasp_approach_samples(self):
        return self.config['table_alignment']['num_approach_offset_samples']

    def _generate_phi_offsets(self):
        phi_offsets = []
        if self._max_grasp_approach_offset == self._min_grasp_approach_offset:
            phi_inc = 1
        elif self._num_grasp_approach_samples == 1:
            phi_inc = self._max_grasp_approach_offset - self._min_grasp_approach_offset + 1
        else:
            phi_inc = (self._max_grasp_approach_offset - self._min_grasp_approach_offset) / \
                      (self._num_grasp_approach_samples - 1)

        phi = self._min_grasp_approach_offset
        while phi <= self._max_grasp_approach_offset:
            phi_offsets.append(phi)
            phi += phi_inc
        return phi_offsets

    @property
    def _approach_dist(self):
        return self.config['collision_checking']['approach_dist']

    @property
    def _delta_approach(self):
        return self.config['collision_checking']['delta_approach']

    @property
    def _table_offset(self):
        return self.config['collision_checking']['table_offset']

    @property
    def _stable_pose_min_p(self):
        return self.config['stable_pose_min_p']

    def _set_table_mesh_filename(self):
        table_mesh_filename = self.config['collision_checking']['table_mesh_filename']
        if not os.path.isabs(table_mesh_filename):
            return os.path.join(DATA_DIR, table_mesh_filename)
        return table_mesh_filename

    def _set_table_mesh(self):
        return ObjFile(self._table_mesh_filename).read()

    def _set_gripper(self):
        return RobotGripper.load(self.config['gripper'])

    def _load_datasets(self):
        """ Load the datasets in the Hdf5 database given through the config file.

            Returns
            -------
            datasets (list): list of datasets
        """
        database = Hdf5Database(self.config['database_name'], access_level=READ_ONLY_ACCESS)
        target_object_keys = self.config['target_objects']
        dataset_names = target_object_keys.keys()
        datasets = [database.dataset(dn) for dn in dataset_names]
        if self.NUM_OBJECTS is not None:
            datasets = [dataset.subset(0, self.NUM_OBJECTS) for dataset in datasets]
        return datasets

    def _set_tensor_config(self):
        """ Sets the tensor config based on the used config file.

            Returns
            -------
            tensor_config (dict): tensor config that can be used to initiate a tensor dataset.
        """
        tensor_config = self.config['tensors']
        tensor_config['fields']['robust_ferrari_canny'] = {}
        tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32'
        return tensor_config

    def render_images(self, scene_objs, stable_pose, num_images, camera_config=None):
        """ Renders depth and binary images from self.obj at the given stable pose. The camera
            sampling occurs within urv.rvs.

            Parameters
            ----------
            scene_objs (dict): Objects occuring in the scene, mostly includes the table mesh.
            stable_pose (StablePose): Stable pose of the object
            num_images (int): Numbers of images to render
            camera_config (dict): Camera sampling parameters with minimum/maximum values for radius, polar angle, ...

            Returns
            -------
            render_samples (list): list of rendered images including sampled camera positions
        """
        if camera_config is None:
            camera_config = self.config['env_rv_params']
        urv = UniformPlanarWorksurfaceImageRandomVariable(self.obj.mesh,
                                                          self._render_modes,
                                                          'camera',
                                                          camera_config,
                                                          scene_objs=scene_objs,
                                                          stable_pose=stable_pose)
        # Render images
        render_samples = urv.rvs(size=num_images)
        return render_samples

    def align_grasps_with_camera(self, grasps):
        """ Attempts to align all grasps in grasps with the z axis of the camera in self.T_obj_camera.

            Parameters
            ----------
            grasps (list): List of grasps for the object mesh in the object frame.

            Returns
            -------
            aligned_grasps (list): List of aligned grasps for the object mesh in the object frame
        """
        z_axis_in_obj = np.dot(self.T_obj_camera.inverse().matrix, np.array((0, 0, -1, 1)).reshape(4, 1))
        z_axis = z_axis_in_obj[0:3].squeeze() / np.linalg.norm(z_axis_in_obj[0:3].squeeze())
        aligned_grasps = [grasp.perpendicular_table(z_axis) for grasp in grasps]
        return aligned_grasps

    def get_camera_pose(self, sample):
        """ Attempts to align all grasps in grasps with the z axis of the camera in self.T_obj_camera.

            Parameters
            ----------
            sample (meshpy rendersample): Image sample

            Returns
            -------
            camera pose (np.array): Array including the camera radius, elevation angle, polar angle, roll, focal length,
                                    and table translation in x and y.
        """
        return np.r_[sample.camera.radius,
                     sample.camera.elev,
                     sample.camera.az,
                     sample.camera.roll,
                     sample.camera.focal,
                     sample.camera.tx,
                     sample.camera.ty]

    def is_grasp_aligned(self, aligned_grasp, stable_pose=None):
        """ Checks if the grasp is aligned with the camera z axis or the z axis of the stable pose, if given.

            Parameters
            ----------
            aligned_grasp (Grasp)
            stable_pose (StablePose): stable pose of object. Default: None

            Returns
            -------
            Aligned (bool): True if grasp is aligned with the desired z axis.
        """
        if stable_pose is not None:
            _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_stp_z(stable_pose)
        else:
            _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_camera_z(self.T_obj_camera)
        perpendicular_table = (np.abs(grasp_approach_table_angle) < self._max_grasp_approach_table_angle)
        if not perpendicular_table:
            return False
        return True

    def is_grasp_collision_free(self, grasp, collision_checker):
        """ Checks if any of the +- phi grasp linear approaches are collision free with OpenRave collision checker.

            Parameters
            ----------
            grasp (Grasp)
            collision_checker (GraspCollisionChecker)

            Returns
            -------
            collision_free (bool): True if grasp is collision free.
        """
        collision_free = False
        for phi_offset in self.phi_offsets:
            grasp.grasp_y_axis_offset(phi_offset)
            collides = collision_checker.collides_along_approach(grasp, self._approach_dist,
                                                                 self._delta_approach)
            if not collides:
                collision_free = True
                break
        return collision_free

    def get_hand_pose(self, grasp_center_x, grasp_center_y, grasp_2d, grasp_3d, grasp_width, grasp_width_px):
        """ Organises numpy array for hand_pose tensor.

            Parameters
            ----------
            grasp_2d (Grasp2D)
            grasp_3d (Grasp3D)

            Returns
            -------
            hand_pose (np.array): Hand_pose tensor including distance between camera and grasp tcp and grasp rotation
                                    quaternion in camera frame.
        """
        return np.r_[grasp_center_x,
                     grasp_center_y,
                     grasp_2d.depth,
                     grasp_3d.quaternion,  # w x y z layout
                     grasp_width,
                     grasp_width_px]

    def _crop_and_resize(self, image):
        """ Crop and resize an image to the final height and width.

            Parameters
            ----------
            image (DepthImage)

            Returns
            -------
            final_im (np.array): cropped and resized according to crop_height/width and final_height/width in
                                self.config['gqcnn']
        """
        cropped_im = image.crop(self.config['gqcnn']['crop_height'], self.config['gqcnn']['crop_width'])
        resized_im = Image.fromarray(np.asarray(cropped_im.data)).resize((self.config['gqcnn']['final_height'],
                                                                          self.config['gqcnn']['final_width']),
                                                                         resample=Image.BILINEAR)
        final_im = np.asarray(resized_im).reshape(self.config['gqcnn']['final_height'],
                                                  self.config['gqcnn']['final_width'],
                                                  1)
        return final_im

    @staticmethod
    def scale(array):
        """ Scale image for visualisation purposes."""
        size = array.shape
        flattend = array.flatten()
        scaled = np.interp(flattend, (min(flattend), max(flattend)), (0, 255), left=0, right=255)
        integ = scaled.astype(np.uint8)
        integ.resize(size)
        return integ.squeeze()

    def _show_image(self, image):
        """ Show image by saving it in /data/test_image.png"""
        scaled_image = self.scale(image)
        im = Image.fromarray(scaled_image).resize((300, 300), resample=Image.NEAREST)
        im.save('/data/test_image.png')
        im.show()

    def prepare_images(self, depth_im_table, dx, dy):
        # Get translation image distances to grasp
        translation = np.array([dy, dx])

        # Transform, crop and resize image
        depth_im_tf_table = depth_im_table.transform(translation, 0.0)

        depth_im_tf = self._crop_and_resize(depth_im_tf_table)

        # self._show_image(depth_im_tf)
        # self._show_image(depth_unrotated_im_tf)

        return depth_im_tf

    def add_datapoint(self, hand_pose,
                      collision_free, aligned_grasp, grasp_metrics):
        # Add data to tensor dataset
        self.tensor_datapoint['hand_poses'] = hand_pose
        self.tensor_datapoint['collision_free'] = collision_free

        # Add metrics to tensor dataset
        self.tensor_datapoint['robust_ferrari_canny'] = (1 * collision_free) * \
                                                        grasp_metrics[aligned_grasp.id]['robust_ferrari_canny']

        self.tensor_dataset.add(self.tensor_datapoint)

    def width_px(self, camera_intr, point1, point2):
        """Returns the width in pixels."""
        # Form the jaw locations in 3D space at the given depth.
        p1 = Point(point1, frame='camera')
        p2 = Point(point2, frame='camera')

        # Project into pixel space.
        u1 = camera_intr.project(p1)
        u2 = camera_intr.project(p2)
        return np.linalg.norm(u1.data - u2.data)

    def save_samples(self, sample, grasps, T_obj_stp, collision_checker, grasp_metrics, stable_pose):
        self.tensor_datapoint['image_labels'] = self.cur_image_label
        T_stp_camera = sample.camera.object_to_camera_pose
        self.T_obj_camera = T_stp_camera * T_obj_stp.as_frames('obj', T_stp_camera.from_frame)
        aligned_grasps = self.align_grasps_with_camera(grasps)

        depth_im_table = sample.renders[RenderMode.DEPTH_SCENE].image
        cx = depth_im_table.center[1]
        cy = depth_im_table.center[0]
        camera_pose = self.get_camera_pose(sample)
        self.tensor_datapoint['camera_poses'] = camera_pose
        shifted_camera_intr = sample.camera.camera_intr
        self.tensor_datapoint['camera_intrs'] = np.r_[shifted_camera_intr.fx,
                                                      shifted_camera_intr.fy,
                                                      shifted_camera_intr.cx,
                                                      shifted_camera_intr.cy]
        self.save_orig_image(depth_im_table, camera_pose, stable_pose, shifted_camera_intr)

        grid_space = 200
        grids = 3
        centre = np.empty((grids, grids, 2))
        saved = np.ones((grids, grids)) * -1

        depth_images = np.empty((grids, grids, 32, 32, 1))
        # Crop images in a grid around the centre
        for x_grid in range(grids):
            x_pos = grid_space / 4 - x_grid * grid_space / (grids + 1)
            for y_grid in range(grids):
                y_pos = grid_space / 4 - y_grid * grid_space / (grids + 1)
                depth_im = self.prepare_images(depth_im_table, x_pos, y_pos)
                self._show_image(depth_im)
                depth_images[x_grid, y_grid, :, :, :] = depth_im
                centre[x_grid, y_grid, 0] = x_pos
                centre[x_grid, y_grid, 1] = y_pos

        for cnt, aligned_grasp in enumerate(aligned_grasps):
            collision_free = self.is_grasp_collision_free(aligned_grasp, collision_checker)
            # Project grasp coordinates in image
            grasp_2d = aligned_grasp.project_camera(self.T_obj_camera, shifted_camera_intr)

            grasp_point = aligned_grasp.close_fingers(self.obj, vis=False, check_approach=False)

            if not grasp_point[0]:
                Warning("Could not find contact points")
                continue
            # Get grasp width in pixel from endpoints
            p_1 = np.dot(self.T_obj_camera.matrix, np.append(grasp_point[1][0].point, 1).T).T[:3]
            p_2 = np.dot(self.T_obj_camera.matrix, np.append(grasp_point[1][1].point, 1).T).T[:3]
            grasp_width_px = self.width_px(shifted_camera_intr, p_1, p_2)

            grasp_width = aligned_grasp.width_from_endpoints(grasp_point[1][0].point, grasp_point[1][1].point)

            pos_x = cx - grasp_2d.center.x
            pos_y = cy - grasp_2d.center.y

            distance = np.abs((centre[:, :, 0] - pos_x)**2 + (centre[:, :, 1] - pos_y)**2)

            idx = np.where(distance == np.amin(distance))
            idx_x = idx[0][0]
            idx_y = idx[1][0]

            grasp_center_x = (300 - grasp_2d.center.x - centre[idx_x, idx_y, 0])//3
            grasp_center_y = (300 - grasp_2d.center.y - centre[idx_x, idx_y, 1])//3

            if saved[idx_x, idx_y] == -1:
                np.save(self.image_dir + "/depth_im_{:07d}.npy".format(self.cur_image_file),
                        depth_images[idx_x, idx_y, :, :, :])
                saved[idx_x, idx_y] = self.cur_image_file
                self.tensor_datapoint['image_files'] = self.cur_image_file
                self.cur_image_file += 1
            else:
                self.tensor_datapoint['image_files'] = saved[idx_x, idx_y]

            T_grasp_camera = aligned_grasp.gripper_pose(self.gripper).inverse() * self.T_obj_camera.inverse()

            if VISUALISE_3D:
                z_axis = T_grasp_camera.z_axis
                theta = np.rad2deg(np.arccos((z_axis[2]) /
                                             (np.sqrt((z_axis[0] ** 2 + z_axis[1] ** 2 + z_axis[2] ** 2)))))
                vis.figure()
                T_obj_world = vis.mesh_stable_pose(self.obj.mesh.trimesh,
                                                   stable_pose.T_obj_world, style='surface', dim=0.5, plot_table=True)
                T_camera_world = T_obj_world * self.T_obj_camera.inverse()
                vis.gripper(self.gripper, aligned_grasp, T_obj_world, color=(0.3, 0.3, 0.3),
                            T_camera_world=T_camera_world)
                print(theta)
                vis.show()

            hand_pose = self.get_hand_pose(grasp_center_x, grasp_center_y, grasp_2d, T_grasp_camera,
                                           grasp_width, grasp_width_px)

            self.add_datapoint(hand_pose,
                               collision_free, aligned_grasp, grasp_metrics)
        self.cur_image_label += 1

    def save_orig_image(self, depth, camera_pose, stable_pose, camera_intr):
        depth_im = Image.fromarray(depth.data).crop((225, 225, 375, 375))
        depth_im.save(self.orig_image_dir + 'depth_{:06d}.tiff'.format(self.cur_image_label))
        with open(self.orig_image_dir + 'object_{:06d}.txt'.format(self.cur_image_label), 'w') as f:
            f.write(self.obj.key)
        self.T_obj_camera.save(self.orig_image_dir + 'camera_tf_{:06d}.tf'.format(self.cur_image_label))
        np.savetxt(self.orig_image_dir + 'camera_pose_{:06d}.txt'.format(self.cur_image_label), camera_pose)
        save_stp = StablePoseFile(self.orig_image_dir + 'stable_pose_{:06d}.stp'.format(self.cur_image_label))
        save_stp.write([stable_pose])
        camera_intr.save(self.orig_image_dir + 'camera_intr_{:06d}.intr'.format(self.cur_image_label))

    def render_data(self):
        logging.basicConfig(level=logging.WARNING)
        for dataset in self.datasets:
            logging.info('Generating data for dataset %s' % dataset.name)
            object_keys = dataset.object_keys

            for obj_key in object_keys:
                self.tensor_datapoint['object_labels'] = self.cur_obj_label
                if self.cur_obj_label % 10 == 0:
                    logging.info("Object number: %d" % self.cur_obj_label)
                self.obj = dataset[obj_key]

                grasps = dataset.grasps(self.obj.key, gripper=self.gripper.name)

                # Load grasp metrics
                grasp_metrics = dataset.grasp_metrics(self.obj.key,
                                                      grasps,
                                                      gripper=self.gripper.name)

                # setup collision checker
                collision_checker = GraspCollisionChecker(self.gripper)
                collision_checker.set_graspable_object(self.obj)

                # read in the stable poses of the mesh
                stable_poses = dataset.stable_poses(self.obj.key)

                # Iterate through stable poses
                for i, stable_pose in enumerate(stable_poses):
                    if not stable_pose.p > self._stable_pose_min_p:
                        continue
                    self.tensor_datapoint['pose_labels'] = self.cur_pose_label

                    # setup table in collision checker
                    T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp')
                    T_obj_table = self.obj.mesh.get_T_surface_obj(T_obj_stp,
                                                                  delta=self._table_offset).as_frames('obj', 'table')
                    T_table_obj = T_obj_table.inverse()
                    T_obj_stp = self.obj.mesh.get_T_surface_obj(T_obj_stp)

                    collision_checker.set_table(self._table_mesh_filename, T_table_obj)

                    # sample images from random variable
                    T_table_obj = RigidTransform(from_frame='table', to_frame='obj')
                    scene_objs = {'table': SceneObject(self.table_mesh, T_table_obj)}

                    # Set up image renderer
                    before = time.time()
                    samples = self.render_images(scene_objs,
                                                 stable_pose,
                                                 self.config['images_per_stable_pose'],
                                                 camera_config=self._camera_configs())
                    # print("Rendering took {:05f} seconds".format(time.time()-before))
                    # times = []
                    for sample in samples:
                        # before = time.time()
                        self.save_samples(sample, grasps, T_obj_stp, collision_checker, grasp_metrics, stable_pose)
                        # times.append(time.time() - before)
                    # print("Saving one sample took avg {:05f} seconds".format(sum(times)/len(times)))
                    self.cur_pose_label += 1
                    gc.collect()
                    # next stable pose
                self.cur_obj_label += 1
                # next object
        # Save dataset
        self.tensor_dataset.flush()
            # plt.subplot(132)
            # plt.title('QUALITY', fontsize=FONT_SIZE)
            # quality_image = quality_image[70:130,70:130,:]
            # plt.imshow(quality_image[:,:,0])

            # plt.subplot(133)
            # plt.title('ANGLE', fontsize=FONT_SIZE)
            # grasp_angle_image = grasp_angle_image[70:130,70:130,:]
            # plt.imshow(grasp_angle_image[:,:,0])

            # plt.show()
            # break
        gc.collect()
        # break

    tensor_dataset.flush()
    #########################################################################
    # np.random.shuffle(image_tensor_inds)

    # for ind in image_tensor_inds:
    #     print 'Loading image tensor', ind

    #     # load image data
    #     image_arr = np.load(image_tensors[ind])['arr_0']
    #     start_ind_arr = np.load(start_ind_tensors[ind])['arr_0']
    #     end_ind_arr = np.load(end_ind_tensors[ind])['arr_0']

    #     i = np.random.choice(DATAPOINTS_PER_FILE, size=1)[0]
    #     print 'Showing image', i

    #     # read datapoint
Beispiel #5
0
class GenerateBalancedObliqueDataset:
    def __init__(self, output_dir):
        self.NUM_OBJECTS = None
        self.table_file = DATA_DIR + '/meshes/table.obj'
        self.data_dir = DATA_DIR + '/meshes/dexnet/'

        output_dir = DATA_DIR + output_dir
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        self.output_dir = output_dir

        self.config = YamlConfig(
            './cfg/tools/generate_oblique_gqcnn_dataset.yaml')

        self.phi_offsets = self._generate_phi_offsets()
        self.datasets, self.target_object_keys = self._load_datasets()
        self.tensor_dataset = TensorDataset(self.output_dir,
                                            self._set_tensor_config())
        self.tensor_datapoint = self.tensor_dataset.datapoint_template
        self.gripper = self._set_gripper()
        self._table_mesh_filename = self._set_table_mesh_filename()
        self.table_mesh = self._set_table_mesh()

        self.cur_pose_label = 0
        self.cur_obj_label = 0
        self.cur_image_label = 0

        self.grasp_upsample_distribuitions = self._get_upsample_distributions()
        self.camera_distributions = self._get_camera_distributions()

        self.obj = None
        self.T_obj_camera = None

    def _get_camera_distributions(self):
        # Load relative amount of grasps per elevation angle phi
        relative_elevation_angles = np.load(
            DATA_DIR + '/meshes/relative_elevation_angles.npy')
        relative_elevation_angles[relative_elevation_angles > 1] = 1.0
        # Load positivity rate per elevation angle phi
        positivity_rate = np.load(DATA_DIR + '/meshes/positivity_rate.npy')
        # Get additional sample ratio through resampling positive grasps per elevation angle phi
        additional_sample_ratio = 1 + self.config[
            'gt_positive_ratio'] - positivity_rate / 100
        # Add additonal sample ratio to relative amount of grasps per elevation angle phi
        camera_frequencies = 1 / (relative_elevation_angles *
                                  additional_sample_ratio)
        camera_frequencies = np.round(
            camera_frequencies / sum(camera_frequencies) * 10000).astype(int)
        # Get camera distribution according to goal sampling ratios
        camera_distribution = [0] * camera_frequencies[0] + [1] * camera_frequencies[1] + \
                              [2] * camera_frequencies[2] + [3] * camera_frequencies[3] + \
                              [4] * camera_frequencies[4] + [5] * camera_frequencies[5] + \
                              [6] * camera_frequencies[6] + [7] * camera_frequencies[7] + \
                              [8] * camera_frequencies[8] + [9] * camera_frequencies[9] + \
                              [10] * camera_frequencies[10] + [11] * camera_frequencies[11]
        return camera_distribution

    def _get_upsample_distributions(self):
        # Get ratio of ground truth positive grasps in unbalanced dataset
        positivity_rate = np.load(DATA_DIR + '/meshes/positivity_rate.npy')
        # Get desired ratio of ground truth positive grasps
        gt_positive_ratio = self.config['gt_positive_ratio'] * 100
        # Get upsample ratio for positive grasps
        upsample_ratio = (gt_positive_ratio -
                          positivity_rate) / positivity_rate
        # Set upsample ratio of negative sampling ratios to zero
        upsample_ratio[upsample_ratio < 0] = 0
        X = [0, 1, 2, 3, 4, 5]
        sample_distributions = []
        # Fit poisson distribution to ratios
        for ratio in upsample_ratio:
            poisson_pd = np.round(poisson.pmf(X, ratio) * 1000).astype(int)
            # Generate vector with possible sample sizes to randomly choose from
            sample_distribution = [0] * poisson_pd[0] + [1] * poisson_pd[1] + [2] * poisson_pd[2] + \
                                  [3] * poisson_pd[3] + [4] * poisson_pd[4] + [5] * poisson_pd[5]
            sample_distributions.append(sample_distribution)
        return sample_distributions

    def _camera_configs(self):
        return self.config['env_rv_params'].copy()

    @property
    def _camera_intr_scale(self):
        return 32.0 / 96.0

    @property
    def _render_modes(self):
        return [RenderMode.SEGMASK, RenderMode.DEPTH_SCENE]

    @property
    def _max_grasp_approach_offset(self):
        return np.deg2rad(
            self.config['table_alignment']['max_approach_offset'])

    @property
    def _min_grasp_approach_offset(self):
        return -np.deg2rad(
            self.config['table_alignment']['max_approach_offset'])

    @property
    def _max_grasp_approach_table_angle(self):
        return np.deg2rad(
            self.config['table_alignment']['max_approach_table_angle'])

    @property
    def _num_grasp_approach_samples(self):
        return self.config['table_alignment']['num_approach_offset_samples']

    def _generate_phi_offsets(self):
        phi_offsets = []
        if self._max_grasp_approach_offset == self._min_grasp_approach_offset:
            phi_inc = 1
        elif self._num_grasp_approach_samples == 1:
            phi_inc = self._max_grasp_approach_offset - self._min_grasp_approach_offset + 1
        else:
            phi_inc = (self._max_grasp_approach_offset - self._min_grasp_approach_offset) / \
                      (self._num_grasp_approach_samples - 1)

        phi = self._min_grasp_approach_offset
        while phi <= self._max_grasp_approach_offset:
            phi_offsets.append(phi)
            phi += phi_inc
        return phi_offsets

    @property
    def _approach_dist(self):
        return self.config['collision_checking']['approach_dist']

    @property
    def _delta_approach(self):
        return self.config['collision_checking']['delta_approach']

    @property
    def _table_offset(self):
        return self.config['collision_checking']['table_offset']

    @property
    def _stable_pose_min_p(self):
        return self.config['stable_pose_min_p']

    def _set_table_mesh_filename(self):
        table_mesh_filename = self.config['collision_checking'][
            'table_mesh_filename']
        if not os.path.isabs(table_mesh_filename):
            return os.path.join(DATA_DIR, table_mesh_filename)
        return table_mesh_filename

    def _set_table_mesh(self):
        return ObjFile(self._table_mesh_filename).read()

    def _set_gripper(self):
        return RobotGripper.load(self.config['gripper'])

    def _load_datasets(self):
        database = Hdf5Database(self.config['database_name'],
                                access_level=READ_ONLY_ACCESS)
        target_object_keys = self.config['target_objects']
        dataset_names = target_object_keys.keys()
        datasets = [database.dataset(dn) for dn in dataset_names]
        if self.NUM_OBJECTS is not None:
            datasets = [
                dataset.subset(0, self.NUM_OBJECTS) for dataset in datasets
            ]
        return datasets, target_object_keys

    def _set_tensor_config(self):
        tensor_config = self.config['tensors']
        tensor_config['fields']['depth_ims_tf_table']['height'] = 32
        tensor_config['fields']['depth_ims_tf_table']['width'] = 32
        tensor_config['fields']['obj_masks']['height'] = 32
        tensor_config['fields']['obj_masks']['width'] = 32
        tensor_config['fields']['robust_ferrari_canny'] = {}
        tensor_config['fields']['robust_ferrari_canny']['dtype'] = 'float32'
        tensor_config['fields']['ferrari_canny'] = {}
        tensor_config['fields']['ferrari_canny']['dtype'] = 'float32'
        tensor_config['fields']['force_closure'] = {}
        tensor_config['fields']['force_closure']['dtype'] = 'float32'
        return tensor_config

    def get_positive_grasps(self, dataset, grasps):
        metrics = dataset.grasp_metrics(self.obj.key,
                                        grasps,
                                        gripper=self.gripper.name)
        positive_grasps = []
        for cnt in range(0, len(metrics)):
            if metrics[cnt]['robust_ferrari_canny'] >= 0.002:
                positive_grasps.append(grasps[cnt])
        return positive_grasps

    def render_images(self,
                      scene_objs,
                      stable_pose,
                      num_images,
                      camera_config=None):
        if camera_config is None:
            camera_config = self.config['env_rv_params']
        urv = UniformPlanarWorksurfaceImageRandomVariable(
            self.obj.mesh,
            self._render_modes,
            'camera',
            camera_config,
            scene_objs=scene_objs,
            stable_pose=stable_pose)
        # Render images
        render_samples = urv.rvs(size=num_images)
        return render_samples

    def align_grasps(self, grasps):
        z_axis_in_obj = np.dot(self.T_obj_camera.inverse().matrix,
                               np.array((0, 0, -1, 1)).reshape(4, 1))
        z_axis = z_axis_in_obj[0:3].squeeze() / np.linalg.norm(
            z_axis_in_obj[0:3].squeeze())
        aligned_grasps = [
            grasp.perpendicular_table(z_axis) for grasp in grasps
        ]
        return aligned_grasps

    def get_camera_pose(self, sample):
        return np.r_[sample.camera.radius, sample.camera.elev,
                     sample.camera.az, sample.camera.roll, sample.camera.focal,
                     sample.camera.tx, sample.camera.ty]

    def is_grasp_aligned(self, aligned_grasp):
        _, grasp_approach_table_angle, _ = aligned_grasp.grasp_angles_from_camera_z(
            self.T_obj_camera)
        perpendicular_table = (np.abs(grasp_approach_table_angle) <
                               self._max_grasp_approach_table_angle)
        if not perpendicular_table:
            return False
        return True

    def is_grasp_collision_free(self, grasp, collision_checker):
        collision_free = False
        for phi_offset in self.phi_offsets:
            grasp.grasp_y_axis_offset(phi_offset)
            collides = collision_checker.collides_along_approach(
                grasp, self._approach_dist, self._delta_approach)
            if not collides:
                collision_free = True
                break
        return collision_free

    def get_hand_pose(self, grasp_2d, shifted_camera_intr):
        # Configure hand pose
        return np.r_[grasp_2d.center.y, grasp_2d.center.x, grasp_2d.depth,
                     grasp_2d.angle,
                     grasp_2d.center.y - shifted_camera_intr.cy,
                     grasp_2d.center.x - shifted_camera_intr.cx,
                     grasp_2d.width_px / 3]

    def prepare_images(self, depth_im_table, binary_im, grasp_2d, cx, cy):
        # Get translation image distances to grasp
        dx = cx - grasp_2d.center.x
        dy = cy - grasp_2d.center.y
        translation = np.array([dy, dx])

        # Transform, crop and resize image
        binary_im_tf = binary_im.transform(translation, grasp_2d.angle)
        depth_im_tf_table = depth_im_table.transform(translation,
                                                     grasp_2d.angle)

        binary_im_tf = binary_im_tf.crop(96, 96)
        depth_im_tf_table = depth_im_tf_table.crop(96, 96)

        depth_image = np.asarray(depth_im_tf_table.data)
        dep_image = Image.fromarray(depth_image).resize(
            (32, 32), resample=Image.BILINEAR)
        depth_im_tf = np.asarray(dep_image).reshape(32, 32, 1)

        binary_image = np.asarray(binary_im_tf.data)
        bin_image = Image.fromarray(binary_image).resize(
            (32, 32), resample=Image.BILINEAR)
        binary_im_tf = np.asarray(bin_image).reshape(32, 32, 1)
        return depth_im_tf, binary_im_tf

    def add_datapoint(self, depth_im_tf, binary_im_tf, hand_pose,
                      collision_free, camera_pose, aligned_grasp,
                      grasp_metrics):

        # Add data to tensor dataset
        self.tensor_datapoint['depth_ims_tf_table'] = depth_im_tf
        self.tensor_datapoint['obj_masks'] = binary_im_tf
        self.tensor_datapoint['hand_poses'] = hand_pose
        self.tensor_datapoint['obj_labels'] = self.cur_obj_label
        self.tensor_datapoint['collision_free'] = collision_free
        self.tensor_datapoint['pose_labels'] = self.cur_pose_label
        self.tensor_datapoint['image_labels'] = self.cur_image_label
        self.tensor_datapoint['camera_poses'] = camera_pose

        # Add metrics to tensor dataset
        for metric_name, metric_val in grasp_metrics[
                aligned_grasp.id].iteritems():
            coll_free_metric = (1 * collision_free) * metric_val
            self.tensor_datapoint[metric_name] = coll_free_metric
        self.tensor_dataset.add(self.tensor_datapoint)

    def save_samples(self,
                     sample,
                     grasps,
                     T_obj_stp,
                     collision_checker,
                     grasp_metrics,
                     only_positive=False):
        T_stp_camera = sample.camera.object_to_camera_pose
        self.T_obj_camera = T_stp_camera * T_obj_stp.as_frames(
            'obj', T_stp_camera.from_frame)
        aligned_grasps = self.align_grasps(grasps)
        binary_im = sample.renders[RenderMode.SEGMASK].image
        depth_im_table = sample.renders[RenderMode.DEPTH_SCENE].image
        camera_pose = self.get_camera_pose(sample)
        shifted_camera_intr = sample.camera.camera_intr

        cx = depth_im_table.center[1]
        cy = depth_im_table.center[0]

        for cnt, aligned_grasp in enumerate(aligned_grasps):
            if not self.is_grasp_aligned(aligned_grasp):
                continue
            collision_free = self.is_grasp_collision_free(
                aligned_grasp, collision_checker)
            if only_positive and not collision_free:
                continue

            # Project grasp coordinates in image
            grasp_2d = aligned_grasp.project_camera(self.T_obj_camera,
                                                    shifted_camera_intr)

            depth_im_tf, binary_im_tf = self.prepare_images(
                depth_im_table, binary_im, grasp_2d, cx, cy)

            hand_pose = self.get_hand_pose(grasp_2d, shifted_camera_intr)

            self.add_datapoint(depth_im_tf, binary_im_tf, hand_pose,
                               collision_free, camera_pose, aligned_grasp,
                               grasp_metrics)
        self.cur_image_label += 1

    def render_data(self):
        logging.basicConfig(level=logging.WARNING)
        for dataset in self.datasets:
            logging.info('Generating data for dataset %s' % dataset.name)
            object_keys = dataset.object_keys

            for obj_key in object_keys:
                logging.info("Object number: %d" % self.cur_obj_label)
                self.obj = dataset[obj_key]

                grasps = dataset.grasps(self.obj.key,
                                        gripper=self.gripper.name)
                positive_grasps = self.get_positive_grasps(dataset, grasps)

                # Load grasp metrics
                grasp_metrics = dataset.grasp_metrics(
                    self.obj.key, grasps, gripper=self.gripper.name)

                # setup collision checker
                collision_checker = GraspCollisionChecker(self.gripper)
                collision_checker.set_graspable_object(self.obj)

                # read in the stable poses of the mesh
                stable_poses = dataset.stable_poses(self.obj.key)

                # Iterate through stable poses
                for i, stable_pose in enumerate(stable_poses):
                    if not stable_pose.p > self._stable_pose_min_p:
                        continue

                    # setup table in collision checker
                    T_obj_stp = stable_pose.T_obj_table.as_frames('obj', 'stp')
                    T_obj_table = self.obj.mesh.get_T_surface_obj(
                        T_obj_stp,
                        delta=self._table_offset).as_frames('obj', 'table')
                    T_table_obj = T_obj_table.inverse()
                    T_obj_stp = self.obj.mesh.get_T_surface_obj(T_obj_stp)

                    collision_checker.set_table(self._table_mesh_filename,
                                                T_table_obj)

                    # sample images from random variable
                    T_table_obj = RigidTransform(from_frame='table',
                                                 to_frame='obj')
                    scene_objs = {
                        'table': SceneObject(self.table_mesh, T_table_obj)
                    }

                    # Set up image renderer
                    for _ in range(self.config['images_per_stable_pose']):
                        elev_angle = np.random.choice(
                            self.camera_distributions)
                        camera_config = self._camera_configs()
                        camera_config['min_elev'] = elev_angle * 5.0
                        camera_config['max_elev'] = (elev_angle + 1) * 5.0
                        sample = self.render_images(
                            scene_objs,
                            stable_pose,
                            1,
                            camera_config=camera_config)
                        self.save_samples(sample, grasps, T_obj_stp,
                                          collision_checker, grasp_metrics)
                        # Get camera elevation angle for current sample
                        elev_angle = sample.camera.elev * 180 / np.pi
                        elev_bar = int(elev_angle // 5)
                        # Sample number of positive images from distribution
                        number_positive_images = np.random.choice(
                            self.grasp_upsample_distribuitions[elev_bar])
                        # Render only positive images
                        new_config = self._camera_configs()
                        new_config['min_elev'] = elev_angle
                        new_config['max_elev'] = elev_angle
                        positive_render_samples = self.render_images(
                            scene_objs,
                            stable_pose,
                            number_positive_images,
                            camera_config=new_config)
                        if type(positive_render_samples) == list:
                            for pos_sample in positive_render_samples:
                                self.save_samples(pos_sample,
                                                  positive_grasps,
                                                  T_obj_stp,
                                                  collision_checker,
                                                  grasp_metrics,
                                                  only_positive=True)
                        else:
                            self.save_samples(positive_render_samples,
                                              positive_grasps,
                                              T_obj_stp,
                                              collision_checker,
                                              grasp_metrics,
                                              only_positive=True)

                    self.cur_pose_label += 1
                    gc.collect()
                    # next stable pose
                self.cur_obj_label += 1
                # next object
        # Save dataset
        self.tensor_dataset.flush()