Example #1
0
def run_parallel_bin_picking_benchmark(input_dataset_path,
                                       heap_ids,
                                       timesteps,
                                       output_dataset_path,
                                       config_filename):
    raise NotImplementedError('Cannot run in parallel. Need to split up the heap ids and timesteps')

    # load config
    config = YamlConfig(config_filename)

    # init ray
    ray_config = config['ray']
    num_cpus = ray_config['num_cpus']
    ray.init(num_cpus=num_cpus,
             redirect_output=ray_config['redirect_output'])
    
    # rollouts
    num_rollouts = config['num_rollouts'] // num_cpus
    dataset_ids = [rollout_bin_picking_policy_in_parallel.remote(dataset_path, config_filename, num_rollouts) for i in range(num_cpus)]
    dataset_filenames = ray.get(dataset_ids)
    if len(dataset_filenames) == 0:
        return
    
    # merge datasets    
    subproc_dataset = TensorDataset.open(dataset_filenames[0])
    tensor_config = subproc_dataset.config


    # open dataset
    dataset = TensorDataset(dataset_path, tensor_config)
    dataset.add_metadata('action_ids', subproc_dataset.metadata['action_ids'])

    # add datapoints
    obj_id = 0
    heap_id = 0
    obj_ids = {}
    for dataset_filename in dataset_filenames:
        logging.info('Aggregating data from %s' %(dataset_filename))
        j = 0
        subproc_dataset = TensorDataset.open(dataset_filename)
        subproc_obj_ids = subproc_dataset.metadata['obj_ids']
        for datapoint in subproc_dataset:
            if j > 0 and datapoint['timesteps'] == 0:
                heap_id += 1
                
            # modify object ids
            for i in range(datapoint['obj_ids'].shape[0]):
                subproc_obj_id = datapoint['obj_ids'][i]
                if subproc_obj_id != np.uint32(-1):
                    subproc_obj_key = subproc_obj_ids[str(subproc_obj_id)]
                    if subproc_obj_key not in obj_ids.keys():
                        obj_ids[subproc_obj_key] = obj_id
                        obj_id += 1
                    datapoint['obj_ids'][i] = obj_ids[subproc_obj_key]

            # modify grasped obj id
            subproc_grasped_obj_id = datapoint['grasped_obj_ids']
            grasped_obj_key = subproc_obj_ids[str(subproc_grasped_obj_id)]
            datapoint['grasped_obj_ids'] = obj_ids[grasped_obj_key]

            # modify heap id
            datapoint['heap_ids'] = heap_id
                
            # add datapoint to dataset
            dataset.add(datapoint)
            j += 1
            
    # write to disk        
    obj_ids = utils.reverse_dictionary(obj_ids)
    dataset.add_metadata('obj_ids', obj_ids)
    dataset.flush()
Example #2
0
            if "obj_ids" in dataset.metadata.keys():
                # modify object ids
                dataset_obj_ids = dataset.metadata["obj_ids"]
                for k in range(datapoint["obj_ids"].shape[0]):
                    dataset_obj_id = datapoint["obj_ids"][k]
                    if dataset_obj_id != np.iinfo(np.uint32).max:
                        dataset_obj_key = dataset_obj_ids[str(dataset_obj_id)]
                        if dataset_obj_key not in obj_ids.keys():
                            obj_ids[dataset_obj_key] = obj_id
                            obj_id += 1
                        datapoint["obj_ids"][k] = obj_ids[dataset_obj_key]

                # modify grasped obj id
                dataset_grasped_obj_id = datapoint["grasped_obj_ids"]
                grasped_obj_key = dataset_obj_ids[str(dataset_grasped_obj_id)]
                datapoint["grasped_obj_ids"] = obj_ids[grasped_obj_key]

            # add datapoint
            output_dataset.add(datapoint)

    # set metadata
    obj_ids = utils.reverse_dictionary(obj_ids)
    output_dataset.add_metadata("obj_ids", obj_ids)
    for field_name, field_data in dataset.metadata.iteritems():
        if field_name not in ["obj_ids"]:
            output_dataset.add_metadata(field_name, field_data)

    # flush to disk
    output_dataset.flush()
Example #3
0
    def test_single_read_write(self):
        # seed
        np.random.seed(SEED)
        random.seed(SEED)

        # open dataset
        create_successful = True
        try:
            dataset = TensorDataset(TEST_TENSOR_DATASET_NAME, TENSOR_CONFIG)
        except:
            create_successful = False
        self.assertTrue(create_successful)

        # check field names
        write_datapoint = dataset.datapoint_template
        for field_name in write_datapoint.keys():
            self.assertTrue(field_name in dataset.field_names)

        # add the datapoint
        write_datapoint['float_value'] = np.random.rand()
        write_datapoint['int_value'] = int(100 * np.random.rand())
        write_datapoint['str_value'] = utils.gen_experiment_id()
        write_datapoint['vector_value'] = np.random.rand(HEIGHT)
        write_datapoint['matrix_value'] = np.random.rand(HEIGHT, WIDTH)
        write_datapoint['image_value'] = np.random.rand(
            HEIGHT, WIDTH, CHANNELS)
        dataset.add(write_datapoint)

        # check num datapoints
        self.assertTrue(dataset.num_datapoints == 1)

        # add metadata
        metadata_num = np.random.rand()
        dataset.add_metadata('test', metadata_num)

        # check written arrays
        dataset.flush()
        for field_name in dataset.field_names:
            filename = os.path.join(TEST_TENSOR_DATASET_NAME, 'tensors',
                                    '%s_00000.npz' % (field_name))
            value = np.load(filename)['arr_0']
            if isinstance(value[0], str):
                self.assertTrue(value[0] == write_datapoint[field_name])
            else:
                self.assertTrue(
                    np.allclose(value[0], write_datapoint[field_name]))

        # re-open the dataset
        del dataset
        dataset = TensorDataset.open(TEST_TENSOR_DATASET_NAME)

        # read metadata
        self.assertTrue(np.allclose(dataset.metadata['test'], metadata_num))

        # read datapoint
        read_datapoint = dataset.datapoint(0)
        for field_name in dataset.field_names:
            if isinstance(read_datapoint[field_name], str):
                self.assertTrue(
                    read_datapoint[field_name] == write_datapoint[field_name])
            else:
                self.assertTrue(
                    np.allclose(read_datapoint[field_name],
                                write_datapoint[field_name]))

        # check iterator
        for read_datapoint in dataset:
            for field_name in dataset.field_names:
                if isinstance(read_datapoint[field_name], str):
                    self.assertTrue(read_datapoint[field_name] ==
                                    write_datapoint[field_name])
                else:
                    self.assertTrue(
                        np.allclose(read_datapoint[field_name],
                                    write_datapoint[field_name]))

        # read individual fields
        for field_name in dataset.field_names:
            read_datapoint = dataset.datapoint(0, field_names=[field_name])
            if isinstance(read_datapoint[field_name], str):
                self.assertTrue(
                    read_datapoint[field_name] == write_datapoint[field_name])
            else:
                self.assertTrue(
                    np.allclose(read_datapoint[field_name],
                                write_datapoint[field_name]))

        # re-open the dataset in write-only
        del dataset
        dataset = TensorDataset.open(TEST_TENSOR_DATASET_NAME,
                                     access_mode=READ_WRITE_ACCESS)

        # delete datapoint
        dataset.delete_last()

        # check that the dataset is correct
        self.assertTrue(dataset.num_datapoints == 0)
        self.assertTrue(dataset.num_tensors == 0)
        for field_name in dataset.field_names:
            filename = os.path.join(TEST_TENSOR_DATASET_NAME, 'tensors',
                                    '%s_00000.npz' % (field_name))
            self.assertFalse(os.path.exists(filename))

        # remove dataset
        if os.path.exists(TEST_TENSOR_DATASET_NAME):
            shutil.rmtree(TEST_TENSOR_DATASET_NAME)