def run_parallel_bin_picking_benchmark(input_dataset_path, heap_ids, timesteps, output_dataset_path, config_filename): raise NotImplementedError('Cannot run in parallel. Need to split up the heap ids and timesteps') # load config config = YamlConfig(config_filename) # init ray ray_config = config['ray'] num_cpus = ray_config['num_cpus'] ray.init(num_cpus=num_cpus, redirect_output=ray_config['redirect_output']) # rollouts num_rollouts = config['num_rollouts'] // num_cpus dataset_ids = [rollout_bin_picking_policy_in_parallel.remote(dataset_path, config_filename, num_rollouts) for i in range(num_cpus)] dataset_filenames = ray.get(dataset_ids) if len(dataset_filenames) == 0: return # merge datasets subproc_dataset = TensorDataset.open(dataset_filenames[0]) tensor_config = subproc_dataset.config # open dataset dataset = TensorDataset(dataset_path, tensor_config) dataset.add_metadata('action_ids', subproc_dataset.metadata['action_ids']) # add datapoints obj_id = 0 heap_id = 0 obj_ids = {} for dataset_filename in dataset_filenames: logging.info('Aggregating data from %s' %(dataset_filename)) j = 0 subproc_dataset = TensorDataset.open(dataset_filename) subproc_obj_ids = subproc_dataset.metadata['obj_ids'] for datapoint in subproc_dataset: if j > 0 and datapoint['timesteps'] == 0: heap_id += 1 # modify object ids for i in range(datapoint['obj_ids'].shape[0]): subproc_obj_id = datapoint['obj_ids'][i] if subproc_obj_id != np.uint32(-1): subproc_obj_key = subproc_obj_ids[str(subproc_obj_id)] if subproc_obj_key not in obj_ids.keys(): obj_ids[subproc_obj_key] = obj_id obj_id += 1 datapoint['obj_ids'][i] = obj_ids[subproc_obj_key] # modify grasped obj id subproc_grasped_obj_id = datapoint['grasped_obj_ids'] grasped_obj_key = subproc_obj_ids[str(subproc_grasped_obj_id)] datapoint['grasped_obj_ids'] = obj_ids[grasped_obj_key] # modify heap id datapoint['heap_ids'] = heap_id # add datapoint to dataset dataset.add(datapoint) j += 1 # write to disk obj_ids = utils.reverse_dictionary(obj_ids) dataset.add_metadata('obj_ids', obj_ids) dataset.flush()
if "obj_ids" in dataset.metadata.keys(): # modify object ids dataset_obj_ids = dataset.metadata["obj_ids"] for k in range(datapoint["obj_ids"].shape[0]): dataset_obj_id = datapoint["obj_ids"][k] if dataset_obj_id != np.iinfo(np.uint32).max: dataset_obj_key = dataset_obj_ids[str(dataset_obj_id)] if dataset_obj_key not in obj_ids.keys(): obj_ids[dataset_obj_key] = obj_id obj_id += 1 datapoint["obj_ids"][k] = obj_ids[dataset_obj_key] # modify grasped obj id dataset_grasped_obj_id = datapoint["grasped_obj_ids"] grasped_obj_key = dataset_obj_ids[str(dataset_grasped_obj_id)] datapoint["grasped_obj_ids"] = obj_ids[grasped_obj_key] # add datapoint output_dataset.add(datapoint) # set metadata obj_ids = utils.reverse_dictionary(obj_ids) output_dataset.add_metadata("obj_ids", obj_ids) for field_name, field_data in dataset.metadata.iteritems(): if field_name not in ["obj_ids"]: output_dataset.add_metadata(field_name, field_data) # flush to disk output_dataset.flush()
def test_single_read_write(self): # seed np.random.seed(SEED) random.seed(SEED) # open dataset create_successful = True try: dataset = TensorDataset(TEST_TENSOR_DATASET_NAME, TENSOR_CONFIG) except: create_successful = False self.assertTrue(create_successful) # check field names write_datapoint = dataset.datapoint_template for field_name in write_datapoint.keys(): self.assertTrue(field_name in dataset.field_names) # add the datapoint write_datapoint['float_value'] = np.random.rand() write_datapoint['int_value'] = int(100 * np.random.rand()) write_datapoint['str_value'] = utils.gen_experiment_id() write_datapoint['vector_value'] = np.random.rand(HEIGHT) write_datapoint['matrix_value'] = np.random.rand(HEIGHT, WIDTH) write_datapoint['image_value'] = np.random.rand( HEIGHT, WIDTH, CHANNELS) dataset.add(write_datapoint) # check num datapoints self.assertTrue(dataset.num_datapoints == 1) # add metadata metadata_num = np.random.rand() dataset.add_metadata('test', metadata_num) # check written arrays dataset.flush() for field_name in dataset.field_names: filename = os.path.join(TEST_TENSOR_DATASET_NAME, 'tensors', '%s_00000.npz' % (field_name)) value = np.load(filename)['arr_0'] if isinstance(value[0], str): self.assertTrue(value[0] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(value[0], write_datapoint[field_name])) # re-open the dataset del dataset dataset = TensorDataset.open(TEST_TENSOR_DATASET_NAME) # read metadata self.assertTrue(np.allclose(dataset.metadata['test'], metadata_num)) # read datapoint read_datapoint = dataset.datapoint(0) for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue( read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # check iterator for read_datapoint in dataset: for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # read individual fields for field_name in dataset.field_names: read_datapoint = dataset.datapoint(0, field_names=[field_name]) if isinstance(read_datapoint[field_name], str): self.assertTrue( read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # re-open the dataset in write-only del dataset dataset = TensorDataset.open(TEST_TENSOR_DATASET_NAME, access_mode=READ_WRITE_ACCESS) # delete datapoint dataset.delete_last() # check that the dataset is correct self.assertTrue(dataset.num_datapoints == 0) self.assertTrue(dataset.num_tensors == 0) for field_name in dataset.field_names: filename = os.path.join(TEST_TENSOR_DATASET_NAME, 'tensors', '%s_00000.npz' % (field_name)) self.assertFalse(os.path.exists(filename)) # remove dataset if os.path.exists(TEST_TENSOR_DATASET_NAME): shutil.rmtree(TEST_TENSOR_DATASET_NAME)