def test_dataset_shuffle(self): run_length = 10 dataset = Dataset() for run_index in range(3): for step_index in range(run_length + run_index): dataset.append( Experience( observation=torch.as_tensor((len(dataset), )), action=torch.as_tensor((len(dataset), )), reward=torch.as_tensor((0, )), done=torch.as_tensor( (0, )) if step_index != run_length + run_index - 1 else torch.as_tensor((1, )))) self.assertEqual(dataset.observations[0].item(), 0) dataset.shuffle() self.assertEqual(dataset.observations[0], dataset.actions[0]) self.assertNotEqual(dataset.observations[0].item(), 0)
def generate_dataset_by_length(length: int, input_size: tuple = (3, 100, 100), output_size: tuple = (1, ), continuous: bool = True, fixed_input_value: float = None, fixed_output_value: float = None) -> Dataset: dataset = Dataset() while len(dataset) < length: for count, experience in enumerate( experience_generator(input_size=input_size, output_size=output_size, continuous=continuous, fixed_input_value=fixed_input_value, fixed_output_value=fixed_output_value)): if experience.done != TerminationType.Unknown: dataset.append(experience) if len(dataset) >= length: break return dataset
def test_dataset_subsample(self): run_length = 10 subsample = 3 dataset = Dataset() for run_index in range(3): for step_index in range(run_length + run_index): dataset.append( Experience( observation=torch.as_tensor((step_index, )), action=torch.as_tensor((0, )), reward=torch.as_tensor((0, )), done=torch.as_tensor( (0, )) if step_index != run_length + run_index - 1 else torch.as_tensor((1, )))) dataset.subsample(subsample) for exp_index in range(len(dataset)): self.assertTrue( dataset.observations[exp_index].item() % subsample == 0 or dataset.done[exp_index].item() == 1)
def test_dataset_size(self): dataset = Dataset() dataset.append( Experience(observation=torch.as_tensor([0] * 10), action=torch.as_tensor([1] * 3), reward=torch.as_tensor(0), done=torch.as_tensor(2))) first_size = dataset.get_memory_size() dataset.append( Experience(observation=torch.as_tensor([0] * 10), action=torch.as_tensor([1] * 3), reward=torch.as_tensor(0), done=torch.as_tensor(2))) self.assertEqual(2 * first_size, dataset.get_memory_size()) dataset = Dataset() dataset.append( Experience(observation=torch.as_tensor([0] * 10, dtype=torch.float32), action=torch.as_tensor([1] * 3, dtype=torch.float32), reward=torch.as_tensor(0, dtype=torch.float32), done=torch.as_tensor(2, dtype=torch.float32))) second_size = dataset.get_memory_size() self.assertEqual(first_size, 2 * second_size)
class DataSaver: def __init__(self, config: DataSaverConfig): self._config = config self._logger = get_logger( name=get_filename_without_extension(__file__), output_path=self._config.output_path, quiet=False) cprint(f'initiate', self._logger) if not self._config.saving_directory.startswith('/'): self._config.saving_directory = os.path.join( os.environ['HOME'], self._config.saving_directory) if self._config.store_on_ram_only: self._dataset = Dataset(max_size=self._config.max_size) # used to keep track of replay buffer size on file system if not self._config.store_on_ram_only \ and os.path.isdir(os.path.dirname(self._config.saving_directory)) \ and self._config.max_size != -1: data_loader = DataLoader(config=DataLoaderConfig().create( config_dict={ 'data_directories': [ os.path.join( os.path.dirname(self._config.saving_directory), run) for run in sorted( os.listdir( os.path.dirname( self._config.saving_directory))) ], 'output_path': self._config.output_path, 'store': False # don't store config })) data_loader.load_dataset() self._frame_counter = len(data_loader.get_dataset()) else: self._frame_counter = 0 def __len__(self): if self._config.store_on_ram_only: return len(self._dataset) else: return self._frame_counter def update_saving_directory(self): if self._config.separate_raw_data_runs: self._config.saving_directory = create_saving_directory( self._config.output_path, self._config.saving_directory_tag) def get_saving_directory(self): return self._config.saving_directory if not self._config.store_on_ram_only else 'ram' def get_dataset(self): return self._dataset def save(self, experience: Experience) -> None: if experience.done == TerminationType.Unknown: return # don't save experiences in an unknown state if self._config.store_on_ram_only: return self._dataset.append(experience) else: os.makedirs(self._config.saving_directory, exist_ok=True) return self._store_in_file_system(experience=experience) def _store_in_file_system(self, experience: Experience) -> None: for dst, data in zip(['observation', 'action', 'reward', 'done'], [ experience.observation, experience.action, experience.reward, experience.done ]): if data is not None: self._store_frame(data=np.asarray( data.value if isinstance(data, Action) else data), dst=dst, time_stamp=experience.time_stamp) for key, value in experience.info.items(): self._store_frame(data=np.asarray(value.value) if isinstance( value, Action) else value, dst=f'info_{to_file_name(key)}', time_stamp=experience.time_stamp) if experience.done in [ TerminationType.Success, TerminationType.Failure ]: os.system( f'touch {os.path.join(self._config.saving_directory, experience.done.name)}' ) self._check_dataset_size_on_file_system() def _store_frame(self, data: Union[np.ndarray, float], dst: str, time_stamp: int) -> None: if not isinstance(data, np.ndarray): data = np.asarray(data) try: if len(data.shape) in [2, 3]: if not os.path.isdir( os.path.join(self._config.saving_directory, dst)): os.makedirs(os.path.join(self._config.saving_directory, dst), exist_ok=True) store_image( data=data, file_name=os.path.join(self._config.saving_directory, dst, timestamp_to_filename(time_stamp)) + '.jpg') elif len(data.shape) in [0, 1]: store_array_to_file(data=data, file_name=os.path.join( self._config.saving_directory, dst + '.data'), time_stamp=time_stamp) except Exception as e: cprint(f'Failed to store frame: {e}', self._logger, msg_type=MessageType.error) def _check_dataset_size_on_file_system(self): self._frame_counter += 1 # If number of frames exceed max_size, remove oldest run and decrease frame counter if self._frame_counter > self._config.max_size != -1: raw_data_dir = os.path.dirname(self._config.saving_directory) first_run = sorted(os.listdir(raw_data_dir))[0] with open(os.path.join(raw_data_dir, first_run, 'done.data'), 'r') as f: run_length = len(f.readlines()) self._frame_counter -= run_length shutil.rmtree(os.path.join(raw_data_dir, first_run), ignore_errors=True) if not self._config.separate_raw_data_runs: cprint( f"Reached max buffer size and removing all data." f"Avoid this by setting data_saver_config.separate_raw_data_runs to True.", msg_type=MessageType.warning, logger=self._logger) def _get_runs(self) -> list: """ parse the parent directory of the saving directory for all raw_data runs. Return a list of the absolute paths to these runs. """ raw_data_dir = os.path.dirname(self._config.saving_directory) return [ os.path.join(raw_data_dir, run) for run in sorted(os.listdir(raw_data_dir)) ] def create_train_validation_hdf5_files( self, runs: List[str] = None, input_size: List[int] = None) -> None: all_runs = runs if runs is not None else self._get_runs() number_of_training_runs = int(self._config.training_validation_split * len(all_runs)) train_runs = all_runs[0:number_of_training_runs] validation_runs = all_runs[number_of_training_runs:] for file_name, runs in zip(['train', 'validation'], [train_runs, validation_runs]): config = DataLoaderConfig().create( config_dict={ 'data_directories': runs, 'output_path': self._config.output_path, 'subsample': self._config.subsample_hdf5, 'input_size': input_size }) data_loader = DataLoader(config=config) data_loader.load_dataset() create_hdf5_file_from_dataset(filename=os.path.join( self._config.output_path, file_name + '.hdf5'), dataset=data_loader.get_dataset()) cprint(f'created {file_name}.hdf5', self._logger) def empty_raw_data_in_output_directory(self) -> None: raw_data_directory = os.path.dirname(self._config.saving_directory) if os.path.isdir(raw_data_directory): for d in os.listdir(raw_data_directory): shutil.rmtree(os.path.join(raw_data_directory, d)) def clear_buffer(self) -> None: self._frame_counter = 0 if self._config.store_on_ram_only: self._dataset = Dataset() else: self.empty_raw_data_in_output_directory() def remove(self): [h.close() for h in self._logger.handlers]
class PhiWeightTest(unittest.TestCase): def setUp(self) -> None: self.output_dir = f'{os.environ["PWD"]}/test_dir/{get_filename_without_extension(__file__)}' os.makedirs(self.output_dir, exist_ok=True) self.batch = Dataset() self.durations = [10, 1, 5] self.step_reward = torch.as_tensor(1) self.end_reward = torch.as_tensor(10) for episode in range(3): for experience in range(self.durations[episode] - 1): self.batch.append( Experience(observation=torch.as_tensor(5), action=torch.as_tensor(5), reward=self.step_reward, done=torch.as_tensor(0))) self.batch.append( Experience(observation=torch.as_tensor(5), action=torch.as_tensor(5), reward=self.end_reward, done=torch.as_tensor(2))) def test_get_returns_on_dataset(self): returns = get_returns(self.batch) targets = [ self.end_reward + (duration - 1) * self.step_reward for duration in self.durations for _ in range(duration) ] for r_e, r_t in zip(returns, targets): self.assertEqual(r_e, r_t) def test_get_reward_to_go(self): returns = get_reward_to_go(self.batch) targets = reversed([ self.end_reward + t * self.step_reward for duration in reversed(self.durations) for t in range(duration) ]) for r_e, r_t in zip(returns, targets): self.assertEqual(r_e, r_t) def test_generalized_advantage_estimate(self): # with gae_lambda == 1 and no value --> same as reward-to-go rtg_returns = get_generalized_advantage_estimate( batch_rewards=self.batch.rewards, batch_done=self.batch.done, batch_values=[torch.as_tensor(0.)] * len(self.batch), discount=1, gae_lambda=1) for r_e, r_t in zip(rtg_returns, get_reward_to_go(self.batch)): self.assertEqual(r_e, r_t) one_step_returns = get_generalized_advantage_estimate( batch_rewards=self.batch.rewards, batch_done=self.batch.done, batch_values=[torch.as_tensor(0.)] * len(self.batch), discount=1, gae_lambda=0) targets = [ self.step_reward if d == 0 else self.end_reward for d in self.batch.done ] for r_e, r_t in zip(one_step_returns, targets): self.assertEqual(r_e, r_t) gae_returns = get_generalized_advantage_estimate( batch_rewards=self.batch.rewards, batch_done=self.batch.done, batch_values=[torch.as_tensor(0.)] * len(self.batch), discount=0.99, gae_lambda=0.99) for t in range(len(self.batch)): self.assertGreaterEqual(gae_returns[t], one_step_returns[t]) self.assertLessEqual(gae_returns[t], rtg_returns[t]) def tearDown(self) -> None: shutil.rmtree(self.output_dir, ignore_errors=True)