def offline_gym( env_name: str, pkl_path: str, num_train_transitions: int, max_steps: Optional[int], seed: Optional[int] = None, ): """ Generate samples from a DiscreteRandomPolicy on the Gym environment and saves results in a pandas df parquet. """ initialize_seed(seed) env = Gym(env_name=env_name) replay_buffer = ReplayBuffer(replay_capacity=num_train_transitions, batch_size=1) fill_replay_buffer(env, replay_buffer, num_train_transitions) if isinstance(env.action_space, gym.spaces.Discrete): is_discrete_action = True else: assert isinstance(env.action_space, gym.spaces.Box) is_discrete_action = False df = replay_buffer_to_pre_timeline_df(is_discrete_action, replay_buffer) logger.info(f"Saving dataset with {len(df)} samples to {pkl_path}") df.to_pickle(pkl_path)
def _offline_gym( env: Gym, agent: Agent, pkl_path: str, num_train_transitions: int, max_steps: Optional[int], seed: int = 1, ): initialize_seed(seed) replay_buffer = ReplayBuffer(replay_capacity=num_train_transitions, batch_size=1) fill_replay_buffer(env, replay_buffer, num_train_transitions, agent) if isinstance(env.action_space, gym.spaces.Discrete): is_discrete_action = True else: assert isinstance(env.action_space, gym.spaces.Box) is_discrete_action = False df = replay_buffer_to_pre_timeline_df(is_discrete_action, replay_buffer) logger.info(f"Saving dataset with {len(df)} samples to {pkl_path}") df.to_pickle(pkl_path)