예제 #1
0
def offline_gym(
    env_name: str,
    pkl_path: str,
    num_train_transitions: int,
    max_steps: Optional[int],
    seed: Optional[int] = None,
):
    """
    Generate samples from a DiscreteRandomPolicy on the Gym environment and
    saves results in a pandas df parquet.
    """
    initialize_seed(seed)
    env = Gym(env_name=env_name)

    replay_buffer = ReplayBuffer(replay_capacity=num_train_transitions,
                                 batch_size=1)
    fill_replay_buffer(env, replay_buffer, num_train_transitions)
    if isinstance(env.action_space, gym.spaces.Discrete):
        is_discrete_action = True
    else:
        assert isinstance(env.action_space, gym.spaces.Box)
        is_discrete_action = False
    df = replay_buffer_to_pre_timeline_df(is_discrete_action, replay_buffer)
    logger.info(f"Saving dataset with {len(df)} samples to {pkl_path}")
    df.to_pickle(pkl_path)
예제 #2
0
def _offline_gym(
    env: Gym,
    agent: Agent,
    pkl_path: str,
    num_train_transitions: int,
    max_steps: Optional[int],
    seed: int = 1,
):
    initialize_seed(seed)

    replay_buffer = ReplayBuffer(replay_capacity=num_train_transitions,
                                 batch_size=1)
    fill_replay_buffer(env, replay_buffer, num_train_transitions, agent)
    if isinstance(env.action_space, gym.spaces.Discrete):
        is_discrete_action = True
    else:
        assert isinstance(env.action_space, gym.spaces.Box)
        is_discrete_action = False
    df = replay_buffer_to_pre_timeline_df(is_discrete_action, replay_buffer)
    logger.info(f"Saving dataset with {len(df)} samples to {pkl_path}")
    df.to_pickle(pkl_path)