Example #1
0
from research.estop.gym.ddpg_training import debug_run, make_default_ddpg_train_config
from research.estop.gym.ant import env_name, reward_adjustment
from research.estop.gym.gym_wrappers import build_env_spec

env_spec = build_env_spec(env_name, reward_adjustment)
debug_run(env_spec, make_default_ddpg_train_config(env_spec), respect_gym_done=True)
Example #2
0
output_results_dir = Path("results/estop_ddpg_half_cheetah")

num_support_set_rollouts = 128

num_random_seeds = cpu_count() // 2
num_episodes = 20000
policy_evaluation_frequency = 1000
policy_video_frequency = 1000

if __name__ == "__main__":
    rng = random.PRNGKey(0)

    # This is assuming that we used the default train config. If not, well...
    # don't do that.
    env_spec = build_env_spec(env_name, reward_adjustment)
    train_config = make_default_ddpg_train_config(env_spec)

    expert_rollouts = debug_run_estop.run_expert_rollouts(rng)
    state_min, state_max = debug_run_estop.get_estop_bounds(expert_rollouts)

    ###

    # Create necessary directory structure.
    output_results_dir.mkdir()

    pickle.dump(
        {
            "type": "estop",
            "num_random_seeds": num_random_seeds,
            "num_episodes": num_episodes,
            "policy_evaluation_frequency": policy_evaluation_frequency,
Example #3
0
from research.estop.gym.ddpg_training import debug_run, make_default_ddpg_train_config
from research.estop.gym.ant import env_name, reward_adjustment
from research.estop.gym.gym_wrappers import build_env_spec

env_spec = build_env_spec(env_name, reward_adjustment)
debug_run(env_spec, make_default_ddpg_train_config(env_spec))