Beispiel #1
0
def test_parallel_data_load_from_directory_clipped_from_hydra():
    """Test loading trajectories of multiple episodes in parallel into an in-memory dataset. (Each
    data-loader process reads the files assigned to it.)"""
    # Heuristics rollout
    rollout_config = {
        "configuration": "test",
        "env": "gym_env",
        "env.name": "CartPole-v0",
        "policy": "random_policy",
        "runner": "sequential",
        "runner.n_episodes": 2,
        "runner.max_episode_steps": 20,
        "seeding.env_base_seed": 12345,
        "seeding.agent_base_seed": 12345,
    }
    run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout")

    hydra_config = {
        '_target_': 'maze.core.trajectory_recording.datasets.in_memory_dataset.InMemoryDataset',
        'n_workers': 2,
        'conversion_env_factory': lambda: make_gym_maze_env("CartPole-v0"),
        'input_data': 'trajectory_data',
        'deserialize_in_main_thread': False,
        'trajectory_processor': {
            '_target_': 'maze.core.trajectory_recording.datasets.trajectory_processor.DeadEndClippingTrajectoryProcessor',
            'clip_k': 2
        }
    }

    dataset = Factory(InMemoryDataset).instantiate(hydra_config)

    assert len(dataset) == 11 + 17
Beispiel #2
0
def run_sacfd(env: str, teacher_policy: str, sac_runner: str,
              sac_wrappers: str, sac_model: str, sac_critic: str):
    """Run soft actor critic from demonstrations for given config parameters.

    Runs a rollout with the given teacher_policy, then runs sacfD on the collected trajectory data.
    """
    # Heuristics rollout
    """Test the functionality of sacfd by first running a rollout and then starting sac with the computed output"""

    # Heuristics rollout
    rollout_config = dict(configuration="test",
                          env=env,
                          policy=teacher_policy,
                          runner="sequential")
    rollout_config['runner.n_episodes'] = 10
    rollout_config['runner.max_episode_steps'] = 10
    run_maze_job(rollout_config,
                 config_module="maze.conf",
                 config_name="conf_rollout")

    # Behavioral cloning on top of the heuristic rollout trajectories
    train_config = dict(configuration="test",
                        env=env,
                        wrappers=sac_wrappers,
                        model=sac_model,
                        algorithm="sacfd",
                        runner=sac_runner,
                        critic=sac_critic)
    run_maze_job(train_config,
                 config_module="maze.conf",
                 config_name="conf_train")
Beispiel #3
0
def test_parallel_data_load_from_directory_clipped():
    """Test loading trajectories of multiple episodes in parallel into an in-memory dataset. (Each
    data-loader process reads the files assigned to it.)"""
    # Heuristics rollout
    rollout_config = {
        "configuration": "test",
        "env": "gym_env",
        "env.name": "CartPole-v0",
        "policy": "random_policy",
        "runner": "sequential",
        "runner.n_episodes": 2,
        "runner.max_episode_steps": 20,
        "seeding.env_base_seed": 12345,
        "seeding.agent_base_seed": 12345,
    }
    run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout")

    dataset = InMemoryDataset(
        n_workers=2,
        conversion_env_factory=lambda: make_gym_maze_env("CartPole-v0"),
        input_data="trajectory_data",
        trajectory_processor=DeadEndClippingTrajectoryProcessor(clip_k=2),
        deserialize_in_main_thread=False
    )

    assert len(dataset) == 11 + 17
Beispiel #4
0
def run_behavioral_cloning(env: str, teacher_policy: str, bc_runner: str,
                           bc_wrappers: str, bc_model: str):
    """Run behavioral cloning for given config parameters.

    Runs a rollout with the given teacher_policy, then runs behavioral cloning on the collected trajectory data.
    """
    # Heuristics rollout
    rollout_config = dict(configuration="test",
                          env=env,
                          policy=teacher_policy,
                          runner="sequential")
    run_maze_job(rollout_config,
                 config_module="maze.conf",
                 config_name="conf_rollout")

    # Behavioral cloning on top of the heuristic rollout trajectories
    train_config = dict(configuration="test",
                        env=env,
                        wrappers=bc_wrappers,
                        model=bc_model,
                        algorithm="bc",
                        runner=bc_runner)
    run_maze_job(train_config,
                 config_module="maze.conf",
                 config_name="conf_train")
def test_standard_configurations(config_name: str, hydra_overrides: Dict[str, str]):
    # run training
    try:
        with Timeout(seconds=15):
            run_maze_job(hydra_overrides, config_module="maze.conf", config_name=config_name)
    except TimeoutError:
        # ignore timeout errors, we don't wait for the training to end
        pass

    if config_name == "conf_train":
        # load tensorboard log
        tf_summary_files = glob.glob("*events.out.tfevents*")
        assert len(tf_summary_files) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}"
def perform_algorithm_seeding_test(hydra_overrides: Dict[str, str]):
    # Perform base run for comparison ----------------------------------------------------------------------------------
    base_dir = os.path.abspath('.')
    os.mkdir('./base_exp')
    os.chdir('./base_exp')
    # run training
    with Timeout(seconds=60):
        cfg = run_maze_job(hydra_overrides,
                           config_module="maze.conf",
                           config_name="conf_train")

    # load tensorboard log
    tf_summary_files = glob.glob("*events.out.tfevents*")
    assert len(
        tf_summary_files
    ) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}"
    events_df = tensorboard_to_pandas(tf_summary_files[0])
    clear_global_state()

    # Perform comparison run with same seeds ---------------------------------------------------------------------------
    os.chdir(base_dir)
    os.mkdir('./exp_pos')
    os.chdir('./exp_pos')

    hydra_overrides['seeding.agent_base_seed'] = cfg.seeding.agent_base_seed
    hydra_overrides['seeding.env_base_seed'] = cfg.seeding.env_base_seed
    # run training
    with Timeout(seconds=60):
        run_maze_job(hydra_overrides,
                     config_module="maze.conf",
                     config_name="conf_train")

    # load tensorboard log
    tf_summary_files = glob.glob("*events.out.tfevents*")
    assert len(
        tf_summary_files
    ) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}"
    events_df_2 = tensorboard_to_pandas(tf_summary_files[0])
    clear_global_state()
    del hydra_overrides['seeding.agent_base_seed']
    del hydra_overrides['seeding.env_base_seed']

    assert len(events_df) == len(events_df_2)
    for idx, (key, epoch) in enumerate(events_df.index):
        if 'time' in key:
            continue
        assert events_df_2.values[idx] == events_df.values[idx], \
            f'Value not equal for key: {key} in epoch: {epoch}'

    # Perform second comparison run with different seeds ---------------------------------------------------------------
    os.chdir(base_dir)
    os.mkdir('./exp_neg')
    os.chdir('./exp_neg')

    # run training
    with Timeout(seconds=60):
        run_maze_job(hydra_overrides,
                     config_module="maze.conf",
                     config_name="conf_train")

    # load tensorboard log
    tf_summary_files = glob.glob("*events.out.tfevents*")
    assert len(
        tf_summary_files
    ) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}"
    events_df_2 = tensorboard_to_pandas(tf_summary_files[0])

    all_equal = True
    for idx, (key, epoch) in enumerate(events_df.index):
        if 'time' in key:
            continue
        all_equal = all_equal and events_df.values[idx] == events_df_2.values[
            idx]
    assert not all_equal, 'The resulting logs should not be all equal'
Beispiel #7
0
def test_heuristic_rollouts(hydra_overrides: Dict[str, str]):
    """Runs rollout of a dummy policy on cartpole using the sequential and parallel runners."""
    run_maze_job(hydra_overrides,
                 config_module="maze.conf",
                 config_name="conf_rollout")