def test_parallel_data_load_from_directory_clipped_from_hydra(): """Test loading trajectories of multiple episodes in parallel into an in-memory dataset. (Each data-loader process reads the files assigned to it.)""" # Heuristics rollout rollout_config = { "configuration": "test", "env": "gym_env", "env.name": "CartPole-v0", "policy": "random_policy", "runner": "sequential", "runner.n_episodes": 2, "runner.max_episode_steps": 20, "seeding.env_base_seed": 12345, "seeding.agent_base_seed": 12345, } run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout") hydra_config = { '_target_': 'maze.core.trajectory_recording.datasets.in_memory_dataset.InMemoryDataset', 'n_workers': 2, 'conversion_env_factory': lambda: make_gym_maze_env("CartPole-v0"), 'input_data': 'trajectory_data', 'deserialize_in_main_thread': False, 'trajectory_processor': { '_target_': 'maze.core.trajectory_recording.datasets.trajectory_processor.DeadEndClippingTrajectoryProcessor', 'clip_k': 2 } } dataset = Factory(InMemoryDataset).instantiate(hydra_config) assert len(dataset) == 11 + 17
def run_sacfd(env: str, teacher_policy: str, sac_runner: str, sac_wrappers: str, sac_model: str, sac_critic: str): """Run soft actor critic from demonstrations for given config parameters. Runs a rollout with the given teacher_policy, then runs sacfD on the collected trajectory data. """ # Heuristics rollout """Test the functionality of sacfd by first running a rollout and then starting sac with the computed output""" # Heuristics rollout rollout_config = dict(configuration="test", env=env, policy=teacher_policy, runner="sequential") rollout_config['runner.n_episodes'] = 10 rollout_config['runner.max_episode_steps'] = 10 run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout") # Behavioral cloning on top of the heuristic rollout trajectories train_config = dict(configuration="test", env=env, wrappers=sac_wrappers, model=sac_model, algorithm="sacfd", runner=sac_runner, critic=sac_critic) run_maze_job(train_config, config_module="maze.conf", config_name="conf_train")
def test_parallel_data_load_from_directory_clipped(): """Test loading trajectories of multiple episodes in parallel into an in-memory dataset. (Each data-loader process reads the files assigned to it.)""" # Heuristics rollout rollout_config = { "configuration": "test", "env": "gym_env", "env.name": "CartPole-v0", "policy": "random_policy", "runner": "sequential", "runner.n_episodes": 2, "runner.max_episode_steps": 20, "seeding.env_base_seed": 12345, "seeding.agent_base_seed": 12345, } run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout") dataset = InMemoryDataset( n_workers=2, conversion_env_factory=lambda: make_gym_maze_env("CartPole-v0"), input_data="trajectory_data", trajectory_processor=DeadEndClippingTrajectoryProcessor(clip_k=2), deserialize_in_main_thread=False ) assert len(dataset) == 11 + 17
def run_behavioral_cloning(env: str, teacher_policy: str, bc_runner: str, bc_wrappers: str, bc_model: str): """Run behavioral cloning for given config parameters. Runs a rollout with the given teacher_policy, then runs behavioral cloning on the collected trajectory data. """ # Heuristics rollout rollout_config = dict(configuration="test", env=env, policy=teacher_policy, runner="sequential") run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout") # Behavioral cloning on top of the heuristic rollout trajectories train_config = dict(configuration="test", env=env, wrappers=bc_wrappers, model=bc_model, algorithm="bc", runner=bc_runner) run_maze_job(train_config, config_module="maze.conf", config_name="conf_train")
def test_standard_configurations(config_name: str, hydra_overrides: Dict[str, str]): # run training try: with Timeout(seconds=15): run_maze_job(hydra_overrides, config_module="maze.conf", config_name=config_name) except TimeoutError: # ignore timeout errors, we don't wait for the training to end pass if config_name == "conf_train": # load tensorboard log tf_summary_files = glob.glob("*events.out.tfevents*") assert len(tf_summary_files) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}"
def perform_algorithm_seeding_test(hydra_overrides: Dict[str, str]): # Perform base run for comparison ---------------------------------------------------------------------------------- base_dir = os.path.abspath('.') os.mkdir('./base_exp') os.chdir('./base_exp') # run training with Timeout(seconds=60): cfg = run_maze_job(hydra_overrides, config_module="maze.conf", config_name="conf_train") # load tensorboard log tf_summary_files = glob.glob("*events.out.tfevents*") assert len( tf_summary_files ) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}" events_df = tensorboard_to_pandas(tf_summary_files[0]) clear_global_state() # Perform comparison run with same seeds --------------------------------------------------------------------------- os.chdir(base_dir) os.mkdir('./exp_pos') os.chdir('./exp_pos') hydra_overrides['seeding.agent_base_seed'] = cfg.seeding.agent_base_seed hydra_overrides['seeding.env_base_seed'] = cfg.seeding.env_base_seed # run training with Timeout(seconds=60): run_maze_job(hydra_overrides, config_module="maze.conf", config_name="conf_train") # load tensorboard log tf_summary_files = glob.glob("*events.out.tfevents*") assert len( tf_summary_files ) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}" events_df_2 = tensorboard_to_pandas(tf_summary_files[0]) clear_global_state() del hydra_overrides['seeding.agent_base_seed'] del hydra_overrides['seeding.env_base_seed'] assert len(events_df) == len(events_df_2) for idx, (key, epoch) in enumerate(events_df.index): if 'time' in key: continue assert events_df_2.values[idx] == events_df.values[idx], \ f'Value not equal for key: {key} in epoch: {epoch}' # Perform second comparison run with different seeds --------------------------------------------------------------- os.chdir(base_dir) os.mkdir('./exp_neg') os.chdir('./exp_neg') # run training with Timeout(seconds=60): run_maze_job(hydra_overrides, config_module="maze.conf", config_name="conf_train") # load tensorboard log tf_summary_files = glob.glob("*events.out.tfevents*") assert len( tf_summary_files ) == 1, f"expected exactly 1 tensorflow summary file {tf_summary_files}" events_df_2 = tensorboard_to_pandas(tf_summary_files[0]) all_equal = True for idx, (key, epoch) in enumerate(events_df.index): if 'time' in key: continue all_equal = all_equal and events_df.values[idx] == events_df_2.values[ idx] assert not all_equal, 'The resulting logs should not be all equal'
def test_heuristic_rollouts(hydra_overrides: Dict[str, str]): """Runs rollout of a dummy policy on cartpole using the sequential and parallel runners.""" run_maze_job(hydra_overrides, config_module="maze.conf", config_name="conf_rollout")