def test_autoresolving_proxy_attribute(): """ Tests auto-resolving proxy attributes like critic (see for :py:class:`maze.api.utils._ATTRIBUTE_PROXIES` for more info). """ cartpole_env_factory = lambda: GymMazeEnv(env=gym.make("CartPole-v0")) _, _, critic_composer, _, _ = _get_cartpole_setup_components() alg_config = A2CAlgorithmConfig(n_epochs=1, epoch_length=25, patience=15, critic_burn_in_epochs=0, n_rollout_steps=100, lr=0.0005, gamma=0.98, gae_lambda=1.0, policy_loss_coef=1.0, value_loss_coef=0.5, entropy_coef=0.00025, max_grad_norm=0.0, device='cpu', rollout_evaluator=RolloutEvaluator( eval_env=SequentialVectorEnv( [cartpole_env_factory]), n_episodes=1, model_selection=None, deterministic=True)) default_overrides = { "runner.normalization_samples": 1, "runner.concurrency": 1 } rc = run_context.RunContext(env=cartpole_env_factory, silent=True, algorithm=alg_config, critic=critic_composer, runner="dev", overrides=default_overrides) rc.train(n_epochs=1) assert isinstance(rc._runners[RunMode.TRAINING][0].model_composer.critic, TorchSharedStateCritic) rc = run_context.RunContext(env=cartpole_env_factory, silent=True, algorithm=alg_config, critic="template_state", runner="dev", overrides=default_overrides) rc.train(n_epochs=1) assert isinstance(rc._runners[RunMode.TRAINING][0].model_composer.critic, TorchStepStateCritic)
def test_evaluation(): """ Tests evaluation. """ # Test with ES: No rollout evaluator in config. rc = run_context.RunContext( env=lambda: GymMazeEnv(env=gym.make("CartPole-v0")), silent=True, configuration="test", overrides={ "runner.normalization_samples": 1, "runner.shared_noise_table_size": 10 }) rc.train(1) stats = rc.evaluate(n_episodes=5) assert len(stats) == 1 assert stats[0][(BaseEnvEvents.reward, "episode_count", None)] in (5, 6) # Test with A2C: Partially specified rollout evaluator in config. rc = run_context.RunContext( env=lambda: GymMazeEnv(env=gym.make("CartPole-v0")), silent=True, algorithm="a2c", configuration="test", overrides={"runner.concurrency": 1}) rc.train(1) stats = rc.evaluate(n_episodes=2) assert len(stats) == 1 assert stats[0][(BaseEnvEvents.reward, "episode_count", None)] in (2, 3) # Test with A2C and instanatiated RolloutEvaluator. rc = run_context.RunContext( env=lambda: GymMazeEnv(env=gym.make("CartPole-v0")), silent=True, algorithm="a2c", configuration="test", overrides={ "runner.concurrency": 1, "algorithm.rollout_evaluator": RolloutEvaluator(eval_env=SequentialVectorEnv( [lambda: GymMazeEnv("CartPole-v0")]), n_episodes=1, model_selection=None, deterministic=True) }) rc.train(1) stats = rc.evaluate(n_episodes=5) assert len(stats) == 1 assert stats[0][(BaseEnvEvents.reward, "episode_count", None)] in (1, 2)
def test_inconsistency_identification_type_3() -> None: """ Tests identification of inconsistency due to derived config group. """ es_dev_runner_config = { 'state_dict_dump_file': 'state_dict.pt', 'spaces_config_dump_file': 'spaces_config.pkl', 'normalization_samples': 10000, '_target_': 'maze.train.trainers.es.ESDevRunner', 'n_eval_rollouts': 10, 'shared_noise_table_size': 100000000 } a2c_alg_config = A2CAlgorithmConfig( n_epochs=1, epoch_length=25, patience=15, critic_burn_in_epochs=0, n_rollout_steps=100, lr=0.0005, gamma=0.98, gae_lambda=1.0, policy_loss_coef=1.0, value_loss_coef=0.5, entropy_coef=0.00025, max_grad_norm=0.0, device='cpu', rollout_evaluator=RolloutEvaluator(eval_env=SequentialVectorEnv( [lambda: GymMazeEnv(env="CartPole-v0")]), n_episodes=1, model_selection=None, deterministic=True)) default_overrides = { "runner.normalization_samples": 1, "runner.concurrency": 1 } rc = run_context.RunContext(algorithm=a2c_alg_config, env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, runner="dev", overrides=default_overrides) rc.train(1) run_context.RunContext(env=lambda: GymMazeEnv(env="CartPole-v0"), runner=es_dev_runner_config, silent=True, overrides=default_overrides) rc.train(1)
def test_template_model_composer() -> None: """ Tests behaviour with TemplateModelComposer. """ default_overrides = { "algorithm": "es", "runner": "dev", "runner.normalization_samples": 1, "runner.n_eval_rollouts": 1, "runner.shared_noise_table_size": 10 } run_context.RunContext(silent=True, model="vector_obs", overrides=default_overrides).train(1) # Plug in invalid policy. with pytest.raises(TypeError): run_context.RunContext(silent=True, model="vector_obs", overrides=default_overrides, policy="random_policy").train(1) # Specify valid policy directly. run_context.RunContext( silent=True, model="vector_obs", overrides=default_overrides, policy={ '_target_': 'maze.perception.models.policies.ProbabilisticPolicyComposer' }).train(1) # Specify valid policy via overrides. run_context.RunContext( overrides={ **default_overrides, 'model.policy._target_': 'maze.perception.models.policies.ProbabilisticPolicyComposer' }).train(1) # Specify invalid policy target. with pytest.raises(ValueError): run_context.RunContext( silent=True, model="vector_obs", overrides={ **default_overrides, 'model.policy._target_': 'x' }).train(1)
def test_multirun(): """ Tests multirun capabilities. """ with pytest.raises(BaseException): rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'), silent=True, algorithm="ppo", overrides={ "runner.normalization_samples": 1, "runner.concurrency": 1, "algorithm.lr": "0.0001,0.0005,0.001" }, configuration="test", multirun=False) rc.train(n_epochs=1) with pytest.raises(TypeError): rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'), silent=True, algorithm="ppo", overrides={ "runner.normalization_samples": 1, "runner.concurrency": 1, "algorithm.lr": [0.0001, 0.0005, 0.001] }, configuration="test", multirun=False) rc.train(n_epochs=1) rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'), silent=True, algorithm="ppo", overrides={ "runner.normalization_samples": 1, "runner.concurrency": 1, "algorithm.lr": [0.0001, 0.0005, 0.001] }, configuration="test", multirun=True) rc.train(n_epochs=1) assert len(rc.policy) == 3 assert len(rc.run_dir) == 3 assert len(rc.config[RunMode.TRAINING]) == 3 assert len(rc.env_factory) == 3 assert len(rc.evaluate()) == 3
def test_inconsistency_identification_type_1() -> None: """ Tests identification of inconsistency due to specified elements being incompatible with the run mode. """ with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(runner="parallel").train(1)
def test_manual_rollout() -> None: """ Test manual rollout via control loop. """ env_factory = lambda: GymMazeEnv('CartPole-v0') rc = run_context.RunContext(env=lambda: env_factory(), silent=True) rc.train(n_epochs=1) env = env_factory() obs = env.reset() for i in range(2): action = rc.compute_action(obs) obs, rewards, dones, info = env.step(action)
def test_multiple_runs() -> None: """ Tests behaviour with execution of multiple subsequent runs. """ rc = run_context.RunContext(silent=True, overrides={ "algorithm": "es", "runner": "dev", "runner.normalization_samples": 1, "runner.n_eval_rollouts": 1, "runner.shared_noise_table_size": 10 }) rc.train(n_epochs=1) rc.train(n_epochs=1)
def test_env_type(): """ Tests whether environment is correctly wrapped. """ rc = run_context.RunContext(silent=True, overrides={ "runner.normalization_samples": 1, "runner.shared_noise_table_size": 10 }) rc.train(1) env = rc.env_factory() assert isinstance(env, MazeEnv) assert isinstance(env, LogStatsWrapper)
def test_experiment(): """ Tests whether experiments are correctly loaded. """ rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'), silent=True, overrides={ "runner.normalization_samples": 1, "runner.concurrency": 1 }, experiment="cartpole_ppo_wrappers") rc.train(1) assert isinstance(rc._runners[RunMode.TRAINING][0]._trainer, PPO) assert rc._runners[RunMode.TRAINING][0]._cfg.algorithm.lr == 0.0001
def test_standalone_training(alg: str, runner: str) -> None: """ Tests whether instantation and standalone training runs with all supported algorithms. :param alg: Algorithm to train with. :param runner: Runner configuration module name. """ overrides = { "env.name": "CartPole-v0", "runner.normalization_samples": "1" } run_context.RunContext(algorithm=alg, overrides=overrides, silent=True, runner=runner, configuration="test").train(n_epochs=1)
def test_inconsistency_identification_type_2_b() -> None: """ Tests identification of inconsistency due to codependent components. """ _, _, _, a2c_alg_config, default_overrides = _generate_inconsistency_type_2_configs( ) rc = run_context.RunContext(algorithm=a2c_alg_config, silent=True, overrides={ **default_overrides, "runner.concurrency": 2, "runner.eval_concurrency": 1, }, runner="local") rc.train(1)
def test_overrides() -> None: """ Tests setting of values via overrides dictionary. """ _, policy_composer, _, _, _ = _get_cartpole_setup_components() gym_env_name = "CartPole-v0" rc = run_context.RunContext(algorithm="a2c", overrides={ "env.name": gym_env_name, "model.policy": policy_composer, "runner.normalization_samples": 1, "runner.concurrency": 1 }, silent=True) rc.train(n_epochs=1) train_network = rc.policy.networks[0] env = rc.env_factory() assert isinstance(env.core_env, GymCoreEnv) assert env.core_env.env.unwrapped.spec.id == gym_env_name assert isinstance(train_network, FlattenConcatPolicyNet) assert train_network.hidden_units == [222, 222]
def test_inconsistency_identification_type_2_a() -> None: """ Tests identification of inconsistency due to codependent components. """ es_dev_runner_config, a2c_dev_runner_config, invalid_a2c_dev_runner_config, a2c_alg_config, default_overrides = \ _generate_inconsistency_type_2_configs() with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", runner=es_dev_runner_config, silent=True, overrides=default_overrides) with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm=a2c_alg_config, runner=es_dev_runner_config, silent=True, overrides=default_overrides) with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="es", runner=a2c_dev_runner_config, silent=True, overrides=default_overrides) with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", runner=invalid_a2c_dev_runner_config, silent=True, overrides=default_overrides) rc = run_context.RunContext(algorithm="es", runner=es_dev_runner_config, silent=True, overrides=default_overrides) rc.train(1) rc = run_context.RunContext(algorithm="a2c", runner=a2c_dev_runner_config, silent=True, overrides=default_overrides) rc.train(1)
def train(n_epochs: int) -> int: """ Trains agent in pure Python. :param n_epochs: Number of epochs to train. :return: 0 if successful. """ # Environment setup # ----------------- env = cartpole_env_factory() # Algorithm setup # --------------- algorithm_config = A2CAlgorithmConfig( n_epochs=5, epoch_length=25, patience=15, critic_burn_in_epochs=0, n_rollout_steps=100, lr=0.0005, gamma=0.98, gae_lambda=1.0, policy_loss_coef=1.0, value_loss_coef=0.5, entropy_coef=0.00025, max_grad_norm=0.0, device='cpu', rollout_evaluator=RolloutEvaluator( eval_env=SequentialVectorEnv([cartpole_env_factory]), n_episodes=1, model_selection=None, deterministic=True ) ) # Custom model setup # ------------------ # Policy customization # ^^^^^^^^^^^^^^^^^^^^ # Policy network. policy_net = CartpolePolicyNet( obs_shapes={'observation': env.observation_space.spaces['observation'].shape}, action_logit_shapes={'action': (env.action_space.spaces['action'].n,)} ) policy_networks = [policy_net] # Policy distribution. distribution_mapper = DistributionMapper(action_space=env.action_space, distribution_mapper_config={}) # Policy composer. policy_composer = ProbabilisticPolicyComposer( action_spaces_dict=env.action_spaces_dict, observation_spaces_dict=env.observation_spaces_dict, # Derive distribution from environment's action space. distribution_mapper=distribution_mapper, networks=policy_networks, # We have only one agent and network, thus this is an empty list. substeps_with_separate_agent_nets=[], # We have only one step and one agent. agent_counts_dict={0: 1} ) # Critic customization # ^^^^^^^^^^^^^^^^^^^^ # Value networks. value_networks = { 0: TorchModelBlock( in_keys='observation', out_keys='value', in_shapes=env.observation_space.spaces['observation'].shape, in_num_dims=[2], out_num_dims=2, net=CartpoleValueNet({'observation': env.observation_space.spaces['observation'].shape}) ) } # Critic composer. critic_composer = SharedStateCriticComposer( observation_spaces_dict=env.observation_spaces_dict, agent_counts_dict={0: 1}, networks=value_networks, stack_observations=True ) # Training # ^^^^^^^^ rc = run_context.RunContext( env=cartpole_env_factory, algorithm=algorithm_config, policy=policy_composer, critic=critic_composer, runner="dev" ) rc.train(n_epochs=n_epochs) # Distributed training # ^^^^^^^^^^^^^^^^^^^^ algorithm_config.rollout_evaluator.eval_env = SubprocVectorEnv([cartpole_env_factory]) rc = run_context.RunContext( env=cartpole_env_factory, algorithm=algorithm_config, policy=policy_composer, critic=critic_composer, runner="local" ) rc.train(n_epochs=n_epochs) # Evaluation # ^^^^^^^^^^ print("-----------------") evaluator = RolloutEvaluator( eval_env=LogStatsWrapper.wrap(cartpole_env_factory(), logging_prefix="eval"), n_episodes=1, model_selection=None ) evaluator.evaluate(rc.policy) return 0
def test_inconsistency_identification_type_4_invalid() -> None: """ Tests identification of inconsistency due to specification of super- and subcomponents. """ model_composer, policy_composer, _, _, _ = _get_cartpole_setup_components() model_policy_target = "maze.perception.models.policies.ProbabilisticPolicyComposer" # With nesting level > 1. Both parent and child in overrides. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, overrides={ "policy": policy_composer, "model.policy._target_": model_policy_target }) # With nesting level > 1, parent in overrides with proxy path. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext( algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, policy=policy_composer, overrides={"model.policy._target_": model_policy_target}) # With nesting level > 1, with proxy path, parent with full path. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, overrides={ "policy": policy_composer, "policy._target_": model_policy_target }) # With nesting level > 1, both with proxy path. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, overrides={ "policy": policy_composer, "policy._target_": model_policy_target }) # With nesting level > 1, parent as explicit argument. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext( algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, policy=policy_composer, overrides={"model.policy._target_": model_policy_target}) # With override referencing explicit argument. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, model=model_composer, overrides={"model.policy": policy_composer}) # With override referencing explicit argument via proxy. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, model=model_composer, overrides={"policy": policy_composer}) # With explicit argument referencing other explicit argument. with pytest.raises(run_context.InvalidSpecificationError): run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, model=model_composer, policy=policy_composer)
def test_inconsistency_identification_type_4_valid() -> None: """ Tests identification of inconsistency due to specification of super- and subcomponents. """ _, policy_composer, _, _, _ = _get_cartpole_setup_components() model_policy_target = "maze.perception.models.policies.ProbabilisticPolicyComposer" model_dictconfig = { '_target_': 'maze.perception.models.custom_model_composer.CustomModelComposer', 'distribution_mapper_config': [{ 'action_space': 'gym.spaces.Box', 'distribution': 'maze.distributions.squashed_gaussian.SquashedGaussianProbabilityDistribution' }], 'policy': { '_target_': 'maze.perception.models.policies.ProbabilisticPolicyComposer', 'networks': [{ '_target_': 'maze.perception.models.built_in.flatten_concat.FlattenConcatPolicyNet', 'non_lin': 'torch.nn.Tanh', 'hidden_units': [222, 222] }], "substeps_with_separate_agent_nets": [], "agent_counts_dict": { 0: 1 } }, 'critic': { '_target_': 'maze.perception.models.critics.StateCriticComposer', 'networks': [{ '_target_': 'maze.perception.models.built_in.flatten_concat.FlattenConcatStateValueNet', 'non_lin': 'torch.nn.Tanh', 'hidden_units': [256, 256] }] } } default_overrides = {"runner.concurrency": 1} # With DictConfig parent (legal). rc = run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, model=model_dictconfig, policy=policy_composer, runner="dev", overrides=default_overrides, configuration="test") rc.train(1) assert rc.policy.networks[0].hidden_units == [222, 222] # With config module name parent. rc = run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, model="flatten_concat", policy=policy_composer, runner="dev", overrides=default_overrides, configuration="test") rc.train(1) assert rc.policy.networks[0].hidden_units == [222, 222] # With config module name parent and aliased child override. rc = run_context.RunContext(algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, policy=model_dictconfig["policy"], runner="dev", overrides={ "policy._target_": model_policy_target, **default_overrides }, configuration="test") rc.train(1) assert rc.policy.networks[0].hidden_units == [222, 222] # With config module name parent and non-aliased child override. rc = run_context.RunContext( algorithm="a2c", env=lambda: GymMazeEnv(env="CartPole-v0"), silent=True, policy=model_dictconfig["policy"], runner="dev", overrides={"model.policy._target_": model_policy_target}, configuration="test") rc.train(1) assert rc.policy.networks[0].hidden_units == [222, 222]