def test_handles_no_config_provided(): """ Make sure the trainer setup handles no configs provided at all. """ brain_name = "testbrain" no_default_config = RunOptions().behaviors # Pretend this was created without a YAML file no_default_config.set_config_specified(False) trainer_factory = TrainerFactory( trainer_config=no_default_config, output_path="output_path", train_model=True, load_model=False, seed=42, param_manager=EnvironmentParameterManager(), ) trainer_factory.generate(brain_name)
def test_sac_trainer_update_normalization(sac_config): behavior_id_team0 = "test_brain?team=0" brain_name = BehaviorIdentifiers.from_name_behavior_id( behavior_id_team0).brain_name mock_specs = mb.setup_test_behavior_specs(True, False, vector_action_space=[2], vector_obs_space=1) base_config = sac_config.behaviors output_path = "results_dir" train_model = True load_model = False seed = 42 trainer_factory = TrainerFactory( trainer_config=base_config, output_path=output_path, train_model=train_model, load_model=load_model, seed=seed, param_manager=EnvironmentParameterManager(), ) sac_trainer = trainer_factory.generate(brain_name) parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id( behavior_id_team0) policy = sac_trainer.create_policy(parsed_behavior_id0, mock_specs) sac_trainer.add_policy(parsed_behavior_id0, policy) trajectory_queue0 = AgentManagerQueue(behavior_id_team0) sac_trainer.subscribe_trajectory_queue(trajectory_queue0) time_horizon = 15 trajectory = make_fake_trajectory( length=time_horizon, max_step_complete=True, observation_specs=create_observation_specs_with_shapes([(1, )]), action_spec=mock_specs.action_spec, ) trajectory_queue0.put(trajectory) # mocking out update_normalization in both the policy and critic with patch( "mlagents.trainers.torch.networks.ValueNetwork.update_normalization" ) as optimizer_update_normalization_mock, patch( "mlagents.trainers.policy.torch_policy.TorchPolicy.update_normalization" ) as policy_update_normalization_mock: sac_trainer.advance() optimizer_update_normalization_mock.assert_called_once() policy_update_normalization_mock.assert_called_once()
def test_initialize_ppo_trainer(BehaviorSpecMock, dummy_config): brain_name = "testbrain" training_behaviors = {"testbrain": BehaviorSpecMock()} output_path = "results_dir" train_model = True load_model = False seed = 11 expected_reward_buff_cap = 1 base_config = dummy_config.behaviors expected_config = ppo_dummy_config() def mock_constructor( self, brain, reward_buff_cap, trainer_settings, training, load, seed, artifact_path, ): assert brain == brain_name assert trainer_settings == expected_config assert reward_buff_cap == expected_reward_buff_cap assert training == train_model assert load == load_model assert seed == seed assert artifact_path == os.path.join(output_path, brain_name) with patch.object(PPOTrainer, "__init__", mock_constructor): trainer_factory = TrainerFactory( trainer_config=base_config, output_path=output_path, train_model=train_model, load_model=load_model, seed=seed, param_manager=EnvironmentParameterManager(), ) trainers = {} for brain_name in training_behaviors.keys(): trainers[brain_name] = trainer_factory.generate(brain_name) assert "testbrain" in trainers assert isinstance(trainers["testbrain"], PPOTrainer)