def test_poca_optimizer_update_gail(gail_dummy_config, dummy_config): # noqa: F811 # Test evaluate dummy_config.reward_signals = gail_dummy_config config = poca_dummy_config() optimizer = create_test_poca_optimizer(config, use_rnn=False, use_discrete=False, use_visual=False) # Test update update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec) # Mock out reward signal eval copy_buffer_fields( update_buffer, src_key=BufferKey.ENVIRONMENT_REWARDS, dst_keys=[ BufferKey.ADVANTAGES, RewardSignalUtil.returns_key("extrinsic"), RewardSignalUtil.value_estimates_key("extrinsic"), RewardSignalUtil.baseline_estimates_key("extrinsic"), RewardSignalUtil.returns_key("gail"), RewardSignalUtil.value_estimates_key("gail"), RewardSignalUtil.baseline_estimates_key("gail"), ], ) update_buffer[BufferKey.CONTINUOUS_LOG_PROBS] = np.ones_like( update_buffer[BufferKey.CONTINUOUS_ACTION]) optimizer.update( update_buffer, num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length, ) # Check if buffer size is too big update_buffer = mb.simulate_rollout(3000, optimizer.policy.behavior_spec) # Mock out reward signal eval copy_buffer_fields( update_buffer, src_key=BufferKey.ENVIRONMENT_REWARDS, dst_keys=[ BufferKey.ADVANTAGES, RewardSignalUtil.returns_key("extrinsic"), RewardSignalUtil.value_estimates_key("extrinsic"), RewardSignalUtil.baseline_estimates_key("extrinsic"), RewardSignalUtil.returns_key("gail"), RewardSignalUtil.value_estimates_key("gail"), RewardSignalUtil.baseline_estimates_key("gail"), ], ) optimizer.update( update_buffer, num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length, )
def dummy_config(): return poca_dummy_config()
from mlagents.trainers.tests.dummy_config import ( ppo_dummy_config, sac_dummy_config, poca_dummy_config, ) from mlagents.trainers.tests.check_env_trains import ( check_environment_trains, default_reward_processor, ) BRAIN_NAME = "1D" PPO_TORCH_CONFIG = ppo_dummy_config() SAC_TORCH_CONFIG = sac_dummy_config() POCA_TORCH_CONFIG = poca_dummy_config() # tests in this file won't be tested on GPU machine pytestmark = pytest.mark.check_environment_trains @pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) def test_simple_poca(action_sizes): env = MultiAgentEnvironment([BRAIN_NAME], action_sizes=action_sizes, num_agents=2) config = attr.evolve(POCA_TORCH_CONFIG) check_environment_trains(env, {BRAIN_NAME: config}) @pytest.mark.parametrize("num_visual", [1, 2])
def poca_config(): return RunOptions(behaviors={"test_brain": poca_dummy_config()})