def test_reward_signal_structure(): """ Tests the RewardSignalSettings structure method. This one is special b/c it takes in a Dict[RewardSignalType, RewardSignalSettings]. """ reward_signals_dict = { "extrinsic": {"strength": 1.0}, "curiosity": {"strength": 1.0}, } reward_signals = RewardSignalSettings.structure( reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] ) assert isinstance(reward_signals[RewardSignalType.EXTRINSIC], RewardSignalSettings) assert isinstance(reward_signals[RewardSignalType.CURIOSITY], CuriositySettings) # Check invalid reward signal type reward_signals_dict = {"puppo": {"strength": 1.0}} with pytest.raises(ValueError): RewardSignalSettings.structure( reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] ) # Check missing GAIL demo path reward_signals_dict = {"gail": {"strength": 1.0}} with pytest.raises(TypeError): RewardSignalSettings.structure( reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] ) # Check non-Dict input with pytest.raises(TrainerConfigError): RewardSignalSettings.structure( "notadict", Dict[RewardSignalType, RewardSignalSettings] )
def create_test_poca_optimizer(dummy_config, use_rnn, use_discrete, use_visual): mock_specs = mb.setup_test_behavior_specs( use_discrete, use_visual, vector_action_space=DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, ) trainer_settings = attr.evolve(dummy_config) trainer_settings.reward_signals = { RewardSignalType.EXTRINSIC: RewardSignalSettings(strength=1.0, gamma=0.99) } trainer_settings.network_settings.memory = (NetworkSettings.MemorySettings( sequence_length=8, memory_size=10) if use_rnn else None) policy = TorchPolicy(0, mock_specs, trainer_settings, "test", False) optimizer = TorchPOCAOptimizer(policy, trainer_settings) return optimizer
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None: buffer = create_agent_buffer(behavior_spec, 1000, reward) settings = RewardSignalSettings() extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == reward).all() # Test group rewards. Rewards should be double of the environment rewards, but shouldn't count # the groupmate rewards. buffer[BufferKey.GROUP_REWARD] = buffer[BufferKey.ENVIRONMENT_REWARDS] # 2 agents with identical rewards buffer[BufferKey.GROUPMATE_REWARDS].set( [np.ones(1, dtype=np.float32) * reward] * 2 for _ in range(buffer.num_experiences)) generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == 2 * reward).all() # Test groupmate rewards. Total reward should be indiv_reward + 2 * teammate_reward + group_reward extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) extrinsic_rp.add_groupmate_rewards = True generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == 4 * reward).all()
def extrinsic_dummy_config(): return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def test_factory(behavior_spec: BehaviorSpec) -> None: settings = RewardSignalSettings() extrinsic_rp = create_reward_provider(RewardSignalType.EXTRINSIC, behavior_spec, settings) assert extrinsic_rp.name == "Extrinsic"
def test_construction(behavior_spec: BehaviorSpec) -> None: settings = RewardSignalSettings() settings.gamma = 0.2 extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) assert extrinsic_rp.gamma == 0.2 assert extrinsic_rp.name == "Extrinsic"
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None: buffer = create_agent_buffer(behavior_spec, 1000, reward) settings = RewardSignalSettings() extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == reward).all()