def test_add_rewards_output(dummy_config): brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0) dummy_config["summary_path"] = "./summaries/test_trainer_summary" dummy_config["model_path"] = "./models/test_trainer_models/TestModel" trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False) rewardsout = AllRewardsOutput( reward_signals={ "extrinsic": RewardSignalResult(scaled_reward=np.array([1.0, 1.0]), unscaled_reward=np.array([1.0, 1.0])) }, environment=np.array([1.0, 1.0]), ) values = {"extrinsic": np.array([[2.0]])} agent_id = "123" idx = 0 # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail. next_idx = 1 trainer.add_rewards_outputs( rewardsout, values=values, agent_id=agent_id, agent_idx=idx, agent_next_idx=next_idx, ) assert trainer.training_buffer[agent_id]["extrinsic_value_estimates"][ 0] == 2.0 assert trainer.training_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
def test_add_rewards_output(dummy_config): brain_params = BrainParameters( brain_name="test_brain", vector_observation_space_size=1, camera_resolutions=[], vector_action_space_size=[2], vector_action_descriptions=[], vector_action_space_type=0, ) dummy_config["summary_path"] = "./summaries/test_trainer_summary" dummy_config["model_path"] = "./models/test_trainer_models/TestModel" trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False) rewardsout = AllRewardsOutput( reward_signals={ "extrinsic": RewardSignalResult( scaled_reward=np.array([1.0, 1.0], dtype=np.float32), unscaled_reward=np.array([1.0, 1.0], dtype=np.float32), ) }, environment=np.array([1.0, 1.0], dtype=np.float32), ) values = {"extrinsic": np.array([[2.0]], dtype=np.float32)} agent_id = "123" idx = 0 # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail. next_idx = 1 trainer.add_rewards_outputs( rewardsout, values=values, agent_id=agent_id, agent_idx=idx, agent_next_idx=next_idx, ) assert trainer.processing_buffer[agent_id]["extrinsic_value_estimates"][ 0] == 2.0 assert trainer.processing_buffer[agent_id]["extrinsic_rewards"][0] == 1.0