def test_add_rewards_output(dummy_config):
    brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0",
                         False)
    rewardsout = AllRewardsOutput(
        reward_signals={
            "extrinsic":
            RewardSignalResult(scaled_reward=np.array([1.0, 1.0]),
                               unscaled_reward=np.array([1.0, 1.0]))
        },
        environment=np.array([1.0, 1.0]),
    )
    values = {"extrinsic": np.array([[2.0]])}
    agent_id = "123"
    idx = 0
    # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail.
    next_idx = 1
    trainer.add_rewards_outputs(
        rewardsout,
        values=values,
        agent_id=agent_id,
        agent_idx=idx,
        agent_next_idx=next_idx,
    )
    assert trainer.training_buffer[agent_id]["extrinsic_value_estimates"][
        0] == 2.0
    assert trainer.training_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
Example #2
0
def test_add_rewards_output(dummy_config):
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0",
                         False)
    rewardsout = AllRewardsOutput(
        reward_signals={
            "extrinsic":
            RewardSignalResult(
                scaled_reward=np.array([1.0, 1.0], dtype=np.float32),
                unscaled_reward=np.array([1.0, 1.0], dtype=np.float32),
            )
        },
        environment=np.array([1.0, 1.0], dtype=np.float32),
    )
    values = {"extrinsic": np.array([[2.0]], dtype=np.float32)}
    agent_id = "123"
    idx = 0
    # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail.
    next_idx = 1
    trainer.add_rewards_outputs(
        rewardsout,
        values=values,
        agent_id=agent_id,
        agent_idx=idx,
        agent_next_idx=next_idx,
    )
    assert trainer.processing_buffer[agent_id]["extrinsic_value_estimates"][
        0] == 2.0
    assert trainer.processing_buffer[agent_id]["extrinsic_rewards"][0] == 1.0