def test_add_rewards_output(dummy_config):
    brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0",
                         False)
    rewardsout = AllRewardsOutput(
        reward_signals={
            "extrinsic":
            RewardSignalResult(scaled_reward=np.array([1.0, 1.0]),
                               unscaled_reward=np.array([1.0, 1.0]))
        },
        environment=np.array([1.0, 1.0]),
    )
    values = {"extrinsic": np.array([[2.0]])}
    agent_id = "123"
    idx = 0
    # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail.
    next_idx = 1
    trainer.add_rewards_outputs(
        rewardsout,
        values=values,
        agent_id=agent_id,
        agent_idx=idx,
        agent_next_idx=next_idx,
    )
    assert trainer.training_buffer[agent_id]["extrinsic_value_estimates"][
        0] == 2.0
    assert trainer.training_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
Beispiel #2
0
def load_demonstration(
        file_path: str) -> Tuple[BrainParameters, List[BrainInfo], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of BrainInfos containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33
    file_paths = []
    if os.path.isdir(file_path):
        all_files = os.listdir(file_path)
        for _file in all_files:
            if _file.endswith(".demo"):
                file_paths.append(os.path.join(file_path, _file))
        if not all_files:
            raise ValueError(
                "There are no '.demo' files in the provided directory.")
    elif os.path.isfile(file_path):
        file_paths.append(file_path)
        file_extension = pathlib.Path(file_path).suffix
        if file_extension != ".demo":
            raise ValueError(
                "The file is not a '.demo' file. Please provide a file with the "
                "correct extension.")
    else:
        raise FileNotFoundError(
            "The demonstration file or directory {} does not exist.".format(
                file_path))

    brain_params = None
    brain_infos = []
    total_expected = 0
    for _file_path in file_paths:
        data = open(_file_path, "rb").read()
        next_pos, pos, obs_decoded = 0, 0, 0
        while pos < len(data):
            next_pos, pos = _DecodeVarint32(data, pos)
            if obs_decoded == 0:
                meta_data_proto = DemonstrationMetaProto()
                meta_data_proto.ParseFromString(data[pos:pos + next_pos])
                total_expected += meta_data_proto.number_steps
                pos = INITIAL_POS
            if obs_decoded == 1:
                brain_param_proto = BrainParametersProto()
                brain_param_proto.ParseFromString(data[pos:pos + next_pos])
                brain_params = BrainParameters.from_proto(brain_param_proto)
                pos += next_pos
            if obs_decoded > 1:
                agent_info = AgentInfoProto()
                agent_info.ParseFromString(data[pos:pos + next_pos])
                brain_info = BrainInfo.from_agent_proto(
                    0, [agent_info], brain_params)
                brain_infos.append(brain_info)
                if len(brain_infos) == total_expected:
                    break
                pos += next_pos
            obs_decoded += 1
    return brain_params, brain_infos, total_expected
def test_trainer_increment_step(dummy_config):
    trainer_params = dummy_config
    brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)

    trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False)
    policy_mock = mock.Mock()
    step_count = 10
    policy_mock.increment_step = mock.Mock(return_value=step_count)
    trainer.policy = policy_mock

    trainer.increment_step(5)
    policy_mock.increment_step.assert_called_with(5)
    assert trainer.step == 10
def load_demonstration(file_path):
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of BrainInfos containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33

    if not os.path.isfile(file_path):
        raise FileNotFoundError(
            "The demonstration file {} does not exist.".format(file_path))
    file_extension = pathlib.Path(file_path).suffix
    if file_extension != ".demo":
        raise ValueError(
            "The file is not a '.demo' file. Please provide a file with the "
            "correct extension.")

    brain_params = None
    brain_infos = []
    data = open(file_path, "rb").read()
    next_pos, pos, obs_decoded = 0, 0, 0
    total_expected = 0
    while pos < len(data):
        next_pos, pos = _DecodeVarint32(data, pos)
        if obs_decoded == 0:
            meta_data_proto = DemonstrationMetaProto()
            meta_data_proto.ParseFromString(data[pos:pos + next_pos])
            total_expected = meta_data_proto.number_steps
            pos = INITIAL_POS
        if obs_decoded == 1:
            brain_param_proto = BrainParametersProto()
            brain_param_proto.ParseFromString(data[pos:pos + next_pos])
            brain_params = BrainParameters.from_proto(brain_param_proto)
            pos += next_pos
        if obs_decoded > 1:
            agent_info = AgentInfoProto()
            agent_info.ParseFromString(data[pos:pos + next_pos])
            brain_info = BrainInfo.from_agent_proto([agent_info], brain_params)
            brain_infos.append(brain_info)
            if len(brain_infos) == total_expected:
                break
            pos += next_pos
        obs_decoded += 1
    return brain_params, brain_infos, total_expected
def test_trainer_increment_step():
    trainer_params = {
        "trainer": "ppo",
        "batch_size": 2048,
        "beta": 0.005,
        "buffer_size": 20480,
        "epsilon": 0.2,
        "gamma": 0.995,
        "hidden_units": 512,
        "lambd": 0.95,
        "learning_rate": 0.0003,
        "max_steps": "2e6",
        "memory_size": 256,
        "normalize": True,
        "num_epoch": 3,
        "num_layers": 3,
        "time_horizon": 1000,
        "sequence_length": 64,
        "summary_freq": 3000,
        "use_recurrent": False,
        "use_curiosity": False,
        "curiosity_strength": 0.01,
        "curiosity_enc_size": 128,
        "summary_path": "./summaries/test_trainer_summary",
        "model_path": "./models/test_trainer_models/TestModel",
        "keep_checkpoints": 5,
        "reward_signals": {
            "extrinsic": {
                "strength": 1.0,
                "gamma": 0.99
            }
        },
    }
    brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)

    trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0",
                         False)
    policy_mock = mock.Mock()
    step_count = 10
    policy_mock.increment_step = mock.Mock(return_value=step_count)
    trainer.policy = policy_mock

    trainer.increment_step(5)
    policy_mock.increment_step.assert_called_with(5)
    assert trainer.step == 10
def group_spec_to_brain_parameters(
        name: str, group_spec: AgentGroupSpec) -> BrainParameters:
    vec_size = np.sum([
        shape[0] for shape in group_spec.observation_shapes if len(shape) == 1
    ])
    vis_sizes = [
        shape for shape in group_spec.observation_shapes if len(shape) == 3
    ]
    cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes]
    a_size: List[int] = []
    if group_spec.is_action_discrete():
        a_size += list(group_spec.discrete_action_branches)
        vector_action_space_type = 0
    else:
        a_size += [group_spec.action_size]
        vector_action_space_type = 1
    return BrainParameters(name, int(vec_size), cam_res, a_size, [],
                           vector_action_space_type)
Beispiel #7
0
    def __init__(self, use_discrete):
        super().__init__()
        self.discrete = use_discrete
        self._brains: Dict[str, BrainParameters] = {}
        brain_params = BrainParameters(
            brain_name=BRAIN_NAME,
            vector_observation_space_size=OBS_SIZE,
            camera_resolutions=[],
            vector_action_space_size=[2] if use_discrete else [1],
            vector_action_descriptions=["moveDirection"],
            vector_action_space_type=0 if use_discrete else 1,
        )
        self._brains[BRAIN_NAME] = brain_params

        # state
        self.position = 0.0
        self.step_count = 0
        self.random = random.Random(str(brain_params))
        self.goal = self.random.choice([-1, 1])
Beispiel #8
0
def make_brain_parameters(
    discrete_action: bool = False,
    visual_inputs: int = 0,
    brain_name: str = "RealFakeBrain",
    vec_obs_size: int = 6,
) -> BrainParameters:
    resolutions = [
        CameraResolution(width=30, height=40, num_channels=3)
        for _ in range(visual_inputs)
    ]

    return BrainParameters(
        vector_observation_space_size=vec_obs_size,
        camera_resolutions=resolutions,
        vector_action_space_size=[2],
        vector_action_descriptions=["", ""],
        vector_action_space_type=int(not discrete_action),
        brain_name=brain_name,
    )
Beispiel #9
0
def test_trainer_increment_step(dummy_config):
    trainer_params = dummy_config
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False)
    policy_mock = mock.Mock()
    step_count = 10
    policy_mock.increment_step = mock.Mock(return_value=step_count)
    trainer.policy = policy_mock

    trainer.increment_step(5)
    policy_mock.increment_step.assert_called_with(5)
    assert trainer.step == 10
Beispiel #10
0
def test_add_rewards_output(dummy_config):
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0",
                         False)
    rewardsout = AllRewardsOutput(
        reward_signals={
            "extrinsic":
            RewardSignalResult(
                scaled_reward=np.array([1.0, 1.0], dtype=np.float32),
                unscaled_reward=np.array([1.0, 1.0], dtype=np.float32),
            )
        },
        environment=np.array([1.0, 1.0], dtype=np.float32),
    )
    values = {"extrinsic": np.array([[2.0]], dtype=np.float32)}
    agent_id = "123"
    idx = 0
    # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail.
    next_idx = 1
    trainer.add_rewards_outputs(
        rewardsout,
        values=values,
        agent_id=agent_id,
        agent_idx=idx,
        agent_next_idx=next_idx,
    )
    assert trainer.processing_buffer[agent_id]["extrinsic_value_estimates"][
        0] == 2.0
    assert trainer.processing_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
Beispiel #11
0
from typing import List
import logging
import numpy as np
from unittest import mock

from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
from mlagents.envs.communicator_objects.observation_pb2 import (
    ObservationProto,
    NONE as COMPRESSION_TYPE_NONE,
)
from mlagents.envs.brain import BrainInfo, BrainParameters

test_brain = BrainParameters(
    brain_name="test_brain",
    vector_observation_space_size=3,
    camera_resolutions=[],
    vector_action_space_size=[],
    vector_action_descriptions=[],
    vector_action_space_type=1,
)


def _make_agent_info_proto(vector_obs: List[float]) -> AgentInfoProto:
    obs = ObservationProto(
        float_data=ObservationProto.FloatData(data=vector_obs),
        shape=[len(vector_obs)],
        compression_type=COMPRESSION_TYPE_NONE,
    )
    agent_info_proto = AgentInfoProto(observations=[obs])
    return agent_info_proto