def test_add_rewards_output(dummy_config): brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0) dummy_config["summary_path"] = "./summaries/test_trainer_summary" dummy_config["model_path"] = "./models/test_trainer_models/TestModel" trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False) rewardsout = AllRewardsOutput( reward_signals={ "extrinsic": RewardSignalResult(scaled_reward=np.array([1.0, 1.0]), unscaled_reward=np.array([1.0, 1.0])) }, environment=np.array([1.0, 1.0]), ) values = {"extrinsic": np.array([[2.0]])} agent_id = "123" idx = 0 # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail. next_idx = 1 trainer.add_rewards_outputs( rewardsout, values=values, agent_id=agent_id, agent_idx=idx, agent_next_idx=next_idx, ) assert trainer.training_buffer[agent_id]["extrinsic_value_estimates"][ 0] == 2.0 assert trainer.training_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
def load_demonstration( file_path: str) -> Tuple[BrainParameters, List[BrainInfo], int]: """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of BrainInfos containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. INITIAL_POS = 33 file_paths = [] if os.path.isdir(file_path): all_files = os.listdir(file_path) for _file in all_files: if _file.endswith(".demo"): file_paths.append(os.path.join(file_path, _file)) if not all_files: raise ValueError( "There are no '.demo' files in the provided directory.") elif os.path.isfile(file_path): file_paths.append(file_path) file_extension = pathlib.Path(file_path).suffix if file_extension != ".demo": raise ValueError( "The file is not a '.demo' file. Please provide a file with the " "correct extension.") else: raise FileNotFoundError( "The demonstration file or directory {} does not exist.".format( file_path)) brain_params = None brain_infos = [] total_expected = 0 for _file_path in file_paths: data = open(_file_path, "rb").read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos:pos + next_pos]) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos:pos + next_pos]) brain_params = BrainParameters.from_proto(brain_param_proto) pos += next_pos if obs_decoded > 1: agent_info = AgentInfoProto() agent_info.ParseFromString(data[pos:pos + next_pos]) brain_info = BrainInfo.from_agent_proto( 0, [agent_info], brain_params) brain_infos.append(brain_info) if len(brain_infos) == total_expected: break pos += next_pos obs_decoded += 1 return brain_params, brain_infos, total_expected
def test_trainer_increment_step(dummy_config): trainer_params = dummy_config brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0) trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False) policy_mock = mock.Mock() step_count = 10 policy_mock.increment_step = mock.Mock(return_value=step_count) trainer.policy = policy_mock trainer.increment_step(5) policy_mock.increment_step.assert_called_with(5) assert trainer.step == 10
def load_demonstration(file_path): """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of BrainInfos containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. INITIAL_POS = 33 if not os.path.isfile(file_path): raise FileNotFoundError( "The demonstration file {} does not exist.".format(file_path)) file_extension = pathlib.Path(file_path).suffix if file_extension != ".demo": raise ValueError( "The file is not a '.demo' file. Please provide a file with the " "correct extension.") brain_params = None brain_infos = [] data = open(file_path, "rb").read() next_pos, pos, obs_decoded = 0, 0, 0 total_expected = 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos:pos + next_pos]) total_expected = meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos:pos + next_pos]) brain_params = BrainParameters.from_proto(brain_param_proto) pos += next_pos if obs_decoded > 1: agent_info = AgentInfoProto() agent_info.ParseFromString(data[pos:pos + next_pos]) brain_info = BrainInfo.from_agent_proto([agent_info], brain_params) brain_infos.append(brain_info) if len(brain_infos) == total_expected: break pos += next_pos obs_decoded += 1 return brain_params, brain_infos, total_expected
def test_trainer_increment_step(): trainer_params = { "trainer": "ppo", "batch_size": 2048, "beta": 0.005, "buffer_size": 20480, "epsilon": 0.2, "gamma": 0.995, "hidden_units": 512, "lambd": 0.95, "learning_rate": 0.0003, "max_steps": "2e6", "memory_size": 256, "normalize": True, "num_epoch": 3, "num_layers": 3, "time_horizon": 1000, "sequence_length": 64, "summary_freq": 3000, "use_recurrent": False, "use_curiosity": False, "curiosity_strength": 0.01, "curiosity_enc_size": 128, "summary_path": "./summaries/test_trainer_summary", "model_path": "./models/test_trainer_models/TestModel", "keep_checkpoints": 5, "reward_signals": { "extrinsic": { "strength": 1.0, "gamma": 0.99 } }, } brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0) trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False) policy_mock = mock.Mock() step_count = 10 policy_mock.increment_step = mock.Mock(return_value=step_count) trainer.policy = policy_mock trainer.increment_step(5) policy_mock.increment_step.assert_called_with(5) assert trainer.step == 10
def group_spec_to_brain_parameters( name: str, group_spec: AgentGroupSpec) -> BrainParameters: vec_size = np.sum([ shape[0] for shape in group_spec.observation_shapes if len(shape) == 1 ]) vis_sizes = [ shape for shape in group_spec.observation_shapes if len(shape) == 3 ] cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes] a_size: List[int] = [] if group_spec.is_action_discrete(): a_size += list(group_spec.discrete_action_branches) vector_action_space_type = 0 else: a_size += [group_spec.action_size] vector_action_space_type = 1 return BrainParameters(name, int(vec_size), cam_res, a_size, [], vector_action_space_type)
def __init__(self, use_discrete): super().__init__() self.discrete = use_discrete self._brains: Dict[str, BrainParameters] = {} brain_params = BrainParameters( brain_name=BRAIN_NAME, vector_observation_space_size=OBS_SIZE, camera_resolutions=[], vector_action_space_size=[2] if use_discrete else [1], vector_action_descriptions=["moveDirection"], vector_action_space_type=0 if use_discrete else 1, ) self._brains[BRAIN_NAME] = brain_params # state self.position = 0.0 self.step_count = 0 self.random = random.Random(str(brain_params)) self.goal = self.random.choice([-1, 1])
def make_brain_parameters( discrete_action: bool = False, visual_inputs: int = 0, brain_name: str = "RealFakeBrain", vec_obs_size: int = 6, ) -> BrainParameters: resolutions = [ CameraResolution(width=30, height=40, num_channels=3) for _ in range(visual_inputs) ] return BrainParameters( vector_observation_space_size=vec_obs_size, camera_resolutions=resolutions, vector_action_space_size=[2], vector_action_descriptions=["", ""], vector_action_space_type=int(not discrete_action), brain_name=brain_name, )
def test_trainer_increment_step(dummy_config): trainer_params = dummy_config brain_params = BrainParameters( brain_name="test_brain", vector_observation_space_size=1, camera_resolutions=[], vector_action_space_size=[2], vector_action_descriptions=[], vector_action_space_type=0, ) trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False) policy_mock = mock.Mock() step_count = 10 policy_mock.increment_step = mock.Mock(return_value=step_count) trainer.policy = policy_mock trainer.increment_step(5) policy_mock.increment_step.assert_called_with(5) assert trainer.step == 10
def test_add_rewards_output(dummy_config): brain_params = BrainParameters( brain_name="test_brain", vector_observation_space_size=1, camera_resolutions=[], vector_action_space_size=[2], vector_action_descriptions=[], vector_action_space_type=0, ) dummy_config["summary_path"] = "./summaries/test_trainer_summary" dummy_config["model_path"] = "./models/test_trainer_models/TestModel" trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False) rewardsout = AllRewardsOutput( reward_signals={ "extrinsic": RewardSignalResult( scaled_reward=np.array([1.0, 1.0], dtype=np.float32), unscaled_reward=np.array([1.0, 1.0], dtype=np.float32), ) }, environment=np.array([1.0, 1.0], dtype=np.float32), ) values = {"extrinsic": np.array([[2.0]], dtype=np.float32)} agent_id = "123" idx = 0 # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail. next_idx = 1 trainer.add_rewards_outputs( rewardsout, values=values, agent_id=agent_id, agent_idx=idx, agent_next_idx=next_idx, ) assert trainer.processing_buffer[agent_id]["extrinsic_value_estimates"][ 0] == 2.0 assert trainer.processing_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
from typing import List import logging import numpy as np from unittest import mock from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto from mlagents.envs.communicator_objects.observation_pb2 import ( ObservationProto, NONE as COMPRESSION_TYPE_NONE, ) from mlagents.envs.brain import BrainInfo, BrainParameters test_brain = BrainParameters( brain_name="test_brain", vector_observation_space_size=3, camera_resolutions=[], vector_action_space_size=[], vector_action_descriptions=[], vector_action_space_type=1, ) def _make_agent_info_proto(vector_obs: List[float]) -> AgentInfoProto: obs = ObservationProto( float_data=ObservationProto.FloatData(data=vector_obs), shape=[len(vector_obs)], compression_type=COMPRESSION_TYPE_NONE, ) agent_info_proto = AgentInfoProto(observations=[obs]) return agent_info_proto