def load_demonstration( file_path: str, ) -> Tuple[BehaviorSpec, List[AgentInfoActionPairProto], int]: """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. file_paths = get_demo_files(file_path) behavior_spec = None brain_param_proto = None info_action_pairs = [] total_expected = 0 for _file_path in file_paths: with open(_file_path, "rb") as fp: with hierarchical_timer("read_file"): data = fp.read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos : pos + next_pos]) if ( meta_data_proto.api_version not in SUPPORTED_DEMONSTRATION_VERSIONS ): raise RuntimeError( f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})" ) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos : pos + next_pos]) pos += next_pos if obs_decoded > 1: agent_info_action = AgentInfoActionPairProto() agent_info_action.ParseFromString(data[pos : pos + next_pos]) if behavior_spec is None: behavior_spec = behavior_spec_from_proto( brain_param_proto, agent_info_action.agent_info ) info_action_pairs.append(agent_info_action) if len(info_action_pairs) == total_expected: break pos += next_pos obs_decoded += 1 if not behavior_spec: raise RuntimeError( f"No BrainParameters found in demonstration file at {file_path}." ) return behavior_spec, info_action_pairs, total_expected
def record_demo(use_discrete, num_visual=0, num_vector=1): env = RecordEnvironment( [BRAIN_NAME], use_discrete=use_discrete, num_visual=num_visual, num_vector=num_vector, n_demos=100, ) # If we want to use true demos, we can solve the env in the usual way # Otherwise, we can just call solve to execute the optimal policy env.solve() agent_info_protos = env.demonstration_protos[BRAIN_NAME] meta_data_proto = DemonstrationMetaProto() brain_param_proto = BrainParametersProto( vector_action_size=[2] if use_discrete else [1], vector_action_descriptions=[""], vector_action_space_type=discrete if use_discrete else continuous, brain_name=BRAIN_NAME, is_training=True, ) action_type = "Discrete" if use_discrete else "Continuous" demo_path_name = "1DTest" + action_type + ".demo" demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name)) write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos) return demo_path
def record_demo(action_sizes, num_visual=0, num_vector=1): env = RecordEnvironment( [BRAIN_NAME], action_sizes=action_sizes, num_visual=num_visual, num_vector=num_vector, n_demos=100, ) # If we want to use true demos, we can solve the env in the usual way # Otherwise, we can just call solve to execute the optimal policy env.solve() agent_info_protos = env.demonstration_protos[BRAIN_NAME] meta_data_proto = DemonstrationMetaProto() continuous_action_size, discrete_action_size = action_sizes action_spec_proto = ActionSpecProto( num_continuous_actions=continuous_action_size, num_discrete_actions=discrete_action_size, discrete_branch_sizes=[2] if discrete_action_size > 0 else None, ) brain_param_proto = BrainParametersProto(brain_name=BRAIN_NAME, is_training=True, action_spec=action_spec_proto) action_type = "Discrete" if action_sizes else "Continuous" demo_path_name = "1DTest" + action_type + ".demo" demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name)) write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos) return demo_path
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto: bp = BrainParametersProto( vector_action_size=[2], vector_action_descriptions=["", ""], vector_action_space_type=discrete if self.is_discrete else continuous, brain_name=self.brain_name, is_training=True, ) rl_init = UnityRLInitializationOutputProto( name="RealFakeAcademy", version=UnityEnvironment.API_VERSION, log_path="", brain_parameters=[bp], ) output = UnityRLOutputProto(agentInfos=self._get_agent_infos()) return UnityOutputProto(rl_initialization_output=rl_init, rl_output=output)
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto: if self.is_discrete: action_spec = ActionSpecProto(num_discrete_actions=2, discrete_branch_sizes=[3, 2]) else: action_spec = ActionSpecProto(num_continuous_actions=2) bp = BrainParametersProto(brain_name=self.brain_name, is_training=True, action_spec=action_spec) rl_init = UnityRLInitializationOutputProto( name="RealFakeAcademy", communication_version=UnityEnvironment.API_VERSION, package_version="mock_package_version", log_path="", brain_parameters=[bp], ) output = UnityRLOutputProto(agentInfos=self._get_agent_infos()) return UnityOutputProto(rl_initialization_output=rl_init, rl_output=output)
def test_agent_behavior_spec_from_proto(): agent_proto = generate_list_agent_proto(1, [(3, ), (4, )])[0] bp = BrainParametersProto() bp.vector_action_size.extend([5, 4]) bp.vector_action_space_type = 0 behavior_spec = behavior_spec_from_proto(bp, agent_proto) assert behavior_spec.is_action_discrete() assert not behavior_spec.is_action_continuous() assert behavior_spec.observation_shapes == [(3, ), (4, )] assert behavior_spec.discrete_action_branches == (5, 4) assert behavior_spec.action_size == 2 bp = BrainParametersProto() bp.vector_action_size.extend([6]) bp.vector_action_space_type = 1 behavior_spec = behavior_spec_from_proto(bp, agent_proto) assert not behavior_spec.is_action_discrete() assert behavior_spec.is_action_continuous() assert behavior_spec.action_size == 6
def test_agent_behavior_spec_from_proto(): agent_proto = generate_list_agent_proto(1, [(3, ), (4, )])[0] bp = BrainParametersProto() bp.vector_action_size_deprecated.extend([5, 4]) bp.vector_action_space_type_deprecated = 0 behavior_spec = behavior_spec_from_proto(bp, agent_proto) assert behavior_spec.action_spec.is_discrete() assert not behavior_spec.action_spec.is_continuous() assert [spec.shape for spec in behavior_spec.sensor_specs] == [(3, ), (4, )] assert behavior_spec.action_spec.discrete_branches == (5, 4) assert behavior_spec.action_spec.discrete_size == 2 bp = BrainParametersProto() bp.vector_action_size_deprecated.extend([6]) bp.vector_action_space_type_deprecated = 1 behavior_spec = behavior_spec_from_proto(bp, agent_proto) assert not behavior_spec.action_spec.is_discrete() assert behavior_spec.action_spec.is_continuous() assert behavior_spec.action_spec.continuous_size == 6
def load_demonstration( file_path: str ) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]: """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. INITIAL_POS = 33 file_paths = [] if os.path.isdir(file_path): all_files = os.listdir(file_path) for _file in all_files: if _file.endswith(".demo"): file_paths.append(os.path.join(file_path, _file)) if not all_files: raise ValueError("There are no '.demo' files in the provided directory.") elif os.path.isfile(file_path): file_paths.append(file_path) file_extension = pathlib.Path(file_path).suffix if file_extension != ".demo": raise ValueError( "The file is not a '.demo' file. Please provide a file with the " "correct extension." ) else: raise FileNotFoundError( "The demonstration file or directory {} does not exist.".format(file_path) ) brain_params = None brain_param_proto = None info_action_pairs = [] total_expected = 0 for _file_path in file_paths: with open(_file_path, "rb") as fp: with hierarchical_timer("read_file"): data = fp.read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos : pos + next_pos]) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos : pos + next_pos]) pos += next_pos if obs_decoded > 1: agent_info_action = AgentInfoActionPairProto() agent_info_action.ParseFromString(data[pos : pos + next_pos]) if brain_params is None: brain_params = BrainParameters.from_proto( brain_param_proto, agent_info_action.agent_info ) info_action_pairs.append(agent_info_action) if len(info_action_pairs) == total_expected: break pos += next_pos obs_decoded += 1 if not brain_params: raise RuntimeError( f"No BrainParameters found in demonstration file at {file_path}." ) return brain_params, info_action_pairs, total_expected