예제 #1
0
def load_demonstration(
    file_path: str,
) -> Tuple[BehaviorSpec, List[AgentInfoActionPairProto], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    file_paths = get_demo_files(file_path)
    behavior_spec = None
    brain_param_proto = None
    info_action_pairs = []
    total_expected = 0
    for _file_path in file_paths:
        with open(_file_path, "rb") as fp:
            with hierarchical_timer("read_file"):
                data = fp.read()
            next_pos, pos, obs_decoded = 0, 0, 0
            while pos < len(data):
                next_pos, pos = _DecodeVarint32(data, pos)
                if obs_decoded == 0:
                    meta_data_proto = DemonstrationMetaProto()
                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
                    if (
                        meta_data_proto.api_version
                        not in SUPPORTED_DEMONSTRATION_VERSIONS
                    ):
                        raise RuntimeError(
                            f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})"
                        )
                    total_expected += meta_data_proto.number_steps
                    pos = INITIAL_POS
                if obs_decoded == 1:
                    brain_param_proto = BrainParametersProto()
                    brain_param_proto.ParseFromString(data[pos : pos + next_pos])
                    pos += next_pos
                if obs_decoded > 1:
                    agent_info_action = AgentInfoActionPairProto()
                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
                    if behavior_spec is None:
                        behavior_spec = behavior_spec_from_proto(
                            brain_param_proto, agent_info_action.agent_info
                        )
                    info_action_pairs.append(agent_info_action)
                    if len(info_action_pairs) == total_expected:
                        break
                    pos += next_pos
                obs_decoded += 1
    if not behavior_spec:
        raise RuntimeError(
            f"No BrainParameters found in demonstration file at {file_path}."
        )
    return behavior_spec, info_action_pairs, total_expected
 def record_demo(use_discrete, num_visual=0, num_vector=1):
     env = RecordEnvironment(
         [BRAIN_NAME],
         use_discrete=use_discrete,
         num_visual=num_visual,
         num_vector=num_vector,
         n_demos=100,
     )
     # If we want to use true demos, we can solve the env in the usual way
     # Otherwise, we can just call solve to execute the optimal policy
     env.solve()
     agent_info_protos = env.demonstration_protos[BRAIN_NAME]
     meta_data_proto = DemonstrationMetaProto()
     brain_param_proto = BrainParametersProto(
         vector_action_size=[2] if use_discrete else [1],
         vector_action_descriptions=[""],
         vector_action_space_type=discrete if use_discrete else continuous,
         brain_name=BRAIN_NAME,
         is_training=True,
     )
     action_type = "Discrete" if use_discrete else "Continuous"
     demo_path_name = "1DTest" + action_type + ".demo"
     demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
     write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
     return demo_path
예제 #3
0
 def record_demo(action_sizes, num_visual=0, num_vector=1):
     env = RecordEnvironment(
         [BRAIN_NAME],
         action_sizes=action_sizes,
         num_visual=num_visual,
         num_vector=num_vector,
         n_demos=100,
     )
     # If we want to use true demos, we can solve the env in the usual way
     # Otherwise, we can just call solve to execute the optimal policy
     env.solve()
     agent_info_protos = env.demonstration_protos[BRAIN_NAME]
     meta_data_proto = DemonstrationMetaProto()
     continuous_action_size, discrete_action_size = action_sizes
     action_spec_proto = ActionSpecProto(
         num_continuous_actions=continuous_action_size,
         num_discrete_actions=discrete_action_size,
         discrete_branch_sizes=[2] if discrete_action_size > 0 else None,
     )
     brain_param_proto = BrainParametersProto(brain_name=BRAIN_NAME,
                                              is_training=True,
                                              action_spec=action_spec_proto)
     action_type = "Discrete" if action_sizes else "Continuous"
     demo_path_name = "1DTest" + action_type + ".demo"
     demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
     write_demo(demo_path, meta_data_proto, brain_param_proto,
                agent_info_protos)
     return demo_path
예제 #4
0
 def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
     bp = BrainParametersProto(
         vector_action_size=[2],
         vector_action_descriptions=["", ""],
         vector_action_space_type=discrete if self.is_discrete else continuous,
         brain_name=self.brain_name,
         is_training=True,
     )
     rl_init = UnityRLInitializationOutputProto(
         name="RealFakeAcademy",
         version=UnityEnvironment.API_VERSION,
         log_path="",
         brain_parameters=[bp],
     )
     output = UnityRLOutputProto(agentInfos=self._get_agent_infos())
     return UnityOutputProto(rl_initialization_output=rl_init, rl_output=output)
 def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
     if self.is_discrete:
         action_spec = ActionSpecProto(num_discrete_actions=2,
                                       discrete_branch_sizes=[3, 2])
     else:
         action_spec = ActionSpecProto(num_continuous_actions=2)
     bp = BrainParametersProto(brain_name=self.brain_name,
                               is_training=True,
                               action_spec=action_spec)
     rl_init = UnityRLInitializationOutputProto(
         name="RealFakeAcademy",
         communication_version=UnityEnvironment.API_VERSION,
         package_version="mock_package_version",
         log_path="",
         brain_parameters=[bp],
     )
     output = UnityRLOutputProto(agentInfos=self._get_agent_infos())
     return UnityOutputProto(rl_initialization_output=rl_init,
                             rl_output=output)
예제 #6
0
def test_agent_behavior_spec_from_proto():
    agent_proto = generate_list_agent_proto(1, [(3, ), (4, )])[0]
    bp = BrainParametersProto()
    bp.vector_action_size.extend([5, 4])
    bp.vector_action_space_type = 0
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert behavior_spec.is_action_discrete()
    assert not behavior_spec.is_action_continuous()
    assert behavior_spec.observation_shapes == [(3, ), (4, )]
    assert behavior_spec.discrete_action_branches == (5, 4)
    assert behavior_spec.action_size == 2
    bp = BrainParametersProto()
    bp.vector_action_size.extend([6])
    bp.vector_action_space_type = 1
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert not behavior_spec.is_action_discrete()
    assert behavior_spec.is_action_continuous()
    assert behavior_spec.action_size == 6
예제 #7
0
def test_agent_behavior_spec_from_proto():
    agent_proto = generate_list_agent_proto(1, [(3, ), (4, )])[0]
    bp = BrainParametersProto()
    bp.vector_action_size_deprecated.extend([5, 4])
    bp.vector_action_space_type_deprecated = 0
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert behavior_spec.action_spec.is_discrete()
    assert not behavior_spec.action_spec.is_continuous()
    assert [spec.shape for spec in behavior_spec.sensor_specs] == [(3, ),
                                                                   (4, )]
    assert behavior_spec.action_spec.discrete_branches == (5, 4)
    assert behavior_spec.action_spec.discrete_size == 2
    bp = BrainParametersProto()
    bp.vector_action_size_deprecated.extend([6])
    bp.vector_action_space_type_deprecated = 1
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert not behavior_spec.action_spec.is_discrete()
    assert behavior_spec.action_spec.is_continuous()
    assert behavior_spec.action_spec.continuous_size == 6
예제 #8
0
def load_demonstration(
    file_path: str
) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33
    file_paths = []
    if os.path.isdir(file_path):
        all_files = os.listdir(file_path)
        for _file in all_files:
            if _file.endswith(".demo"):
                file_paths.append(os.path.join(file_path, _file))
        if not all_files:
            raise ValueError("There are no '.demo' files in the provided directory.")
    elif os.path.isfile(file_path):
        file_paths.append(file_path)
        file_extension = pathlib.Path(file_path).suffix
        if file_extension != ".demo":
            raise ValueError(
                "The file is not a '.demo' file. Please provide a file with the "
                "correct extension."
            )
    else:
        raise FileNotFoundError(
            "The demonstration file or directory {} does not exist.".format(file_path)
        )

    brain_params = None
    brain_param_proto = None
    info_action_pairs = []
    total_expected = 0
    for _file_path in file_paths:
        with open(_file_path, "rb") as fp:
            with hierarchical_timer("read_file"):
                data = fp.read()
            next_pos, pos, obs_decoded = 0, 0, 0
            while pos < len(data):
                next_pos, pos = _DecodeVarint32(data, pos)
                if obs_decoded == 0:
                    meta_data_proto = DemonstrationMetaProto()
                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
                    total_expected += meta_data_proto.number_steps
                    pos = INITIAL_POS
                if obs_decoded == 1:
                    brain_param_proto = BrainParametersProto()
                    brain_param_proto.ParseFromString(data[pos : pos + next_pos])
                    pos += next_pos
                if obs_decoded > 1:
                    agent_info_action = AgentInfoActionPairProto()
                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
                    if brain_params is None:
                        brain_params = BrainParameters.from_proto(
                            brain_param_proto, agent_info_action.agent_info
                        )
                    info_action_pairs.append(agent_info_action)
                    if len(info_action_pairs) == total_expected:
                        break
                    pos += next_pos
                obs_decoded += 1
    if not brain_params:
        raise RuntimeError(
            f"No BrainParameters found in demonstration file at {file_path}."
        )
    return brain_params, info_action_pairs, total_expected