Exemple #1
0
def test_process_trajectory(dummy_config):
    brain_params_team0 = BrainParameters(
        brain_name="test_brain?team=0",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    brain_name = BehaviorIdentifiers.from_name_behavior_id(
        brain_params_team0.brain_name).brain_name

    brain_params_team1 = BrainParameters(
        brain_name="test_brain?team=1",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0",
                             False)
    trainer = GhostTrainer(ppo_trainer, brain_name, 0, dummy_config, True, "0")

    # first policy encountered becomes policy trained by wrapped PPO
    policy = trainer.create_policy(brain_params_team0)
    trainer.add_policy(brain_params_team0.brain_name, policy)
    trajectory_queue0 = AgentManagerQueue(brain_params_team0.brain_name)
    trainer.subscribe_trajectory_queue(trajectory_queue0)

    # Ghost trainer should ignore this queue because off policy
    policy = trainer.create_policy(brain_params_team1)
    trainer.add_policy(brain_params_team1.brain_name, policy)
    trajectory_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
    trainer.subscribe_trajectory_queue(trajectory_queue1)

    time_horizon = 15
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=[2],
    )
    trajectory_queue0.put(trajectory)
    trainer.advance()

    # Check that trainer put trajectory in update buffer
    assert trainer.trainer.update_buffer.num_experiences == 15

    trajectory_queue1.put(trajectory)
    trainer.advance()

    # Check that ghost trainer ignored off policy queue
    assert trainer.trainer.update_buffer.num_experiences == 15
    # Check that it emptied the queue
    assert trajectory_queue1.empty()
def test_demo_mismatch():
    path_prefix = os.path.dirname(os.path.abspath(__file__))
    # observation mismatch
    with pytest.raises(RuntimeError):
        brain_params_obs = BrainParameters(
            brain_name="test_brain",
            vector_observation_space_size=9,
            camera_resolutions=[],
            vector_action_space_size=[2],
            vector_action_descriptions=[],
            vector_action_space_type=1,
        )
        _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1,
                                        brain_params_obs)
    # action mismatch
    with pytest.raises(RuntimeError):
        brain_params_act = BrainParameters(
            brain_name="test_brain",
            vector_observation_space_size=8,
            camera_resolutions=[],
            vector_action_space_size=[3],
            vector_action_descriptions=[],
            vector_action_space_type=1,
        )
        _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1,
                                        brain_params_act)
    # action type mismatch
    with pytest.raises(RuntimeError):
        brain_params_type = BrainParameters(
            brain_name="test_brain",
            vector_observation_space_size=8,
            camera_resolutions=[],
            vector_action_space_size=[2],
            vector_action_descriptions=[],
            vector_action_space_type=0,
        )
        _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1,
                                        brain_params_type)
    # vis obs mismatch
    with pytest.raises(RuntimeError):
        brain_params_vis = BrainParameters(
            brain_name="test_brain",
            vector_observation_space_size=8,
            camera_resolutions=[[30, 40]],
            vector_action_space_size=[2],
            vector_action_descriptions=[],
            vector_action_space_type=1,
        )
        _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1,
                                        brain_params_vis)
Exemple #3
0
def test_trainer_increment_step(ppo_optimizer, dummy_config):
    trainer_params = dummy_config
    mock_optimizer = mock.Mock()
    mock_optimizer.reward_signals = {}
    ppo_optimizer.return_value = mock_optimizer

    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer = PPOTrainer(
        brain_params.brain_name, 0, trainer_params, True, False, 0, "0"
    )
    policy_mock = mock.Mock(spec=NNPolicy)
    policy_mock.get_current_step.return_value = 0
    step_count = (
        5
    )  # 10 hacked because this function is no longer called through trainer
    policy_mock.increment_step = mock.Mock(return_value=step_count)
    trainer.add_policy("testbehavior", policy_mock)

    trainer._increment_step(5, "testbehavior")
    policy_mock.increment_step.assert_called_with(5)
    assert trainer.step == step_count
Exemple #4
0
def test_normalization(dummy_config):
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0",
                         False)
    time_horizon = 6
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=2,
    )
    # Change half of the obs to 0
    for i in range(3):
        trajectory.steps[i].obs[0] = np.zeros(1, dtype=np.float32)
    trainer.process_trajectory(trajectory)

    # Check that the running mean and variance is correct
    steps, mean, variance = trainer.ppo_policy.sess.run([
        trainer.policy.model.normalization_steps,
        trainer.policy.model.running_mean,
        trainer.policy.model.running_variance,
    ])

    assert steps == 6
    assert mean[0] == 0.5
    # Note: variance is divided by number of steps, and initialized to 1 to avoid
    # divide by 0. The right answer is 0.25
    assert (variance[0] - 1) / steps == 0.25

    # Make another update, this time with all 1's
    time_horizon = 10
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=2,
    )
    trainer.process_trajectory(trajectory)

    # Check that the running mean and variance is correct
    steps, mean, variance = trainer.ppo_policy.sess.run([
        trainer.policy.model.normalization_steps,
        trainer.policy.model.running_mean,
        trainer.policy.model.running_variance,
    ])

    assert steps == 16
    assert mean[0] == 0.8125
    assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
def test_normalization(dummy_config):
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"

    time_horizon = 6
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=[2],
    )
    # Change half of the obs to 0
    for i in range(3):
        trajectory.steps[i].obs[0] = np.zeros(1, dtype=np.float32)
    policy = policy = NNPolicy(0, brain_params, dummy_config, False, False)

    trajectory_buffer = trajectory.to_agentbuffer()
    policy.update_normalization(trajectory_buffer["vector_obs"])

    # Check that the running mean and variance is correct
    steps, mean, variance = policy.sess.run([
        policy.normalization_steps, policy.running_mean,
        policy.running_variance
    ])

    assert steps == 6
    assert mean[0] == 0.5
    # Note: variance is divided by number of steps, and initialized to 1 to avoid
    # divide by 0. The right answer is 0.25
    assert (variance[0] - 1) / steps == 0.25

    # Make another update, this time with all 1's
    time_horizon = 10
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=[2],
    )
    trajectory_buffer = trajectory.to_agentbuffer()
    policy.update_normalization(trajectory_buffer["vector_obs"])

    # Check that the running mean and variance is correct
    steps, mean, variance = policy.sess.run([
        policy.normalization_steps, policy.running_mean,
        policy.running_variance
    ])

    assert steps == 16
    assert mean[0] == 0.8125
    assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
Exemple #6
0
def test_handles_no_default_section(dummy_config):
    """
    Make sure the trainer setup handles a missing "default" in the config.
    """
    brain_name = "testbrain"
    no_default_config = {brain_name: dummy_config["default"]}
    brain_parameters = BrainParameters(
        brain_name=brain_name,
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=no_default_config,
        summaries_dir="test_dir",
        run_id="testrun",
        model_path="model_dir",
        keep_checkpoints=1,
        train_model=True,
        load_model=False,
        seed=42,
    )
    trainer_factory.generate(brain_parameters.brain_name)
Exemple #7
0
def test_raise_if_no_config_for_brain(dummy_config):
    """
    Make sure the trainer setup raises a friendlier exception if both "default" and the brain name
    are missing from the config.
    """
    brain_name = "testbrain"
    bad_config = {"some_other_brain": dummy_config["default"]}
    brain_parameters = BrainParameters(
        brain_name=brain_name,
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=bad_config,
        summaries_dir="test_dir",
        run_id="testrun",
        model_path="model_dir",
        keep_checkpoints=1,
        train_model=True,
        load_model=False,
        seed=42,
    )
    with pytest.raises(TrainerConfigError):
        trainer_factory.generate(brain_parameters)
Exemple #8
0
def test_process_trajectory(dummy_config):
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
    policy = trainer.create_policy(brain_params)
    trainer.add_policy(brain_params.brain_name, policy)
    trajectory_queue = AgentManagerQueue("testbrain")
    trainer.subscribe_trajectory_queue(trajectory_queue)
    time_horizon = 15
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=[2],
    )
    trajectory_queue.put(trajectory)
    trainer.advance()

    # Check that trainer put trajectory in update buffer
    assert trainer.update_buffer.num_experiences == 15

    # Check that GAE worked
    assert (
        "advantages" in trainer.update_buffer
        and "discounted_returns" in trainer.update_buffer
    )

    # Check that the stats are being collected as episode isn't complete
    for reward in trainer.collected_rewards.values():
        for agent in reward.values():
            assert agent > 0

    # Add a terminal trajectory
    trajectory = make_fake_trajectory(
        length=time_horizon + 1,
        max_step_complete=False,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=[2],
    )
    trajectory_queue.put(trajectory)
    trainer.advance()

    # Check that the stats are reset as episode is finished
    for reward in trainer.collected_rewards.values():
        for agent in reward.values():
            assert agent == 0
    assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
Exemple #9
0
def test_ppo_get_value_estimates(mock_communicator, mock_launcher,
                                 dummy_config):
    tf.reset_default_graph()

    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    policy = PPOPolicy(0, brain_params, dummy_config, False, False)
    time_horizon = 15
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        vec_obs_size=1,
        num_vis_obs=0,
        action_space=[2],
    )
    run_out = policy.get_value_estimates(trajectory.next_obs,
                                         "test_agent",
                                         done=False)
    for key, val in run_out.items():
        assert type(key) is str
        assert type(val) is float

    run_out = policy.get_value_estimates(trajectory.next_obs,
                                         "test_agent",
                                         done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val == 0.0

    # Check if we ignore terminal states properly
    policy.reward_signals["extrinsic"].use_terminal_states = False
    run_out = policy.get_value_estimates(trajectory.next_obs,
                                         "test_agent",
                                         done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val != 0.0

    agentbuffer = trajectory.to_agentbuffer()
    batched_values = policy.get_batched_value_estimates(agentbuffer)
    for values in batched_values.values():
        assert len(values) == 15
def group_spec_to_brain_parameters(
    name: str, group_spec: AgentGroupSpec
) -> BrainParameters:
    vec_size = np.sum(
        [shape[0] for shape in group_spec.observation_shapes if len(shape) == 1]
    )
    vis_sizes = [shape for shape in group_spec.observation_shapes if len(shape) == 3]
    cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes]
    a_size: List[int] = []
    if group_spec.is_action_discrete():
        a_size += list(group_spec.discrete_action_branches)
        vector_action_space_type = 0
    else:
        a_size += [group_spec.action_size]
        vector_action_space_type = 1
    return BrainParameters(
        name, int(vec_size), cam_res, a_size, [], vector_action_space_type
    )
Exemple #11
0
def make_brain_parameters(
    discrete_action: bool = False,
    visual_inputs: int = 0,
    brain_name: str = "RealFakeBrain",
    vec_obs_size: int = 6,
) -> BrainParameters:
    resolutions = [
        CameraResolution(width=30, height=40, num_channels=3)
        for _ in range(visual_inputs)
    ]

    return BrainParameters(
        vector_observation_space_size=vec_obs_size,
        camera_resolutions=resolutions,
        vector_action_space_size=[2],
        vector_action_descriptions=["", ""],
        vector_action_space_type=int(not discrete_action),
        brain_name=brain_name,
    )
Exemple #12
0
def group_spec_to_brain_parameters(
        name: str, group_spec
) -> BrainParameters:
    vec_size = np.sum(
        [shape[0] for shape in group_spec['observation_shapes'] if len(shape) == 1]
    )
    vis_sizes = [shape for shape in group_spec['observation_shapes'] if len(shape) == 3]
    cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes]
    a_size: List[int] = []
    if group_spec['action_type'] == 'DISCRETE':
        a_size += list(group_spec['action_shape'])
        vector_action_space_type = 0
    else:
        a_size += [group_spec.action_size]
        vector_action_space_type = 1
    return BrainParameters(
        brain_name=name, vector_observation_space_size=int(vec_size), camera_resolutions=cam_res,
        vector_action_space_size=a_size, vector_action_descriptions=[],
        vector_action_space_type=vector_action_space_type
    )
Exemple #13
0
def test_trainer_increment_step(dummy_config):
    trainer_params = dummy_config
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0",
                         False)
    policy_mock = mock.Mock()
    step_count = 10
    policy_mock.increment_step = mock.Mock(return_value=step_count)
    trainer.policy = policy_mock

    trainer.increment_step(5)
    policy_mock.increment_step.assert_called_with(5)
    assert trainer.step == 10
Exemple #14
0
def test_add_rewards_output(dummy_config):
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0",
                         False)
    rewardsout = AllRewardsOutput(
        reward_signals={
            "extrinsic":
            RewardSignalResult(
                scaled_reward=np.array([1.0, 1.0], dtype=np.float32),
                unscaled_reward=np.array([1.0, 1.0], dtype=np.float32),
            )
        },
        environment=np.array([1.0, 1.0], dtype=np.float32),
    )
    values = {"extrinsic": np.array([[2.0]], dtype=np.float32)}
    agent_id = "123"
    idx = 0
    # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail.
    next_idx = 1
    trainer.add_rewards_outputs(
        rewardsout,
        values=values,
        agent_id=agent_id,
        agent_idx=idx,
        agent_next_idx=next_idx,
    )
    assert trainer.processing_buffer[agent_id]["extrinsic_value_estimates"][
        0] == 2.0
    assert trainer.processing_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
Exemple #15
0
def test_handles_no_config_provided(BrainParametersMock):
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors
    brain_parameters = BrainParameters(
        brain_name=brain_name,
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
    )
    trainer_factory.generate(brain_parameters.brain_name)
Exemple #16
0
def test_trainer_increment_step(dummy_config):
    trainer_params = dummy_config
    brain_params = BrainParameters(
        brain_name="test_brain",
        vector_observation_space_size=1,
        camera_resolutions=[],
        vector_action_space_size=[2],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    trainer = PPOTrainer(brain_params.brain_name, 0, trainer_params, True,
                         False, 0, "0", False)
    policy_mock = mock.Mock()
    step_count = (
        5
    )  # 10 hacked becausee this function is no longer called through trainer
    policy_mock.increment_step = mock.Mock(return_value=step_count)
    trainer.policy = policy_mock

    trainer.increment_step(5)
    print(trainer.policy.increment_step(5))
    policy_mock.increment_step.assert_called_with(5)
    assert trainer.step == step_count
Exemple #17
0
def test_publish_queue(dummy_config):
    brain_params_team0 = BrainParameters(
        brain_name="test_brain?team=0",
        vector_observation_space_size=8,
        camera_resolutions=[],
        vector_action_space_size=[1],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )

    parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(
        brain_params_team0.brain_name
    )

    brain_name = parsed_behavior_id0.brain_name

    brain_params_team1 = BrainParameters(
        brain_name="test_brain?team=1",
        vector_observation_space_size=8,
        camera_resolutions=[],
        vector_action_space_size=[1],
        vector_action_descriptions=[],
        vector_action_space_type=0,
    )
    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
    ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
    controller = GhostController(100)
    trainer = GhostTrainer(
        ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
    )

    # First policy encountered becomes policy trained by wrapped PPO
    # This queue should remain empty after swap snapshot
    policy = trainer.create_policy(parsed_behavior_id0, brain_params_team0)
    trainer.add_policy(parsed_behavior_id0, policy)
    policy_queue0 = AgentManagerQueue(brain_params_team0.brain_name)
    trainer.publish_policy_queue(policy_queue0)

    # Ghost trainer should use this queue for ghost policy swap
    parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(
        brain_params_team1.brain_name
    )
    policy = trainer.create_policy(parsed_behavior_id1, brain_params_team1)
    trainer.add_policy(parsed_behavior_id1, policy)
    policy_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
    trainer.publish_policy_queue(policy_queue1)

    # check ghost trainer swap pushes to ghost queue and not trainer
    assert policy_queue0.empty() and policy_queue1.empty()
    trainer._swap_snapshots()
    assert policy_queue0.empty() and not policy_queue1.empty()
    # clear
    policy_queue1.get_nowait()

    mock_brain = mb.setup_mock_brain(
        False,
        False,
        vector_action_space=VECTOR_ACTION_SPACE,
        vector_obs_space=VECTOR_OBS_SPACE,
        discrete_action_space=DISCRETE_ACTION_SPACE,
    )

    buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_brain)
    # Mock out reward signal eval
    buffer["extrinsic_rewards"] = buffer["environment_rewards"]
    buffer["extrinsic_returns"] = buffer["environment_rewards"]
    buffer["extrinsic_value_estimates"] = buffer["environment_rewards"]
    buffer["curiosity_rewards"] = buffer["environment_rewards"]
    buffer["curiosity_returns"] = buffer["environment_rewards"]
    buffer["curiosity_value_estimates"] = buffer["environment_rewards"]
    buffer["advantages"] = buffer["environment_rewards"]
    trainer.trainer.update_buffer = buffer

    # when ghost trainer advance and wrapped trainer buffers full
    # the wrapped trainer pushes updated policy to correct queue
    assert policy_queue0.empty() and policy_queue1.empty()
    trainer.advance()
    assert not policy_queue0.empty() and policy_queue1.empty()
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    DemonstrationMetaProto, )

from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.demo_loader import (
    load_demonstration,
    demo_to_buffer,
    get_demo_files,
    write_delimited,
)

BRAIN_PARAMS = BrainParameters(
    brain_name="test_brain",
    vector_observation_space_size=8,
    camera_resolutions=[],
    vector_action_space_size=[2],
    vector_action_descriptions=[],
    vector_action_space_type=1,
)


def test_load_demo():
    path_prefix = os.path.dirname(os.path.abspath(__file__))
    behavior_spec, pair_infos, total_expected = load_demonstration(
        path_prefix + "/test.demo")
    assert np.sum(behavior_spec.observation_shapes[0]) == 8
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1,
                                    BRAIN_PARAMS)
    assert len(demo_buffer["actions"]) == total_expected - 1
Exemple #19
0
def load_demonstration(
    file_path: str
) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33
    file_paths = []
    if os.path.isdir(file_path):
        all_files = os.listdir(file_path)
        for _file in all_files:
            if _file.endswith(".demo"):
                file_paths.append(os.path.join(file_path, _file))
        if not all_files:
            raise ValueError("There are no '.demo' files in the provided directory.")
    elif os.path.isfile(file_path):
        file_paths.append(file_path)
        file_extension = pathlib.Path(file_path).suffix
        if file_extension != ".demo":
            raise ValueError(
                "The file is not a '.demo' file. Please provide a file with the "
                "correct extension."
            )
    else:
        raise FileNotFoundError(
            "The demonstration file or directory {} does not exist.".format(file_path)
        )

    brain_params = None
    brain_param_proto = None
    info_action_pairs = []
    total_expected = 0
    for _file_path in file_paths:
        with open(_file_path, "rb") as fp:
            with hierarchical_timer("read_file"):
                data = fp.read()
            next_pos, pos, obs_decoded = 0, 0, 0
            while pos < len(data):
                next_pos, pos = _DecodeVarint32(data, pos)
                if obs_decoded == 0:
                    meta_data_proto = DemonstrationMetaProto()
                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
                    total_expected += meta_data_proto.number_steps
                    pos = INITIAL_POS
                if obs_decoded == 1:
                    brain_param_proto = BrainParametersProto()
                    brain_param_proto.ParseFromString(data[pos : pos + next_pos])
                    pos += next_pos
                if obs_decoded > 1:
                    agent_info_action = AgentInfoActionPairProto()
                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
                    if brain_params is None:
                        brain_params = BrainParameters.from_proto(
                            brain_param_proto, agent_info_action.agent_info
                        )
                    info_action_pairs.append(agent_info_action)
                    if len(info_action_pairs) == total_expected:
                        break
                    pos += next_pos
                obs_decoded += 1
    if not brain_params:
        raise RuntimeError(
            f"No BrainParameters found in demonstration file at {file_path}."
        )
    return brain_params, info_action_pairs, total_expected