Esempio n. 1
0
def test_action_generator():
    # Continuous
    action_len = 30
    specs = BehaviorSpec(
        observation_shapes=[(5, )],
        action_type=ActionType.CONTINUOUS,
        action_shape=action_len,
    )
    zero_action = specs.create_empty_action(4)
    assert np.array_equal(zero_action,
                          np.zeros((4, action_len), dtype=np.float32))
    random_action = specs.create_random_action(4)
    assert random_action.dtype == np.float32
    assert random_action.shape == (4, action_len)
    assert np.min(random_action) >= -1
    assert np.max(random_action) <= 1

    # Discrete
    action_shape = (10, 20, 30)
    specs = BehaviorSpec(
        observation_shapes=[(5, )],
        action_type=ActionType.DISCRETE,
        action_shape=action_shape,
    )
    zero_action = specs.create_empty_action(4)
    assert np.array_equal(zero_action,
                          np.zeros((4, len(action_shape)), dtype=np.int32))

    random_action = specs.create_random_action(4)
    assert random_action.dtype == np.int32
    assert random_action.shape == (4, len(action_shape))
    assert np.min(random_action) >= 0
    for index, branch_size in enumerate(action_shape):
        assert np.max(random_action[:, index]) < branch_size
def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3, ), (4, )]
    spec = BehaviorSpec(create_observation_specs_with_shapes(shapes),
                        ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
    for agent_id in range(n_agents):
        if agent_id in decision_steps:
            # we set the reward equal to the agent id in generate_list_agent_proto
            assert decision_steps[agent_id].reward == agent_id
        elif agent_id in terminal_steps:
            assert terminal_steps[agent_id].reward == agent_id
        else:
            raise Exception("Missing agent from the steps")
    # We sort the AgentId since they are split between DecisionSteps and TerminalSteps
    combined_agent_id = list(decision_steps.agent_id) + list(
        terminal_steps.agent_id)
    combined_agent_id.sort()
    assert combined_agent_id == list(range(n_agents))
    for agent_id in range(n_agents):
        assert (agent_id in terminal_steps) == (agent_id % 2 == 0)
        if agent_id in terminal_steps:
            assert terminal_steps[agent_id].interrupted == (agent_id % 4 == 0)
    assert decision_steps.obs[0].shape[1] == shapes[0][0]
    assert decision_steps.obs[1].shape[1] == shapes[1][0]
    assert terminal_steps.obs[0].shape[1] == shapes[0][0]
    assert terminal_steps.obs[1].shape[1] == shapes[1][0]
Esempio n. 3
0
def create_mock_group_spec(
    number_visual_observations=0,
    vector_action_space_type="continuous",
    vector_observation_space_size=3,
    vector_action_space_size=None,
):
    """
    Creates a mock BrainParameters object with parameters.
    """
    # Avoid using mutable object as default param
    if vector_action_space_type == "continuous":
        if vector_action_space_size is None:
            vector_action_space_size = 2
        else:
            vector_action_space_size = vector_action_space_size[0]
        action_spec = ActionSpec.create_continuous(vector_action_space_size)
    else:
        if vector_action_space_size is None:
            vector_action_space_size = (2, )
        else:
            vector_action_space_size = tuple(vector_action_space_size)
        action_spec = ActionSpec.create_discrete(vector_action_space_size)
    obs_shapes = [(vector_observation_space_size, )]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
    obs_spec = create_observation_specs_with_shapes(obs_shapes)
    return BehaviorSpec(obs_spec, action_spec)
Esempio n. 4
0
def behavior_spec_from_proto(brain_param_proto: BrainParametersProto,
                             agent_info: AgentInfoProto) -> BehaviorSpec:
    """
    Converts brain parameter and agent info proto to BehaviorSpec object.
    :param brain_param_proto: protobuf object.
    :param agent_info: protobuf object.
    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
    dim_props = [
        tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
        for obs in agent_info.observations
    ]
    sensor_specs = [
        SensorSpec(obs_shape, dim_p)
        for obs_shape, dim_p in zip(observation_shape, dim_props)
    ]
    # proto from communicator < v1.3 does not set action spec, use deprecated fields instead
    if (brain_param_proto.action_spec.num_continuous_actions == 0
            and brain_param_proto.action_spec.num_discrete_actions == 0):
        if brain_param_proto.vector_action_space_type_deprecated == 1:
            action_spec = ActionSpec(
                brain_param_proto.vector_action_size_deprecated[0], ())
        else:
            action_spec = ActionSpec(
                0, tuple(brain_param_proto.vector_action_size_deprecated))
    else:
        action_spec_proto = brain_param_proto.action_spec
        action_spec = ActionSpec(
            action_spec_proto.num_continuous_actions,
            tuple(branch
                  for branch in action_spec_proto.discrete_branch_sizes),
        )
    return BehaviorSpec(sensor_specs, action_spec)
Esempio n. 5
0
def create_behavior_spec(num_visual, num_vector, vector_size):
    behavior_spec = BehaviorSpec(
        [(84, 84, 3)] * int(num_visual) + [(vector_size, )] * int(num_vector),
        ActionType.DISCRETE,
        (1, ),
    )
    return behavior_spec
Esempio n. 6
0
def create_mock_group_spec(
    number_visual_observations=0,
    vector_action_space_type="continuous",
    vector_observation_space_size=3,
    vector_action_space_size=None,
):
    """
    Creates a mock BrainParameters object with parameters.
    """
    # Avoid using mutable object as default param
    act_type = ActionType.DISCRETE
    if vector_action_space_type == "continuous":
        act_type = ActionType.CONTINUOUS
        if vector_action_space_size is None:
            vector_action_space_size = 2
        else:
            vector_action_space_size = vector_action_space_size[0]
    else:
        if vector_action_space_size is None:
            vector_action_space_size = (2, )
        else:
            vector_action_space_size = tuple(vector_action_space_size)
    obs_shapes = [(vector_observation_space_size, )]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
    return BehaviorSpec(obs_shapes, act_type, vector_action_space_size)
def test_empty_terminal_steps():
    specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )],
                         action_spec=ActionSpec.create_continuous(3))
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
    assert ts.obs[0].shape == (0, 3, 2)
    assert ts.obs[1].shape == (0, 5)
def test_empty_decision_steps():
    specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )],
                         action_spec=ActionSpec.create_continuous(3))
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
Esempio n. 9
0
def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 10)
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert masks is None
Esempio n. 10
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    policy = FakePolicy(test_seed, basic_mock_brain(), basic_params())
    # Doesn't really matter what this is
    dummy_groupspec = BehaviorSpec([(1, )], "continuous", 1)
    no_agent_step = DecisionSteps.empty(dummy_groupspec)
    result = policy.get_action(no_agent_step)
    assert result == ActionInfo.empty()
Esempio n. 11
0
def setup_test_behavior_specs(
    use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8
):
    behavior_spec = BehaviorSpec(
        [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)],
        ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS,
        tuple(vector_action_space) if use_discrete else vector_action_space,
    )
    return behavior_spec
Esempio n. 12
0
def test_batched_step_result_from_proto_raises_on_infinite():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents,
                                        shapes,
                                        infinite_rewards=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
Esempio n. 13
0
def test_empty_decision_steps():
    specs = BehaviorSpec(
        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]),
        action_spec=ActionSpec.create_continuous(3),
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(create_observation_specs_with_shapes(shapes),
                                 ActionSpec.create_continuous(10))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert masks is None
Esempio n. 15
0
def test_batched_step_result_from_proto_raises_on_nan():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 3)
    ap_list = generate_list_agent_proto(n_agents,
                                        shapes,
                                        nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
Esempio n. 16
0
def test_empty_terminal_steps():
    specs = BehaviorSpec(
        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]),
        action_spec=ActionSpec.create_continuous(3),
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
    assert ts.obs[0].shape == (0, 3, 2)
    assert ts.obs[1].shape == (0, 5)
Esempio n. 17
0
def test_empty_decision_steps():
    specs = BehaviorSpec(
        observation_shapes=[(3, 2), (5, )],
        action_type=ActionType.CONTINUOUS,
        action_shape=3,
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
def test_batched_step_result_from_proto_raises_on_nan():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(create_observation_specs_with_shapes(shapes),
                                 ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents,
                                        shapes,
                                        nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
Esempio n. 19
0
def test_empty_terminal_steps():
    specs = BehaviorSpec(
        observation_shapes=[(3, 2), (5, )],
        action_type=ActionType.CONTINUOUS,
        action_shape=3,
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
    assert ts.obs[0].shape == (0, 3, 2)
    assert ts.obs[1].shape == (0, 5)
Esempio n. 20
0
def create_mock_steps(
    num_agents: int = 1,
    num_vector_observations: int = 0,
    num_vis_observations: int = 0,
    action_shape: List[int] = None,
    discrete: bool = False,
    done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:
    """
    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
    Imitates constant vector/visual observations, rewards, dones, and agents.

    :int num_agents: Number of "agents" to imitate.
    :int num_vector_observations: Number of "observations" in your observation space
    :int num_vis_observations: Number of "observations" in your observation space
    :int num_vector_acts: Number of actions in your action space
    :bool discrete: Whether or not action space is discrete
    :bool done: Whether all the agents in the batch are done
    """
    if action_shape is None:
        action_shape = [2]

    obs_list = []
    for _ in range(num_vis_observations):
        obs_list.append(np.ones((num_agents, 84, 84, 3), dtype=np.float32))
    if num_vector_observations > 1:
        obs_list.append(
            np.array(num_agents * [num_vector_observations * [1]],
                     dtype=np.float32))
    action_mask = None
    if discrete:
        action_mask = [
            np.array(num_agents * [action_size * [False]])
            for action_size in action_shape
        ]

    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
    behavior_spec = BehaviorSpec(
        [(84, 84, 3)] * num_vis_observations +
        [(num_vector_observations, 0, 0)],
        ActionType.DISCRETE if discrete else ActionType.CONTINUOUS,
        action_shape if discrete else action_shape[0],
    )
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
            TerminalSteps(obs_list, reward, interrupted, agent_id),
        )
    else:
        return (
            DecisionSteps(obs_list, reward, agent_id, action_mask),
            TerminalSteps.empty(behavior_spec),
        )
Esempio n. 21
0
def setup_test_behavior_specs(
    use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8
):
    if use_discrete:
        action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
    else:
        action_spec = ActionSpec.create_continuous(vector_action_space)
    observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
    obs_spec = create_observation_specs_with_shapes(observation_shapes)
    behavior_spec = BehaviorSpec(obs_spec, action_spec)
    return behavior_spec
Esempio n. 22
0
def test_specs():
    specs = BehaviorSpec(
        observation_shapes=[(3, 2), (5, )],
        action_type=ActionType.CONTINUOUS,
        action_shape=3,
    )
    assert specs.discrete_action_branches is None
    assert specs.action_size == 3
    assert specs.create_empty_action(5).shape == (5, 3)
    assert specs.create_empty_action(5).dtype == np.float32

    specs = BehaviorSpec(
        observation_shapes=[(3, 2), (5, )],
        action_type=ActionType.DISCRETE,
        action_shape=(3, ),
    )
    assert specs.discrete_action_branches == (3, )
    assert specs.action_size == 1
    assert specs.create_empty_action(5).shape == (5, 1)
    assert specs.create_empty_action(5).dtype == np.int32
Esempio n. 23
0
def test_mismatch_observations_raise_in_step_result_from_proto():
    n_agents = 10
    shapes = [(3, ), (4, )]
    spec = BehaviorSpec(create_observation_specs_with_shapes(shapes),
                        ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    # Hack an observation to be larger, we should get an exception
    ap_list[0].observations[0].shape[0] += 1
    ap_list[0].observations[0].float_data.data.append(0.42)
    with pytest.raises(UnityObservationException):
        steps_from_proto(ap_list, spec)
Esempio n. 24
0
def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(shapes, ActionType.DISCRETE, (10, ))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert isinstance(masks, list)
    assert len(masks) == 1
    assert masks[0].shape == (n_agents / 2, 10)
    assert masks[0][0, 0]
Esempio n. 25
0
def test_action_masking_discrete_2():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert isinstance(masks, list)
    assert len(masks) == 3
    assert masks[0].shape == (n_agents / 2, 2)
    assert masks[1].shape == (n_agents / 2, 2)
    assert masks[2].shape == (n_agents / 2, 6)
    assert masks[0][0, 0]
Esempio n. 26
0
    def __init__(
            self,
            brain_names,
            step_size=STEP_SIZE,
            num_visual=0,
            num_vector=1,
            num_var_len=0,
            vis_obs_size=VIS_OBS_SIZE,
            vec_obs_size=OBS_SIZE,
            var_len_obs_size=VAR_LEN_SIZE,
            action_sizes=(1, 0),
    ):
        super().__init__()
        self.num_visual = num_visual
        self.num_vector = num_vector
        self.num_var_len = num_var_len
        self.vis_obs_size = vis_obs_size
        self.vec_obs_size = vec_obs_size
        self.var_len_obs_size = var_len_obs_size
        continuous_action_size, discrete_action_size = action_sizes
        discrete_tuple = tuple(2 for _ in range(discrete_action_size))
        action_spec = ActionSpec(continuous_action_size, discrete_tuple)
        self.total_action_size = (continuous_action_size + discrete_action_size
                                  )  # to set the goals/positions
        self.action_spec = action_spec
        self.behavior_spec = BehaviorSpec(self._make_observation_specs(),
                                          action_spec)
        self.action_spec = action_spec
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
        self.step_count: Dict[str, float] = {}
        self.random = random.Random(str(self.behavior_spec))
        self.goal: Dict[str, int] = {}
        self.action = {}
        self.rewards: Dict[str, float] = {}
        self.final_rewards: Dict[str, List[float]] = {}
        self.step_result: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
        self.agent_id: Dict[str, int] = {}
        self.step_size = step_size  # defines the difficulty of the test
        # Allow to be used as a UnityEnvironment during tests
        self.academy_capabilities = None

        for name in self.names:
            self.agent_id[name] = 0
            self.goal[name] = self.random.choice([-1, 1])
            self.rewards[name] = 0
            self.final_rewards[name] = []
            self._reset_agent(name)
            self.action[name] = None
            self.step_result[name] = None
def test_action_masking_discrete():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(create_observation_specs_with_shapes(shapes),
                                 ActionSpec.create_discrete((7, 3)))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert isinstance(masks, list)
    assert len(masks) == 2
    assert masks[0].shape == (n_agents / 2, 7)  # half agents are done
    assert masks[1].shape == (n_agents / 2, 3)  # half agents are done
    assert masks[0][0, 0]
    assert not masks[1][0, 0]
    assert masks[1][0, 1]
Esempio n. 28
0
def behavior_spec_from_proto(brain_param_proto: BrainParametersProto,
                             agent_info: AgentInfoProto) -> BehaviorSpec:
    """
    Converts brain parameter and agent info proto to BehaviorSpec object.
    :param brain_param_proto: protobuf object.
    :param agent_info: protobuf object.
    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
    if brain_param_proto.vector_action_space_type == 1:
        action_spec = ActionSpec(brain_param_proto.vector_action_size[0], ())
    else:
        action_spec = ActionSpec(0,
                                 tuple(brain_param_proto.vector_action_size))
    return BehaviorSpec(observation_shape, action_spec)
Esempio n. 29
0
def create_mock_steps(
    num_agents: int,
    observation_shapes: List[Tuple],
    action_shape: Union[int, Tuple[int]] = None,
    discrete: bool = False,
    done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:
    """
    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
    Imitates constant vector/visual observations, rewards, dones, and agents.

    :int num_agents: Number of "agents" to imitate.
    :List observation_shapes: A List of the observation spaces in your steps
    :int num_vector_acts: Number of actions in your action space
    :bool discrete: Whether or not action space is discrete
    :bool done: Whether all the agents in the batch are done
    """
    if action_shape is None:
        action_shape = 2

    obs_list = []
    for _shape in observation_shapes:
        obs_list.append(np.ones((num_agents, ) + _shape, dtype=np.float32))
    action_mask = None
    if discrete and isinstance(action_shape, Iterable):
        action_mask = [
            np.array(num_agents * [action_size * [False]])
            for action_size in action_shape  # type: ignore
        ]  # type: ignore

    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
    behavior_spec = BehaviorSpec(
        observation_shapes,
        ActionType.DISCRETE if discrete else ActionType.CONTINUOUS,
        action_shape,
    )
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
            TerminalSteps(obs_list, reward, interrupted, agent_id),
        )
    else:
        return (
            DecisionSteps(obs_list, reward, agent_id, action_mask),
            TerminalSteps.empty(behavior_spec),
        )
Esempio n. 30
0
def create_mock_steps(
    num_agents: int,
    observation_specs: List[ObservationSpec],
    action_spec: ActionSpec,
    done: bool = False,
    grouped: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:
    """
    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
    Imitates constant vector/visual observations, rewards, dones, and agents.

    :int num_agents: Number of "agents" to imitate.
    :List observation_specs: A List of the observation specs in your steps
    :int action_spec: ActionSpec for the agent
    :bool done: Whether all the agents in the batch are done
    """
    obs_list = []
    for obs_spec in observation_specs:
        obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32))
    action_mask = None
    if action_spec.is_discrete():
        action_mask = [
            np.array(num_agents * [action_size * [False]])
            for action_size in action_spec.discrete_branches  # type: ignore
        ]  # type: ignore

    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
    _gid = 1 if grouped else 0
    group_id = np.array(num_agents * [_gid], dtype=np.int32)
    group_reward = np.array(num_agents * [0.0], dtype=np.float32)
    behavior_spec = BehaviorSpec(observation_specs, action_spec)
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
            TerminalSteps(
                obs_list, reward, interrupted, agent_id, group_id, group_reward
            ),
        )
    else:
        return (
            DecisionSteps(
                obs_list, reward, agent_id, action_mask, group_id, group_reward
            ),
            TerminalSteps.empty(behavior_spec),
        )