Ejemplo n.º 1
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    behavior_spec = basic_behavior_spec()
    policy = FakePolicy(test_seed, behavior_spec, TrainerSettings(), "output")
    no_agent_step = DecisionSteps.empty(behavior_spec)
    result = policy.get_action(no_agent_step)
    assert result == ActionInfo.empty()
def test_empty_decision_steps():
    specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )],
                         action_spec=ActionSpec.create_continuous(3))
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
Ejemplo n.º 3
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    policy = FakePolicy(test_seed, basic_mock_brain(), basic_params())
    # Doesn't really matter what this is
    dummy_groupspec = BehaviorSpec([(1, )], "continuous", 1)
    no_agent_step = DecisionSteps.empty(dummy_groupspec)
    result = policy.get_action(no_agent_step)
    assert result == ActionInfo.empty()
Ejemplo n.º 4
0
def test_empty_decision_steps():
    specs = BehaviorSpec(
        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]),
        action_spec=ActionSpec.create_continuous(3),
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
Ejemplo n.º 5
0
def test_empty_decision_steps():
    specs = BehaviorSpec(
        observation_shapes=[(3, 2), (5, )],
        action_type=ActionType.CONTINUOUS,
        action_shape=3,
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
Ejemplo n.º 6
0
def create_mock_steps(
    num_agents: int = 1,
    num_vector_observations: int = 0,
    num_vis_observations: int = 0,
    action_shape: List[int] = None,
    discrete: bool = False,
    done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:
    """
    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
    Imitates constant vector/visual observations, rewards, dones, and agents.

    :int num_agents: Number of "agents" to imitate.
    :int num_vector_observations: Number of "observations" in your observation space
    :int num_vis_observations: Number of "observations" in your observation space
    :int num_vector_acts: Number of actions in your action space
    :bool discrete: Whether or not action space is discrete
    :bool done: Whether all the agents in the batch are done
    """
    if action_shape is None:
        action_shape = [2]

    obs_list = []
    for _ in range(num_vis_observations):
        obs_list.append(np.ones((num_agents, 84, 84, 3), dtype=np.float32))
    if num_vector_observations > 1:
        obs_list.append(
            np.array(num_agents * [num_vector_observations * [1]],
                     dtype=np.float32))
    action_mask = None
    if discrete:
        action_mask = [
            np.array(num_agents * [action_size * [False]])
            for action_size in action_shape
        ]

    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
    behavior_spec = BehaviorSpec(
        [(84, 84, 3)] * num_vis_observations +
        [(num_vector_observations, 0, 0)],
        ActionType.DISCRETE if discrete else ActionType.CONTINUOUS,
        action_shape if discrete else action_shape[0],
    )
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
            TerminalSteps(obs_list, reward, interrupted, agent_id),
        )
    else:
        return (
            DecisionSteps(obs_list, reward, agent_id, action_mask),
            TerminalSteps.empty(behavior_spec),
        )
Ejemplo n.º 7
0
def create_mock_steps(
    num_agents: int,
    observation_shapes: List[Tuple],
    action_shape: Union[int, Tuple[int]] = None,
    discrete: bool = False,
    done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:
    """
    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
    Imitates constant vector/visual observations, rewards, dones, and agents.

    :int num_agents: Number of "agents" to imitate.
    :List observation_shapes: A List of the observation spaces in your steps
    :int num_vector_acts: Number of actions in your action space
    :bool discrete: Whether or not action space is discrete
    :bool done: Whether all the agents in the batch are done
    """
    if action_shape is None:
        action_shape = 2

    obs_list = []
    for _shape in observation_shapes:
        obs_list.append(np.ones((num_agents, ) + _shape, dtype=np.float32))
    action_mask = None
    if discrete and isinstance(action_shape, Iterable):
        action_mask = [
            np.array(num_agents * [action_size * [False]])
            for action_size in action_shape  # type: ignore
        ]  # type: ignore

    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
    behavior_spec = BehaviorSpec(
        observation_shapes,
        ActionType.DISCRETE if discrete else ActionType.CONTINUOUS,
        action_shape,
    )
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
            TerminalSteps(obs_list, reward, interrupted, agent_id),
        )
    else:
        return (
            DecisionSteps(obs_list, reward, agent_id, action_mask),
            TerminalSteps.empty(behavior_spec),
        )
Ejemplo n.º 8
0
 def _update_state(self, output: UnityRLOutputProto) -> None:
     """
     Collects experience information from all external brains in environment at current step.
     """
     for brain_name in self._env_specs.keys():
         if brain_name in output.agentInfos:
             agent_info_list = output.agentInfos[brain_name].value
             self._env_state[brain_name] = steps_from_proto(
                 agent_info_list, self._env_specs[brain_name]
             )
         else:
             self._env_state[brain_name] = (
                 DecisionSteps.empty(self._env_specs[brain_name]),
                 TerminalSteps.empty(self._env_specs[brain_name]),
             )
     self._side_channel_manager.process_side_channel_message(output.side_channel)
Ejemplo n.º 9
0
def create_mock_steps(
    num_agents: int,
    observation_specs: List[ObservationSpec],
    action_spec: ActionSpec,
    done: bool = False,
    grouped: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:
    """
    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
    Imitates constant vector/visual observations, rewards, dones, and agents.

    :int num_agents: Number of "agents" to imitate.
    :List observation_specs: A List of the observation specs in your steps
    :int action_spec: ActionSpec for the agent
    :bool done: Whether all the agents in the batch are done
    """
    obs_list = []
    for obs_spec in observation_specs:
        obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32))
    action_mask = None
    if action_spec.is_discrete():
        action_mask = [
            np.array(num_agents * [action_size * [False]])
            for action_size in action_spec.discrete_branches  # type: ignore
        ]  # type: ignore

    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
    _gid = 1 if grouped else 0
    group_id = np.array(num_agents * [_gid], dtype=np.int32)
    group_reward = np.array(num_agents * [0.0], dtype=np.float32)
    behavior_spec = BehaviorSpec(observation_specs, action_spec)
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
            TerminalSteps(
                obs_list, reward, interrupted, agent_id, group_id, group_reward
            ),
        )
    else:
        return (
            DecisionSteps(
                obs_list, reward, agent_id, action_mask, group_id, group_reward
            ),
            TerminalSteps.empty(behavior_spec),
        )