Example #1
0
def plot_summaries(history_lists: Sequence[Sequence[History]],
                   labels: Sequence[str],
                   prefix: str = "."):
    """
    Plot summaries of several experiments.

    :param history_lists: the list of experiments, i.e. list of list of histories.
    :param labels: the labels associated to each experiment.
    :param prefix: path prefix where to save the plots.
    :return: None
    """
    assert_(
        len(history_lists) == len(labels),
        "Please provide the correct number of labels.",
    )
    attributes = ["total_rewards", "average_rewards", "lengths"]
    figures = []
    for attribute in attributes:
        f = plt.figure()
        ax = f.add_subplot()
        ax.set_title(attribute)
        for label, history_list in zip(labels, history_lists):
            _plot_history_attribute(history_list, attribute, label, ax=ax)
        figures.append(f)
        plt.savefig(os.path.join(prefix, attribute + ".svg"))
    return figures
Example #2
0
 def get_action(self, current_state: State) -> Any:
     """Get an action."""
     assert_(
         isinstance(self.context.policy, MultiAgentPolicy),
         "Only multi-agent policy is allowed.",
     )
     policy = cast(MultiAgentPolicy, self.context.policy)
     return [
         p.get_action(self.process_state(current_state, i))
         for i, p in enumerate(policy.policies)
     ]
Example #3
0
 def action_space(self, value: Optional[gym.spaces.Tuple] = None) -> None:
     """Set the action space."""
     assert_(
         value is None or isinstance(value, gym.spaces.Tuple),
         "Only tuple spaces allowed.",
     )
     self._action_space = value
     spaces = value.spaces if value is not None else [None] * len(
         self.policies)
     for p, s in zip(self.policies, spaces):
         p.action_space = s
Example #4
0
    def process_state(self, state: Tuple, model_id: int):
        """
        Process a tuple of state for a specific agent.

        :param state: the (joint) state.
        :param model_id: the id of the model.
        :return: return the ith component of the state.
        """
        assert_(
            len(state) == len(self.models),
            f"Expected {len(self.models)} observation for each agent, found {len(state)}.",
        )
        return state[model_id]
Example #5
0
def history_from_json(o: Dict) -> History:
    """Return a history object from JSON."""
    only_allowed_keys = {"seed", "is_training", "episodes"}
    assert_(
        set(o.keys()) == only_allowed_keys,
        f"Only the following keys are allowed: {only_allowed_keys}.",
    )
    is_training = o["is_training"]
    seed = o["seed"]
    episodes: List[EpisodeAgentObs] = [
        agent_observations_from_json(e) for e in o["episodes"]
    ]
    return History(episodes=episodes, is_training=is_training, seed=seed)
Example #6
0
    def on_step_end(self, step, agent_observation: AgentObservation,
                    **kwargs) -> None:
        """On step end event."""
        state, action, reward, state_p, done = agent_observation
        assert_(
            isinstance(state, (list, tuple)),
            f"Expected a tuple of states, found {type(state)}: {state}",
        )

        for i, model in enumerate(self.models):
            model.on_step_end(
                step,
                (self.process_state(state,
                                    i), action[i], reward, state_p[i], done),
            )
Example #7
0
 def model(self, value: Optional[Model] = None) -> None:
     """Set the model."""
     assert_(
         value is None or isinstance(value, AbstractMultiAgentModel),
         "Only multi-agent models allowed.",
     )
     value = cast(AbstractMultiAgentModel, value)
     assert_(
         value is None or len(self.policies) == len(value.models),
         f"Number of policies {len(self.policies)} is different from number of models {len(value.models)}.",
     )
     self._model = value
     models = value.models if value is not None else [None] * len(
         self.policies)  # type: ignore
     for p, m in zip(self.policies, models):
         p.model = m
Example #8
0
    def is_session_done(self) -> bool:
        """
        Check whether the session is done.

        That is: either we run out of episodes or run out of steps.
        """
        assert_(
            self.nb_episodes is not None or self.nb_steps is not None,
            "Please specify either 'nb_episodes' or 'nb_steps'.",
        )
        is_beyond_max_episode = (
            self.nb_episodes is not None and self.current_episode >= self.nb_episodes
        )
        is_beyond_max_step = (
            self.nb_steps is not None and self.current_step >= self.nb_steps
        )
        return is_beyond_max_episode or is_beyond_max_step
Example #9
0
    def decode(self, s: int) -> Sequence[int]:
        """
        Do the decoding.

        :param s: a point in a discrete space.
        :return: the decoded version of the point into a multi-discrete space.
        """
        assert_(self.output_space.contains(s),
                f"{s} is not contained in the output space.")
        dims = self.input_space.nvec
        result = []
        tmp = s
        for dim in reversed(dims[+1:]):
            result.append(tmp % dim)
            tmp = tmp // dim
        result.append(tmp)
        result = np.array(list(reversed(result)))
        assert_(
            self.input_space.contains(result),
            f"{result} is not contained in the input space.",
        )
        return result
Example #10
0
    def encode(self, s: Sequence[int]) -> int:
        """
        Do the encoding.

        :param s: a point in the multi-discrete space.
        :return: the encoded version of the point.
        """
        if type(s) in {list, tuple}:
            s = np.array(s)
        assert_(self.input_space.contains(s),
                f"{s} is not contained in the input space.")
        dims = self.input_space.nvec
        result = 0
        for i, dim in zip(s, dims[+1:]):
            result += i
            result *= dim
        result += s[-1]
        assert_(
            self.output_space.contains(result),
            f"{result} is not contained in the output space.",
        )
        return result
Example #11
0
def run_experiments(
    make_agent: Callable,
    env: gym.Env,
    policy: Policy,
    nb_runs: int = 50,
    nb_episodes: int = 500,
    nb_workers: int = 8,
    seeds: Optional[Sequence[int]] = None,
    callbacks: Sequence[LearningEventListener] = (),
    name_prefix: str = "experiment",
) -> Tuple[List[Agent], List[History]]:
    """
    Run many experiments with multiprocessing.

    :param make_agent: a callable to make an agent.
    :param env: the environment to use.
    :param policy: the policy.
    :param nb_runs: the number of runs.
    :param nb_episodes: the number of episodes.
    :param nb_workers: the number of workers.
    :param seeds: a list of seeds; if None, the range [0, nb_runs-1] is used.
    :param callbacks: a list callbacks.
    :param name_prefix: the prefix to each experiment.
    :return: a list of histories, one for each run.
    """
    agents = []
    histories = []

    if seeds is None:
        seeds = list(range(0, nb_runs))
    assert_(
        len(seeds) == nb_runs,
        f"The number of seeds {len(seeds)} is different from the number of runs {nb_runs}.",
    )
    agent = make_agent()
    pool = multiprocessing.Pool(processes=nb_workers)
    _current_seed_to_str = partial(_seed_to_str, max(seeds))
    results = [
        pool.apply_async(
            _do_job,
            args=(
                agent,
                env,
                policy,
                seed,
                nb_episodes,
                callbacks,
                name_prefix + "-" + _current_seed_to_str(seed),
            ),
            error_callback=_raise_exception,
        ) for seed in seeds
    ]
    try:
        for p in results:
            p.wait()
    except KeyboardInterrupt:
        pass

    for p in filter(lambda x: x.ready(), results):
        agent, history = p.get()
        agents.append(agent)
        histories.append(history)
    return agents, histories
Example #12
0
 def context(self) -> "Context":
     """Get the context."""
     assert_(self._context is not None, "Context not set.")
     return cast(Context, self._context)
Example #13
0
 def test(self, *args, **kwargs) -> History:
     """Test the agent."""
     assert_("is_training" not in kwargs, "Cannot specify the 'is_training' flag.")
     return self._play(*args, is_training=False, **kwargs)  # type: ignore
Example #14
0
 def model(self) -> Model:
     """Get the context."""
     assert_(self._model is not None, "Model not set.")
     return cast(Model, self._model)
Example #15
0
 def action_space(self) -> gym.spaces.Space:
     """Get the action space."""
     assert_(self._model is not None, "Action space is not set.")
     return self._action_space