コード例 #1
0
def load_trained_agent(dirpath, env_cls, env_params=None, **kwargs):
    """Load a pre-trained agent with its weights.

    Parameters
    ----------
    dirpath: str
        The path to the directory in which the model parameters and agent config
        file are stored.
    env_cls: class or str
        The environment to train on, if a string is provided, it must be the name of a gym env.
    env_params: dict, default=None
        Key-value pairings to pass to env_cls if a class is provided.

    Returns
    -------
    agent: spyro.agents.*
        An agent object with loaded / pre-trained weights.
    """
    config_path = os.path.join(dirpath, "agent_config.json")
    agent = init_agent_from_config(config_path, **kwargs)
    agent.load_weights(os.path.join(dirpath, "model.ckpt"),
                       env_cls=env_cls,
                       env_params=env_params)
    progress("Agent's weights loaded.")
    return agent
コード例 #2
0
    def __init__(self,
                 env_cls,
                 result_queue,
                 task_queue=None,
                 stop_indicator=None,
                 state_processor=None,
                 max_values=None,
                 strategy='random',
                 env_params=None,
                 timeout=5,
                 verbose=False):
        super().__init__()
        self.env_cls = env_cls
        self.env_params = env_params
        self.task_queue = task_queue
        self.result_queue = result_queue
        self.state_processor = state_processor
        self.stop_indicator = stop_indicator
        self.strategy = strategy
        self.max_values = max_values
        self.timeout = timeout
        self.verbose = verbose

        if self.strategy == 'tasks':
            assert task_queue is not None, "Must provide a task_queue if strategy='tasks'"
        if self.strategy != 'tasks':
            assert stop_indicator is not None, "Must provide a stop_indicator if strategy!='tasks"
        if self.strategy == 'uniform':
            assert max_values is not None, "max_values must be provided when strategy='uniform'"

        progress("Worker initialized.", verbose=self.verbose)
コード例 #3
0
def init_agent_from_config(config_path, force_no_log=False):
    """Initialize an agent based on a config file from a previous run.

    Parameters
    ----------
    config_path: str
        The path to the config JSON file.
    force_no_log: bool, default=False
        If true, sets log=False in agent's init. Useful to prevent the new agent
        from logging in a subdirectory of the original logdir.
    """
    # load config
    config = json.load(open(config_path, 'r'))

    # determine agent class
    agent_cls = AGENT_MAP[config["name"]]

    # set logging to False if specified
    if force_no_log:
        config["log"] = False
        del config["logdir"]

    # retrieve policy
    try:
        policy_config = config.pop("policy")
        has_policy = True
        policy_name = policy_config.pop("name")
        if policy_name == "EpsilonGreedyPolicy":
            del policy_config["epsilon"]
        policy = POLICY_MAP[policy_name](**policy_config)
    except KeyError:
        has_policy = False

    # retrieve memory
    try:
        memory_config = config.pop("memory")
        has_memory = True
        _ = memory_config.pop("name")
        memory = ReplayBuffer(**memory_config)
    except KeyError:
        has_memory = False

    # init agent
    if has_policy and has_memory:
        agent = agent_cls(policy, memory, **config)
    elif has_policy:
        agent = agent_cls(policy, **config)
    elif has_memory:
        agent = agent_cls(memory, **config)
    else:
        agent = agent_cls(**config)

    progress("Agent reconstructed from config.")
    return agent
コード例 #4
0
def evaluate_saved_agent(dirpath,
                         env_cls,
                         n_episodes=100000,
                         tmax=1000,
                         policy=None,
                         env_params=None,
                         save=True,
                         evaluator=None):
    """Load a trained and saved agent from disk and evaluate it on a test environment.

    Parameters
    ----------
    dirpath: str
        The path to the directory in which the model parameters and agent config
        file are stored.
    env_cls: class or str
        The environment to train on, if a string is provided, it must be the name of a gym env.
    n_episodes: int, default=100000
        The number of episodes to use for evaluation.
    tmax: int, default=1000
        The maximum number of steps per episode.
    env_params: dict, default=None
        Key-value pairings to pass to env_cls if a class is provided.

    Returns
    -------
    results: any
        Output of agent.evaluate. Usually, this is a  dictionary with 'mean_episode_reward',
        'total_episode_reward', and 'episode_length' as keys and numpy arrays as values.
    test_log: pd.DataFrame
        The simulation log of all tested episodes.
    """
    agent = load_trained_agent(dirpath,
                               env_cls,
                               env_params=None,
                               force_no_log=True)
    progress("Start test run on {} episodes.".format(n_episodes))
    results = agent.evaluate(env_cls,
                             n_episodes=n_episodes,
                             tmax=tmax,
                             policy=policy,
                             env_params=env_params)

    test_log = agent.env.get_test_log()

    if save:
        progress("Saving results to {}.".format(dirpath))
        pickle.dump(results,
                    open(os.path.join(dirpath, "test_results_dict.pkl"), "wb"))
        test_log.to_csv(os.path.join(dirpath, "test_log.csv"), index=False)

    if evaluator is not None:
        progress("Extracting metrics using the evaluator")
        summary = evaluator.evaluate(test_log)

    else:
        summary = None

    progress("Evaluation completed.")
    return results, test_log, summary
コード例 #5
0
    def _run_tasks(self):
        """Start interacting with the environment to obtain specifically requested
        experiences (tasks) and send the results to the global queue.
        """
        progress("Start peforming tasks.", verbose=self.verbose)
        self._make_env()

        while True:
            try:
                task = self.task_queue.get(timeout=1)
                self.perform_task(task)
            except queue.Empty:
                progress(
                    "Empty task queue found at worker. Shutting down worker.",
                    verbose=self.verbose)
                break
コード例 #6
0
    def _run_randomly(self):
        """Start interacting with the environment without manipulating the state in-between
        steps and send the result of each step to the global results queue.
        """
        progress("Start obtaining experiences.", verbose=self.verbose)
        self._make_env()

        while self.stop_indicator.value != 1:

            # start episode by resetting env
            state = self.state_processor(self.env.reset())
            done = False

            # gather experiences until episode end
            while not done:
                response, target = self.env._simulate()

                if (response is not None) and (response != np.inf):
                    try:
                        self.result_queue.put(
                            {
                                "state": state,
                                "response": response,
                                "target": target
                            },
                            block=True,
                            timeout=self.timeout)
                    except queue.Full:
                        progress(
                            "Queue has been full for {} seconds. Breaking.".
                            format(self.timeout),
                            verbose=self.verbose)
                        break

                raw_state, done = self.env._extract_state(
                    self.env._get_available_vehicles())
                state = self.state_processor(raw_state)
コード例 #7
0
def merge_tables(*tables,
                 to_quantiles=True,
                 key="responses",
                 num_quantiles=51,
                 save_path=None):
    """Merge multiple tables into one big one.

    Parameters
    ----------
    tables: dict
        The tables to merge. Should all have the same set of keys / states.
    to_quantiles: bool, default=True
        Whether to calculate quantiles over the obtained values rather than keep the raw ones.
    key: str, default="responses"
        If to_quantiles=True, the key is the key in the inner dictionary that points to the array
        over which to compute quantiles. If to_quantiles=False, key is the (list of) keys for which
        arrays of different tables should be appended.
    num_quantiles: int, default=51
        The number of quantiles to compute when to_quantiles=True.
    save_path: str, default=None
        The path to save the resulting table. If None, does not save.

    Returns
    -------
    merged_table: dict
        The merged table.
    """
    assert len(tables) > 1, "Must provide more than one table"
    assert set(tables[0].keys()) == set(
        tables[0].keys()), "Keys are not the same for all tables"

    merged = tables[0]
    for i, state in enumerate(tables[0].keys()):
        progress("Merging results for state {} / {}.".format(
            i + 1, len(merged)),
                 same_line=True,
                 newline_end=(i + 1 == len(merged)))
        merged[state] = append_arrays_in_dicts(
            *[t[state] for t in tables],
            keys=key if isinstance(key, list) else [key])

    if to_quantiles:
        progress("Obtaining quantiles for '{}''".format(key))
        merged = get_table_quantiles(merged,
                                     num_quantiles=num_quantiles,
                                     inner_key=key)

    if save_path is not None:
        pickle.dump(merged, open(save_path, "wb"))
        progress("Merged table save at {}".format(save_path))

    return merged
コード例 #8
0
    def gather_random_experiences(self,
                                  env_cls,
                                  total_steps=50000000,
                                  start_step=0,
                                  env_params=None,
                                  strategy='random',
                                  timeout=3):
        """Collect random experiences from parallel workers.

        Parameters
        ----------
        env_cls: Python class
            The environment to train on.
        total_steps: int, default=50000000
            The total number of experiences to gather.
        env_params: dict, default=None
            Parameters passed to env_cls upon initialization.
        timeout: int, default=3
            The maximum time to wait for an item in the results queue if it is empty.
        """
        self.stop_indicator = mp.Value("i", 0)
        self.global_counter = start_step
        total_steps = total_steps + start_step
        self.result_queue = mp.Queue(self.max_queue_size)

        # initialize workers
        workers = [
            ExperienceGatheringProcess(env_cls,
                                       self.result_queue,
                                       stop_indicator=self.stop_indicator,
                                       env_params=env_params,
                                       state_processor=self.state_processor,
                                       max_values=self.max_values,
                                       strategy=self.strategy)
            for _ in range(self.num_workers)
        ]

        for worker in workers:
            worker.start()

        # wait for workers to start delivering
        time.sleep(5)

        try:
            while True:
                try:
                    experience = self.result_queue.get(block=True,
                                                       timeout=timeout)
                    self.process_random_experience(experience)
                    self.global_counter += 1
                    progress("Processed {} / {} experiences".format(
                        self.global_counter, total_steps),
                             same_line=True,
                             newline_end=False,
                             verbose=self.global_counter % 1000 == 0)
                except queue.Empty:
                    progress("\nQueue is empty. Breaking loop.",
                             verbose=self.verbose)
                    break

                if self.global_counter >= total_steps:
                    if self.stop_indicator.value == 0:
                        with self.stop_indicator.get_lock():
                            self.stop_indicator.value = 1
                        progress(
                            "\nSent stop signal to workers. Processing last results in queue.",
                            verbose=self.verbose)

        except KeyboardInterrupt:
            progress(
                "KeyboardInterrupt: sending stop signal and waiting for workers.",
                verbose=self.verbose)
            with self.stop_indicator.get_lock():
                self.stop_indicator.value = 1

        for worker in workers:
            if worker.is_alive():
                worker.join()

        progress("Workers stopped gracefully.", verbose=self.verbose)
コード例 #9
0
    def perform_tasks(self,
                      env_cls,
                      reps=100,
                      env_params=None,
                      timeout=10,
                      debug_subset=None):
        """Gather experiences.

        Parameters
        ----------
        env_cls: Python class
            The environment to gather experiences from. This class was designed for
            FireCommanderV2, but similar environments might work as well.
        include_time: bool, default=False
            Whether to include day of the week and hour of the day in the state
            representation. Note: setting to True significantly increases the number
            of available states, and thus run time.
        reps: int, default=100
            The number of repetitions/experiences to gather for each state.
        env_params: dict, default=None
            Key-value pairs passed to env_cls.
        timeout: int, default=10
            The maximum time to wait for workers to produce results. After timeout
            seconds, the main process stops getting results from the queue and
            wraps up the other processes.
        """
        # define tasks and put them in a global queue
        tasks = self.define_tasks(reps=reps, debug_subset=debug_subset)
        self.global_counter = 0
        self.num_tasks = len(tasks)

        self.task_queue = mp.Queue()
        self.result_queue = mp.Queue()

        _ = list(map(self.task_queue.put, tasks))
        progress("Put {} tasks in Queue (queue length: {})".format(
            self.num_tasks, self.task_queue.qsize()),
                 verbose=self.verbose)

        # initialize workers
        workers = [
            ExperienceGatheringProcess(env_cls,
                                       self.result_queue,
                                       task_queue=self.task_queue,
                                       env_params=env_params,
                                       state_processor=self.state_processor,
                                       strategy='tasks')
            for _ in range(self.num_workers)
        ]

        for worker in workers:
            worker.start()

        try:
            while True:
                try:
                    performed_task = self.result_queue.get(block=True,
                                                           timeout=timeout)
                    self.process_performed_task(performed_task)
                    self.global_counter += 1
                    progress("performed {} / {} tasks".format(
                        self.global_counter, self.num_tasks),
                             same_line=True,
                             newline_end=False,
                             verbose=self.verbose)
                except queue.Empty:
                    progress("\nQueue is empty. Breaking loop.",
                             verbose=self.verbose)
                    break

        except KeyboardInterrupt:
            pass

        for worker in workers:
            if worker.is_alive():
                worker.join()
コード例 #10
0
ファイル: a3c.py プロジェクト: joepvdbogaert/fire.ai
    def evaluate(self, env_cls, n_episodes=10000, tmax=None, policy=None, env_params=None, init=False):
        """Evaluate the agent on an environemt without training."""
        if policy is not None:
            self.eval_policy = policy
        else:
            self.eval_policy = self.policy

        if tmax is None:
            self.tmax = 1000000
        else:
            self.tmax = tmax

        self.env = make_env(env_cls, env_params)
        self.action_shape, self.n_actions = get_space_shape(self.env.action_space)
        self.obs_shape, _ = get_space_shape(self.env.observation_space)
        print("Environment initialized.")

        if init:
            tf.reset_default_graph()
            self._init_graph()
            print("Graph created.")

        self.episode_counter = 0
        self.step_counter = 0
        self.done = True

        self.eval_results = {
            "total_episode_reward": np.zeros(n_episodes),
            "mean_episode_reward": np.zeros(n_episodes),
            "episode_length": np.zeros(n_episodes),
        }

        for ep in range(n_episodes):
            self.state = np.asarray(self.env.reset(), dtype=np.float64)
            self.episode_step_counter = 0
            self.episode_reward = 0

            for i in range(self.tmax):

                # predict Q-values Q(s,a)
                action_probabilities = self.session.run(
                    self.action_probs,
                    feed_dict={self.state_ph: np.reshape(self.state, (1, -1))}
                )

                # select and perform action
                self.action = self.eval_policy.select_action(action_probabilities.reshape(-1))
                new_state, self.reward, self.done, _ = self.env.step(self.action)

                # bookkeeping
                self.step_counter += 1
                self.episode_reward += self.reward
                self.episode_step_counter += 1
                self.state = np.asarray(copy.copy(new_state), dtype=np.float64)

                # end of episode
                if self.done:
                    break

            self.eval_results["total_episode_reward"][ep] = self.episode_reward
            self.eval_results["mean_episode_reward"][ep] = self.episode_reward / self.episode_step_counter
            self.eval_results["episode_length"][ep] = self.episode_step_counter

            progress("Completed episode {}/{}".format(ep + 1, n_episodes),
                     same_line=(ep > 0), newline_end=(ep + 1 == n_episodes))

        return self.eval_results
コード例 #11
0
    def evaluate(self,
                 env_cls,
                 n_episodes=10000,
                 tmax=None,
                 policy=None,
                 env_params=None,
                 init=False):
        """Evaluate the agent on an environemt without training.

        Parameters
        ----------
        env_cls: uninitialized Python class or str
            The environment to train on. If a class is provided, it must be uninitialized.
            Parameters can be passed to the environment using env_params. If a string
            is provided, this string is fed to `gym.make()` to create the environment.
        n_episodes: int, optional, default=10,000
            The number of episodes to run.
        tmax: int, optional, default=None
            The maximum number of steps to run in each episode. If None, set to 10,000 to
            not enforce a limit in most environments.
        policy: spyro.policies instance, default=None
            The policy to use during evaluation if it is not the same as during training.
        env_params: dict, optional, default=None
            Dictionary of parameter values to pass to `env_cls` upon initialization.
        init: boolean, default=False
            Whether to (re-)initialize the network (True) or to keep the current neural
            network parameters (False).
        """
        if policy is not None:
            self.eval_policy = policy
        else:
            self.eval_policy = self.policy

        if tmax is None:
            self.tmax = 10000
        else:
            self.tmax = tmax

        self.env = make_env(env_cls, env_params)
        self.action_shape, self.n_actions, self.obs_shape, _ = \
                obtain_env_information(env_cls, env_params)

        if init:
            tf.reset_default_graph()
            self._init_graph()

        self.episode_counter = 0
        self.step_counter = 0
        self.done = True

        self.eval_results = {
            "total_episode_reward": np.zeros(n_episodes),
            "mean_episode_reward": np.zeros(n_episodes),
            "episode_length": np.zeros(n_episodes),
        }

        for ep in range(n_episodes):
            self.state = np.asarray(self.env.reset(), dtype=np.float64)
            self.episode_step_counter = 0
            self.episode_reward = 0

            for i in range(self.tmax):

                # predict Q-values Q(s,a)
                qvalues = self.session.run(self.online_qvalues,
                                           feed_dict={
                                               self.states_ph:
                                               np.reshape(self.state, (1, -1))
                                           })

                # select and perform action
                self.action = self.eval_policy.select_action(
                    qvalues.reshape(-1))
                new_state, self.reward, self.done, _ = self.env.step(
                    self.action)

                # bookkeeping
                self.step_counter += 1
                self.episode_reward += self.reward
                self.episode_step_counter += 1
                self.state = np.asarray(copy.copy(new_state), dtype=np.float64)

                # end of episode
                if self.done:
                    break

            self.eval_results["total_episode_reward"][ep] = self.episode_reward
            self.eval_results["mean_episode_reward"][
                ep] = self.episode_reward / self.episode_step_counter
            self.eval_results["episode_length"][ep] = self.episode_step_counter

            progress("Completed episode {}/{}".format(ep + 1, n_episodes),
                     same_line=(ep > 0),
                     newline_end=(ep + 1 == n_episodes))

        return self.eval_results
コード例 #12
0
    def evaluate(self, env_cls, n_episodes=10000, tmax=None, env_params=None):
        """Evaluate the agent on an environemt without training.

        Parameters
        ----------
        env_cls: uninitialized Python class or str
            The environment to train on. If a class is provided, it must be uninitialized.
            Parameters can be passed to the environment using env_params. If a string
            is provided, this string is fed to `gym.make()` to create the environment.
        n_episodes: int, optional, default=10,000
            The number of episodes to run.
        tmax: int, optional, default=None
            The maximum number of steps to run in each episode. If None, set to 10,000 to
            not enforce a limit in most environments.
        env_params: dict, optional, default=None
            Dictionary of parameter values to pass to `env_cls` upon initialization.
        """
        if tmax is None:
            self.tmax = 10000
        else:
            self.tmax = tmax

        self.env = make_env(env_cls, env_params)
        self.action_shape, self.n_actions, self.obs_shape, _ = \
                obtain_env_information(env_cls, env_params)

        self.episode_counter = 0
        self.step_counter = 0
        self.done = True

        self.eval_results = {
            "total_episode_reward": np.zeros(n_episodes),
            "mean_episode_reward": np.zeros(n_episodes),
            "episode_length": np.zeros(n_episodes),
        }

        seen_states = {}
        for ep in range(n_episodes):
            self.state = np.asarray(self.env.reset(), dtype=np.int16)
            self.episode_step_counter = 0
            self.episode_reward = 0

            for i in range(self.tmax):

                # get relocations from dictionary if problem was solved before
                # otherwise solve it and save the results for next time
                try:
                    relocations = seen_states[tuple(
                        extract_vehicles_from_state(self.state))]
                except KeyError:
                    relocations = self.get_relocations(self.state)
                    seen_states[tuple(extract_vehicles_from_state(
                        self.state))] = relocations

                # get origin if current destination is in the relocations
                to_from = {d['to']: d['from'] for d in relocations.values()}
                destination_area = extract_current_destination_area(self.state)
                origin_area = to_from[
                    destination_area] if destination_area in list(
                        to_from.keys()) else None

                # select and perform action
                self.action = area_to_action(origin_area)
                new_state, self.reward, self.done, _ = self.env.step(
                    self.action)

                # bookkeeping
                self.step_counter += 1
                self.episode_reward += self.reward
                self.episode_step_counter += 1
                self.state = np.asarray(copy.copy(new_state), dtype=np.int16)

                # end of episode
                if self.done:
                    break

            self.eval_results["total_episode_reward"][ep] = self.episode_reward
            self.eval_results["mean_episode_reward"][
                ep] = self.episode_reward / self.episode_step_counter
            self.eval_results["episode_length"][ep] = self.episode_step_counter

            progress("Completed episode {}/{}".format(ep + 1, n_episodes),
                     same_line=(ep > 0),
                     newline_end=(ep + 1 == n_episodes))

        return self.eval_results
コード例 #13
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from spyro.utils import progress
from spyro.value_estimation import STATION_NAMES

try:
    from fdsim.helpers import lonlat_to_xy
except:
    progress("fdsim not installed, some functions might not work.")
try:
    import geopandas as gpd
except:
    progress('geopandas not installed, some functions might not work.')


def set_sns_params(font_scale=1.2, **kwargs):
    sns.set(font_scale=font_scale, **kwargs)


def quantile_range(num_quantiles=50):
    """Generate evenly spaced values in (0, 1) that can be used as quantile-positions.

    Parameters
    ----------
    num_quantiles: int, default=50
        The number of quantile-positions to generate.
    """