Example #1
0
    def collect_metrics(self,
                        timeout_seconds,
                        min_history=100,
                        selected_evaluators=None):
        """Returns evaluator and optimizer stats.

        Arguments:
            timeout_seconds (int): Max wait time for a evaluator before
                dropping its results. This usually indicates a hung evaluator.
            min_history (int): Min history length to smooth results over.
            selected_evaluators (list): Override the list of remote evaluators
                to collect metrics from.

        Returns:
            res (dict): A training result dict from evaluator metrics with
                `info` replaced with stats from self.
        """
        episodes, num_dropped = collect_episodes(
            self.local_evaluator,
            selected_evaluators or self.remote_evaluators,
            timeout_seconds=timeout_seconds)
        orig_episodes = list(episodes)
        missing = min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-min_history:]
        res = summarize_episodes(episodes, orig_episodes, num_dropped)
        res.update(info=self.stats())
        return res
Example #2
0
    def collect_metrics(self,
                        timeout_seconds,
                        min_history=100,
                        selected_evaluators=None):
        """Returns evaluator and optimizer stats.

        Arguments:
            timeout_seconds (int): Max wait time for a evaluator before
                dropping its results. This usually indicates a hung evaluator.
            min_history (int): Min history length to smooth results over.
            selected_evaluators (list): Override the list of remote evaluators
                to collect metrics from.

        Returns:
            res (dict): A training result dict from evaluator metrics with
                `info` replaced with stats from self.
        """
        episodes, num_dropped = collect_episodes(
            self.local_evaluator,
            selected_evaluators or self.remote_evaluators,
            timeout_seconds=timeout_seconds)
        orig_episodes = list(episodes)
        missing = min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-min_history:]
        res = summarize_episodes(episodes, orig_episodes, num_dropped)
        res.update(info=self.stats())
        return res
Example #3
0
def provincial_eval(trainer, eval_workers):
    """Evaluates the performance of the domray model by playing it against
    Provincial using preset buy menus.

    Args:
        trainer (Trainer): trainer class to evaluate.
        eval_workers (WorkerSet): evaluation workers.

    Returns:
        metrics (dict): evaluation metrics dict
    """
    global eval_metrics

    for i in range(num_episodes_per_scenario):
        ray.get([w.sample.remote() for w in eval_workers.remote_workers()])
        #for worker in eval_workers.remote_workers():
        #    worker.foreach_env.remote(lambda env: env.debug())

    episodes, _ = collect_episodes(
        remote_workers=eval_workers.remote_workers(), timeout_seconds=600)

    metrics = summarize_episodes(episodes)
    eval_metrics.append(metrics)

    return metrics
Example #4
0
    def __call__(self, _):
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds)
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history:]
        res = summarize_episodes(episodes, orig_episodes)

        # Add in iterator metrics.
        metrics = LocalIterator.get_metrics()
        timers = {}
        counters = {}
        info = {}
        info.update(metrics.info)
        for k, counter in metrics.counters.items():
            counters[k] = counter
        for k, timer in metrics.timers.items():
            timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
            if timer.has_units_processed():
                timers["{}_throughput".format(k)] = round(
                    timer.mean_throughput, 3)
        res.update({
            "num_healthy_workers": len(self.workers.remote_workers()),
            "timesteps_total": metrics.counters[STEPS_SAMPLED_COUNTER],
        })
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        relevant = [
            "info", "custom_metrics", "sampler_perf", "timesteps_total",
            "policy_reward_mean", "episode_len_mean"
        ]

        d = {k: res[k] for k in relevant}
        d["evaluation"] = res.get("evaluation", {})

        if self.log_to_neptune:
            metrics_to_be_logged = ["info", "evaluation"]

            def log_metric(metrics, base_string=''):
                if isinstance(metrics, dict):
                    for k in metrics:
                        log_metric(metrics[k], base_string + '{}_'.format(k))
                else:
                    neptune.log_metric(base_string, metrics)

            for k in d:
                if k in metrics_to_be_logged:
                    log_metric(d[k], base_string='{}_'.format(k))

        return d
Example #5
0
    def __call__(self, _: Any) -> Dict:
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.selected_workers or self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds,
        )
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes = self.episode_history[-missing:] + episodes
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history :]
        res = summarize_episodes(episodes, orig_episodes, self.keep_custom_metrics)

        # Add in iterator metrics.
        metrics = _get_shared_metrics()
        custom_metrics_from_info = metrics.info.pop("custom_metrics", {})
        timers = {}
        counters = {}
        info = {}
        info.update(metrics.info)
        for k, counter in metrics.counters.items():
            counters[k] = counter
        for k, timer in metrics.timers.items():
            timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
            if timer.has_units_processed():
                timers["{}_throughput".format(k)] = round(timer.mean_throughput, 3)
        res.update(
            {
                "num_healthy_workers": len(self.workers.remote_workers()),
                "timesteps_total": (
                    metrics.counters[STEPS_TRAINED_COUNTER]
                    if self.by_steps_trained
                    else metrics.counters[STEPS_SAMPLED_COUNTER]
                ),
                # tune.Trainable uses timesteps_this_iter for tracking
                # total timesteps.
                "timesteps_this_iter": metrics.counters[
                    STEPS_TRAINED_THIS_ITER_COUNTER
                ],
                "agent_timesteps_total": metrics.counters.get(
                    AGENT_STEPS_SAMPLED_COUNTER, 0
                ),
            }
        )
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        res["custom_metrics"] = res.get("custom_metrics", {})
        res["episode_media"] = res.get("episode_media", {})
        res["custom_metrics"].update(custom_metrics_from_info)
        return res
Example #6
0
    def __call__(self, _: Any) -> Dict:
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.selected_workers or self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds)
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes = self.episode_history[-missing:] + episodes
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history:]
        res = summarize_episodes(episodes, orig_episodes)

        # Add in iterator metrics.
        metrics = _get_shared_metrics()
        custom_metrics_from_info = metrics.info.pop("custom_metrics", {})
        timers = {}
        counters = {}
        info = {}
        info.update(metrics.info)
        for k, counter in metrics.counters.items():
            counters[k] = counter
        for k, timer in metrics.timers.items():
            timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
            if timer.has_units_processed():
                timers["{}_throughput".format(k)] = round(
                    timer.mean_throughput, 3)

                throughput = timer.mean_throughput

                with Log.timer(log=True, logger=self.logger,
                               info="THROUGHPUT") as logging_metrics:
                    logging_metrics.append(throughput)

        res.update({
            "num_healthy_workers":
            len(self.workers.remote_workers()),
            "timesteps_total":
            metrics.counters[STEPS_SAMPLED_COUNTER],
            "agent_timesteps_total":
            metrics.counters.get(AGENT_STEPS_SAMPLED_COUNTER, 0),
        })
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        res["custom_metrics"] = res.get("custom_metrics", {})
        res["episode_media"] = res.get("episode_media", {})
        res["custom_metrics"].update(custom_metrics_from_info)
        return res
Example #7
0
 def __call__(self, info):
     episodes, self.to_be_collected = collect_episodes(
         self.workers.local_worker(),
         self.workers.remote_workers(),
         self.to_be_collected,
         timeout_seconds=self.timeout_seconds)
     orig_episodes = list(episodes)
     missing = self.min_history - len(episodes)
     if missing > 0:
         episodes.extend(self.episode_history[-missing:])
         assert len(episodes) <= self.min_history
     self.episode_history.extend(orig_episodes)
     self.episode_history = self.episode_history[-self.min_history:]
     res = summarize_episodes(episodes, orig_episodes)
     res.update(info=info)
     return res
Example #8
0
    def __call__(self, _):
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds)
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history:]
        res = summarize_episodes(episodes, orig_episodes)

        # Add in iterator metrics.
        metrics = LocalIterator.get_metrics()
        if metrics.parent_metrics:
            print("TODO: support nested metrics better")
        all_metrics = [metrics] + metrics.parent_metrics
        timers = {}
        counters = {}
        info = {}
        for metrics in all_metrics:
            info.update(metrics.info)
            for k, counter in metrics.counters.items():
                counters[k] = counter
            for k, timer in metrics.timers.items():
                timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
                if timer.has_units_processed():
                    timers["{}_throughput".format(k)] = round(
                        timer.mean_throughput, 3)
            res.update({
                "num_healthy_workers":
                len(self.workers.remote_workers()),
                "timesteps_total":
                metrics.counters[STEPS_SAMPLED_COUNTER],
            })
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        return res
Example #9
0
    def collect_metrics(self,
                        timeout_seconds,
                        min_history=100,
                        selected_workers=None):
        """Returns worker and optimizer stats.

        Arguments:
            timeout_seconds (int): Max wait time for a worker before
                dropping its results. This usually indicates a hung worker.
            min_history (int): Min history length to smooth results over.
            selected_workers (list): Override the list of remote workers
                to collect metrics from.

        Returns:
            res (dict): A training result dict from worker metrics with
                `info` replaced with stats from self.
        """
        return_stats = {}

        episode_storage = {}

        for ws_id, workers in self.workers.items():
            episodes, self.to_be_collected[ws_id] = collect_episodes(
                workers.local_worker(),
                selected_workers or workers.remote_workers(),
                self.to_be_collected[ws_id],
                timeout_seconds=timeout_seconds)
            orig_episodes = list(episodes)
            missing = min_history - len(episodes)
            if missing > 0:
                episodes.extend(self.episode_history[ws_id][-missing:])
                assert len(episodes) <= min_history
            self.episode_history[ws_id].extend(orig_episodes)
            self.episode_history[ws_id] = self.episode_history[ws_id][
                                          -min_history:]

            episode_storage[ws_id] = episodes
            res = summarize_episodes(episodes, orig_episodes)
            return_stats[ws_id] = res
        return_stats = parse_stats(return_stats, episode_storage)
        return_stats.update(info=self.stats())
        return_stats["info"]["learner_queue"].pop("size_quantiles")
        return return_stats
Example #10
0
def own_evaluation(agent, num_rounds):
    results = []
    num_episodes = num_rounds * agent.config["evaluation_num_episodes"]
    if agent.config["num_workers"] == 0:
        for _ in range(num_episodes):
            agent.evaluation_workers.local_worker().sample()
    else:
        while len(results) < num_episodes:
            # Calling .sample() runs exactly one episode per worker due to how the
            # eval workers are configured.
            ray.get(
                [w.sample.remote() for w in agent.workers.remote_workers()])

            episodes, _ = collect_episodes(None,
                                           agent.workers.remote_workers(), [])

            for episode in episodes:
                for key, winner in episode.custom_metrics.copy().items():
                    results.append(winner)

    return results[:num_episodes]
Example #11
0
def custom_eval_function(trainer, eval_workers):
    """Example of a custom evaluation function.

    Arguments:
        trainer (Trainer): trainer class to evaluate.
        eval_workers (WorkerSet): evaluation workers.

    Returns:
        metrics (dict): evaluation metrics dict.
    """

    # We configured 2 eval workers in the training config.
    worker_1, worker_2 = eval_workers.remote_workers()

    # Set different env settings for each worker. Here we use a fixed config,
    # which also could have been computed in each worker by looking at
    # env_config.worker_index (printed in SimpleCorridor class above).
    worker_1.foreach_env.remote(lambda env: env.set_corridor_length(4))
    worker_2.foreach_env.remote(lambda env: env.set_corridor_length(7))

    for i in range(5):
        print("Custom evaluation round", i)
        # Calling .sample() runs exactly one episode per worker due to how the
        # eval workers are configured.
        ray.get([w.sample.remote() for w in eval_workers.remote_workers()])

    # Collect the accumulated episodes on the workers, and then summarize the
    # episode stats into a metrics dict.
    episodes, _ = collect_episodes(
        remote_workers=eval_workers.remote_workers(), timeout_seconds=99999)
    # You can compute metrics from the episodes manually, or use the
    # convenient `summarize_episodes()` utility:
    metrics = summarize_episodes(episodes)
    # Note that the above two statements are the equivalent of:
    # metrics = collect_metrics(eval_workers.local_worker(),
    #                           eval_workers.remote_workers())

    # You can also put custom values in the metrics dict.
    metrics["foo"] = 1
    return metrics
Example #12
0
    def collect_metrics(self, min_history=100):
        """Returns evaluator and optimizer stats.

        Arguments:
            min_history (int): Min history length to smooth results over.

        Returns:
            res (dict): A training result dict from evaluator metrics with
                `info` replaced with stats from self.
        """
        episodes = collect_episodes(self.local_evaluator,
                                    self.remote_evaluators)
        orig_episodes = list(episodes)
        missing = min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-min_history:]
        res = summarize_episodes(episodes)
        res.update(info=self.stats())
        return res
Example #13
0
    def execution_plan(workers: WorkerSet, config: AlgorithmConfigDict,
                       **kwargs) -> LocalIterator[dict]:
        assert (
            len(kwargs) == 0
        ), "MBMPO execution_plan does NOT take any additional parameters"

        # Train TD Models on the driver.
        workers.local_worker().foreach_policy(fit_dynamics)

        # Sync driver's policy with workers.
        workers.sync_weights()

        # Sync TD Models and normalization stats with workers
        sync_ensemble(workers)
        sync_stats(workers)

        # Dropping metrics from the first iteration
        _, _ = collect_episodes(workers.local_worker(),
                                workers.remote_workers(), [],
                                timeout_seconds=9999)

        # Metrics Collector.
        metric_collect = CollectMetrics(
            workers,
            min_history=0,
            timeout_seconds=config["metrics_episode_collection_timeout_s"],
        )

        num_inner_steps = config["inner_adaptation_steps"]

        def inner_adaptation_steps(itr):
            buf = []
            split = []
            metrics = {}
            for samples in itr:
                print("Collecting Samples, Inner Adaptation {}".format(
                    len(split)))
                # Processing Samples (Standardize Advantages)
                samples, split_lst = post_process_samples(samples, config)

                buf.extend(samples)
                split.append(split_lst)

                adapt_iter = len(split) - 1
                prefix = "DynaTrajInner_" + str(adapt_iter)
                metrics = post_process_metrics(prefix, workers, metrics)

                if len(split) > num_inner_steps:
                    out = SampleBatch.concat_samples(buf)
                    out["split"] = np.array(split)
                    buf = []
                    split = []

                    yield out, metrics
                    metrics = {}
                else:
                    inner_adaptation(workers, samples)

        # Iterator for Inner Adaptation Data gathering (from pre->post
        # adaptation).
        rollouts = from_actors(workers.remote_workers())
        rollouts = rollouts.batch_across_shards()
        rollouts = rollouts.transform(inner_adaptation_steps)

        # Meta update step with outer combine loop for multiple MAML
        # iterations.
        train_op = rollouts.combine(
            MetaUpdate(
                workers,
                config["num_maml_steps"],
                config["maml_optimizer_steps"],
                metric_collect,
            ))
        return train_op
Example #14
0
def execution_plan(workers: WorkerSet,
                   config: TrainerConfigDict) -> LocalIterator[dict]:
    """Execution plan of the PPO algorithm. Defines the distributed dataflow.

    Args:
        workers (WorkerSet): The WorkerSet for training the Polic(y/ies)
            of the Trainer.
        config (TrainerConfigDict): The trainer's configuration dict.

    Returns:
        LocalIterator[dict]: The Policy class to use with PPOTrainer.
            If None, use `default_policy` provided in build_trainer().
    """
    # Train TD Models on the driver.
    workers.local_worker().foreach_policy(fit_dynamics)

    # Sync driver's policy with workers.
    workers.sync_weights()

    # Sync TD Models and normalization stats with workers
    sync_ensemble(workers)
    sync_stats(workers)

    # Dropping metrics from the first iteration
    _, _ = collect_episodes(workers.local_worker(),
                            workers.remote_workers(), [],
                            timeout_seconds=9999)

    # Metrics Collector.
    metric_collect = CollectMetrics(
        workers,
        min_history=0,
        timeout_seconds=config["collect_metrics_timeout"])

    num_inner_steps = config["inner_adaptation_steps"]

    def inner_adaptation_steps(itr):
        buf = []
        split = []
        metrics = {}
        for samples in itr:
            print("Collecting Samples, Inner Adaptation {}".format(len(split)))
            # Processing Samples (Standardize Advantages)
            samples, split_lst = post_process_samples(samples, config)

            buf.extend(samples)
            split.append(split_lst)

            adapt_iter = len(split) - 1
            prefix = "DynaTrajInner_" + str(adapt_iter)
            metrics = post_process_metrics(prefix, workers, metrics)

            if len(split) > num_inner_steps:
                out = SampleBatch.concat_samples(buf)
                out["split"] = np.array(split)
                buf = []
                split = []

                yield out, metrics
                metrics = {}
            else:
                inner_adaptation(workers, samples)

    # Iterator for Inner Adaptation Data gathering (from pre->post adaptation).
    rollouts = from_actors(workers.remote_workers())
    rollouts = rollouts.batch_across_shards()
    rollouts = rollouts.transform(inner_adaptation_steps)

    # Meta update step with outer combine loop for multiple MAML iterations.
    train_op = rollouts.combine(
        MetaUpdate(workers, config["num_maml_steps"],
                   config["maml_optimizer_steps"], metric_collect))
    return train_op
Example #15
0
def execution_plan(workers, config):
    # Train TD Models
    workers.local_worker().foreach_policy(fit_dynamics)

    # Sync workers policy with workers
    workers.sync_weights()

    # Sync TD Models and normalization stats with workers
    sync_ensemble(workers)
    sync_stats(workers)

    # Dropping metrics from the first iteration
    episodes, to_be_collected = collect_episodes(
        workers.local_worker(),
        workers.remote_workers(), [],
        timeout_seconds=9999)

    # Metrics Collector
    metric_collect = CollectMetrics(
        workers,
        min_history=0,
        timeout_seconds=config["collect_metrics_timeout"])

    inner_steps = config["inner_adaptation_steps"]

    def inner_adaptation_steps(itr):
        buf = []
        split = []
        metrics = {}
        for samples in itr:
            print("Collecting Samples, Inner Adaptation {}".format(len(split)))
            # Processing Samples (Standardize Advantages)
            samples, split_lst = post_process_samples(samples, config)

            buf.extend(samples)
            split.append(split_lst)

            adapt_iter = len(split) - 1
            prefix = "DynaTrajInner_" + str(adapt_iter)
            metrics = post_process_metrics(prefix, workers, metrics)

            if len(split) > inner_steps:
                out = SampleBatch.concat_samples(buf)
                out["split"] = np.array(split)
                buf = []
                split = []

                yield out, metrics
                metrics = {}
            else:
                inner_adaptation(workers, samples)

    # Iterator for Inner Adaptation Data gathering (from pre->post adaptation)
    rollouts = from_actors(workers.remote_workers())
    rollouts = rollouts.batch_across_shards()
    rollouts = rollouts.transform(inner_adaptation_steps)

    # Metaupdate Step with outer combine loop for multiple MAML iterations
    train_op = rollouts.combine(
        MetaUpdate(workers, config["num_maml_steps"],
                   config["maml_optimizer_steps"], metric_collect))
    return train_op