예제 #1
0
def provincial_eval(trainer, eval_workers):
    """Evaluates the performance of the domray model by playing it against
    Provincial using preset buy menus.

    Args:
        trainer (Trainer): trainer class to evaluate.
        eval_workers (WorkerSet): evaluation workers.

    Returns:
        metrics (dict): evaluation metrics dict
    """
    global eval_metrics

    for i in range(num_episodes_per_scenario):
        ray.get([w.sample.remote() for w in eval_workers.remote_workers()])
        #for worker in eval_workers.remote_workers():
        #    worker.foreach_env.remote(lambda env: env.debug())

    episodes, _ = collect_episodes(
        remote_workers=eval_workers.remote_workers(), timeout_seconds=600)

    metrics = summarize_episodes(episodes)
    eval_metrics.append(metrics)

    return metrics
예제 #2
0
    def collect_metrics(self,
                        timeout_seconds,
                        min_history=100,
                        selected_evaluators=None):
        """Returns evaluator and optimizer stats.

        Arguments:
            timeout_seconds (int): Max wait time for a evaluator before
                dropping its results. This usually indicates a hung evaluator.
            min_history (int): Min history length to smooth results over.
            selected_evaluators (list): Override the list of remote evaluators
                to collect metrics from.

        Returns:
            res (dict): A training result dict from evaluator metrics with
                `info` replaced with stats from self.
        """
        episodes, num_dropped = collect_episodes(
            self.local_evaluator,
            selected_evaluators or self.remote_evaluators,
            timeout_seconds=timeout_seconds)
        orig_episodes = list(episodes)
        missing = min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-min_history:]
        res = summarize_episodes(episodes, orig_episodes, num_dropped)
        res.update(info=self.stats())
        return res
예제 #3
0
 def collect_metrics(self):
     assert self.episodes
     metrics = {
         k: summarize_episodes(v, v, 0)
         for k, v in self.episodes.items()
     }
     return metrics
예제 #4
0
    def collect_metrics(self,
                        timeout_seconds,
                        min_history=100,
                        selected_evaluators=None):
        """Returns evaluator and optimizer stats.

        Arguments:
            timeout_seconds (int): Max wait time for a evaluator before
                dropping its results. This usually indicates a hung evaluator.
            min_history (int): Min history length to smooth results over.
            selected_evaluators (list): Override the list of remote evaluators
                to collect metrics from.

        Returns:
            res (dict): A training result dict from evaluator metrics with
                `info` replaced with stats from self.
        """
        episodes, num_dropped = collect_episodes(
            self.local_evaluator,
            selected_evaluators or self.remote_evaluators,
            timeout_seconds=timeout_seconds)
        orig_episodes = list(episodes)
        missing = min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-min_history:]
        res = summarize_episodes(episodes, orig_episodes, num_dropped)
        res.update(info=self.stats())
        return res
예제 #5
0
    def __call__(self, _):
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds)
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history:]
        res = summarize_episodes(episodes, orig_episodes)

        # Add in iterator metrics.
        metrics = LocalIterator.get_metrics()
        timers = {}
        counters = {}
        info = {}
        info.update(metrics.info)
        for k, counter in metrics.counters.items():
            counters[k] = counter
        for k, timer in metrics.timers.items():
            timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
            if timer.has_units_processed():
                timers["{}_throughput".format(k)] = round(
                    timer.mean_throughput, 3)
        res.update({
            "num_healthy_workers": len(self.workers.remote_workers()),
            "timesteps_total": metrics.counters[STEPS_SAMPLED_COUNTER],
        })
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        relevant = [
            "info", "custom_metrics", "sampler_perf", "timesteps_total",
            "policy_reward_mean", "episode_len_mean"
        ]

        d = {k: res[k] for k in relevant}
        d["evaluation"] = res.get("evaluation", {})

        if self.log_to_neptune:
            metrics_to_be_logged = ["info", "evaluation"]

            def log_metric(metrics, base_string=''):
                if isinstance(metrics, dict):
                    for k in metrics:
                        log_metric(metrics[k], base_string + '{}_'.format(k))
                else:
                    neptune.log_metric(base_string, metrics)

            for k in d:
                if k in metrics_to_be_logged:
                    log_metric(d[k], base_string='{}_'.format(k))

        return d
예제 #6
0
    def __call__(self, _: Any) -> Dict:
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.selected_workers or self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds,
        )
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes = self.episode_history[-missing:] + episodes
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history :]
        res = summarize_episodes(episodes, orig_episodes, self.keep_custom_metrics)

        # Add in iterator metrics.
        metrics = _get_shared_metrics()
        custom_metrics_from_info = metrics.info.pop("custom_metrics", {})
        timers = {}
        counters = {}
        info = {}
        info.update(metrics.info)
        for k, counter in metrics.counters.items():
            counters[k] = counter
        for k, timer in metrics.timers.items():
            timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
            if timer.has_units_processed():
                timers["{}_throughput".format(k)] = round(timer.mean_throughput, 3)
        res.update(
            {
                "num_healthy_workers": len(self.workers.remote_workers()),
                "timesteps_total": (
                    metrics.counters[STEPS_TRAINED_COUNTER]
                    if self.by_steps_trained
                    else metrics.counters[STEPS_SAMPLED_COUNTER]
                ),
                # tune.Trainable uses timesteps_this_iter for tracking
                # total timesteps.
                "timesteps_this_iter": metrics.counters[
                    STEPS_TRAINED_THIS_ITER_COUNTER
                ],
                "agent_timesteps_total": metrics.counters.get(
                    AGENT_STEPS_SAMPLED_COUNTER, 0
                ),
            }
        )
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        res["custom_metrics"] = res.get("custom_metrics", {})
        res["episode_media"] = res.get("episode_media", {})
        res["custom_metrics"].update(custom_metrics_from_info)
        return res
예제 #7
0
    def __call__(self, _: Any) -> Dict:
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.selected_workers or self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds)
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes = self.episode_history[-missing:] + episodes
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history:]
        res = summarize_episodes(episodes, orig_episodes)

        # Add in iterator metrics.
        metrics = _get_shared_metrics()
        custom_metrics_from_info = metrics.info.pop("custom_metrics", {})
        timers = {}
        counters = {}
        info = {}
        info.update(metrics.info)
        for k, counter in metrics.counters.items():
            counters[k] = counter
        for k, timer in metrics.timers.items():
            timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
            if timer.has_units_processed():
                timers["{}_throughput".format(k)] = round(
                    timer.mean_throughput, 3)

                throughput = timer.mean_throughput

                with Log.timer(log=True, logger=self.logger,
                               info="THROUGHPUT") as logging_metrics:
                    logging_metrics.append(throughput)

        res.update({
            "num_healthy_workers":
            len(self.workers.remote_workers()),
            "timesteps_total":
            metrics.counters[STEPS_SAMPLED_COUNTER],
            "agent_timesteps_total":
            metrics.counters.get(AGENT_STEPS_SAMPLED_COUNTER, 0),
        })
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        res["custom_metrics"] = res.get("custom_metrics", {})
        res["episode_media"] = res.get("episode_media", {})
        res["custom_metrics"].update(custom_metrics_from_info)
        return res
예제 #8
0
 def __call__(self, info):
     episodes, self.to_be_collected = collect_episodes(
         self.workers.local_worker(),
         self.workers.remote_workers(),
         self.to_be_collected,
         timeout_seconds=self.timeout_seconds)
     orig_episodes = list(episodes)
     missing = self.min_history - len(episodes)
     if missing > 0:
         episodes.extend(self.episode_history[-missing:])
         assert len(episodes) <= self.min_history
     self.episode_history.extend(orig_episodes)
     self.episode_history = self.episode_history[-self.min_history:]
     res = summarize_episodes(episodes, orig_episodes)
     res.update(info=info)
     return res
예제 #9
0
    def collect_metrics(self):
        dist_episodes = ray.get([
            e.apply.remote(lambda ev: ev.episodes)
            for e in self.remote_evaluators
        ])

        aggregated_episodes = defaultdict(list)
        for episodes in dist_episodes:
            for k, v in episodes.items():
                aggregated_episodes[k].extend(v)
        aggregated_episodes = dict(aggregated_episodes)

        res = {
            k: summarize_episodes(v, v, 0)
            for k, v in aggregated_episodes.items()
        }

        return {"inner_update_metrics": res}
예제 #10
0
    def __call__(self, _):
        # Collect worker metrics.
        episodes, self.to_be_collected = collect_episodes(
            self.workers.local_worker(),
            self.workers.remote_workers(),
            self.to_be_collected,
            timeout_seconds=self.timeout_seconds)
        orig_episodes = list(episodes)
        missing = self.min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= self.min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-self.min_history:]
        res = summarize_episodes(episodes, orig_episodes)

        # Add in iterator metrics.
        metrics = LocalIterator.get_metrics()
        if metrics.parent_metrics:
            print("TODO: support nested metrics better")
        all_metrics = [metrics] + metrics.parent_metrics
        timers = {}
        counters = {}
        info = {}
        for metrics in all_metrics:
            info.update(metrics.info)
            for k, counter in metrics.counters.items():
                counters[k] = counter
            for k, timer in metrics.timers.items():
                timers["{}_time_ms".format(k)] = round(timer.mean * 1000, 3)
                if timer.has_units_processed():
                    timers["{}_throughput".format(k)] = round(
                        timer.mean_throughput, 3)
            res.update({
                "num_healthy_workers":
                len(self.workers.remote_workers()),
                "timesteps_total":
                metrics.counters[STEPS_SAMPLED_COUNTER],
            })
        res["timers"] = timers
        res["info"] = info
        res["info"].update(counters)
        return res
예제 #11
0
    def collect_metrics(self,
                        timeout_seconds,
                        min_history=100,
                        selected_workers=None):
        """Returns worker and optimizer stats.

        Arguments:
            timeout_seconds (int): Max wait time for a worker before
                dropping its results. This usually indicates a hung worker.
            min_history (int): Min history length to smooth results over.
            selected_workers (list): Override the list of remote workers
                to collect metrics from.

        Returns:
            res (dict): A training result dict from worker metrics with
                `info` replaced with stats from self.
        """
        return_stats = {}

        episode_storage = {}

        for ws_id, workers in self.workers.items():
            episodes, self.to_be_collected[ws_id] = collect_episodes(
                workers.local_worker(),
                selected_workers or workers.remote_workers(),
                self.to_be_collected[ws_id],
                timeout_seconds=timeout_seconds)
            orig_episodes = list(episodes)
            missing = min_history - len(episodes)
            if missing > 0:
                episodes.extend(self.episode_history[ws_id][-missing:])
                assert len(episodes) <= min_history
            self.episode_history[ws_id].extend(orig_episodes)
            self.episode_history[ws_id] = self.episode_history[ws_id][
                                          -min_history:]

            episode_storage[ws_id] = episodes
            res = summarize_episodes(episodes, orig_episodes)
            return_stats[ws_id] = res
        return_stats = parse_stats(return_stats, episode_storage)
        return_stats.update(info=self.stats())
        return_stats["info"]["learner_queue"].pop("size_quantiles")
        return return_stats
예제 #12
0
def custom_eval_function(trainer, eval_workers):
    """Example of a custom evaluation function.

    Arguments:
        trainer (Trainer): trainer class to evaluate.
        eval_workers (WorkerSet): evaluation workers.

    Returns:
        metrics (dict): evaluation metrics dict.
    """

    # We configured 2 eval workers in the training config.
    worker_1, worker_2 = eval_workers.remote_workers()

    # Set different env settings for each worker. Here we use a fixed config,
    # which also could have been computed in each worker by looking at
    # env_config.worker_index (printed in SimpleCorridor class above).
    worker_1.foreach_env.remote(lambda env: env.set_corridor_length(4))
    worker_2.foreach_env.remote(lambda env: env.set_corridor_length(7))

    for i in range(5):
        print("Custom evaluation round", i)
        # Calling .sample() runs exactly one episode per worker due to how the
        # eval workers are configured.
        ray.get([w.sample.remote() for w in eval_workers.remote_workers()])

    # Collect the accumulated episodes on the workers, and then summarize the
    # episode stats into a metrics dict.
    episodes, _ = collect_episodes(
        remote_workers=eval_workers.remote_workers(), timeout_seconds=99999)
    # You can compute metrics from the episodes manually, or use the
    # convenient `summarize_episodes()` utility:
    metrics = summarize_episodes(episodes)
    # Note that the above two statements are the equivalent of:
    # metrics = collect_metrics(eval_workers.local_worker(),
    #                           eval_workers.remote_workers())

    # You can also put custom values in the metrics dict.
    metrics["foo"] = 1
    return metrics
예제 #13
0
    def collect_metrics(self, min_history=100):
        """Returns evaluator and optimizer stats.

        Arguments:
            min_history (int): Min history length to smooth results over.

        Returns:
            res (dict): A training result dict from evaluator metrics with
                `info` replaced with stats from self.
        """
        episodes = collect_episodes(self.local_evaluator,
                                    self.remote_evaluators)
        orig_episodes = list(episodes)
        missing = min_history - len(episodes)
        if missing > 0:
            episodes.extend(self.episode_history[-missing:])
            assert len(episodes) <= min_history
        self.episode_history.extend(orig_episodes)
        self.episode_history = self.episode_history[-min_history:]
        res = summarize_episodes(episodes)
        res.update(info=self.stats())
        return res