Beispiel #1
0
 def tearDown(self):
     print("Tearing down....")
     try:
         self.runner._server.shutdown()
         self.runner = None
     except Exception as e:
         print(e)
     ray.shutdown()
     _register_all()
Beispiel #2
0
def _start_new_cluster():
    cluster = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    # Pytest doesn't play nicely with imports
    _register_all()
    return cluster
Beispiel #3
0
def start_connected_emptyhead_cluster():
    """Starts head with no resources."""

    cluster = Cluster(
        initialize_head=True,
        connect=True,
        head_node_args={
            "num_cpus": 0,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
        })
    # Pytest doesn't play nicely with imports
    _register_all()
    yield cluster
    # The code after the yield will run as teardown code.
    ray.shutdown()
    cluster.shutdown()
Beispiel #4
0
 def tearDown(self):
     ray.shutdown()
     _register_all()  # re-register the evicted objects
     if "CUDA_VISIBLE_DEVICES" in os.environ:
         del os.environ["CUDA_VISIBLE_DEVICES"]
     shutil.rmtree(self.tmpdir)
Beispiel #5
0
 def tearDown(self):
     ray.shutdown()
     _register_all()
Beispiel #6
0
 def tearDown(self):
     shutil.rmtree(self.absolute_local_dir, ignore_errors=True)
     self.absolute_local_dir = None
     ray.shutdown()
     # Without this line, test_tune_server.testAddTrial would fail.
     _register_all()
Beispiel #7
0
 def do_test(self, alg: str, config: dict, fn=None):
     fn = fn or self._do_test_fault_ignore
     try:
         fn(alg, config)
     finally:
         _register_all()  # re-register the evicted objects
Beispiel #8
0
from __future__ import print_function

import random
import unittest
import numpy as np

from ray.tune.hyperband import HyperBandScheduler
from ray.tune.async_hyperband import AsyncHyperBandScheduler
from ray.tune.pbt import PopulationBasedTraining, explore
from ray.tune.median_stopping_rule import MedianStoppingRule
from ray.tune.result import TrainingResult
from ray.tune.trial import Trial, Resources
from ray.tune.trial_scheduler import TrialScheduler

from ray.rllib import _register_all
_register_all()


def result(t, rew):
    return TrainingResult(time_total_s=t,
                          episode_reward_mean=rew,
                          training_iteration=int(t))


class EarlyStoppingSuite(unittest.TestCase):
    def basicSetup(self, rule):
        t1 = Trial("PPO")  # mean is 450, max 900, t_max=10
        t2 = Trial("PPO")  # mean is 450, max 450, t_max=5
        for i in range(10):
            self.assertEqual(
                rule.on_trial_result(None, t1, result(i, i * 100)),
 def tearDown(self):
     ray.shutdown()
     _register_all()  # re-register the evicted objects
Beispiel #10
0
def start_ray():
    ray.init()
    _register_all()
    yield
    ray.shutdown()
Beispiel #11
0
 def tearDown(self):
     ray.worker.cleanup()
     _register_all()  # re-register the evicted objects
Beispiel #12
0
 def setUp(self):
     _register_all()  # re-register the evicted objects
 def tearDownClass(cls):
     ray.shutdown()
     _register_all()
Beispiel #14
0
 def setUp(self):
     # Wait up to five seconds for placement groups when starting a trial
     os.environ["TUNE_PLACEMENT_GROUP_WAIT_S"] = "5"
     # Block for results even when placement groups are pending
     os.environ["TUNE_TRIAL_STARTUP_GRACE_PERIOD"] = "0"
     _register_all()  # re-register the evicted objects
Beispiel #15
0
#!/usr/bin/env python

import os
import sys

if __name__ == "__main__":
    # Do not import tf for testing purposes.
    os.environ["RLLIB_TEST_NO_TF_IMPORT"] = "1"

    # Test registering (includes importing) all Trainers.
    from ray.rllib import _register_all

    # This should surface any dependency on tf, e.g. inside function
    # signatures/typehints.
    _register_all()

    from ray.rllib.agents.a3c import A2CTrainer

    assert ("tensorflow" not in sys.modules
            ), "`tensorflow` initially present, when it shouldn't!"

    # Note: No ray.init(), to test it works without Ray
    trainer = A2CTrainer(env="CartPole-v0",
                         config={
                             "framework": "torch",
                             "num_workers": 0
                         })
    trainer.train()

    assert (
        "tensorflow" not in sys.modules
Beispiel #16
0
 def setUp(self):
     self.trial_executor = RayTrialExecutor(queue_trials=False)
     ray.init()
     _register_all()  # Needed for flaky tests
Beispiel #17
0
def start_ray():
    ray.init(log_to_driver=False, local_mode=True)
    _register_all()
    yield
    ray.shutdown()
Beispiel #18
0
 def tearDown(self):
     ray.shutdown()
     _register_all()  # re-register the evicted objects
 def setUp(self):
     self.trial_executor = RayTrialExecutor()
     ray.init(num_cpus=2, ignore_reinit_error=True)
     _register_all()  # Needed for flaky tests
Beispiel #20
0
 def tearDown(self):
     shutil.rmtree(self.logdir)
     ray.shutdown()
     _register_all()
Beispiel #21
0
 def tearDown(self):
     ray.shutdown()
     _register_all()  # re-register the evicted objects
     shutil.rmtree(self.tmpdir)
Beispiel #22
0
def start_ray():
    ray.init()
    _register_all()
    yield
    ray.shutdown()
Beispiel #23
0
    def execute(self):
        timesteps = 0
        best_period_value = None

        if self.pr.agent.name() == "A2C":
            trainer = A2CTrainer(config=self.rllib_config,
                                 logger_creator=rllib_logger_creator)
        elif self.pr.agent.name() == "PPO":
            trainer = PPOTrainer(config=self.rllib_config,
                                 logger_creator=rllib_logger_creator)
            # import pdb; pdb.set_trace()
        else:
            raise ValueError('There is no rllib trainer with name ' +
                             self.pr.agent.name())

        tf_writer = SummaryWriter(
            self.pr.save_logs_to) if self.pr.save_logs_to else None

        reward_metric = Metric(short_name='rews',
                               long_name='trajectory reward',
                               formatting_string='{:5.1f}',
                               higher_is_better=True)
        time_step_metric = Metric(short_name='steps',
                                  long_name='total number of steps',
                                  formatting_string='{:5.1f}',
                                  higher_is_better=True)

        metrics = [reward_metric, time_step_metric]

        if self.pr.train:
            start_time = time.time()
            policy_save_tag = 0
            while timesteps < self.pr.total_steps:

                result = trainer.train()

                timesteps = result["timesteps_total"]
                reward_metric.log(result['evaluation']['episode_reward_mean'])
                time_step_metric.log(result['evaluation']['episode_len_mean'])
                # import pdb; pdb.set_trace()
                # # Get a metric list from each environment.
                # if hasattr(trainer, "evaluation_workers"):
                #     metric_lists = sum(trainer.evaluation_workers.foreach_worker(lambda w: w.foreach_env(lambda e: e.metrics)), [])
                # else:
                #     metric_lists = sum(trainer.workers.foreach_worker(lambda w: w.foreach_env(lambda e: e.metrics)), [])

                # metrics = metric_lists[0]

                # # Aggregate metrics from all other environments.
                # for metric_list in metric_lists[1:]:
                #     for i, metric in enumerate(metric_list):
                #         metrics[i]._values.extend(metric._values)

                save_logs_to = self.pr.save_logs_to
                model_save_paths_dict = self.pr.model_save_paths_dict
                # Consider whether to save a model.
                saved = False
                if model_save_paths_dict is not None and metrics[
                        0].currently_optimal:
                    # trainer.get_policy().model.save(model_save_paths_dict)
                    policy_save_tag += 1
                    trainer.get_policy().model.save_model_in_progress(
                        model_save_paths_dict, policy_save_tag)
                    saved = True

                # Write the metrics for this reporting period.
                total_seconds = time.time() - start_time
                logger.write_and_condense_metrics(total_seconds, 'iters',
                                                  timesteps, saved, metrics,
                                                  tf_writer)

                # Clear the metrics, both those maintained by the training workers and by the evaluation ones.
                condense_fn = lambda environment: [
                    m.condense_values() for m in environment.metrics
                ]
                trainer.workers.foreach_worker(
                    lambda w: w.foreach_env(condense_fn))
                if hasattr(trainer, "evaluation_workers"):
                    trainer.evaluation_workers.foreach_worker(
                        lambda w: w.foreach_env(condense_fn))

        else:
            start_time = time.time()
            env = trainer.workers.local_worker().env
            metrics = env.metrics
            worker = trainer.workers.local_worker()
            steps = steps_since_report = 0

            while True:
                batch = worker.sample()
                current_steps = len(batch["obs"])
                steps += current_steps
                steps_since_report += current_steps

                if steps_since_report >= self.pr.reporting_interval:
                    total_seconds = time.time() - start_time

                    # Write the metrics for this reporting period.
                    logger.write_and_condense_metrics(total_seconds, 'iters',
                                                      steps, False, metrics,
                                                      tf_writer)

                    steps_since_report = 0
                    if steps >= self.pr.total_steps:
                        break

            env.close()

        # Get a summary metric for the entire stage, based on the environment's first metric.
        summary_metric = logger.summarize_stage(metrics[0])

        # Temporary workaround for https://github.com/ray-project/ray/issues/8205
        ray.shutdown()
        _register_all()

        return summary_metric