def launch(self):
        """Actual entry point into the class instance where everything happens.
        Lots of delegating to classes that are in subclass or can be over-ridden.
        """
        self.register_env_creator()

        # All worker nodes will block at this step during training
        ray_cluster_config = self.ray_init_config()
        if not self.is_master_node:
            return

        # Start the driver on master node
        ray.init(**ray_cluster_config)
        experiment_config = self.get_experiment_config()
        experiment_config = self.customize_experiment_config(experiment_config)
        print("Running experiment with config %s" % json.dumps(experiment_config, indent=2))
        run_experiments(experiment_config)
        all_wokers_host_names = self.get_all_host_names()[1:]
        # If distributed job, send TERMINATION_SIGNAL to all workers.
        if len(all_wokers_host_names) > 0:
            self.sage_cluster_communicator.create_s3_signal(TERMINATION_SIGNAL)

        algo = experiment_config["training"]["run"]
        env_string = experiment_config["training"]["config"]["env"]
        config = experiment_config["training"]["config"]
        self.save_checkpoint_and_serving_model(algorithm=algo,
                                               env_string=env_string,
                                               config=config)
Ejemplo n.º 2
0
 def run():
     run_experiments(
         {
             "foo": {
                 "run": MyResettableClass,
                 "max_failures": 1,
                 "num_samples": 4,
                 "config": {
                     "fake_reset_not_supported": True
                 },
             }
         },
         reuse_actors=True,
         scheduler=FrequentPausesScheduler())
Ejemplo n.º 3
0
def test_cluster_rllib_restore(start_connected_cluster, tmpdir):
    cluster = start_connected_cluster
    dirpath = str(tmpdir)
    script = """
import time
import ray
from ray import tune

ray.init(redis_address="{redis_address}")

kwargs = dict(
    run="PG",
    env="CartPole-v1",
    stop=dict(training_iteration=10),
    local_dir="{checkpoint_dir}",
    checkpoint_freq=1,
    max_failures=1)

tune.run_experiments(
    dict(experiment=kwargs),
    raise_on_failed_trial=False)
""".format(
        redis_address=cluster.redis_address, checkpoint_dir=dirpath)
    run_string_as_driver_nonblocking(script)
    # Wait until the right checkpoint is saved.
    # The trainable returns every 0.5 seconds, so this should not miss
    # the checkpoint.
    metadata_checkpoint_dir = os.path.join(dirpath, "experiment")
    for i in range(100):
        if TrialRunner.checkpoint_exists(metadata_checkpoint_dir):
            # Inspect the internal trialrunner
            runner = TrialRunner.restore(metadata_checkpoint_dir)
            trials = runner.get_trials()
            last_res = trials[0].last_result
            if last_res and last_res.get("training_iteration"):
                break
        time.sleep(0.3)

    if not TrialRunner.checkpoint_exists(metadata_checkpoint_dir):
        raise RuntimeError("Checkpoint file didn't appear.")

    ray.shutdown()
    cluster.shutdown()
    cluster = _start_new_cluster()
    cluster.wait_for_nodes()

    # Restore properly from checkpoint
    trials2 = tune.run_experiments(
        {
            "experiment": {
                "run": "PG",
                "checkpoint_freq": 1,
                "local_dir": dirpath
            }
        },
        resume=True)
    assert all(t.status == Trial.TERMINATED for t in trials2)
    cluster.shutdown()
Ejemplo n.º 4
0
def test_ls(start_ray, capsys, tmpdir):
    """This test captures output of list_trials."""
    experiment_name = "test_ls"
    experiment_path = os.path.join(str(tmpdir), experiment_name)
    num_samples = 2
    with capsys.disabled():
        tune.run_experiments({
            experiment_name: {
                "run": "__fake",
                "stop": {
                    "training_iteration": 1
                },
                "num_samples": num_samples,
                "local_dir": str(tmpdir)
            }
        })

    commands.list_trials(experiment_path, info_keys=("status", ))
    captured = capsys.readouterr().out.strip()
    lines = captured.split("\n")
    assert sum("TERMINATED" in line for line in lines) == num_samples
Ejemplo n.º 5
0
def test_lsx(start_ray, capsys, tmpdir):
    """This test captures output of list_experiments."""
    project_path = str(tmpdir)
    num_experiments = 3
    for i in range(num_experiments):
        experiment_name = "test_lsx{}".format(i)
        with capsys.disabled():
            tune.run_experiments({
                experiment_name: {
                    "run": "__fake",
                    "stop": {
                        "training_iteration": 1
                    },
                    "num_samples": 1,
                    "local_dir": project_path
                }
            })

    commands.list_experiments(project_path, info_keys=("total_trials", ))
    captured = capsys.readouterr().out.strip()
    lines = captured.split("\n")
    assert sum("1" in line for line in lines) >= 3
Ejemplo n.º 6
0
 def testTrialReuseEnabled(self):
     trials = run_experiments(
         {
             "foo": {
                 "run": MyResettableClass,
                 "num_samples": 4,
                 "config": {},
             }
         },
         reuse_actors=True,
         scheduler=FrequentPausesScheduler())
     self.assertEqual([t.last_result["num_resets"] for t in trials],
                      [1, 2, 3, 4])
Ejemplo n.º 7
0
def test_cluster_down_full(start_connected_cluster, tmpdir):
    """Tests that run_experiment restoring works on cluster shutdown."""
    cluster = start_connected_cluster
    dirpath = str(tmpdir)

    exp1_args = dict(
        run="__fake",
        stop=dict(training_iteration=3),
        local_dir=dirpath,
        checkpoint_freq=1)
    exp2_args = dict(run="__fake", stop=dict(training_iteration=3))
    exp3_args = dict(
        run="__fake",
        stop=dict(training_iteration=3),
        config=dict(mock_error=True))
    exp4_args = dict(
        run="__fake",
        stop=dict(training_iteration=3),
        config=dict(mock_error=True),
        checkpoint_freq=1)
    all_experiments = {
        "exp1": exp1_args,
        "exp2": exp2_args,
        "exp3": exp3_args,
        "exp4": exp4_args
    }

    tune.run_experiments(all_experiments, raise_on_failed_trial=False)

    ray.shutdown()
    cluster.shutdown()
    cluster = _start_new_cluster()

    trials = tune.run_experiments(
        all_experiments, resume=True, raise_on_failed_trial=False)
    assert len(trials) == 4
    assert all(t.status in [Trial.TERMINATED, Trial.ERROR] for t in trials)
    cluster.shutdown()
Ejemplo n.º 8
0
cluster = Cluster()
for i in range(num_nodes):
    cluster.add_node(
        redis_port=6379 if i == 0 else None,
        num_redis_shards=num_redis_shards if i == 0 else None,
        num_cpus=20,
        num_gpus=0,
        resources={str(i): 2},
        object_store_memory=object_store_memory,
        redis_max_memory=redis_max_memory)
ray.init(redis_address=cluster.redis_address)

# Run the workload.

run_experiments({
    "apex": {
        "run": "APEX",
        "env": "Pong-v0",
        "config": {
            "num_workers": 8,
            "num_gpus": 0,
            "buffer_size": 10000,
            "learning_starts": 0,
            "sample_batch_size": 1,
            "train_batch_size": 1,
            "min_iter_time_s": 10,
            "timesteps_per_iteration": 10,
        },
    }
})
Ejemplo n.º 9
0
    ray.init()
    MultiPendulum = make_multiagent("Pendulum-v0")
    register_env("multi_pend", lambda _: MultiPendulum(1))
    trials = run_experiments({
        "test": {
            "run": "PPO",
            "env": "multi_pend",
            "stop": {
                "timesteps_total": 500000,
                "episode_reward_mean": -200,
            },
            "config": {
                "train_batch_size": 2048,
                "vf_clip_param": 10.0,
                "num_workers": 0,
                "num_envs_per_worker": 10,
                "lambda": 0.1,
                "gamma": 0.95,
                "lr": 0.0003,
                "sgd_minibatch_size": 64,
                "num_sgd_iter": 10,
                "model": {
                    "fcnet_hiddens": [64, 64],
                },
                "batch_mode": "complete_episodes",
            },
        }
    })
    if trials[0].last_result["episode_reward_mean"] < -200:
        raise ValueError("Did not get to -200 reward", trials[0].last_result)
Ejemplo n.º 10
0
def test_cluster_interrupt(start_connected_cluster, tmpdir):
    """Tests run_experiment on cluster shutdown with actual interrupt.

    This is an end-to-end test.
    """
    cluster = start_connected_cluster
    dirpath = str(tmpdir)

    # Needs to be in scope for pytest
    class _Mock(tune.Trainable):
        """Finishes on the 4th iteration."""
        def setup(self, config):
            self.state = {"hi": 0}

        def step(self):
            self.state["hi"] += 1
            time.sleep(0.5)
            return {"done": self.state["hi"] >= 4}

        def save_checkpoint(self, path):
            return self.state

        def load_checkpoint(self, state):
            self.state = state

    # Removes indent from class.
    reformatted = "\n".join(line[4:] if len(line) else line
                            for line in inspect.getsource(_Mock).split("\n"))

    script = """
import os
import time
import ray
from ray import tune

os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "0"

ray.init(address="{address}")

{fail_class_code}

tune.run(
    {fail_class},
    name="experiment",
    stop=dict(training_iteration=5),
    local_dir="{checkpoint_dir}",
    checkpoint_freq=1,
    max_failures=1,
    raise_on_failed_trial=False)
""".format(address=cluster.address,
           checkpoint_dir=dirpath,
           fail_class_code=reformatted,
           fail_class=_Mock.__name__)
    run_string_as_driver_nonblocking(script)

    # Wait until the right checkpoint is saved.
    # The trainable returns every 0.5 seconds, so this should not miss
    # the checkpoint.
    local_checkpoint_dir = os.path.join(dirpath, "experiment")
    for i in range(50):
        if TrialRunner.checkpoint_exists(local_checkpoint_dir):
            # Inspect the internal trialrunner
            runner = TrialRunner(resume="LOCAL",
                                 local_checkpoint_dir=local_checkpoint_dir)
            trials = runner.get_trials()
            last_res = trials[0].last_result
            if last_res and last_res.get("training_iteration") == 3:
                break
        time.sleep(0.2)

    if not TrialRunner.checkpoint_exists(local_checkpoint_dir):
        raise RuntimeError("Checkpoint file didn't appear.")

    ray.shutdown()
    cluster.shutdown()
    cluster = _start_new_cluster()
    Experiment.register_if_needed(_Mock)

    # Inspect the internal trialrunner
    runner = TrialRunner(resume="LOCAL",
                         local_checkpoint_dir=local_checkpoint_dir)
    trials = runner.get_trials()
    assert trials[0].last_result["training_iteration"] == 3
    assert trials[0].status == Trial.PENDING

    # Restore properly from checkpoint
    trials2 = tune.run_experiments(
        {
            "experiment": {
                "run": _Mock,
                "local_dir": dirpath,
                "checkpoint_freq": 1
            }
        },
        resume=True,
        raise_on_failed_trial=False)
    assert all(t.status == Trial.TERMINATED for t in trials2)
    assert {t.trial_id for t in trials2} == {t.trial_id for t in trials}
    ray.shutdown()
    cluster.shutdown()
 register_env("SinglePairTrading-v0",
              lambda config: SinglePairTradingEnv(config))
 ray.init()
 run_experiments({
     "clipparam_{}_{}_{}_wallet-{}-{}".format(
         FLAGS.symbol + FLAGS.to_symbol, FLAGS.limit, FLAGS.histo, WALLET_FIRST_SYMBOL, WALLET_SECOND_SYMBOL):
     {
         "run": FLAGS.algo,
         "env": "SinglePairTrading-v0",
         "stop": {
             "timesteps_total": 1e6,  #1e6 = 1M
         },
         "checkpoint_freq": 100,
         "checkpoint_at_end": True,
         "config": {
             "lr": grid_search([
                 1e-4
                 # 1e-6
             ]),
             "num_workers": 3,  # parallelism
             'observation_filter': 'MeanStdFilter',
             "vf_clip_param": 10000000.0,
             "env_config": {
                 'keys': keys,
                 'symbols': symbols,
                 'first_coin': FLAGS.symbol,
                 'second_coin': FLAGS.to_symbol
             },
         }
     }
 })
Ejemplo n.º 12
0
            if args.torch:
                deprecation_warning(old="--torch", new="--framework=torch")
                exp["config"]["framework"] = "torch"
                args.framework = "torch"

        # Print out the actual config.
        print("== Test config ==")
        print(yaml.dump(experiments))

        # Try running each test 3 times and make sure it reaches the given
        # reward.
        passed = False
        for i in range(3):
            try:
                ray.init(num_cpus=5, local_mode=args.local_mode)
                trials = run_experiments(experiments, resume=False, verbose=2)
            finally:
                ray.shutdown()
                _register_all()

            for t in trials:
                if (t.last_result["episode_reward_mean"] >=
                        t.stopping_criterion["episode_reward_mean"]):
                    passed = True
                    break

            if passed:
                print("Regression test PASSED")
                break
            else:
                print("Regression test FAILED on attempt {}".format(i + 1))
Ejemplo n.º 13
0
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--smoke-test', action='store_true', help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable("my_class", TrainMNIST)
    mnist_spec = {
        'run': 'my_class',
        'stop': {
          'mean_accuracy': 0.99,
          'time_total_s': 600,
        },
        'config': {
            'learning_rate': lambda spec:  10 ** np.random.uniform(-5, -3),
            'activation': grid_search(['relu', 'elu', 'tanh']),
        },
        "repeat": 10,
    }

    if args.smoke_test:
        mnist_spec['stop']['training_iteration'] = 2
        mnist_spec['repeat'] = 2

    ray.init()
    hyperband = HyperBandScheduler(
        time_attr="timesteps_total", reward_attr="mean_accuracy",
        max_t=100)

    run_experiments(
        {'mnist_hyperband_test': mnist_spec}, scheduler=hyperband)
Ejemplo n.º 14
0
                last_layer, training=input_dict["is_training"])
        output = slim.fully_connected(
            last_layer,
            num_outputs,
            weights_initializer=normc_initializer(0.01),
            activation_fn=None,
            scope="fc_out")
        return output, last_layer


if __name__ == "__main__":
    args = parser.parse_args()
    ray.init()

    ModelCatalog.register_custom_model("bn_model", BatchNormModel)
    run_experiments({
        "batch_norm_demo": {
            "run": args.run,
            "env": "Pendulum-v0" if args.run == "DDPG" else "CartPole-v0",
            "stop": {
                "training_iteration": args.num_iters
            },
            "config": {
                "model": {
                    "custom_model": "bn_model",
                },
                "num_workers": 0,
            },
        },
    })
Ejemplo n.º 15
0
    def checkAndReturnConsistentLogs(self, results, sleep_per_iter=None):
        """Checks logging is the same between APIs.

        Ignore "DONE" for logging but checks that the
        scheduler is notified properly with the last result.
        """
        class_results = copy.deepcopy(results)
        function_results = copy.deepcopy(results)

        class_output = []
        function_output = []
        scheduler_notif = []

        class MockScheduler(FIFOScheduler):
            def on_trial_complete(self, runner, trial, result):
                scheduler_notif.append(result)

        class ClassAPILogger(Logger):
            def on_result(self, result):
                class_output.append(result)

        class FunctionAPILogger(Logger):
            def on_result(self, result):
                function_output.append(result)

        class _WrappedTrainable(Trainable):
            def _setup(self, config):
                del config
                self._result_iter = copy.deepcopy(class_results)

            def _train(self):
                if sleep_per_iter:
                    time.sleep(sleep_per_iter)
                res = self._result_iter.pop(0)  # This should not fail
                if not self._result_iter:  # Mark "Done" for last result
                    res[DONE] = True
                return res

        def _function_trainable(config, reporter):
            for result in function_results:
                if sleep_per_iter:
                    time.sleep(sleep_per_iter)
                reporter(**result)

        class_trainable_name = "class_trainable"
        register_trainable(class_trainable_name, _WrappedTrainable)

        trials = run_experiments(
            {
                "function_api": {
                    "run": _function_trainable,
                    "loggers": [FunctionAPILogger],
                },
                "class_api": {
                    "run": class_trainable_name,
                    "loggers": [ClassAPILogger],
                },
            },
            raise_on_failed_trial=False,
            scheduler=MockScheduler())

        # Ignore these fields
        NO_COMPARE_FIELDS = {
            HOSTNAME,
            NODE_IP,
            TRIAL_ID,
            EXPERIMENT_TAG,
            PID,
            TIME_THIS_ITER_S,
            TIME_TOTAL_S,
            DONE,  # This is ignored because FunctionAPI has different handling
            "timestamp",
            "time_since_restore",
            "experiment_id",
            "date",
        }

        self.assertEqual(len(class_output), len(results))
        self.assertEqual(len(function_output), len(results))

        def as_comparable_result(result):
            return {
                k: v
                for k, v in result.items() if k not in NO_COMPARE_FIELDS
            }

        function_comparable = [
            as_comparable_result(result) for result in function_output
        ]
        class_comparable = [
            as_comparable_result(result) for result in class_output
        ]

        self.assertEqual(function_comparable, class_comparable)

        self.assertEqual(sum(t.get(DONE) for t in scheduler_notif), 2)
        self.assertEqual(as_comparable_result(scheduler_notif[0]),
                         as_comparable_result(scheduler_notif[1]))

        # Make sure the last result is the same.
        self.assertEqual(as_comparable_result(trials[0].last_result),
                         as_comparable_result(trials[1].last_result))

        return function_output, trials
Ejemplo n.º 16
0
 def f():
     run_experiments({"foo": {"run": "PPO", "stop": {"asdf": 1}}})
Ejemplo n.º 17
0
 def f():
     run_experiments({"foo": {
         "run": "asdf",
     }})
Ejemplo n.º 18
0
 def f():
     run_experiments(
         {"foo": {
             "run": grid_search("invalid grid search"),
         }})
Ejemplo n.º 19
0
 def f():
     run_experiments(
         {"foo": {
             "run": "asdf",
             "bah": "this param is not allowed",
         }})
Ejemplo n.º 20
0
 def f():
     run_experiments({"foo": {}})
Ejemplo n.º 21
0
object_store_memory = 10**9
num_nodes = 3

message = ("Make sure there is enough memory on this machine to run this "
           "workload. We divide the system memory by 2 to provide a buffer.")
assert (num_nodes * object_store_memory + num_redis_shards * redis_max_memory <
        ray._private.utils.get_system_memory() / 2), message

# Simulate a cluster on one machine.

ray.init(address="auto")

# Run the workload.

run_experiments({
    "ppo": {
        "run": "PPO",
        "env": "CartPole-v0",
        "num_samples": 10000,
        "config": {
            "framework": "torch",
            "num_workers": 7,
            "num_gpus": 0,
            "num_sgd_iter": 1,
        },
        "stop": {
            "timesteps_total": 1,
        },
    }
})
Ejemplo n.º 22
0
        config = {
            "num_gpus": 0,
            "num_workers": 2,
            "optimizer": {
                "num_replay_buffer_shards": 1,
            },
            "min_iter_time_s": 3,
            "buffer_size": 1000,
            "learning_starts": 1000,
            "train_batch_size": 128,
            "sample_batch_size": 32,
            "target_network_update_freq": 500,
            "timesteps_per_iteration": 1000,
        }
        group = True
    else:
        config = {}
        group = False

    ray.init()
    run_experiments({
        "two_step": {
            "run": args.run,
            "env": "grouped_twostep" if group else TwoStepGame,
            "stop": {
                "timesteps_total": args.stop,
            },
            "config": config,
        },
    })
Ejemplo n.º 23
0
run_experiments({
    "carla-dqn": {
        "run": "DQN",
        "env": "carla_env",
        "resources": {"cpu": 4, "gpu": 1},
        "config": {
            "env_config": env_config,
            "model": {
                "custom_model": "carla",
                "custom_options": {
                    "image_shape": [
                        80, 80,
                        lambda spec: spec.config.env_config.framestack * (
                            spec.config.env_config.use_depth_camera and 1 or 3
                        ),
                    ],
                },
                "conv_filters": [
                    [16, [8, 8], 4],
                    [32, [4, 4], 2],
                    [512, [10, 10], 1],
                ],
            },
            "timesteps_per_iteration": 100,
            "learning_starts": 1000,
            "schedule_max_timesteps": 100000,
            "gamma": 0.8,
            "tf_session_args": {
              "gpu_options": {"allow_growth": True},
            },
        },
    },
})
Ejemplo n.º 24
0
 def f():
     run_experiments({"foo": {
         "run": "f1",
     }})
Ejemplo n.º 25
0
    sched = AsyncHyperBandScheduler(
        time_attr="training_iteration",
        reward_attr="neg_mean_loss",
        max_t=400,
        grace_period=20)
    tune.register_trainable("train_mnist",
                            lambda cfg, rprtr: train_mnist(args, cfg, rprtr))
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.98,
                    "training_iteration": 1 if args.smoke_test else 20
                },
                "resources_per_trial": {
                    "cpu": 3,
                    "gpu": int(not args.no_cuda)
                },
                "run": "train_mnist",
                "num_samples": 1 if args.smoke_test else 10,
                "config": {
                    "lr": tune.sample_from(
                        lambda spec: np.random.uniform(0.001, 0.1)),
                    "momentum": tune.sample_from(
                        lambda spec: np.random.uniform(0.1, 0.9)),
                }
            }
        },
        verbose=0,
        scheduler=sched)
Ejemplo n.º 26
0
    def custom_stats(self):
        return {
            "policy_loss": self.policy_loss,
            "imitation_loss": self.imitation_loss,
        }


if __name__ == "__main__":
    ray.init()
    args = parser.parse_args()

    ModelCatalog.register_custom_model("custom_loss", CustomLossModel)
    run_experiments({
        "custom_loss": {
            "run": "PG",
            "env": "CartPole-v0",
            "stop": {
                "training_iteration": args.iters,
            },
            "config": {
                "num_workers": 0,
                "model": {
                    "custom_model": "custom_loss",
                    "custom_options": {
                        "input_files": args.input_files,
                    },
                },
            },
        },
    })
Ejemplo n.º 27
0
        create_env, env_name = make_create_env(params=flow_params, version=0)
        env_name = env_name + '_[FLOW_RATE, FLOW_RATE_MERGE, RL_PENETRATION]:[{},{},{:.3f}]'.format(
            FLOW_RATE, FLOW_RATE_MERGE, RL_PENETRATION)
        # Register as rllib env
        register_env(env_name, create_env)

        checkpoint_path = base + '/checkpoint_{}/checkpoint-{}'.format(
            checkpoint, checkpoint)
        config_path = base + '/params.pkl'

        with open(config_path, mode='rb') as f:
            config = pickle.load(f)
        exp_tag = {
            "run": 'PPO',
            "env": env_name,
            "config": {
                **config
            },
            "checkpoint_freq": 25,
            "checkpoint_at_end": True,
            "max_failures": 999,
            "stop": {
                "training_iteration": checkpoint + 50
            },
            "restore": checkpoint_path,
            "num_samples": 1,
        }
        exp_list.append(Experiment.from_json(args.exp_tag, exp_tag))

    trials = run_experiments(experiments=exp_list)
Ejemplo n.º 28
0
register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()

run_experiments({
    "carla-a3c": {
        "run": "A3C",
        "env": "carla_env",
        "resources": {
            "cpu": 4,
            "gpu": 1
        },
        "config": {
            "env_config": env_config,
            "model": {
                "custom_model":
                "carla",
                "custom_options": {
                    "image_shape": [80, 80, 6],
                },
                "conv_filters": [
                    [16, [8, 8], 4],
                    [32, [4, 4], 2],
                    [512, [10, 10], 1],
                ],
            },
            "gamma": 0.8,
            "num_workers": 1,
        },
    },
})
Ejemplo n.º 29
0
    for i in range(args.num_iters):
        if i % 10 == 0:
            start = time.time()
            loss = sgd.step(fetch_stats=True)["loss"]
            metrics = sgd.foreach_model(lambda model: model.get_metrics())
            acc = [m["accuracy"] for m in metrics]
            print("Iter", i, "loss", loss, "accuracy", acc)
            print("Time per iteration", time.time() - start)
            assert len(set(acc)) == 1, ("Models out of sync", acc)
            reporter(timesteps_total=i, mean_loss=loss, mean_accuracy=acc[0])
        else:
            sgd.step()


if __name__ == "__main__":
    args = parser.parse_args()
    ray.init(redis_address=args.redis_address)

    if args.tune:
        run_experiments({
            "mnist_sgd": {
                "run": train_mnist,
                "config": {
                    "args": args,
                },
            },
        })
    else:
        train_mnist({"args": args}, lambda **kw: None)
Ejemplo n.º 30
0
ray.init(redis_address=redis_address)
run_experiments({
    "carla-a3c": {
        "run": "A3C",
        "env": "carla_env",
        "resources": {
            "cpu": 5,
            "gpu": 2,
            "driver_gpu_limit": 0
        },
        "config": {
            "env_config": env_config,
            "use_gpu_for_workers": True,
            "model": {
                "custom_model":
                "carla",
                "custom_options": {
                    "image_shape": [80, 80, 6],
                },
                "conv_filters": [
                    [16, [8, 8], 4],
                    [32, [4, 4], 2],
                    [512, [10, 10], 1],
                ],
            },
            "gamma": 0.95,
            "num_workers": 2,
        },
    },
})
Ejemplo n.º 31
0
                    default=2,
                    help="How many gpus do you want ? Because we own too many")

args = parser.parse_args()

set_seed(args.seed)
ray.init(object_id_seed=args.seed, num_gpus=args.n_gpu, num_cpus=args.n_cpu)

full_config = load_config(env_config_file=args.env_config,
                          model_config_file=args.model_config,
                          env_ext_file=args.env_ext,
                          model_ext_file=args.model_ext)

if args.pbt_config:
    assert args.grid_config == None, "Cannot set Population based training and grid search, choose wisely"
    full_config, pbt_scheduler = prepare_pbt_config(full_config,
                                                    args.pbt_config)
else:
    pbt_scheduler = None

if args.grid_config:
    full_config = grid_search_overriding(full_config, args.grid_config)

experiment = create_expe_spec(full_config,
                              n_cpu=args.n_cpu,
                              n_gpu=args.n_gpu,
                              exp_dir=args.exp_dir)

tune.run_experiments(experiments=experiment,
                     scheduler=pbt_scheduler,
                     queue_trials=True)
Ejemplo n.º 32
0
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config


if __name__ == "__main__":
    alg_run, gym_name, config = setup_exps()
    ray.init(num_cpus=N_CPUS + 1,
        object_store_memory = 2048*1024*1024)
    trials = run_experiments({
        flow_params["exp_tag"]: {
            "run": alg_run,
            "env": gym_name,
            "config": {
                **config
            },
            "checkpoint_freq": 1,
            "checkpoint_at_end": True,
            "max_failures": 999,
            "stop": {
                "training_iteration": 1,
            },
        }
    })
Ejemplo n.º 33
0
    args, _ = parser.parse_known_args()

    mnist_spec = {
        'run': train,
        'num_samples': 10,
        'stop': {
            'mean_accuracy': 0.99,
            'timesteps_total': 600,
        },
        'config': {
            'activation': grid_search(['relu', 'elu', 'tanh']),
        },
    }

    if args.smoke_test:
        mnist_spec['stop']['training_iteration'] = 2
        mnist_spec['num_samples'] = 1

    ray.init()

    from ray.tune.schedulers import AsyncHyperBandScheduler
    run_experiments(
        {
            'tune_mnist_test': mnist_spec
        },
        scheduler=AsyncHyperBandScheduler(
            time_attr="timesteps_total",
            reward_attr="mean_accuracy",
            max_t=600,
        ))
Ejemplo n.º 34
0
    config.update({
        'multiagent': {
            'policies': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['av']
        }
    })

    return alg_run, env_name, config


# RUN EXPERIMENT

if __name__ == '__main__':
    alg_run, env_name, config = setup_exps(flow_params)
    ray.init(num_cpus=N_CPUS + 1)

    run_experiments({
        flow_params['exp_tag']: {
            'run': alg_run,
            'env': env_name,
            'checkpoint_freq': 20,
            'checkpoint_at_end': True,
            'stop': {
                'training_iteration': N_TRAINING_ITERATIONS
            },
            'config': config,
        },
    })
Ejemplo n.º 35
0
    result = subprocess.check_output(
        "ps aux | grep '{}' | grep -v grep || true".format(UNIQUE_CMD),
        shell=True)
    return result


if __name__ == "__main__":
    register_env("subproc", lambda config: EnvWithSubprocess(config))
    ray.init()
    assert os.path.exists(UNIQUE_FILE_0)
    assert os.path.exists(UNIQUE_FILE_1)
    assert not leaked_processes()
    run_experiments({
        "demo": {
            "run": "PG",
            "env": "subproc",
            "num_samples": 1,
            "config": {
                "num_workers": 1,
            },
            "stop": {
                "training_iteration": 1
            },
        },
    })
    leaked = leaked_processes()
    assert not leaked, "LEAKED PROCESSES: {}".format(leaked)
    assert not os.path.exists(UNIQUE_FILE_0), "atexit handler not called"
    assert not os.path.exists(UNIQUE_FILE_1), "atexit handler not called"
    print("OK")
Ejemplo n.º 36
0
import ray
from ray.tune import run_experiments

if __name__ == '__main__':

    ray.init()

    for test in sys.argv[1:]:
        experiments = yaml.load(open(test).read())

        print("== Test config ==")
        print(yaml.dump(experiments))

        for i in range(3):
            trials = run_experiments(experiments)

            num_failures = 0
            for t in trials:
                if (t.last_result["episode_reward_mean"] <
                        t.stopping_criterion["episode_reward_mean"]):
                    num_failures += 1

            if not num_failures:
                print("Regression test PASSED")
                sys.exit(0)

            print("Regression test flaked, retry", i)

        print("Regression test FAILED")
        sys.exit(1)
Ejemplo n.º 37
0
    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1 if done else 0, done, {}


if __name__ == "__main__":
    # Can also register the env creator function explicitly with:
    # register_env("corridor", lambda config: SimpleCorridor(config))
    ray.init()
    run_experiments({
        "demo": {
            "run": "PPO",
            "env": SimpleCorridor,  # or "corridor" if registered above
            "stop": {
                "timesteps_total": 10000,
            },
            "config": {
                "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
                "num_workers": 1,  # parallelism
                "env_config": {
                    "corridor_length": 5,
                },
            },
        },
    })
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config


if __name__ == '__main__':
    alg_run, gym_name, config = setup_exps()
    ray.init(num_cpus=N_CPUS + 1, redirect_output=False)
    trials = run_experiments({
        flow_params['exp_tag']: {
            'run': alg_run,
            'env': gym_name,
            'config': {
                **config
            },
            'checkpoint_freq': 20,
            'max_failures': 999,
            'stop': {
                'training_iteration': 200,
            },
        }
    })
Ejemplo n.º 39
0
        num_cpus=10,
        num_gpus=0,
        resources={str(i): 2},
        object_store_memory=object_store_memory,
        redis_max_memory=redis_max_memory)
ray.init(redis_address=cluster.redis_address)

# Run the workload.

pbt = PopulationBasedTraining(
    time_attr="training_iteration",
    reward_attr="episode_reward_mean",
    perturbation_interval=10,
    hyperparam_mutations={
        "lr": [0.1, 0.01, 0.001, 0.0001],
    })

run_experiments(
    {
        "pbt_test": {
            "run": "PG",
            "env": "CartPole-v0",
            "num_samples": 8,
            "config": {
                "lr": 0.01,
            },
        }
    },
    scheduler=pbt,
    verbose=False)
Ejemplo n.º 40
0
                        action='store_true',
                        help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable("my_class", TrainMNIST)
    mnist_spec = {
        'run': 'my_class',
        'stop': {
            'mean_accuracy': 0.99,
            'time_total_s': 600,
        },
        'config': {
            'learning_rate':
            sample_from(lambda spec: 10**np.random.uniform(-5, -3)),
            'activation':
            grid_search(['relu', 'elu', 'tanh']),
        },
        "num_samples": 10,
    }

    if args.smoke_test:
        mnist_spec['stop']['training_iteration'] = 20
        mnist_spec['num_samples'] = 2

    ray.init()
    hyperband = HyperBandScheduler(time_attr="training_iteration",
                                   reward_attr="mean_accuracy",
                                   max_t=10)

    run_experiments({'mnist_hyperband_test': mnist_spec}, scheduler=hyperband)
Ejemplo n.º 41
0
    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="episode_reward_mean",
        perturbation_interval=10,
        hyperparam_mutations={
            # Allow for scaling-based perturbations, with a uniform backing
            # distribution for resampling.
            "factor_1": lambda: random.uniform(0.0, 20.0),
            # Allow perturbations within this set of categorical values.
            "factor_2": [1, 2],
        })

    # Try to find the best factor 1 and factor 2
    run_experiments(
        {
            "pbt_test": {
                "run": MyTrainableClass,
                "stop": {
                    "training_iteration": 20 if args.smoke_test else 99999
                },
                "num_samples": 10,
                "config": {
                    "factor_1": 4.0,
                    "factor_2": 1.0,
                },
            }
        },
        scheduler=pbt,
        verbose=False)
Ejemplo n.º 42
0
    config['eval_prob'] = 0.05
    config['observation_filter'] = "NoFilter"

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    # Register as rllib env
    register_env(env_name, create_env)

    trials = run_experiments({
        flow_params["exp_tag"]: {
            "run": alg_run,
            "env": env_name,
            "config": {
                **config
            },
            "checkpoint_freq": 25,
            "max_failures": 999,
            "stop": {
                "training_iteration": 500
            },
            "num_samples": 1,
            "upload_dir": "s3://<BUCKET NAME>"
        },
    })
Ejemplo n.º 43
0
    "scenarios": [LANE_KEEP],
})

register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()

ray.init()
run_experiments({
    "carla-a3c": {
        "run": "A3C",
        "env": "carla_env",
        "resources": {"cpu": 4, "gpu": 1},
        "config": {
            "env_config": env_config,
            "model": {
                "custom_model": "carla",
                "custom_options": {
                    "image_shape": [80, 80, 6],
                },
                "conv_filters": [
                    [16, [8, 8], 4],
                    [32, [4, 4], 2],
                    [512, [10, 10], 1],
                ],
            },
            "gamma": 0.8,
            "num_workers": 1,
        },
    },
})
Ejemplo n.º 44
0
    from ray import tune
    from ray.tune.schedulers import HyperBandScheduler

    ray.init()
    sched = HyperBandScheduler(
        time_attr="training_iteration", reward_attr="neg_mean_loss")
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.95,
                    "training_iteration": 1 if args.smoke_test else 20,
                },
                "trial_resources": {
                    "cpu": 3
                },
                "run": TrainMNIST,
                "num_samples": 1 if args.smoke_test else 20,
                "checkpoint_at_end": True,
                "config": {
                    "args": args,
                    "lr": tune.sample_from(
                        lambda spec: np.random.uniform(0.001, 0.1)),
                    "momentum": tune.sample_from(
                        lambda spec: np.random.uniform(0.1, 0.9)),
                }
            }
        },
        verbose=0,
        scheduler=sched)
Ejemplo n.º 45
0
# Simulate a cluster on one machine.

cluster = Cluster()
for i in range(num_nodes):
    cluster.add_node(
        redis_port=6379 if i == 0 else None,
        num_redis_shards=num_redis_shards if i == 0 else None,
        num_cpus=10,
        num_gpus=0,
        resources={str(i): 2},
        object_store_memory=object_store_memory,
        redis_max_memory=redis_max_memory)
ray.init(redis_address=cluster.redis_address)

# Run the workload.

run_experiments({
    "impala": {
        "run": "IMPALA",
        "env": "CartPole-v0",
        "config": {
            "num_workers": 8,
            "num_gpus": 0,
            "num_envs_per_worker": 5,
            "remote_worker_envs": True,
            "sample_batch_size": 50,
            "train_batch_size": 100,
        },
    },
})
Ejemplo n.º 46
0
        with open(checkpoint_path) as f:
            self.timestep = json.loads(f.read())["timestep"]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    # Hyperband early stopping, configured with `episode_reward_mean` as the
    # objective and `training_iteration` as the time unit,
    # which is automatically filled by Tune.
    hyperband = HyperBandScheduler(
        time_attr="training_iteration",
        reward_attr="episode_reward_mean",
        max_t=100)

    exp = Experiment(
        name="hyperband_test",
        run=MyTrainableClass,
        num_samples=20,
        stop={"training_iteration": 1 if args.smoke_test else 99999},
        config={
            "width": sample_from(lambda spec: 10 + int(90 * random.random())),
            "height": sample_from(lambda spec: int(100 * random.random()))
        })

    run_experiments(exp, scheduler=hyperband)
Ejemplo n.º 47
0
        },
        {
            'name': 'height',
            'type': 'int',
            'bounds': {
                'min': -100,
                'max': 100
            },
        },
    ]

    config = {
        "my_exp": {
            "run": "exp",
            "num_samples": 10 if args.smoke_test else 1000,
            "config": {
                "iterations": 100,
            },
            "stop": {
                "timesteps_total": 100
            },
        }
    }
    algo = SigOptSearch(
        space,
        name="SigOpt Example Experiment",
        max_concurrent=1,
        reward_attr="neg_mean_loss")
    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
    run_experiments(config, search_alg=algo, scheduler=scheduler)
Ejemplo n.º 48
0
Archivo: apex.py Proyecto: yncxcw/ray
# for i in range(num_nodes):
#     cluster.add_node(redis_port=6379 if i == 0 else None,
#                      num_redis_shards=num_redis_shards if i == 0 else None,
#                      num_cpus=20,
#                      num_gpus=0,
#                      resources={str(i): 2},
#                      object_store_memory=object_store_memory,
#                      redis_max_memory=redis_max_memory,
#                      dashboard_host="0.0.0.0")
# ray.init(address=cluster.address)
ray.init()

# Run the workload.

run_experiments({
    "apex": {
        "run": "APEX",
        "env": "Pong-v0",
        "config": {
            "num_workers": 3,
            "num_gpus": 0,
            "buffer_size": 10000,
            "learning_starts": 0,
            "rollout_fragment_length": 1,
            "train_batch_size": 1,
            "min_iter_time_s": 10,
            "timesteps_per_iteration": 10,
        },
    }
})
Ejemplo n.º 49
0
#!/usr/bin/env python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys

import ray
from ray.tune import register_trainable, run_experiments


def f(config, reporter):
    reporter(timesteps_total=1)


if __name__ == "__main__":
    ray.init()
    register_trainable("my_class", f)
    run_experiments({
        "test": {
            "run": "my_class",
            "stop": {
                "training_iteration": 1
            }
        }
    })
    assert 'ray.rllib' not in sys.modules, "RLlib should not be imported"
Ejemplo n.º 50
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-iters", type=int, default=2000)
    args = parser.parse_args()

    ray.init()
    trials = tune.run_experiments({
        "test": {
            "env": "CartPole-v0",
            "run": "PG",
            "stop": {
                "training_iteration": args.num_iters,
            },
            "config": {
                "callbacks": {
                    "on_episode_start": tune.function(on_episode_start),
                    "on_episode_step": tune.function(on_episode_step),
                    "on_episode_end": tune.function(on_episode_end),
                    "on_sample_end": tune.function(on_sample_end),
                    "on_train_result": tune.function(on_train_result),
                },
            },
        }
    })

    # verify custom metrics for integration tests
    custom_metrics = trials[0].last_result["custom_metrics"]
    print(custom_metrics)
    assert "pole_angle_mean" in custom_metrics
    assert "pole_angle_min" in custom_metrics
    assert "pole_angle_max" in custom_metrics
Ejemplo n.º 51
0
    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json

    create_env, env_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(env_name, create_env)

    trials = run_experiments({
        flow_params["exp_tag"]: {
            "run": "PPO",
            "env": env_name,
            "config": {
                **config
            },
            "checkpoint_freq": 5,
            "max_failures": 999,
            "stop": {
                "training_iteration": 200,
            },
            "repeat": 3,
            "trial_resources": {
                "cpu": 1,
                "gpu": 0,
                "extra_cpu": PARALLEL_ROLLOUTS - 1,
            },
        },
    })
Ejemplo n.º 52
0
        }
    elif args.run == "APEX_QMIX":
        config = {
            "num_gpus": 0,
            "num_workers": 2,
            "optimizer": {
                "num_replay_buffer_shards": 1,
            },
            "min_iter_time_s": 3,
            "buffer_size": 1000,
            "learning_starts": 1000,
            "train_batch_size": 128,
            "sample_batch_size": 32,
            "target_network_update_freq": 500,
            "timesteps_per_iteration": 1000,
        }
    else:
        config = {}

    ray.init()
    run_experiments({
        "two_step": {
            "run": args.run,
            "env": "grouped_twostep",
            "stop": {
                "timesteps_total": args.stop,
            },
            "config": config,
        },
    })
Ejemplo n.º 53
0

register_trainable("my_class", MyTrainableClass)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    # asynchronous hyperband early stopping, configured with
    # `episode_reward_mean` as the
    # objective and `timesteps_total` as the time unit.
    ahb = AsyncHyperBandScheduler(
        time_attr="timesteps_total", reward_attr="episode_reward_mean",
        grace_period=5, max_t=100)

    run_experiments({
        "asynchyperband_test": {
            "run": "my_class",
            "stop": {"training_iteration": 1 if args.smoke_test else 99999},
            "repeat": 20,
            "resources": {"cpu": 1, "gpu": 0},
            "config": {
                "width": lambda spec: 10 + int(90 * random.random()),
                "height": lambda spec: int(100 * random.random()),
            },
        }
    }, scheduler=ahb)
Ejemplo n.º 54
0
flow_json = json.dumps(flow_params,
                       cls=FlowParamsEncoder,
                       sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config'][
    'flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,
        "env": gym_name,
        "config": {
            **config
        },
        "checkpoint_freq": 5,  # number of iterations between checkpoints
        "checkpoint_at_end": True,  # generate a checkpoint at the end
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 200,  # number of iterations to stop after
        },
    },
})
Ejemplo n.º 55
0
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    pbt = PopulationBasedTraining(
        time_attr="training_iteration", reward_attr="episode_reward_mean",
        perturbation_interval=10,
        hyperparam_mutations={
            # Allow for scaling-based perturbations, with a uniform backing
            # distribution for resampling.
            "factor_1": lambda: random.uniform(0.0, 20.0),
            # Allow perturbations within this set of categorical values.
            "factor_2": [1, 2],
        })

    # Try to find the best factor 1 and factor 2
    run_experiments({
        "pbt_test": {
            "run": "my_class",
            "stop": {"training_iteration": 2 if args.smoke_test else 99999},
            "repeat": 10,
            "resources": {"cpu": 1, "gpu": 0},
            "config": {
                "factor_1": 4.0,
                "factor_2": 1.0,
            },
        }
    }, scheduler=pbt, verbose=False)
Ejemplo n.º 56
0
#!/usr/bin/env python

import sys

import ray
from ray.tune import register_trainable, run_experiments


def f(config, reporter):
    reporter(timesteps_total=1)


if __name__ == "__main__":
    ray.init(num_cpus=2)

    register_trainable("my_class", f)
    run_experiments({
        "test": {
            "run": "my_class",
            "stop": {
                "training_iteration": 1
            }
        }
    })
    assert "ray.rllib" not in sys.modules, "RLlib should not be imported"
    assert "mlflow" not in sys.modules, "MLflow should not be imported"
Ejemplo n.º 57
0
    from ray import tune
    from ray.tune.schedulers import HyperBandScheduler

    ray.init()
    sched = HyperBandScheduler(
        time_attr="training_iteration", reward_attr="neg_mean_loss")
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.95,
                    "training_iteration": 1 if args.smoke_test else 20,
                },
                "resources_per_trial": {
                    "cpu": 3
                },
                "run": TrainMNIST,
                "num_samples": 1 if args.smoke_test else 20,
                "checkpoint_at_end": True,
                "config": {
                    "args": args,
                    "lr": tune.sample_from(
                        lambda spec: np.random.uniform(0.001, 0.1)),
                    "momentum": tune.sample_from(
                        lambda spec: np.random.uniform(0.1, 0.9)),
                }
            }
        },
        verbose=0,
        scheduler=sched)
Ejemplo n.º 58
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-iters", type=int, default=2000)
    args = parser.parse_args()

    ray.init()
    trials = tune.run_experiments({
        "test": {
            "env": "CartPole-v0",
            "run": "PG",
            "stop": {
                "training_iteration": args.num_iters,
            },
            "config": {
                "callbacks": {
                    "on_episode_start": tune.function(on_episode_start),
                    "on_episode_step": tune.function(on_episode_step),
                    "on_episode_end": tune.function(on_episode_end),
                    "on_sample_end": tune.function(on_sample_end),
                    "on_train_result": tune.function(on_train_result),
                },
            },
        }
    })

    # verify custom metrics for integration tests
    custom_metrics = trials[0].last_result["custom_metrics"]
    print(custom_metrics)
    assert "mean_pole_angle" in custom_metrics
    assert type(custom_metrics["mean_pole_angle"]) is float
    assert "callback_ok" in trials[0].last_result
Ejemplo n.º 59
0
        '--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
        help='Directory for storing input data')
    FLAGS, unparsed = parser.parse_known_args()
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


# !!! Example of using the ray.tune Python API !!!
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--smoke-test', action='store_true', help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable('train_mnist', train)
    mnist_spec = {
        'run': 'train_mnist',
        'stop': {
          'mean_accuracy': 0.99,
          'time_total_s': 600,
        },
        'config': {
            'activation': grid_search(['relu', 'elu', 'tanh']),
        },
    }

    if args.smoke_test:
        mnist_spec['stop']['training_iteration'] = 2

    ray.init()
    run_experiments({'tune_mnist_test': mnist_spec})
Ejemplo n.º 60
0
    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    # Register as rllib env
    register_env(env_name, create_env)

    exp_tag = {
        "run": alg_run,
        "env": env_name,
        "config": {
            **config
        },
        "checkpoint_freq": 10,
        "max_failures": 999,
        "stop": {
            "training_iteration": 500
        },
        "num_samples": 1,

    }

    if upload_dir:
        exp_tag["upload_dir"] = "s3://" + upload_dir

    trials = run_experiments({
        flow_params["exp_tag"]: exp_tag
    })