def variant_equals(*keys):
    def get_from_spec(spec):
        # TODO(hartikainen): This may break in some cases. ray.tune seems to
        # add a 'config' key at the top of the spec, whereas `generate_variants`
        # does not.
        node = spec.get('config', spec)
        for key in keys:
            node = node[key]

        return node

    return sample_from(get_from_spec)
Beispiel #2
0
 def testDependentLambda(self):
     trials = self.generate_trials({
         "run": "PPO",
         "config": {
             "x": grid_search([1, 2]),
             "y": tune.sample_from(lambda spec: spec.config.x * 100),
         },
     }, "dependent_lambda")
     trials = list(trials)
     self.assertEqual(len(trials), 2)
     self.assertEqual(trials[0].config, {"x": 1, "y": 100})
     self.assertEqual(trials[1].config, {"x": 2, "y": 200})
Beispiel #3
0
def choice_n(categories, min_items, max_items):
    """
    Sample a subset from a list

    :param categories: A list to be sampled
    :param min_items: minimum number of items to be sampled
    :param max_items: maximum number of items to be sampled
    """
    return tune.sample_from(lambda spec: list(
        np.random.choice(categories,
                         size=np.random.randint(low=min_items, high=max_items),
                         replace=False)))
def config_word_emb_w2v_custom(config_test_dct):
		from ray import tune
		import numpy as np
		import random
		import utils.definition_network as dn
		
		return {
				"name": "wc_lstm_t1",
				"exp_sets": generate_model(
						dict({
								'function': 'w2vCustom',
								'embedding_type': dn.EmbeddingType.WORD2VEC_CUSTOM,
								'custom_file': 'SMHD-CBOW-A-D-ADUsers-300.bin',
								'use_embedding': dn.UseEmbedding.NON_STATIC,
								'total_registers': config_test_dct['total_registers'],
								'subdirectory': config_test_dct['subdirectory']
						})
				),
				"batch_size": tune.sample_from(lambda spec: random.choice([20, 10, 25, 40])),
				"epochs": tune.sample_from(lambda spec: np.random.randint(32, 80)),
				"threads": 2,
				"lr": tune.sample_from(lambda spec: np.random.uniform(0.001, 0.01)),
				# "momentum": tune.sample_from(lambda spec: np.random.uniform(0.1, 0.9)),
				"lstm_units": tune.sample_from(lambda spec: np.random.randint(16, 128)),
				"dropout_lstm": tune.sample_from(lambda spec: np.random.uniform(0.2, 0.4)),
				"hidden_layers": tune.sample_from(lambda spec: np.random.randint(3, 5))
		}
Beispiel #5
0
def get_variant_spec_image(universe,
                           domain,
                           task,
                           policy,
                           algorithm,
                           *args,
                           **kwargs):
    variant_spec = get_variant_spec_base(
        universe, domain, task, policy, algorithm, *args, **kwargs)

    if is_image_env(domain, task, variant_spec):
        preprocessor_params = {
            'type': 'convnet_preprocessor',
            'kwargs': {
                'conv_filters': (64, ) * 3,
                'conv_kernel_sizes': (3, ) * 3,
                'conv_strides': (2, ) * 3,
                'normalization_type': 'layer',
                'downsampling_type': 'conv',
            },
        }

        variant_spec['policy_params']['kwargs']['hidden_layer_sizes'] = (M, M)
        variant_spec['policy_params']['kwargs']['observation_preprocessors_params'] = {
            'pixels': deepcopy(preprocessor_params)
        }

        # for key in ('hidden_layer_sizes', 'observation_preprocessors_params'):
        variant_spec['Q_params']['kwargs']['hidden_layer_sizes'] = (
            tune.sample_from(lambda spec: (deepcopy(
                spec.get('config', spec)['policy_params']['kwargs']['hidden_layer_sizes']
            )))
        )
        variant_spec['Q_params']['kwargs']['observation_preprocessors_params'] = (
            tune.sample_from(lambda spec: (deepcopy(
                spec.get('config', spec)['policy_params']['kwargs']['observation_preprocessors_params']
            )))
        )

    return variant_spec
Beispiel #6
0
    def _fill_config_hyperparam(self, config):
        """Fill in the ``config`` dictionary with the hyperparameters.

        Each distribution in ``self.param_distributions`` must implement
        the ``rvs`` method to generate a random variable. The [0] is
        present to extract the single value out of a list, which is returned
        by ``rvs``.

        Args:
            config (:obj:`dict`): dictionary to be filled in as the
                configuration for `tune.run`.

        """
        if (self.search_optimization == "bayesian"
                or isinstance(self.search_optimization, BayesOptSearch)):
            return

        if isinstance(self.param_distributions, list):
            return

        samples = 1
        all_lists = True
        for key, distribution in self.param_distributions.items():
            if isinstance(distribution, list):
                import random

                def get_sample(dist):
                    return lambda spec: dist[random.randint(0, len(dist) - 1)]

                config[key] = tune.sample_from(get_sample(distribution))
                samples *= len(distribution)
            else:
                all_lists = False

                def get_sample(dist):
                    return lambda spec: dist.rvs(1)[0]

                config[key] = tune.sample_from(get_sample(distribution))
        if all_lists:
            self.num_samples = min(self.num_samples, samples)
Beispiel #7
0
    def testMemoryCheckpointFree(self):
        class MyTrainable(Trainable):
            def setup(self, config):
                # Make sure this is large enough so ray uses object store
                # instead of in-process store.
                self.large_object = random.getrandbits(int(10e6))
                self.iter = 0
                self.a = config["a"]

            def step(self):
                self.iter += 1
                return {"metric": self.iter + self.a}

            def save_checkpoint(self, checkpoint_dir):
                file_path = os.path.join(checkpoint_dir, "model.mock")

                with open(file_path, "wb") as fp:
                    pickle.dump((self.large_object, self.iter, self.a), fp)
                return file_path

            def load_checkpoint(self, path):
                with open(path, "rb") as fp:
                    self.large_object, self.iter, self.a = pickle.load(fp)

        class CustomExecutor(RayTrialExecutor):
            def save(self, *args, **kwargs):
                checkpoint = super(CustomExecutor, self).save(*args, **kwargs)
                assert object_memory_usage() <= (12 * 80e6)
                return checkpoint

        param_a = MockParam([1, -1])

        pbt = PopulationBasedTraining(
            time_attr="training_iteration",
            metric="metric",
            mode="max",
            perturbation_interval=1,
            hyperparam_mutations={"b": [-1]},
        )

        tune.run(
            MyTrainable,
            name="ray_demo",
            scheduler=pbt,
            stop={"training_iteration": 10},
            num_samples=3,
            checkpoint_freq=1,
            fail_fast=True,
            config={"a": tune.sample_from(lambda _: param_a())},
            trial_executor=CustomExecutor(queue_trials=False,
                                          reuse_actors=False),
        )
Beispiel #8
0
    def testTuneSampleAPI(self):
        config = {
            "func": tune.sample_from(lambda spec: spec.config.uniform * 0.01),
            "uniform": tune.uniform(-5, -1),
            "quniform": tune.quniform(3.2, 5.4, 0.2),
            "loguniform": tune.loguniform(1e-4, 1e-2),
            "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5),
            "choice": tune.choice([2, 3, 4]),
            "randint": tune.randint(-9, 15),
            "qrandint": tune.qrandint(-21, 12, 3),
            "randn": tune.randn(10, 2),
            "qrandn": tune.qrandn(10, 2, 0.2),
        }
        for _, (_, generated) in zip(range(1000),
                                     generate_variants({"config": config})):
            out = generated["config"]

            self.assertAlmostEqual(out["func"], out["uniform"] * 0.01)

            self.assertGreaterEqual(out["uniform"], -5)
            self.assertLess(out["uniform"], -1)

            self.assertGreaterEqual(out["quniform"], 3.2)
            self.assertLessEqual(out["quniform"], 5.4)
            self.assertAlmostEqual(out["quniform"] / 0.2,
                                   round(out["quniform"] / 0.2))

            self.assertGreaterEqual(out["loguniform"], 1e-4)
            self.assertLess(out["loguniform"], 1e-2)

            self.assertGreaterEqual(out["qloguniform"], 1e-4)
            self.assertLessEqual(out["qloguniform"], 1e-1)
            self.assertAlmostEqual(out["qloguniform"] / 5e-5,
                                   round(out["qloguniform"] / 5e-5))

            self.assertIn(out["choice"], [2, 3, 4])

            self.assertGreaterEqual(out["randint"], -9)
            self.assertLess(out["randint"], 15)

            self.assertGreaterEqual(out["qrandint"], -21)
            self.assertLessEqual(out["qrandint"], 12)
            self.assertEqual(out["qrandint"] % 3, 0)

            # Very improbable
            self.assertGreater(out["randn"], 0)
            self.assertLess(out["randn"], 20)

            self.assertGreater(out["qrandn"], 0)
            self.assertLess(out["qrandn"], 20)
            self.assertAlmostEqual(out["qrandn"] / 0.2,
                                   round(out["qrandn"] / 0.2))
Beispiel #9
0
 def testRecursiveDep(self):
     try:
         list(
             self.generate_trials({
                 "run": "PPO",
                 "config": {
                     "foo": tune.sample_from(lambda spec: spec.config.foo),
                 },
             }, "recursive_dep"))
     except RecursiveDependencyError as e:
         assert "`foo` recursively depends on" in str(e), e
     else:
         assert False
Beispiel #10
0
def _best_guess_spec(envs=None):
    spec = {
        "config": {
            "env_name:embed_path":
            tune.grid_search(_env_victim(envs)),
            "embed_index":
            tune.sample_from(lambda spec: VICTIM_INDEX[spec.config[
                "env_name:embed_path"][0]]),
            "seed":
            tune.grid_search(list(range(3))),
        },
    }
    return spec
Beispiel #11
0
    def run_test_exp(self):
        ahb = AsyncHyperBandScheduler(time_attr="training_iteration",
                                      reward_attr=self.metric,
                                      grace_period=5,
                                      max_t=100)

        run(MyTrainableClass,
            name=self.test_name,
            scheduler=ahb,
            local_dir=self.test_dir,
            **{
                "stop": {
                    "training_iteration": 1
                },
                "num_samples": 10,
                "config": {
                    "width":
                    sample_from(lambda spec: 10 + int(90 * random.random())),
                    "height":
                    sample_from(lambda spec: int(100 * random.random())),
                },
            })
Beispiel #12
0
def parse_option_space(args):
    sampling_space = {'trainer_path': 'runs'}
    perturb_space = {}

    for opt in args.opt_space:
        key, optstr = opt.split('=')
        space_opt = eval(optstr)

        if isinstance(space_opt, (int, float)):
            sampling_space[key] = space_opt
        elif type(space_opt) is list:
            sampling_space[key] = tune.grid_search(space_opt)
            perturb_space[key] = space_opt
        elif type(space_opt) is str:
            sampling_space[key] = tune.sample_from(
                eval('lambda spec: ' + space_opt))
            perturb_space[key] = eval('lambda: ' + space_opt)
        else:
            sampling_space[key] = tune.sample_from(space_opt)
            perturb_space[key] = space_opt

    return sampling_space, perturb_space
Beispiel #13
0
    def testQueueFilling(self):
        os.environ["TUNE_MAX_PENDING_TRIALS_PG"] = "1"

        ray.init(num_cpus=4)

        def f1(config):
            for i in range(10):
                yield i

        tune.register_trainable("f1", f1)

        search_alg = BasicVariantGenerator()
        search_alg.add_configurations(
            {
                "foo": {
                    "run": "f1",
                    "num_samples": 100,
                    "config": {
                        "a": tune.sample_from(lambda spec: 5.0 / 7),
                        "b": tune.sample_from(lambda spec: "long" * 40),
                    },
                    "resources_per_trial": {"cpu": 2},
                }
            }
        )

        runner = TrialRunner(search_alg=search_alg)

        runner.step()
        runner.step()
        runner.step()
        self.assertEqual(len(runner._trials), 3)

        runner.step()
        self.assertEqual(len(runner._trials), 3)

        self.assertEqual(runner._trials[0].status, Trial.RUNNING)
        self.assertEqual(runner._trials[1].status, Trial.RUNNING)
        self.assertEqual(runner._trials[2].status, Trial.PENDING)
Beispiel #14
0
    def testDependentGridSearchCallable(self):
        class Normal:
            def __call__(self, _config):
                return random.normalvariate(mu=0, sigma=1)

        class Single:
            def __call__(self, _config):
                return 20

        trials = self.generate_trials({
            "run": "PPO",
            "config": {
                "x": grid_search(
                    [tune.sample_from(Normal()),
                     tune.sample_from(Normal())]),
                "y": tune.sample_from(Single()),
            },
        }, "dependent_grid_search")
        trials = list(trials)
        self.assertEqual(len(trials), 2)
        self.assertEqual(trials[0].config["y"], 20)
        self.assertEqual(trials[1].config["y"], 20)
Beispiel #15
0
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
    config = {
        "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([2, 4, 8, 16])
    }
    scheduler = ASHAScheduler(
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    result = tune.run(
        tune.with_parameters(train_cifar),
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        metric="loss",
        mode="min",
        num_samples=num_samples,
        scheduler=scheduler
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    if ray.util.client.ray.is_connected():
        # If using Ray Client, we want to make sure checkpoint access
        # happens on the server. So we wrap `test_best_model` in a Ray task.
        # We have to make sure it gets executed on the same node that
        # ``tune.run`` is called on.
        from ray.util.ml_utils.node import force_on_current_node
        remote_fn = force_on_current_node(ray.remote(test_best_model))
        ray.get(remote_fn.remote(best_trial))
    else:
        test_best_model(best_trial)
Beispiel #16
0
def fft_experiment_temp_annealing(fixed_order, softmax_fn, size, ntrials,
                                  nsteps, result_dir, nthreads, smoke_test):
    assert softmax_fn in ['softmax', 'sparsemax']
    config = {
        'fixed_order':
        fixed_order,
        'softmax_fn':
        softmax_fn,
        'size':
        size,
        'lr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(1e-4), math.log(5e-1)))),
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'n_steps_per_epoch':
        nsteps,
    }
    if (not fixed_order) and softmax_fn == 'softmax':
        config['semantic_loss_weight'] = sample_from(lambda spec: math.exp(
            random.uniform(math.log(5e-3), math.log(5e-1))))
    experiment = RayExperiment(
        name=f'Fft_factorization_Temp_{fixed_order}_{softmax_fn}_{size}',
        run=TrainableFftTempAnnealing,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        resources_per_trial={
            'cpu': nthreads,
            'gpu': 0
        },
        stop={
            'training_iteration': 1 if smoke_test else 99999,
            'negative_loss': -1e-8
        },
        config=config,
    )
    return experiment
Beispiel #17
0
def tune_q_log_uniform(
    high: int,
    low: int = 1,
    q: int = 1,
):
    def func(spec):
        return int(
            max(
                low,
                numpy.round(
                    numpy.random.uniform(numpy.log(low),
                                         numpy.log(high))))) // q * q

    return tune.sample_from(func)
Beispiel #18
0
    def run_test_exp(self):
        def training_function(config, checkpoint_dir=None):
            tune.report(episode_reward_mean=config["alpha"])

        return tune.run(
            training_function,
            name=self.test_name,
            local_dir=self.test_dir,
            stop={"training_iteration": 1},
            num_samples=self.num_samples,
            config={
                "alpha": tune.sample_from(lambda spec: 10 + int(90 * random.random())),
            },
        )
Beispiel #19
0
    def testPermutationContinuation(self):
        """
        Tests continuation of runs after permutation.
        Sometimes, runs were continued from deleted checkpoints.
        This deterministic initialisation would fail when the
        fix was not applied.
        See issues #9036, #9036
        """
        scheduler = PopulationBasedTraining(
            time_attr="training_iteration",
            metric="mean_accuracy",
            mode="max",
            perturbation_interval=1,
            log_config=True,
            hyperparam_mutations={"c": lambda: 1})

        param_a = MockParam([10, 20, 30, 40])
        param_b = MockParam([1.2, 0.9, 1.1, 0.8])

        random.seed(100)
        np.random.seed(1000)
        tune.run(
            MockTrainable,
            config={
                "a": tune.sample_from(lambda _: param_a()),
                "b": tune.sample_from(lambda _: param_b()),
                "c": 1
            },
            fail_fast=True,
            num_samples=4,
            checkpoint_freq=1,
            checkpoint_at_end=True,
            keep_checkpoints_num=1,
            checkpoint_score_attr="min-training_iteration",
            scheduler=scheduler,
            name="testPermutationContinuation",
            stop={"training_iteration": 3})
Beispiel #20
0
def get_variant_spec(universe, domain, task, policy):
    variant_spec = {
        'domain': domain,
        'task': task,
        'universe': universe,
        'git_sha': get_git_rev(),

        'env_params': ENV_PARAMS.get(domain, {}).get(task, {}),
        'policy_params': deep_update(
            POLICY_PARAMS_BASE[policy],
            POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})
        ),
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
            }
        },
        'algorithm_params': deep_update(
            ALGORITHM_PARAMS_BASE,
            ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {})
        ),
        'replay_pool_params': {
            'type': 'SimpleReplayPool',
            'kwargs': {
                'max_size': 1e6,
            }
        },
        'sampler_params': {
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'batch_size': 256,
            }
        },
        'run_params': {
            'seed': tune.sample_from(
                lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end': True,
            'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get(
                domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS,
            'checkpoint_replay_pool': False,
        },
    }

    return variant_spec
Beispiel #21
0
 def dense_env_reward_anneal_search(train):
     """Search for the best annealing fraction in SumoHumans."""
     train = dict(train)
     _best_guess_train(train)
     train["total_timesteps"] = int(40e6)
     train["rew_shape"] = True
     train["env_name"] = "multicomp/SumoHumansAutoContact-v0"
     train[
         "embed_path"] = 3  # median difficulty victim (1 is easy, 2 is hard)
     spec = {
         "config": {
             "rew_shape_params": {
                 "anneal_frac":
                 tune.sample_from(lambda spec: np.random.rand()),
             },
             "seed": tune.sample_from(lambda spec: np.random.randint(1000)),
         },
         "run_kwargs": {
             "num_samples": 10
         },
     }
     exp_name = "dense_env_reward_anneal_search"
     _ = locals()  # quieten flake8 unused variable warning
     del _
Beispiel #22
0
    def testGetLastCheckpoint(self):
        # one more experiment with 2 iterations
        new_ea = tune.run(
            MyTrainableClass,
            name=self.test_name,
            local_dir=self.test_dir,
            stop={"training_iteration": 2},
            checkpoint_freq=1,
            config={
                "width":
                tune.sample_from(lambda spec: 10 + int(90 * random.random())),
                "height":
                tune.sample_from(lambda spec: int(100 * random.random())),
            },
        )

        # check if it's loaded correctly
        last_checkpoint = new_ea.get_last_checkpoint()._local_path
        assert self.test_path in last_checkpoint
        assert "checkpoint_000002" in last_checkpoint

        # test restoring the checkpoint and running for another iteration
        tune.run(
            MyTrainableClass,
            name=self.test_name,
            local_dir=self.test_dir,
            restore=last_checkpoint,
            stop={"training_iteration": 3},
            checkpoint_freq=1,
            config={
                "width":
                tune.sample_from(lambda spec: 10 + int(90 * random.random())),
                "height":
                tune.sample_from(lambda spec: int(100 * random.random())),
            },
        )
 def dense_env_reward_anneal_search(train):
     """Search for the best annealing fraction in SumoHumans."""
     train = dict(train)
     _best_guess_train(train)
     train['total_timesteps'] = int(40e6)
     train['rew_shape'] = True
     train['env_name'] = 'multicomp/SumoHumansAutoContact-v0'
     train['victim_path'] = 3  # median difficulty victim (1 is easy, 2 is hard)
     spec = {
         'config': {
             'rew_shape_params': {
                 'anneal_frac': tune.sample_from(
                     lambda spec: np.random.rand()
                 ),
             },
             'seed': tune.sample_from(
                 lambda spec: np.random.randint(1000)
             ),
         },
         'num_samples': 10,
     }
     exp_name = 'dense_env_reward_anneal_search'
     _ = locals()  # quieten flake8 unused variable warning
     del _
Beispiel #24
0
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=3):
    config = {
        "input_dim": 3,
        "steps_ahead": [3, 6, 12],
        "seq_length": tune.sample_from(lambda _: 2**np.random.randint(8, 11)),
        "nhid": tune.sample_from(lambda _: 2**np.random.randint(3, 7)),
        "levels": tune.sample_from(lambda _: 2**np.random.randint(1, 4)),
        "kernel_size": tune.sample_from(lambda _: 2**np.random.randint(1, 5)),
        "dropout": tune.choice([0]),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([8, 16, 32, 64, 128])
    }
    scheduler = ASHAScheduler(metric='3',
                              mode="min",
                              max_t=max_num_epochs,
                              grace_period=5,
                              reduction_factor=3)
    reporter = CLIReporter(
        parameter_columns=[
            "seq_length", "nhid", "levels", "kernel_size", "dropout", "lr",
            "batch_size"
        ],
        metric_columns=["3", "6", "12", "training_iteration"])
    result = tune.run(partial(train),
                      resources_per_trial={
                          "cpu": 2,
                          "gpu": gpus_per_trial
                      },
                      config=config,
                      num_samples=num_samples,
                      scheduler=scheduler,
                      progress_reporter=reporter)
    best_trial = result.get_best_trial("3", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["3"]))
Beispiel #25
0
 def apply_condition(config, data):
     # TODO: tune.sample_from only supports random search algorithm.
     # To make conditional parameter for the other algorithms, different
     # algorithms should take different methods (will be added)
     if args.algorithm == 'random':
         dp_pad_min, dp_pad_max = data[
             'CELL_PAD_IN_SITES_DETAIL_PLACEMENT']['minmax']
         dp_pad_step = data['CELL_PAD_IN_SITES_DETAIL_PLACEMENT']['step']
         if dp_pad_step == 1:
             config[
                 'CELL_PAD_IN_SITES_DETAIL_PLACEMENT'] = tune.sample_from(
                     lambda spec: tune.randint(
                         dp_pad_min, spec.config.
                         CELL_PAD_IN_SITES_GLOBAL_PLACEMENT + 1))
         if dp_pad_step > 1:
             config[
                 'CELL_PAD_IN_SITES_DETAIL_PLACEMENT'] = tune.sample_from(
                     lambda spec: tune.choice(
                         np.adarray.tolist(
                             np.arange(
                                 dp_pad_min, spec.config.
                                 CELL_PAD_IN_SITES_GLOBAL_PLACEMENT + 1,
                                 dp_pad_step))))
     return config
def sweep_baseline(num_samples):
    # see here https://github.com/ray-project/ray/issues/7084
    ray.init(webui_host='127.0.0.1', object_store_memory=OBJECT_STORE_MINIMUM_MEMORY_BYTES)

    train_set_specs = [esaote_train, philips_train]

    base_config = {'prediction_target': 'Class', 'backend': 'resnet-18', 'n_epochs': 15,
                   'neptune_project': 'createrandom/mus-imageagg', 'batch_size': 32}

    for train_set_spec in train_set_specs:

        total_config = {**base_config, **train_set_spec}

        image_sweep_config = {"lr": sample_from(lambda x: random.uniform(0.001, 0.1)),
                              "backend_mode": sample_from(lambda x: random.choice(['finetune', 'scratch']))}

        config = {**total_config, **image_sweep_config}

        tune.run(train_image_level,
                 config=config,
                 num_samples=num_samples,
                 resources_per_trial={"gpu": 1, "cpu": 8})

    ray.shutdown()
Beispiel #27
0
def cifar10_experiment(dataset, model, args, optimizer, use_hyperband, lr, lr_decay, weight_decay, ntrials, nmaxepochs, batch, resume_pth, result_dir, cuda, smoke_test):
    assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported'
    if lr_decay is None:
        lr_decay = {'factor': 1.0, 'period': 1000, 'milestones': None}
    config={
        'optimizer': optimizer,
        'switch_ams': int(0.5 * nmaxepochs) if optimizer == 'Adam' else None,
        'lr': grid_search(lr['grid']) if lr['grid'] is not None else sample_from(lambda spec: math.exp(random.uniform(math.log(lr['min']), math.log(lr['max'])))),
        # 'lr_decay_factor': 0.2 if lr_decay else 1.0,
        # 'lr_decay_period': lr_decay_period if lr_decay else 10000,
        # 'decay_milestones': decay_milestones,
        'lr_decay' : lr_decay,
        'weight_decay': 5e-4 if weight_decay else 0.0,
        'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
        'device': 'cuda' if cuda else 'cpu',
        'model': {'name': model, 'args': args},
        'dataset': {'name': dataset, 'batch': batch},
     }
    smoke_str = 'smoke_' if smoke_test else '' # for easy finding and deleting unimportant logs
    args_str = '_'.join([k+':'+str(v) for k,v in args.items()])
    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    commit_id = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8')
    experiment = RayExperiment(
        name=f'{smoke_str}{dataset.lower()}_{model}_{args_str}_{optimizer}_epochs_{nmaxepochs}_{timestamp}_{commit_id}',
        run=TrainableModel,
        local_dir=result_dir,
        num_samples=ntrials if not smoke_test else 1,
        checkpoint_at_end=True,
        checkpoint_freq=1000,  # Just to enable recovery with @max_failures
        max_failures=0,
        resources_per_trial={'cpu': 4, 'gpu': 1 if cuda else 0},
        stop={"training_iteration": 1 if smoke_test else nmaxepochs},
        restore=resume_pth,
        config=config,
    )
    return experiment
Beispiel #28
0
def get_variant_spec_image(universe, domain, task, policy, algorithm, *args,
                           **kwargs):
    variant_spec = get_variant_spec_base(universe, domain, task, policy,
                                         algorithm, *args, **kwargs)

    if is_image_env(universe, domain, task, variant_spec):
        preprocessor_params = {
            'class_name': 'convnet_preprocessor',
            'config': {
                'conv_filters': (64, ) * 3,
                'conv_kernel_sizes': (3, ) * 3,
                'conv_strides': (2, ) * 3,
                'normalization_type': 'layer',
                'downsampling_type': 'conv',
            },
        }

        variant_spec['policy_params']['config']['hidden_layer_sizes'] = (400,
                                                                         300)
        variant_spec['policy_params']['config']['preprocessors'] = {
            'pixels': deepcopy(preprocessor_params)
        }

        variant_spec['Q_params']['config']['hidden_layer_sizes'] = (
            tune.sample_from(lambda spec: (deepcopy(
                spec.get('config', spec)['policy_params']['config'][
                    'hidden_layer_sizes']))))
        variant_spec['Q_params']['config']['preprocessors'] = tune.sample_from(
            lambda spec: (
                deepcopy(
                    spec.get('config', spec)['policy_params']['config'][
                        'preprocessors']),
                None,  # Action preprocessor is None
            ))

    return variant_spec
Beispiel #29
0
 def search_space(self, all_available_features):
     return {
         "selected_features": json.dumps(all_available_features),
         "model": "MTNet",
         "lr": 0.001,
         "batch_size": 16,
         "epochs": 1,
         "cnn_dropout": 0.2,
         "rnn_dropout": 0.2,
         "time_step": tune.choice([3, 4]),
         "cnn_height": 2,
         "long_num": tune.choice([3, 4]),
         "ar_size": tune.choice([2, 3]),
         "past_seq_len": tune.sample_from(lambda spec:
                                          (spec.config.long_num + 1) * spec.config.time_step),
     }
 def hyper(train):
     """A random search to find good hyperparameters in Bansal et al's environments."""
     train = dict(train)
     _sparse_reward(train)
     # Checkpoints take up a lot of disk space, only save every ~500k steps
     train['checkpoint_interval'] = 2 ** 19
     train['total_timesteps'] = int(3e6)
     spec = {
         'config': {
             'env_name': tune.grid_search(
                 ['multicomp/KickAndDefend-v0', 'multicomp/SumoHumans-v0']
             ),
             'victim_path': tune.sample_from(
                 lambda spec: TARGET_VICTIM[spec.config.env_name]
             ),
             'seed': tune.sample_from(
                 lambda spec: np.random.randint(1000)
             ),
             # Dec 2018 experiments used 2^11 = 2048 batch size.
             # Aurick Zhou used 2^14 = 16384; Bansal et al use 409600 ~= 2^19.
             'batch_size': tune.sample_from(
                 lambda spec: 2 ** np.random.randint(11, 16)
             ),
             'rl_args': {
                 # PPO2 default is 0.01. run_humanoid.py uses 0.00.
                 'ent_coef': tune.sample_from(
                     lambda spec: np.random.uniform(low=0.00, high=0.02)
                 ),
                 # nminibatches must be a factor of batch size; OK provided power of two
                 # PPO2 default is 2^2 = 4; run_humanoid.py is 2^5 = 32
                 'nminibatches': tune.sample_from(
                     lambda spec: 2 ** (np.random.randint(0, 7))
                 ),
                 # PPO2 default is 4; run_humanoid.py is 10
                 'noptepochs': tune.sample_from(
                     lambda spec: np.random.randint(1, 11),
                 ),
             },
             # PPO2 default is 3e-4; run_humanoid uses 1e-4;
             # Bansal et al use 1e-2 (but with huge batch size).
             # Sample log-uniform between 1e-2 and 1e-5.
             'learning_rate': tune.sample_from(
                 lambda spec: 10 ** (-2 + -3 * np.random.random())
             ),
         },
         'num_samples': 100,
     }
     exp_name = 'hyper'
     _ = locals()  # quieten flake8 unused variable warning
     del _
Beispiel #31
0
        grace_period=20)
    tune.register_trainable("train_mnist",
                            lambda cfg, rprtr: train_mnist(args, cfg, rprtr))
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.99,
                    "timesteps_total": 10 if args.smoke_test else 300
                },
                "run": "train_mnist",
                "num_samples": 1 if args.smoke_test else 10,
                "resources_per_trial": {
                    "cpu": args.threads,
                    "gpu": 0.5 if args.use_gpu else 0
                },
                "config": {
                    "lr": tune.sample_from(
                        lambda spec: np.random.uniform(0.001, 0.1)),
                    "momentum": tune.sample_from(
                        lambda spec: np.random.uniform(0.1, 0.9)),
                    "hidden": tune.sample_from(
                        lambda spec: np.random.randint(32, 512)),
                    "dropout1": tune.sample_from(
                        lambda spec: np.random.uniform(0.2, 0.8)),
                }
            }
        },
        verbose=0,
        scheduler=sched)
    train_spec = {
        "run": Cifar10Model,
        "resources_per_trial": {
            "cpu": 1,
            "gpu": 1
        },
        "stop": {
            "mean_accuracy": 0.80,
            "training_iteration": 30,
        },
        "config": {
            "epochs": 1,
            "batch_size": 64,
            "lr": grid_search([10**-4, 10**-5]),
            "decay": sample_from(lambda spec: spec.config.lr / 100.0),
            "dropout": grid_search([0.25, 0.5]),
        },
        "num_samples": 4,
    }

    if args.smoke_test:
        train_spec["config"]["lr"] = 10**-4
        train_spec["config"]["dropout"] = 0.5

    ray.init()

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="mean_accuracy",
        perturbation_interval=10,
# !!! Example of using the ray.tune Python API !!!
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--smoke-test', action='store_true', help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable("my_class", TrainMNIST)
    mnist_spec = {
        'run': 'my_class',
        'stop': {
            'mean_accuracy': 0.99,
            'time_total_s': 600,
        },
        'config': {
            'learning_rate': sample_from(
                lambda spec: 10**np.random.uniform(-5, -3)),
            'activation': grid_search(['relu', 'elu', 'tanh']),
        },
        "num_samples": 10,
    }

    if args.smoke_test:
        mnist_spec['stop']['training_iteration'] = 20
        mnist_spec['num_samples'] = 2

    ray.init()
    hyperband = HyperBandScheduler(
        time_attr="training_iteration", reward_attr="mean_accuracy", max_t=10)

    run_experiments({'mnist_hyperband_test': mnist_spec}, scheduler=hyperband)
    from ray import tune
    from ray.tune.schedulers import HyperBandScheduler

    ray.init()
    sched = HyperBandScheduler(
        time_attr="training_iteration", reward_attr="neg_mean_loss")
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.95,
                    "training_iteration": 1 if args.smoke_test else 20,
                },
                "resources_per_trial": {
                    "cpu": 3
                },
                "run": TrainMNIST,
                "num_samples": 1 if args.smoke_test else 20,
                "checkpoint_at_end": True,
                "config": {
                    "args": args,
                    "lr": tune.sample_from(
                        lambda spec: np.random.uniform(0.001, 0.1)),
                    "momentum": tune.sample_from(
                        lambda spec: np.random.uniform(0.1, 0.9)),
                }
            }
        },
        verbose=0,
        scheduler=sched)
Beispiel #35
0
    ray.init()
    run_experiments(
        {
            "pbt_humanoid_test": {
                "run": "PPO",
                "env": "Humanoid-v1",
                "num_samples": 8,
                "config": {
                    "kl_coeff": 1.0,
                    "num_workers": 8,
                    "num_gpus": 1,
                    "model": {
                        "free_log_std": True
                    },
                    # These params are tuned from a fixed starting value.
                    "lambda": 0.95,
                    "clip_param": 0.2,
                    "lr": 1e-4,
                    # These params start off randomly drawn from a set.
                    "num_sgd_iter": sample_from(
                        lambda spec: random.choice([10, 20, 30])),
                    "sgd_minibatch_size": sample_from(
                        lambda spec: random.choice([128, 512, 2048])),
                    "train_batch_size": sample_from(
                        lambda spec: random.choice([10000, 20000, 40000]))
                },
            },
        },
        scheduler=pbt)
Beispiel #36
0
        with open(checkpoint_path) as f:
            self.timestep = json.loads(f.read())["timestep"]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    # Hyperband early stopping, configured with `episode_reward_mean` as the
    # objective and `training_iteration` as the time unit,
    # which is automatically filled by Tune.
    hyperband = HyperBandScheduler(
        time_attr="training_iteration",
        reward_attr="episode_reward_mean",
        max_t=100)

    exp = Experiment(
        name="hyperband_test",
        run=MyTrainableClass,
        num_samples=20,
        stop={"training_iteration": 1 if args.smoke_test else 99999},
        config={
            "width": sample_from(lambda spec: 10 + int(90 * random.random())),
            "height": sample_from(lambda spec: int(100 * random.random()))
        })

    run_experiments(exp, scheduler=hyperband)