Esempio n. 1
0
                        type=str,
                        default='/tmp/tensorflow/mnist/input_data',
                        help='Directory for storing input data')
    FLAGS, unparsed = parser.parse_known_args()
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


# !!! Example of using the ray.tune Python API !!!
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--smoke-test',
                        action='store_true',
                        help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable('train_mnist', train)
    mnist_spec = {
        'run': 'train_mnist',
        'stop': {
            'mean_accuracy': 0.99,
            'time_total_s': 600,
        },
        'config': {
            'activation': grid_search(['relu', 'elu', 'tanh']),
            # You can pass any serializable object as well
            'foo': grid_search([np.array([1, 2]),
                                np.array([2, 3])]),
        },
    }

    if args.smoke_test:
Esempio n. 2
0
"""

# Note: if you use any custom models or envs, register them here first, e.g.:
#
# from ray.rllib.examples.env.parametric_actions_cartpole import \
#     ParametricActionsCartPole
# from ray.rllib.examples.model.parametric_actions_model import \
#     ParametricActionsModel
# ModelCatalog.register_custom_model("pa_model", ParametricActionsModel)
# register_env("pa_cartpole", lambda _: ParametricActionsCartPole(10))

from ray.tune import register_trainable
from a2c.tuned_a2c import TunedA2CTrainer
from exploration.tuned_a2c import TunedA2CTrainer as ExplorationA2CTrainer
from fun.fun_policy import FuNTrainer
register_trainable('TunedA2CTrainer', TunedA2CTrainer)
register_trainable('ExplorationA2CTrainer', TunedA2CTrainer)
register_trainable('FuNTrainer', FuNTrainer)

from ray.rllib.models import ModelCatalog
from a2c.small_model import SmallConvModel as A2CSmallModel
from a2c.small_lstm_model import SmallConvLSTMModel as A2CLSTMModel
from exploration.small_model import SmallConvModel as ExplorationA2CModel
from fun.fun_model import FuNModel
ModelCatalog.register_custom_model('A2CSmallModel', A2CSmallModel)
ModelCatalog.register_custom_model('A2CLSTMModel', A2CLSTMModel)
ModelCatalog.register_custom_model('ExplorationA2CModel', ExplorationA2CModel)
ModelCatalog.register_custom_model('FuNModel', FuNModel)


class RolloutSaver:
Esempio n. 3
0
    from helper import create_parser
    parser = create_parser()
    args = parser.parse_args()
    mnist.load_data()  # we do this because it's not threadsafe

    import ray
    from ray import tune
    from ray.tune.async_hyperband import AsyncHyperBandScheduler

    ray.init()
    #ray.init(redis_address="localhost:6379")
    sched = AsyncHyperBandScheduler(time_attr="timesteps_total",
                                    reward_attr="mean_accuracy",
                                    max_t=400,
                                    grace_period=20)
    tune.register_trainable("train_mnist",
                            lambda cfg, rprtr: train_mnist(args, cfg, rprtr))
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.99,
                    "timesteps_total": 300
                },
                "run": "train_mnist",
                "repeat": 100,
                "config": {
                    "lr": lambda spec: np.random.uniform(0.001, 0.1),
                    "momentum": lambda spec: np.random.uniform(0.1, 0.9),
                    "hidden": lambda spec: np.random.randint(32, 512),
                    "dropout1": lambda spec: np.random.uniform(0.2, 0.8),
                }
Esempio n. 4
0
def launch_experiments_ray(variant_specs,
                           args,
                           local_dir,
                           experiment_fn,
                           scheduler=None):
    import ray
    from ray import tune

    tune.register_trainable('mujoco-runner', experiment_fn)

    trial_resources = _normalize_trial_resources(args.trial_resources,
                                                 args.trial_cpus,
                                                 args.trial_gpus,
                                                 args.trial_extra_cpus,
                                                 args.trial_extra_gpus)

    if 'local' in args.mode or 'debug' in args.mode:
        resources = args.resources or {}

        if 'debug' in args.mode:
            # Require a debug resource for each trial, so that we never run
            # more than one trial at a time. This makes debugging easier, since
            # the debugger stdout behaves more reasonably with single process.
            # TODO(hartikainen): Change this from 'extra_gpu' to
            # 'debug-resource' once tune supports custom resources.
            # See: https://github.com/ray-project/ray/pull/2979.
            resources['extra_gpu'] = 1
            trial_resources['extra_gpu'] = 1

        ray.init(resources=resources, num_cpus=args.cpus, num_gpus=args.gpus)
    else:
        ray.init(redis_address=ray.services.get_node_ip_address() + ':6379')

    datetime_prefix = datetimestamp()
    experiment_id = '-'.join((datetime_prefix, args.exp_name))

    tune.run_experiments(
        {
            "{}-{}".format(experiment_id, i): {
                'run':
                'mujoco-runner',
                'trial_resources':
                trial_resources,
                'config':
                variant_spec,
                'local_dir':
                local_dir,
                'num_samples':
                args.num_samples,
                'upload_dir':
                args.upload_dir,
                'checkpoint_freq':
                (args.checkpoint_frequency if args.checkpoint_frequency
                 is not None else variant_spec['run_params'].get(
                     'checkpoint_frequency', 0)),
                'checkpoint_at_end':
                (args.checkpoint_at_end if args.checkpoint_at_end
                 is not None else variant_spec['run_params'].get(
                     'checkpoint_at_end', True)),
                'restore':
                args.restore,  # Defaults to None
            }
            for i, variant_spec in enumerate(variant_specs)
        },
        scheduler=scheduler,
    )
Esempio n. 5
0
    current_best_params = [
        {
            "width": 1,
            "height": 2,
            "activation": 0  # Activation will be relu
        },
        {
            "width": 4,
            "height": 2,
            "activation": 1  # Activation will be tanh
        }
    ]
    algo = HyperOptSearch(space,
                          metric="episode_reward_mean",
                          mode="max",
                          random_state_seed=5,
                          points_to_evaluate=current_best_params)
    algo = ConcurrencyLimiter(algo, max_concurrent=1)
    from ray.tune import register_trainable
    register_trainable("trainable", MyTrainableClass)
    os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "0"
    run("trainable",
        search_alg=algo,
        resume=args.resume,
        verbose=0,
        num_samples=20,
        fail_fast=True,
        stop={"training_iteration": 2},
        local_dir=args.local_dir,
        name="experiment")
Esempio n. 6
0
                 abs(config["width"] - 3))
        time.sleep(0.02)


if __name__ == '__main__':
    import argparse
    from hyperopt import hp

    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(redirect_output=True)

    register_trainable("exp", easy_objective)

    space = {
        'width': hp.uniform('width', 0, 20),
        'height': hp.uniform('height', -100, 100),
        'activation': hp.choice("activation", ["relu", "tanh"])
    }

    config = {
        "my_exp": {
            "run": "exp",
            "repeat": 10 if args.smoke_test else 1000,
            "stop": {
                "training_iteration": 100
            },
        }
Esempio n. 7
0
    def setUp(self):
        def train(config, reporter):
            for i in range(100):
                reporter(timesteps_total=i)

        register_trainable("f1", train)
Esempio n. 8
0
    def _save(self, checkpoint_dir):
        path = os.path.join(checkpoint_dir, "checkpoint")
        with open(path, "w") as f:
            f.write(json.dumps(
                {"timestep": self.timestep, "value": self.current_value}))
        return path

    def _restore(self, checkpoint_path):
        with open(checkpoint_path) as f:
            data = json.loads(f.read())
            self.timestep = data["timestep"]
            self.current_value = data["value"]


register_trainable("my_class", MyTrainableClass)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    pbt = PopulationBasedTraining(
        time_attr="training_iteration", reward_attr="episode_reward_mean",
        perturbation_interval=10,
        hyperparam_mutations={
            # Allow for scaling-based perturbations, with a uniform backing
            # distribution for resampling.
            "factor_1": lambda: random.uniform(0.0, 20.0),
Esempio n. 9
0
#!/usr/bin/env python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys

import ray
from ray.tune import register_trainable, run_experiments


def f(config, reporter):
    reporter(timesteps_total=1)


if __name__ == "__main__":
    ray.init()
    register_trainable("my_class", f)
    run_experiments(
        {"test": {
            "run": "my_class",
            "stop": {
                "training_iteration": 1
            }
        }})
    assert 'ray.rllib' not in sys.modules, "RLlib should not be imported"
Esempio n. 10
0
from ray.tune.schedulers import PopulationBasedTraining
from ray.tune.utils import validate_save_restore
import matplotlib.style as style
import matplotlib.pyplot as plt
import math


def my_func(config, reporter):  # add the reporter parameter
    import time, numpy as np
    i = 0
    while True:
        reporter(timesteps_total=i, mean_accuracy=i**config["alpha"])
        i += config["beta"]
        time.sleep(.01)


tune.register_trainable("my_func", my_func)
ray.init()

tune.run_experiments({
    "my_experiment": {
        "run": "my_func",
        "stop": {
            "mean_accuracy": 100
        },
        "config": {
            "alpha": tune.grid_search([0.2, 0.4, 0.6]),
            "beta": tune.grid_search([1, 2]),
        }
    }
})
Esempio n. 11
0
    def parallelPull(self, manifest={}):

        self.xp_state.versioningDirectory = os.path.expanduser(
            '~') + '/' + 'jarvis.d'

        # Runs one experiment per pull
        # Each experiment has many trials

        tmpexperiment = self.xp_state.tmpexperiment
        if os.path.exists(tmpexperiment):
            rmtree(tmpexperiment)
            os.mkdir(tmpexperiment)
        else:
            os.mkdir(tmpexperiment)

        self.xp_state.visited = []

        if not util.isOrphan(self):
            self.loclist = list(
                map(lambda x: x.getLocation(), self.parent.out_artifacts))
        else:
            self.loclist = [
                self.getLocation(),
            ]
        self.scriptNames = []

        literalsAttached = set([])
        lambdas = []
        if not util.isOrphan(self):
            self.parent.__serialize__(lambdas, self.loclist, self.scriptNames)

        self.loclist = list(set(self.loclist))
        self.scriptNames = list(set(self.scriptNames))

        # Need to sort to compare
        self.loclist.sort()
        self.scriptNames.sort()

        for _, names in lambdas:
            literalsAttached |= set(names)

        original_dir = os.getcwd()

        experimentName = self.xp_state.jarvisFile.split('.')[0]

        def exportedExec(config, reporter):
            tee = tuple([])
            for litName in config['8ilk9274']:
                tee += (config[litName], )
            i = -1
            for j, v in enumerate(config['6zax7937']):
                if v == tee:
                    i = j
                    break
            assert i >= 0
            os.chdir(tmpexperiment + '/' + str(i))
            with open('.' + experimentName + '.jarvis', 'w') as fp:
                json.dump(config, fp)
            for f, names in lambdas:
                literals = list(map(lambda x: config[x], names))
                f(literals)
            reporter(timesteps_total=1)
            os.chdir(original_dir)

        config = {}
        numTrials = 1
        literals = []
        literalNames = []
        for kee in self.xp_state.literalNameToObj:
            if kee in literalsAttached:
                if self.xp_state.literalNameToObj[kee].__oneByOne__:
                    config[kee] = grid_search(
                        self.xp_state.literalNameToObj[kee].v)
                    numTrials *= len(self.xp_state.literalNameToObj[kee].v)
                    literals.append(self.xp_state.literalNameToObj[kee].v)
                else:
                    config[kee] = self.xp_state.literalNameToObj[kee].v
                    if util.isIterable(self.xp_state.literalNameToObj[kee].v):
                        if type(self.xp_state.literalNameToObj[kee].v
                                ) == tuple:
                            literals.append(
                                (self.xp_state.literalNameToObj[kee].v, ))
                        else:
                            literals.append([
                                self.xp_state.literalNameToObj[kee].v,
                            ])
                    else:
                        literals.append([
                            self.xp_state.literalNameToObj[kee].v,
                        ])
                literalNames.append(kee)

        literals = list(itertools.product(*literals))
        config['6zax7937'] = literals
        config['8ilk9274'] = literalNames

        for i in range(numTrials):
            dst = tmpexperiment + '/' + str(i)
            copytree(os.getcwd(), dst, True)

        ts = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

        register_trainable('exportedExec', exportedExec)

        self.xp_state.ray['literalNames'] = literalNames

        run_experiments({
            experimentName: {
                'run': 'exportedExec',
                'resources': {
                    'cpu': 1,
                    'gpu': 0
                },
                'config': config
            }
        })

        if not os.path.isdir(self.xp_state.versioningDirectory):
            os.mkdir(self.xp_state.versioningDirectory)

        moveBackFlag = False

        if os.path.exists(self.xp_state.versioningDirectory + '/' +
                          self.xp_state.jarvisFile.split('.')[0]):
            move(
                self.xp_state.versioningDirectory + '/' +
                self.xp_state.jarvisFile.split('.')[0] + '/.git', '/tmp/')
            rmtree(self.xp_state.versioningDirectory + '/' +
                   self.xp_state.jarvisFile.split('.')[0])
            moveBackFlag = True

        if manifest:

            os.chdir(tmpexperiment)

            dirs = [x for x in os.listdir() if util.isNumber(x)]
            table_full = []
            table_small = []

            for trial in dirs:
                os.chdir(trial)
                with open('.' + experimentName + '.jarvis', 'r') as fp:
                    config = json.load(fp)
                record_full = {}
                record_small = {}

                for literalName in literalNames:
                    record_full[literalName] = config[literalName]
                    record_small[literalName] = config[literalName]
                for artifactLabel in manifest:
                    record_full[artifactLabel] = util.loadArtifact(
                        manifest[artifactLabel].loc)
                    if total_size(record_full[artifactLabel]) >= 1000:
                        record_small[artifactLabel] = " . . . "
                    else:
                        record_small[artifactLabel] = record_full[
                            artifactLabel]
                    if util.isNumber(record_full[artifactLabel]):
                        record_full[artifactLabel] = eval(
                            record_full[artifactLabel])
                    if util.isNumber(record_small[artifactLabel]):
                        record_small[artifactLabel] = eval(
                            record_small[artifactLabel])
                record_small['__trialNum__'] = trial
                record_full['__trialNum__'] = trial

                table_full.append(record_full)
                table_small.append(record_small)
                os.chdir('../')

            df = pd.DataFrame(table_small)
            util.pickleTo(df, experimentName + '.pkl')

            os.chdir(original_dir)

        copytree(
            tmpexperiment, self.xp_state.versioningDirectory + '/' +
            self.xp_state.jarvisFile.split('.')[0])

        os.chdir(self.xp_state.versioningDirectory + '/' +
                 self.xp_state.jarvisFile.split('.')[0])
        if moveBackFlag:
            move(
                '/tmp/.git', self.xp_state.versioningDirectory + '/' +
                self.xp_state.jarvisFile.split('.')[0])
            repo = git.Repo(os.getcwd())
            repo.git.add(A=True)
            repo.index.commit('incremental commit')
        else:
            repo = git.Repo.init(os.getcwd())
            repo.git.add(A=True)
            repo.index.commit('initial commit')
        os.chdir(original_dir)

        if manifest:

            return pd.DataFrame(table_full)
Esempio n. 12
0
    def execute(
            self,
            config,
            dataset=None,
            training_set=None,
            validation_set=None,
            test_set=None,
            training_set_metadata=None,
            data_format=None,
            experiment_name="hyperopt",
            model_name="run",
            # model_load_path=None,
            # model_resume_path=None,
            skip_save_training_description=False,
            skip_save_training_statistics=False,
            skip_save_model=False,
            skip_save_progress=False,
            skip_save_log=False,
            skip_save_processed_input=True,
            skip_save_unprocessed_output=False,
            skip_save_predictions=False,
            skip_save_eval_stats=False,
            output_directory="results",
            gpus=None,
            gpu_memory_limit=None,
            allow_parallel_threads=True,
            backend=None,
            random_seed=default_random_seed,
            debug=False,
            **kwargs):
        if isinstance(dataset, str) and not os.path.isabs(dataset):
            dataset = os.path.abspath(dataset)

        if gpus is not None:
            raise ValueError(
                "Parameter `gpus` is not supported when using Ray Tune. "
                "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your "
                "hyperopt config.")

        hyperopt_dict = dict(
            config=config,
            dataset=dataset,
            training_set=training_set,
            validation_set=validation_set,
            test_set=test_set,
            training_set_metadata=training_set_metadata,
            data_format=data_format,
            experiment_name=experiment_name,
            model_name=model_name,
            # model_load_path=model_load_path,
            # model_resume_path=model_resume_path,
            eval_split=self.split,
            skip_save_training_description=skip_save_training_description,
            skip_save_training_statistics=skip_save_training_statistics,
            skip_save_model=skip_save_model,
            skip_save_progress=skip_save_progress,
            skip_save_log=skip_save_log,
            skip_save_processed_input=skip_save_processed_input,
            skip_save_unprocessed_output=skip_save_unprocessed_output,
            skip_save_predictions=skip_save_predictions,
            skip_save_eval_stats=skip_save_eval_stats,
            output_directory=output_directory,
            gpus=gpus,
            gpu_memory_limit=gpu_memory_limit,
            allow_parallel_threads=allow_parallel_threads,
            backend=backend,
            random_seed=random_seed,
            debug=debug,
        )

        mode = "min" if self.goal != MAXIMIZE else "max"
        metric = "metric_score"
        if self.search_alg_dict is not None:
            if TYPE not in self.search_alg_dict:
                logger.warning("WARNING: Kindly set type param for search_alg "
                               "to utilize Tune's Search Algorithms.")
                search_alg = None
            else:
                search_alg_type = self.search_alg_dict.pop(TYPE)
                search_alg = tune.create_searcher(search_alg_type,
                                                  metric=metric,
                                                  mode=mode,
                                                  **self.search_alg_dict)
        else:
            search_alg = None

        sync_config = None
        if self.kubernetes_namespace:
            from ray.tune.integration.kubernetes import NamespacedKubernetesSyncer
            sync_config = tune.SyncConfig(
                sync_to_driver=NamespacedKubernetesSyncer(
                    self.kubernetes_namespace))

        resources_per_trial = {
            "cpu": self.cpu_resources_per_trial or 1,
            "gpu": self.gpu_resources_per_trial or 0,
        }

        def run_experiment_trial(config, checkpoint_dir=None):
            return self._run_experiment(config, checkpoint_dir, hyperopt_dict,
                                        self.decode_ctx)

        register_trainable(f"trainable_func_f{hash_dict(config)}",
                           run_experiment_trial)

        analysis = tune.run(
            f"trainable_func_f{hash_dict(config)}",
            config=self.search_space,
            scheduler=self.scheduler,
            search_alg=search_alg,
            num_samples=self.num_samples,
            resources_per_trial=resources_per_trial,
            queue_trials=True,
            sync_config=sync_config,
            local_dir=output_directory,
            metric=metric,
            mode=mode,
            trial_name_creator=lambda trial: f"trial_{trial.trial_id}",
            trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}",
        )

        hyperopt_results = analysis.results_df.sort_values(
            "metric_score", ascending=self.goal != MAXIMIZE)

        return hyperopt_results.to_dict(orient="records")
Esempio n. 13
0
        "num_workers": 32,
        "buffer_size": 2000000,
        "learning_starts": 50000,
        "train_batch_size": 512,
        "rollout_fragment_length": 50,
        "target_network_update_freq": 500000,
        "timesteps_per_iteration": 1000,
        "exploration_config": {
            "type": "PerWorkerEpsilonGreedy"
        },
        "worker_side_prioritization": True,
        "min_iter_time_s": 30,
        "training_intensity": None,
        "prioritized_replay": True,
        "prioritized_replay_alpha": 0.6,
        "prioritized_replay_beta": 0.4,
        "final_prioritized_replay_beta": 0.4,
        "prioritized_replay_beta_annealing_timesteps": 20000,
        "prioritized_replay_eps": 1e-6,
    },
)

QMixTrainer = GenericOffPolicyTrainer.with_updates(
    name="QMIXApex",
    default_config=QMIX_APEX_DEFAULT_CONFIG,
    default_policy=QMixTorchPolicy,
    get_policy_class=None,
    execution_plan=apex_execution_plan)

register_trainable("QMIXApex", QMixTrainer)
Esempio n. 14
0
    y = np.dot(sin_x, sin_z)

    # Negate y since we want to minimize y value
    reporter(timesteps_total=1, neg_mean_loss=-y)


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(redirect_output=True)

    register_trainable("exp", michalewicz_function)

    space = SearchSpace({
        ContinuousSpace('x1', 0, 4, 100),
        ContinuousSpace('x2', -2, 2, 100),
        ContinuousSpace('x3', 1, 5, 100),
        ContinuousSpace('x4', -3, 3, 100),
        DiscreteSpace('x5', [-1, 0, 1, 2, 3]),
    })

    config = {
        "my_exp": {
            "run": "exp",
            "stop": {
                "training_iteration": 100
            },
Esempio n. 15
0
        train(epoch)
        test()


if __name__ == "__main__":
    args = parser.parse_args()
    import numpy as np
    import ray
    from ray import tune
    from ray.tune.schedulers import HyperBandScheduler

    ray.init()
    sched = HyperBandScheduler(time_attr="training_iteration",
                               reward_attr="neg_mean_loss",
                               max_t=100)
    tune.register_trainable("train_dream",
                            lambda cfg, rprtr: train_dream(args, cfg, rprtr))

    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    # "neg_mean_loss": 0.0,
                    "training_iteration": 100 if args.smoke_test else 100,
                },
                "resources_per_trial": {
                    "cpu": 1,
                    "gpu": 0
                },
                "run": "train_dream",
                "num_samples": 1 if args.smoke_test else 20,
                # "checkpoint_at_end": True,
Esempio n. 16
0
#!/usr/bin/env python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys

import ray
from ray.tune import register_trainable, run_experiments


def f(config, reporter):
    reporter(timesteps_total=1)


if __name__ == "__main__":
    ray.init()
    register_trainable("my_class", f)
    run_experiments({
        "test": {
            "run": "my_class",
            "stop": {
                "training_iteration": 1
            }
        }
    })
    assert 'ray.rllib' not in sys.modules, "RLlib should not be imported"
Esempio n. 17
0
    def execute(
        self,
        config,
        dataset=None,
        training_set=None,
        validation_set=None,
        test_set=None,
        training_set_metadata=None,
        data_format=None,
        experiment_name="hyperopt",
        model_name="run",
        resume=None,
        skip_save_training_description=False,
        skip_save_training_statistics=False,
        skip_save_model=False,
        skip_save_progress=False,
        skip_save_log=False,
        skip_save_processed_input=True,
        skip_save_unprocessed_output=False,
        skip_save_predictions=False,
        skip_save_eval_stats=False,
        output_directory="results",
        gpus=None,
        gpu_memory_limit=None,
        allow_parallel_threads=True,
        callbacks=None,
        backend=None,
        random_seed=default_random_seed,
        debug=False,
        hyperopt_log_verbosity=3,
        features_eligible_for_shared_params=None,
        **kwargs,
    ) -> RayTuneResults:
        if isinstance(dataset, str) and not has_remote_protocol(
                dataset) and not os.path.isabs(dataset):
            dataset = os.path.abspath(dataset)

        if isinstance(backend, str):
            backend = initialize_backend(backend)

        if gpus is not None:
            raise ValueError(
                "Parameter `gpus` is not supported when using Ray Tune. "
                "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your "
                "hyperopt config.")

        if gpu_memory_limit is None and 0 < self._gpu_resources_per_trial_non_none < 1:
            # Enforce fractional GPU utilization
            gpu_memory_limit = self.gpu_resources_per_trial

        hyperopt_dict = dict(
            config=config,
            dataset=dataset,
            training_set=training_set,
            validation_set=validation_set,
            test_set=test_set,
            training_set_metadata=training_set_metadata,
            data_format=data_format,
            experiment_name=experiment_name,
            model_name=model_name,
            eval_split=self.split,
            skip_save_training_description=skip_save_training_description,
            skip_save_training_statistics=skip_save_training_statistics,
            skip_save_model=skip_save_model,
            skip_save_progress=skip_save_progress,
            skip_save_log=skip_save_log,
            skip_save_processed_input=skip_save_processed_input,
            skip_save_unprocessed_output=skip_save_unprocessed_output,
            skip_save_predictions=skip_save_predictions,
            skip_save_eval_stats=skip_save_eval_stats,
            output_directory=output_directory,
            gpus=gpus,
            gpu_memory_limit=gpu_memory_limit,
            allow_parallel_threads=allow_parallel_threads,
            callbacks=callbacks,
            backend=backend,
            random_seed=random_seed,
            debug=debug,
        )

        mode = "min" if self.goal != MAXIMIZE else "max"
        metric = "metric_score"
        # if random seed not set, use Ludwig seed
        self.search_algorithm.check_for_random_seed(random_seed)
        if self.search_algorithm.search_alg_dict is not None:
            if TYPE not in self.search_algorithm.search_alg_dict:
                candiate_search_algs = [
                    search_alg for search_alg in SEARCH_ALG_IMPORT.keys()
                ]
                logger.warning(
                    "WARNING: search_alg type parameter missing, using 'variant_generator' as default. "
                    f"These are possible values for the type parameter: {candiate_search_algs}."
                )
                search_alg = None
            else:
                search_alg_type = self.search_algorithm.search_alg_dict[TYPE]
                search_alg = tune.create_searcher(
                    search_alg_type,
                    metric=metric,
                    mode=mode,
                    **self.search_algorithm.search_alg_dict)
        else:
            search_alg = None

        if self.max_concurrent_trials:
            assert (
                self.max_concurrent_trials > 0
            ), f"`max_concurrent_trials` must be greater than 0, got {self.max_concurrent_trials}"
            if isinstance(search_alg,
                          BasicVariantGenerator) or search_alg is None:
                search_alg = BasicVariantGenerator(
                    max_concurrent=self.max_concurrent_trials)
            elif isinstance(search_alg, ConcurrencyLimiter):
                raise ValueError(
                    "You have specified `max_concurrent_trials`, but the search "
                    "algorithm is already a `ConcurrencyLimiter`. FIX THIS "
                    "by setting `max_concurrent_trials=None`.")
            else:
                search_alg = ConcurrencyLimiter(
                    search_alg, max_concurrent=self.max_concurrent_trials)

        resources_per_trial = {
            "cpu": self._cpu_resources_per_trial_non_none,
            "gpu": self._gpu_resources_per_trial_non_none,
        }

        def run_experiment_trial(config,
                                 local_hyperopt_dict,
                                 checkpoint_dir=None):
            return self._run_experiment(
                config,
                checkpoint_dir,
                local_hyperopt_dict,
                self.decode_ctx,
                features_eligible_for_shared_params,
                _is_ray_backend(backend),
            )

        tune_config = {}
        tune_callbacks = []
        for callback in callbacks or []:
            run_experiment_trial, tune_config = callback.prepare_ray_tune(
                run_experiment_trial,
                tune_config,
                tune_callbacks,
            )

        if _is_ray_backend(backend):
            # for now, we do not do distributed training on cpu (until spread scheduling is implemented for Ray Train)
            # but we do want to enable it when GPUs are specified
            resources_per_trial = PlacementGroupFactory(
                [{}] + ([{
                    "CPU": 0,
                    "GPU": 1
                }] * self._gpu_resources_per_trial_non_none) if self.
                _gpu_resources_per_trial_non_none else [{}] +
                [{
                    "CPU": self._cpu_resources_per_trial_non_none
                }])

        if has_remote_protocol(output_directory):
            run_experiment_trial = tune.durable(run_experiment_trial)
            self.sync_config = tune.SyncConfig(sync_to_driver=False,
                                               upload_dir=output_directory)
            if _ray_114:
                self.sync_client = get_node_to_storage_syncer(
                    SyncConfig(upload_dir=output_directory))
            else:
                self.sync_client = get_cloud_sync_client(output_directory)
            output_directory = None
        elif self.kubernetes_namespace:
            from ray.tune.integration.kubernetes import KubernetesSyncClient, NamespacedKubernetesSyncer

            self.sync_config = tune.SyncConfig(
                sync_to_driver=NamespacedKubernetesSyncer(
                    self.kubernetes_namespace))
            self.sync_client = KubernetesSyncClient(self.kubernetes_namespace)

        run_experiment_trial_params = tune.with_parameters(
            run_experiment_trial, local_hyperopt_dict=hyperopt_dict)
        register_trainable(
            f"trainable_func_f{hash_dict(config).decode('ascii')}",
            run_experiment_trial_params)

        # Note that resume="AUTO" will attempt to resume the experiment if possible, and
        # otherwise will start a new experiment:
        # https://docs.ray.io/en/latest/tune/tutorials/tune-stopping.html
        should_resume = "AUTO" if resume is None else resume

        try:
            analysis = tune.run(
                f"trainable_func_f{hash_dict(config).decode('ascii')}",
                name=experiment_name,
                config={
                    **self.search_space,
                    **tune_config,
                },
                scheduler=self.scheduler,
                search_alg=search_alg,
                num_samples=self.num_samples,
                keep_checkpoints_num=1,
                max_failures=1,  # retry a trial failure once
                resources_per_trial=resources_per_trial,
                time_budget_s=self.time_budget_s,
                sync_config=self.sync_config,
                local_dir=output_directory,
                metric=metric,
                mode=mode,
                trial_name_creator=lambda trial: f"trial_{trial.trial_id}",
                trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}",
                callbacks=tune_callbacks,
                stop=CallbackStopper(callbacks),
                verbose=hyperopt_log_verbosity,
                resume=should_resume,
                log_to_file=True,
            )
        except Exception as e:
            # Explicitly raise a RuntimeError if an error is encountered during a Ray trial.
            # NOTE: Cascading the exception with "raise _ from e" still results in hanging.
            raise RuntimeError(f"Encountered Ray Tune error: {e}")

        if "metric_score" in analysis.results_df.columns:
            ordered_trials = analysis.results_df.sort_values(
                "metric_score", ascending=self.goal != MAXIMIZE)

            # Catch nans in edge case where the trial doesn't complete
            temp_ordered_trials = []
            for kwargs in ordered_trials.to_dict(orient="records"):
                for key in ["parameters", "training_stats", "eval_stats"]:
                    if isinstance(kwargs[key], float):
                        kwargs[key] = {}
                temp_ordered_trials.append(kwargs)

            # Trials w/empty eval_stats fields & non-empty training_stats fields ran intermediate
            # tune.report call(s) but were terminated before reporting eval_stats from post-train
            # evaluation (e.g., trial stopped due to time budget or relatively poor performance.)
            # For any such trials, run model evaluation for the best model in that trial & record
            # results in ordered_trials which is returned & is persisted in hyperopt_statistics.json.
            for trial in temp_ordered_trials:
                if trial["eval_stats"] == "{}" and trial[
                        "training_stats"] != "{}":
                    # Evaluate the best model on the eval_split, which is validation_set
                    if validation_set is not None and validation_set.size > 0:
                        trial_path = trial["trial_dir"]
                        best_model_path = self._get_best_model_path(
                            trial_path, analysis)
                        if best_model_path is not None:
                            self._evaluate_best_model(
                                trial,
                                trial_path,
                                best_model_path,
                                validation_set,
                                data_format,
                                skip_save_unprocessed_output,
                                skip_save_predictions,
                                skip_save_eval_stats,
                                gpus,
                                gpu_memory_limit,
                                allow_parallel_threads,
                                backend,
                                debug,
                            )
                        else:
                            logger.warning(
                                "Skipping evaluation as no model checkpoints were available"
                            )
                    else:
                        logger.warning(
                            "Skipping evaluation as no validation set was provided"
                        )

            ordered_trials = [
                TrialResults.from_dict(load_json_values(kwargs))
                for kwargs in temp_ordered_trials
            ]
        else:
            logger.warning(
                "No trials reported results; check if time budget lower than epoch latency"
            )
            ordered_trials = []

        return RayTuneResults(ordered_trials=ordered_trials,
                              experiment_analysis=analysis)
Esempio n. 18
0
        values = values[:len(values) - max(upper, 0)]
        values = np.pad(
            values,
            pad_width=[
                (-min(lower, 0), -min(0, upper)),
                *[(0, 0) for k in range(values.ndim - 1)],
            ],
            mode="constant",
        )
        return values


CCPPOPolicy = PPOTFPolicy.with_updates(
    name="CCPPOPolicy",
    postprocess_fn=centralized_critic_postprocessing,
    loss_fn=loss_with_central_critic,
    before_loss_init=setup_mixins,
    grad_stats_fn=central_vf_stats,
    mixins=[
        LearningRateSchedule,
        EntropyCoeffSchedule,
        KLCoeffMixin,
        CentralizedValueMixin,
    ],
)
register_trainable(
    "CcConcatenate",
    PPOTrainer.with_updates(name="CCPPOTrainer",
                            get_policy_class=lambda c: CCPPOPolicy),
)
Esempio n. 19
0
    def _save(self, checkpoint_dir):
        return self.saver.save(
            self.sess, checkpoint_dir + "/save", global_step=self.iterations)

    def _restore(self, path):
        return self.saver.restore(self.sess, path)


# !!! Example of using the ray.tune Python API !!!
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--smoke-test', action='store_true', help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable("my_class", TrainMNIST)
    mnist_spec = {
        'run': 'my_class',
        'stop': {
            'mean_accuracy': 0.99,
            'time_total_s': 600,
        },
        'config': {
            'learning_rate': sample_from(
                lambda spec: 10**np.random.uniform(-5, -3)),
            'activation': grid_search(['relu', 'elu', 'tanh']),
        },
        "num_samples": 10,
    }

    if args.smoke_test:
Esempio n. 20
0
            space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_ %d' % (i, j),
                                                      0.0, 1.0)
            space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_ %d' % (i, j),
                                                       0.0, 1.0)

    final_policy_set = []
    total_computation = 0
    reward_attr = 'top1_valid'  # top1_valid or minus_loss
    for _ in range(1):  # run multiple times.
        for cv_fold in range(cv_num):
            name = "search_%s_%s_fold%d_ratio%.1f" % (C.get()['dataset'],
                                                      C.get()['model']['type'],
                                                      cv_fold, args.cv_ratio)
            print(name)
            register_trainable(
                name,
                lambda augs, rpt: eval_tta(copy.deepcopy(copied_c), augs, rpt))
            algo = HyperOptSearch(space,
                                  max_concurrent=4 * 20,
                                  reward_attr=reward_attr)

            exp_config = {
                name: {
                    'run': name,
                    'num_samples': 4 if args.smoke_test else args.num_search,
                    'resources_per_trial': {
                        'gpu': 1
                    },
                    'stop': {
                        'training_iteration': args.num_policy
                    },
Esempio n. 21
0
                                  config["entropy_coeff_schedule"])
    warmup_steps = config["model"]["custom_options"].get(
        "warmup_steps", 100000)
    TransformerLearningRateSchedule.__init__(
        policy, config["model"]["custom_options"]["transformer"]["num_heads"],
        warmup_steps)


TTFPPOPolicy = PPOTFPolicy.with_updates(name="TTFPPOPolicy",
                                        before_loss_init=setup_mixins,
                                        mixins=[
                                            TransformerLearningRateSchedule,
                                            EntropyCoeffSchedule, KLCoeffMixin,
                                            ValueNetworkMixin
                                        ])

TTFPPOPolicyInfer = PPOTFPolicy.with_updates(name="TTFPPOPolicyInfer",
                                             before_loss_init=setup_mixins,
                                             mixins=[
                                                 LearningRateSchedule,
                                                 EntropyCoeffSchedule,
                                                 KLCoeffMixin,
                                                 ValueNetworkMixin
                                             ])

register_trainable(
    "TTFPPO",
    PPOTrainer.with_updates(name="TTFPPOTrainer",
                            get_policy_class=lambda c: TTFPPOPolicy),
)
Esempio n. 22
0
        eao = eao_vot(tracker, model, model_config)
        print("penalty_k: {0}, scale_lr: {1}, window_influence: {2}, small_sz: {3}, big_sz: {4}, ratio: {6}, eao: {5}".format(penalty_k, scale_lr, window_influence, small_sz, big_sz, eao, ratio))
        reporter(EAO=eao)

    # OTB and Ocean
    if args.dataset.startswith('OTB'):
        auc = auc_otb(tracker, model, model_config)
        print("penalty_k: {0}, scale_lr: {1}, window_influence: {2}, small_sz: {3}, big_sz: {4}, ratio: {6}, eao: {5}".format(penalty_k, scale_lr, window_influence, small_sz, big_sz, auc.item(), ratio))
        reporter(AUC=auc)


if __name__ == "__main__":
    # the resources you computer have, object_store_memory is shm
    #ray.init(num_gpus=args.gpu_nums, num_cpus=args.gpu_nums * 8,  object_store_memory=50000000000)
    ray.init(num_gpus=args.gpu_nums, num_cpus=args.gpu_nums * 8,  object_store_memory=500000000)
    tune.register_trainable("fitness", fitness)

    if 'Ocean' in args.arch:
        params = {
                "penalty_k": hp.quniform('penalty_k', 0.001, 0.2, 0.001),
                "scale_lr": hp.quniform('scale_lr', 0.3, 0.8, 0.001),
                "window_influence": hp.quniform('window_influence', 0.15, 0.65, 0.001),
                "small_sz": hp.choice("small_sz", [255]),
                "big_sz": hp.choice("big_sz", [287, 303, 319]),
                "ratio": hp.quniform('ratio', 0.7, 1, 0.01),
                }
    if 'VOT' not in args.dataset or not args.align:
        params['ratio'] = hp.choice("ratio", [1]) 
 
    print('tuning range: ')
    pprint(params)    
Esempio n. 23
0
    def fit(self):
        if self.config is None:
            raise ValueError('Have to set config file')

        tune.register_trainable(
            "tune_train_eval", lambda tuned, rprtr: tune_train_eval(
                self.dataLoader, self.model, self.criterion, self.customMetric,
                self.config, tuned, rprtr))

        if 'mlflow_tracking_URI' in self.config.keys():
            host = self.config['mlflow_tracking_URI'].split('//')[1].split(
                ':')[0]
            port = self.config['mlflow_tracking_URI'].split('//')[1].split(
                ':')[1]
            os.system('mlflow ui -h ' + host + ' -p ' + port + ' &')
            print('mlflow server start')

        experiment_config = {}
        experiment_config['exp'] = {}

        experiment_config['exp']['trial_resources'] = {}

        if self.config['multiGPU'] == 'Y':
            experiment_config['exp']['trial_resources']['gpu'] = int(
                torch.cuda.device_count())
        else:
            if torch.cuda.device_count > 0:
                experiment_config['exp']['trial_resources']['gpu'] = 1
            else:
                experiment_config['exp']['trial_resources']['gpu'] = 0

        if 'trial_resources_cpu' in self.config.keys():
            experiment_config['exp']['trial_resources']['cpu'] = int(
                self.config['trial_resources_cpu'])
        if 'trial_resources_gpu' in self.config.keys():
            experiment_config['exp']['trial_resources']['gpu'] = int(
                self.config['trial_resources_gpu'])

        experiment_config['exp']['run'] = "tune_train_eval"
        experiment_config['exp']['stop'] = {}
        experiment_config['exp']['stop']['training_iteration'] = int(
            self.config['epoch'])
        experiment_config['exp']['local_dir'] = self.config['ray_dir']
        if 'num_samples' in self.config.keys():
            experiment_config['exp']['num_samples'] = int(
                self.config['num_samples'])

        #set hyper parameter candidate
        experiment_config['exp']['config'] = {}
        setExperimentConfigParam(self.config, 'learning_rate',
                                 experiment_config['exp']['config'])
        setExperimentConfigParam(self.config, 'momentum',
                                 experiment_config['exp']['config'])
        setExperimentConfigParam(self.config, 'lr_decay',
                                 experiment_config['exp']['config'])
        setExperimentConfigParam(self.config, 'weight_decay',
                                 experiment_config['exp']['config'])
        setExperimentConfigParam(self.config, 'amsgrad',
                                 experiment_config['exp']['config'])
        setExperimentConfigParam(self.config, 'nesterov',
                                 experiment_config['exp']['config'])
        print('tuning experiment config')
        print(experiment_config)

        tune.run_experiments(experiment_config,
                             verbose=0,
                             scheduler=self.sched)
Esempio n. 24
0
    def setUp(self):
        def dummy_train(config, reporter):
            reporter(timesteps_total=100, done=True)

        register_trainable("f1", dummy_train)
Esempio n. 25
0
def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir):
    """Tests restoration of HyperOptSearch experiment on cluster shutdown
    with actual interrupt.

    Restoration should restore both state of trials
    and previous search algorithm (HyperOptSearch) state.
    This is an end-to-end test.
    """
    cluster = start_connected_cluster
    dirpath = str(tmpdir)
    local_checkpoint_dir = os.path.join(dirpath, "experiment")
    from ray.tune import register_trainable
    register_trainable("trainable", MyTrainableClass)

    def execute_script_with_args(*args):
        current_dir = os.path.dirname(__file__)
        script = os.path.join(current_dir,
                              "_test_cluster_interrupt_searcher.py")
        subprocess.Popen([sys.executable, script] + list(args))

    args = ["--ray-address", cluster.address, "--local-dir", dirpath]
    execute_script_with_args(*args)
    # Wait until the right checkpoint is saved.
    # The trainable returns every 0.5 seconds, so this should not miss
    # the checkpoint.
    trials = []
    for i in range(50):
        if TrialRunner.checkpoint_exists(local_checkpoint_dir):
            # Inspect the internal trialrunner
            runner = TrialRunner(resume="LOCAL",
                                 local_checkpoint_dir=local_checkpoint_dir)
            trials = runner.get_trials()
            if trials and len(trials) >= 10:
                break
        time.sleep(.5)
    else:
        raise ValueError(f"Didn't generate enough trials: {len(trials)}")

    if not TrialRunner.checkpoint_exists(local_checkpoint_dir):
        raise RuntimeError(
            f"Checkpoint file didn't appear in {local_checkpoint_dir}. "
            f"Current list: {os.listdir(local_checkpoint_dir)}.")

    ray.shutdown()
    cluster.shutdown()

    cluster = _start_new_cluster()
    execute_script_with_args(*(args + ["--resume"]))

    time.sleep(2)

    register_trainable("trainable", MyTrainableClass)
    reached = False
    for i in range(50):
        if TrialRunner.checkpoint_exists(local_checkpoint_dir):
            # Inspect the internal trialrunner
            runner = TrialRunner(resume="LOCAL",
                                 local_checkpoint_dir=local_checkpoint_dir)
            trials = runner.get_trials()

            if len(trials) == 0:
                continue  # nonblocking script hasn't resumed yet, wait

            reached = True
            assert len(trials) >= 10
            assert len(trials) <= 20
            if len(trials) == 20:
                break
            else:
                stop_fn = runner.trial_executor.stop_trial
                [stop_fn(t) for t in trials if t.status is not Trial.ERROR]
        time.sleep(.5)
    assert reached is True

    ray.shutdown()
    cluster.shutdown()
Esempio n. 26
0
def search(conf):
    sw = StopWatch.get()

    # region conf vars
    conf_dataset     = conf['dataset']
    dataroot    = conf['dataroot']
    redis_ip    = conf['redis']
    conf_loader = conf['autoaug']['loader']
    conf_model  = conf['autoaug']['model']
    model_type  = conf_model['type']
    ds_name     = conf_dataset['name']
    aug         = conf_loader['aug']
    val_ratio   = conf_loader['val_ratio']
    epochs      = conf_loader['epochs']
    val_fold    = conf_loader['val_fold']
    cv_num      = conf_loader['cv_num']
    num_policy = conf['autoaug']['num_policy']
    num_op = conf['autoaug']['num_op']
    num_search = conf['autoaug']['num_search']
    num_result_per_cv = conf['autoaug']['num_result_per_cv']
    smoke_test = conf['smoke_test']
    resume = conf['resume']
    # endregion

    ray.init(redis_address=redis_ip,
        # allocate all GPUs on local node if cluster is not specified
        num_gpus=torch.cuda.device_count() if not redis_ip else None)

    # first train with no aug
    _train_no_aug(conf)

    # get values from config
    num_samples = 4 if smoke_test else num_search

    logger.info('----- Search Test-Time Augmentation Policies -----')
    sw.start(tag='search')

    save_paths = [_get_model_filepath(ds_name,
        model_type, 'ratio%.1f_fold%d' %
            (val_ratio, i)) for i in range(cv_num)]

    copied_c = copy.deepcopy(conf)
    ops = augment_list(False)
    space = {}
    for i in range(num_policy):
        for j in range(num_op):
            space['policy_%d_%d' % (i, j)] = hp.choice('policy_%d_%d' %
                (i, j), list(range(0, len(ops))))
            space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_ %d' %
                (i, j), 0.0, 1.0)
            space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_ %d' %
                (i, j), 0.0, 1.0)

    final_policy_set = []
    total_computation = 0
    reward_attr = 'top1_valid'      # top1_valid or minus_loss
    for _ in range(1):  # run multiple times.
        for val_fold in range(cv_num):
            name = "search_%s_%s_fold%d_ratio%.1f" % (ds_name,
                model_type, val_fold, val_ratio)
            #logger.info(name)
            register_trainable(name, (lambda augs,
                rpt: _eval_tta(copy.deepcopy(copied_c), augs, rpt)))
            algo = HyperOptSearch(space, max_concurrent=4*20,
                reward_attr=reward_attr)

            exp_config = {
                name: {
                    'run': name,
                    'num_samples': num_samples,
                    'resources_per_trial': {'gpu': 1},
                    'stop': {'training_iteration': num_policy},
                    'config': {
                        'dataroot': dataroot, 'save_path': save_paths[val_fold],
                        'val_ratio': val_ratio, 'val_fold': val_fold,
                        'num_op': num_op, 'num_policy': num_policy
                    },
                }
            }
            results = run_experiments(exp_config, search_alg=algo,
                scheduler=None, verbose=0, queue_trials=True,
                resume=resume, raise_on_failed_trial=False)

            results = [x for x in results if x.last_result is not None]
            results = sorted(results, key=lambda x: x.last_result[reward_attr],
                reverse=True)

            # calculate computation usage
            for result in results:
                total_computation += result.last_result['elapsed_time']

            for result in results[:num_result_per_cv]:
                final_policy = policy_decoder(result.config, num_policy, num_op)
                logger.info('loss=%.12f top1_valid=%.4f %s' %
                    (result.last_result['minus_loss'],
                        result.last_result['top1_valid'], final_policy))

                final_policy = remove_deplicates(final_policy)
                final_policy_set.extend(final_policy)

    logger.info(json.dumps(final_policy_set))
    logger.info('final_policy=%d' % len(final_policy_set))
    logger.info('processed in %.4f secs, gpu hours=%.4f' % (sw.pause('search'), total_computation / 3600.))
    logger.info('----- Train with Augmentations model=%s dataset=%s aug=%s ratio(test)=%.1f -----' \
        % (model_type, ds_name, aug, val_ratio))
    sw.start(tag='train_aug')

    num_experiments = 5
    default_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_default%d'  \
        % (val_ratio, _)) for _ in range(num_experiments)]
    augment_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_augment%d'  \
        % (val_ratio, _)) for _ in range(num_experiments)]
    reqs = [_train_model.remote(copy.deepcopy(copied_c), dataroot, aug, 0.0, 0, save_path=default_path[_], only_eval=True) \
        for _ in range(num_experiments)] + \
        [_train_model.remote(copy.deepcopy(copied_c), dataroot, final_policy_set, 0.0, 0, save_path=augment_path[_]) \
            for _ in range(num_experiments)]

    tqdm_epoch = tqdm(range(epochs))
    is_done = False
    for epoch in tqdm_epoch:
        while True:
            epochs = OrderedDict()
            for exp_idx in range(num_experiments):
                try:
                    if os.path.exists(default_path[exp_idx]):
                        latest_ckpt = torch.load(default_path[exp_idx])
                        epochs['default_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch']
                except:
                    pass
                try:
                    if os.path.exists(augment_path[exp_idx]):
                        latest_ckpt = torch.load(augment_path[exp_idx])
                        epochs['augment_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch']
                except:
                    pass

            tqdm_epoch.set_postfix(epochs)
            if len(epochs) == num_experiments*2 and min(epochs.values()) >= epochs:
                is_done = True
            if len(epochs) == num_experiments*2 and min(epochs.values()) >= epoch:
                break
            time.sleep(10)
        if is_done:
            break

    logger.info('getting results...')
    final_results = ray.get(reqs)

    for train_mode in ['default', 'augment']:
        avg = 0.
        for _ in range(num_experiments):
            r_model, r_cv, r_dict = final_results.pop(0)
            logger.info('[%s] top1_train=%.4f top1_test=%.4f' % (train_mode, r_dict['top1_train'], r_dict['top1_test']))
            avg += r_dict['top1_test']
        avg /= num_experiments
        logger.info('[%s] top1_test average=%.4f (#experiments=%d)' % (train_mode, avg, num_experiments))
    logger.info('processed in %.4f secs' % sw.pause('train_aug'))

    logger.info(sw)
Esempio n. 27
0
    def execute(
        self,
        config,
        dataset=None,
        training_set=None,
        validation_set=None,
        test_set=None,
        training_set_metadata=None,
        data_format=None,
        experiment_name="hyperopt",
        model_name="run",
        # model_load_path=None,
        # model_resume_path=None,
        skip_save_training_description=False,
        skip_save_training_statistics=False,
        skip_save_model=False,
        skip_save_progress=False,
        skip_save_log=False,
        skip_save_processed_input=True,
        skip_save_unprocessed_output=False,
        skip_save_predictions=False,
        skip_save_eval_stats=False,
        output_directory="results",
        gpus=None,
        gpu_memory_limit=None,
        allow_parallel_threads=True,
        callbacks=None,
        backend=None,
        random_seed=default_random_seed,
        debug=False,
        **kwargs,
    ) -> RayTuneResults:
        if isinstance(dataset, str) and not has_remote_protocol(
                dataset) and not os.path.isabs(dataset):
            dataset = os.path.abspath(dataset)

        if isinstance(backend, str):
            backend = initialize_backend(backend)

        if gpus is not None:
            raise ValueError(
                "Parameter `gpus` is not supported when using Ray Tune. "
                "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your "
                "hyperopt config.")

        if gpu_memory_limit is None and 0 < self._gpu_resources_per_trial_non_none < 1:
            # Enforce fractional GPU utilization
            gpu_memory_limit = self.gpu_resources_per_trial

        hyperopt_dict = dict(
            config=config,
            dataset=dataset,
            training_set=training_set,
            validation_set=validation_set,
            test_set=test_set,
            training_set_metadata=training_set_metadata,
            data_format=data_format,
            experiment_name=experiment_name,
            model_name=model_name,
            # model_load_path=model_load_path,
            # model_resume_path=model_resume_path,
            eval_split=self.split,
            skip_save_training_description=skip_save_training_description,
            skip_save_training_statistics=skip_save_training_statistics,
            skip_save_model=skip_save_model,
            skip_save_progress=skip_save_progress,
            skip_save_log=skip_save_log,
            skip_save_processed_input=skip_save_processed_input,
            skip_save_unprocessed_output=skip_save_unprocessed_output,
            skip_save_predictions=skip_save_predictions,
            skip_save_eval_stats=skip_save_eval_stats,
            output_directory=output_directory,
            gpus=gpus,
            gpu_memory_limit=gpu_memory_limit,
            allow_parallel_threads=allow_parallel_threads,
            callbacks=callbacks,
            backend=backend,
            random_seed=random_seed,
            debug=debug,
        )

        mode = "min" if self.goal != MAXIMIZE else "max"
        metric = "metric_score"
        if self.search_alg_dict is not None:
            if TYPE not in self.search_alg_dict:
                logger.warning("WARNING: Kindly set type param for search_alg "
                               "to utilize Tune's Search Algorithms.")
                search_alg = None
            else:
                search_alg_type = self.search_alg_dict.pop(TYPE)
                search_alg = tune.create_searcher(search_alg_type,
                                                  metric=metric,
                                                  mode=mode,
                                                  **self.search_alg_dict)
        else:
            search_alg = None

        if self.max_concurrent_trials:
            assert (
                self.max_concurrent_trials > 0
            ), f"`max_concurrent_trials` must be greater than 0, got {self.max_concurrent_trials}"
            if isinstance(search_alg,
                          BasicVariantGenerator) or search_alg is None:
                search_alg = BasicVariantGenerator(
                    max_concurrent=self.max_concurrent_trials)
            elif isinstance(search_alg, ConcurrencyLimiter):
                raise ValueError(
                    "You have specified `max_concurrent_trials`, but the search "
                    "algorithm is already a `ConcurrencyLimiter`. FIX THIS "
                    "by setting `max_concurrent_trials=None`.")
            else:
                search_alg = ConcurrencyLimiter(
                    search_alg, max_concurrent=self.max_concurrent_trials)

        resources_per_trial = {
            "cpu": self._cpu_resources_per_trial_non_none,
            "gpu": self._gpu_resources_per_trial_non_none,
        }

        def run_experiment_trial(config,
                                 local_hyperopt_dict,
                                 checkpoint_dir=None):
            return self._run_experiment(config, checkpoint_dir,
                                        local_hyperopt_dict, self.decode_ctx,
                                        _is_ray_backend(backend))

        tune_config = {}
        tune_callbacks = []
        for callback in callbacks or []:
            run_experiment_trial, tune_config = callback.prepare_ray_tune(
                run_experiment_trial,
                tune_config,
                tune_callbacks,
            )

        if _is_ray_backend(backend):
            # we can't set Trial actor's CPUs to 0 so we just go very low
            resources_per_trial = PlacementGroupFactory(
                [{
                    "CPU": 0.001
                }] + ([{
                    "CPU": 1,
                    "GPU": 1
                }] * self._gpu_resources_per_trial_non_none) if self.
                _gpu_resources_per_trial_non_none else [{
                    "CPU": 0.001
                }] + [{
                    "CPU": 1
                }] * self._cpu_resources_per_trial_non_none)

        if has_remote_protocol(output_directory):
            run_experiment_trial = tune.durable(run_experiment_trial)
            self.sync_config = tune.SyncConfig(sync_to_driver=False,
                                               upload_dir=output_directory)
            output_directory = None
        elif self.kubernetes_namespace:
            from ray.tune.integration.kubernetes import NamespacedKubernetesSyncer

            self.sync_config = tune.SyncConfig(
                sync_to_driver=NamespacedKubernetesSyncer(
                    self.kubernetes_namespace))

        run_experiment_trial_params = tune.with_parameters(
            run_experiment_trial, local_hyperopt_dict=hyperopt_dict)
        register_trainable(
            f"trainable_func_f{hash_dict(config).decode('ascii')}",
            run_experiment_trial_params)

        analysis = tune.run(
            f"trainable_func_f{hash_dict(config).decode('ascii')}",
            config={
                **self.search_space,
                **tune_config,
            },
            scheduler=self.scheduler,
            search_alg=search_alg,
            num_samples=self.num_samples,
            keep_checkpoints_num=1,
            resources_per_trial=resources_per_trial,
            time_budget_s=self.time_budget_s,
            sync_config=self.sync_config,
            local_dir=output_directory,
            metric=metric,
            mode=mode,
            trial_name_creator=lambda trial: f"trial_{trial.trial_id}",
            trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}",
            callbacks=tune_callbacks,
        )

        ordered_trials = analysis.results_df.sort_values(
            "metric_score", ascending=self.goal != MAXIMIZE)

        # Catch nans in edge case where the trial doesn't complete
        temp_ordered_trials = []
        for kwargs in ordered_trials.to_dict(orient="records"):
            for key in ["parameters", "training_stats", "eval_stats"]:
                if isinstance(kwargs[key], float):
                    kwargs[key] = {}
            temp_ordered_trials.append(kwargs)

        ordered_trials = [
            TrialResults.from_dict(load_json_values(kwargs))
            for kwargs in temp_ordered_trials
        ]

        return RayTuneResults(ordered_trials=ordered_trials,
                              experiment_analysis=analysis)
Esempio n. 28
0
    def checkAndReturnConsistentLogs(self, results, sleep_per_iter=None):
        """Checks logging is the same between APIs.

        Ignore "DONE" for logging but checks that the
        scheduler is notified properly with the last result.
        """
        class_results = copy.deepcopy(results)
        function_results = copy.deepcopy(results)

        class_output = []
        function_output = []
        scheduler_notif = []

        class MockScheduler(FIFOScheduler):
            def on_trial_complete(self, runner, trial, result):
                scheduler_notif.append(result)

        class ClassAPILogger(Logger):
            def on_result(self, result):
                class_output.append(result)

        class FunctionAPILogger(Logger):
            def on_result(self, result):
                function_output.append(result)

        class _WrappedTrainable(Trainable):
            def _setup(self, config):
                del config
                self._result_iter = copy.deepcopy(class_results)

            def _train(self):
                if sleep_per_iter:
                    time.sleep(sleep_per_iter)
                res = self._result_iter.pop(0)  # This should not fail
                if not self._result_iter:  # Mark "Done" for last result
                    res[DONE] = True
                return res

        def _function_trainable(config, reporter):
            for result in function_results:
                if sleep_per_iter:
                    time.sleep(sleep_per_iter)
                reporter(**result)

        class_trainable_name = "class_trainable"
        register_trainable(class_trainable_name, _WrappedTrainable)

        trials = run_experiments(
            {
                "function_api": {
                    "run": _function_trainable,
                    "loggers": [FunctionAPILogger],
                },
                "class_api": {
                    "run": class_trainable_name,
                    "loggers": [ClassAPILogger],
                },
            },
            raise_on_failed_trial=False,
            scheduler=MockScheduler())

        # Ignore these fields
        NO_COMPARE_FIELDS = {
            HOSTNAME,
            NODE_IP,
            TRIAL_ID,
            EXPERIMENT_TAG,
            PID,
            TIME_THIS_ITER_S,
            TIME_TOTAL_S,
            DONE,  # This is ignored because FunctionAPI has different handling
            "timestamp",
            "time_since_restore",
            "experiment_id",
            "date",
        }

        self.assertEqual(len(class_output), len(results))
        self.assertEqual(len(function_output), len(results))

        def as_comparable_result(result):
            return {
                k: v
                for k, v in result.items() if k not in NO_COMPARE_FIELDS
            }

        function_comparable = [
            as_comparable_result(result) for result in function_output
        ]
        class_comparable = [
            as_comparable_result(result) for result in class_output
        ]

        self.assertEqual(function_comparable, class_comparable)

        self.assertEqual(sum(t.get(DONE) for t in scheduler_notif), 2)
        self.assertEqual(as_comparable_result(scheduler_notif[0]),
                         as_comparable_result(scheduler_notif[1]))

        # Make sure the last result is the same.
        self.assertEqual(as_comparable_result(trials[0].last_result),
                         as_comparable_result(trials[1].last_result))

        return function_output, trials
Esempio n. 29
0
        # Here we use `episode_reward_mean`, but you can also report other
        # objectives such as loss or accuracy (see tune/result.py).
        return TrainingResult(episode_reward_mean=v, timesteps_this_iter=1)

    def _save(self, checkpoint_dir):
        path = os.path.join(checkpoint_dir, "checkpoint")
        with open(path, "w") as f:
            f.write(json.dumps({"timestep": self.timestep}))
        return path

    def _restore(self, checkpoint_path):
        with open(checkpoint_path) as f:
            self.timestep = json.loads(f.read())["timestep"]


register_trainable("my_class", MyTrainableClass)

if __name__ == "__main__":
    ray.init()

    # Hyperband early stopping, configured with `episode_reward_mean` as the
    # objective and `timesteps_total` as the time unit.
    hyperband = HyperBandScheduler(time_attr="timesteps_total",
                                   reward_attr="episode_reward_mean",
                                   max_t=100)

    run_experiments(
        {
            "hyperband_test": {
                "run": "my_class",
                "repeat": 100,
Esempio n. 30
0
if __name__ == "__main__":
    datasets.MNIST('~/data', train=True, download=True)
    args = parser.parse_args()

    import numpy as np
    import ray
    from ray import tune
    from ray.tune.schedulers import AsyncHyperBandScheduler

    ray.init()
    sched = AsyncHyperBandScheduler(
        time_attr="training_iteration",
        reward_attr="neg_mean_loss",
        max_t=400,
        grace_period=20)
    tune.register_trainable("train_mnist",
                            lambda cfg, rprtr: train_mnist(args, cfg, rprtr))
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.98,
                    "training_iteration": 1 if args.smoke_test else 20
                },
                "resources_per_trial": {
                    "cpu": 3,
                    "gpu": int(not args.no_cuda)
                },
                "run": "train_mnist",
                "num_samples": 1 if args.smoke_test else 10,
                "config": {
                    "lr": tune.sample_from(
Esempio n. 31
0
#!/usr/bin/env python

import multiprocessing
import sys

import ray
from ray.tune import register_trainable, run_experiments

from rainbow_rllib_agent import RainbowRLlibAgent

register_trainable("Rainbow", RainbowRLlibAgent)

ray.init(num_gpus=1)

run_experiments({
    "rainbow-simple-pong": {
        "run": "Rainbow",
        "env": "PongNoFrameskip-v4",
        "resources": {
            "cpu": 1,
            "gpu": 1,
        },
        "config": {
            "num_workers": 0,
            "apex": False,
            "lr": .0001,
            "n_step": 3,
            "gamma": 0.99,
            "sample_batch_size": 4,
            "train_batch_size": 32,
            "force_remote_evaluators": False,
Esempio n. 32
0
    def _save(self, checkpoint_dir):
        return self.saver.save(
            self.sess, checkpoint_dir + "/save", global_step=self.iterations)

    def _restore(self, path):
        return self.saver.restore(self.sess, path)


# !!! Example of using the ray.tune Python API !!!
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--smoke-test', action='store_true', help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable("my_class", TrainMNIST)
    mnist_spec = {
        'run': 'my_class',
        'stop': {
          'mean_accuracy': 0.99,
          'time_total_s': 600,
        },
        'config': {
            'learning_rate': lambda spec:  10 ** np.random.uniform(-5, -3),
            'activation': grid_search(['relu', 'elu', 'tanh']),
        },
        "repeat": 10,
    }

    if args.smoke_test:
        mnist_spec['stop']['training_iteration'] = 2
Esempio n. 33
0
    from ray.tune.schedulers import AsyncHyperBandScheduler, FIFOScheduler

    ray.init()
    if args.scheduler == "fifo":
        sched = FIFOScheduler()
    elif args.scheduler == "asynchyperband":
        sched = AsyncHyperBandScheduler(
            time_attr="training_iteration",
            metric="mean_loss",
            mode="min",
            max_t=400,
            grace_period=60)
    else:
        raise NotImplementedError
    tune.register_trainable(
        "TRAIN_FN",
        lambda config, reporter: train_cifar10(args, config, reporter))
    tune.run(
        "TRAIN_FN",
        name=args.expname,
        verbose=2,
        scheduler=sched,
        **{
            "stop": {
                "mean_accuracy": 0.98,
                "training_iteration": 1 if args.smoke_test else args.epochs
            },
            "resources_per_trial": {
                "cpu": int(args.num_workers),
                "gpu": int(args.num_gpus)
            },
Esempio n. 34
0

if __name__ == "__main__":
    import argparse
    import os

    assert "SIGOPT_KEY" in os.environ, \
        "SigOpt API key must be stored as environment variable at SIGOPT_KEY"

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    register_trainable("exp", easy_objective)

    space = [
        {
            'name': 'width',
            'type': 'int',
            'bounds': {
                'min': 0,
                'max': 20
            },
        },
        {
            'name': 'height',
            'type': 'int',
            'bounds': {
                'min': -100,
Esempio n. 35
0
from gym.spaces import Discrete, Box
from gym.envs.registration import EnvSpec
from gym.envs.registration import registry
from ray.rllib.env import MultiAgentEnv
from ray.tune.registry import register_env
from ray.rllib.models import ModelCatalog
from ray.rllib.evaluation.episode import _flatten_action
from ray.rllib.agents.registry import get_agent_class
from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
from common import train_env_factory
from environment.core.utils.config import extend_config
from agents.sacq import SACQAgent

logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO)
tune.register_trainable("SACQ", SACQAgent)


EXAMPLE_USAGE = """
Example Usage via RLlib CLI:

python rollout.py --steps 1000 \
    --checkpoint=checkpoints/October2c/checkpoint_120/checkpoint-120
"""


ENVIRONMENT = "MultiRobot-v0"
RESET_ON_TARGET = True
DEFAULT_TIMESTEP = 0.1
FRAME_MULTIPLIER = 5
EVAL_TIMESTEP = DEFAULT_TIMESTEP/FRAME_MULTIPLIER
Esempio n. 36
0
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
        help='Directory for storing input data')
    FLAGS, unparsed = parser.parse_known_args()
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


# !!! Example of using the ray.tune Python API !!!
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--smoke-test', action='store_true', help='Finish quickly for testing')
    args, _ = parser.parse_known_args()

    register_trainable('train_mnist', train)
    mnist_spec = {
        'run': 'train_mnist',
        'stop': {
          'mean_accuracy': 0.99,
          'time_total_s': 600,
        },
        'config': {
            'activation': grid_search(['relu', 'elu', 'tanh']),
        },
    }

    if args.smoke_test:
        mnist_spec['stop']['training_iteration'] = 2

    ray.init()
Esempio n. 37
0
#!/usr/bin/env python

import multiprocessing
import sys

import ray
from ray.tune import register_trainable, run_experiments

from dqn_agent import DQNRLlibAgent

register_trainable("DQNBaseline", DQNRLlibAgent)

ray.init()

run_experiments({
    "baseline-rllib-cartpole": {
        "run": "DQNBaseline",
        "env": "CartPole-v0",
        "resources": {
            "cpu": 1,
        },
        "config": {
            "num_workers": 0,
            "env_config": {"cartpole": True},
            "apex": False,
            "lr": .0005,
            "n_step": 1,
            "gamma": 0.99,
            "sample_batch_size": 1,
            "train_batch_size": 32,
            "force_remote_evaluators": False,