type=str, default='/tmp/tensorflow/mnist/input_data', help='Directory for storing input data') FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) # !!! Example of using the ray.tune Python API !!! if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable('train_mnist', train) mnist_spec = { 'run': 'train_mnist', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'activation': grid_search(['relu', 'elu', 'tanh']), # You can pass any serializable object as well 'foo': grid_search([np.array([1, 2]), np.array([2, 3])]), }, } if args.smoke_test:
""" # Note: if you use any custom models or envs, register them here first, e.g.: # # from ray.rllib.examples.env.parametric_actions_cartpole import \ # ParametricActionsCartPole # from ray.rllib.examples.model.parametric_actions_model import \ # ParametricActionsModel # ModelCatalog.register_custom_model("pa_model", ParametricActionsModel) # register_env("pa_cartpole", lambda _: ParametricActionsCartPole(10)) from ray.tune import register_trainable from a2c.tuned_a2c import TunedA2CTrainer from exploration.tuned_a2c import TunedA2CTrainer as ExplorationA2CTrainer from fun.fun_policy import FuNTrainer register_trainable('TunedA2CTrainer', TunedA2CTrainer) register_trainable('ExplorationA2CTrainer', TunedA2CTrainer) register_trainable('FuNTrainer', FuNTrainer) from ray.rllib.models import ModelCatalog from a2c.small_model import SmallConvModel as A2CSmallModel from a2c.small_lstm_model import SmallConvLSTMModel as A2CLSTMModel from exploration.small_model import SmallConvModel as ExplorationA2CModel from fun.fun_model import FuNModel ModelCatalog.register_custom_model('A2CSmallModel', A2CSmallModel) ModelCatalog.register_custom_model('A2CLSTMModel', A2CLSTMModel) ModelCatalog.register_custom_model('ExplorationA2CModel', ExplorationA2CModel) ModelCatalog.register_custom_model('FuNModel', FuNModel) class RolloutSaver:
from helper import create_parser parser = create_parser() args = parser.parse_args() mnist.load_data() # we do this because it's not threadsafe import ray from ray import tune from ray.tune.async_hyperband import AsyncHyperBandScheduler ray.init() #ray.init(redis_address="localhost:6379") sched = AsyncHyperBandScheduler(time_attr="timesteps_total", reward_attr="mean_accuracy", max_t=400, grace_period=20) tune.register_trainable("train_mnist", lambda cfg, rprtr: train_mnist(args, cfg, rprtr)) tune.run_experiments( { "exp": { "stop": { "mean_accuracy": 0.99, "timesteps_total": 300 }, "run": "train_mnist", "repeat": 100, "config": { "lr": lambda spec: np.random.uniform(0.001, 0.1), "momentum": lambda spec: np.random.uniform(0.1, 0.9), "hidden": lambda spec: np.random.randint(32, 512), "dropout1": lambda spec: np.random.uniform(0.2, 0.8), }
def launch_experiments_ray(variant_specs, args, local_dir, experiment_fn, scheduler=None): import ray from ray import tune tune.register_trainable('mujoco-runner', experiment_fn) trial_resources = _normalize_trial_resources(args.trial_resources, args.trial_cpus, args.trial_gpus, args.trial_extra_cpus, args.trial_extra_gpus) if 'local' in args.mode or 'debug' in args.mode: resources = args.resources or {} if 'debug' in args.mode: # Require a debug resource for each trial, so that we never run # more than one trial at a time. This makes debugging easier, since # the debugger stdout behaves more reasonably with single process. # TODO(hartikainen): Change this from 'extra_gpu' to # 'debug-resource' once tune supports custom resources. # See: https://github.com/ray-project/ray/pull/2979. resources['extra_gpu'] = 1 trial_resources['extra_gpu'] = 1 ray.init(resources=resources, num_cpus=args.cpus, num_gpus=args.gpus) else: ray.init(redis_address=ray.services.get_node_ip_address() + ':6379') datetime_prefix = datetimestamp() experiment_id = '-'.join((datetime_prefix, args.exp_name)) tune.run_experiments( { "{}-{}".format(experiment_id, i): { 'run': 'mujoco-runner', 'trial_resources': trial_resources, 'config': variant_spec, 'local_dir': local_dir, 'num_samples': args.num_samples, 'upload_dir': args.upload_dir, 'checkpoint_freq': (args.checkpoint_frequency if args.checkpoint_frequency is not None else variant_spec['run_params'].get( 'checkpoint_frequency', 0)), 'checkpoint_at_end': (args.checkpoint_at_end if args.checkpoint_at_end is not None else variant_spec['run_params'].get( 'checkpoint_at_end', True)), 'restore': args.restore, # Defaults to None } for i, variant_spec in enumerate(variant_specs) }, scheduler=scheduler, )
current_best_params = [ { "width": 1, "height": 2, "activation": 0 # Activation will be relu }, { "width": 4, "height": 2, "activation": 1 # Activation will be tanh } ] algo = HyperOptSearch(space, metric="episode_reward_mean", mode="max", random_state_seed=5, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=1) from ray.tune import register_trainable register_trainable("trainable", MyTrainableClass) os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "0" run("trainable", search_alg=algo, resume=args.resume, verbose=0, num_samples=20, fail_fast=True, stop={"training_iteration": 2}, local_dir=args.local_dir, name="experiment")
abs(config["width"] - 3)) time.sleep(0.02) if __name__ == '__main__': import argparse from hyperopt import hp parser = argparse.ArgumentParser() parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init(redirect_output=True) register_trainable("exp", easy_objective) space = { 'width': hp.uniform('width', 0, 20), 'height': hp.uniform('height', -100, 100), 'activation': hp.choice("activation", ["relu", "tanh"]) } config = { "my_exp": { "run": "exp", "repeat": 10 if args.smoke_test else 1000, "stop": { "training_iteration": 100 }, }
def setUp(self): def train(config, reporter): for i in range(100): reporter(timesteps_total=i) register_trainable("f1", train)
def _save(self, checkpoint_dir): path = os.path.join(checkpoint_dir, "checkpoint") with open(path, "w") as f: f.write(json.dumps( {"timestep": self.timestep, "value": self.current_value})) return path def _restore(self, checkpoint_path): with open(checkpoint_path) as f: data = json.loads(f.read()) self.timestep = data["timestep"] self.current_value = data["value"] register_trainable("my_class", MyTrainableClass) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=10, hyperparam_mutations={ # Allow for scaling-based perturbations, with a uniform backing # distribution for resampling. "factor_1": lambda: random.uniform(0.0, 20.0),
#!/usr/bin/env python from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import ray from ray.tune import register_trainable, run_experiments def f(config, reporter): reporter(timesteps_total=1) if __name__ == "__main__": ray.init() register_trainable("my_class", f) run_experiments( {"test": { "run": "my_class", "stop": { "training_iteration": 1 } }}) assert 'ray.rllib' not in sys.modules, "RLlib should not be imported"
from ray.tune.schedulers import PopulationBasedTraining from ray.tune.utils import validate_save_restore import matplotlib.style as style import matplotlib.pyplot as plt import math def my_func(config, reporter): # add the reporter parameter import time, numpy as np i = 0 while True: reporter(timesteps_total=i, mean_accuracy=i**config["alpha"]) i += config["beta"] time.sleep(.01) tune.register_trainable("my_func", my_func) ray.init() tune.run_experiments({ "my_experiment": { "run": "my_func", "stop": { "mean_accuracy": 100 }, "config": { "alpha": tune.grid_search([0.2, 0.4, 0.6]), "beta": tune.grid_search([1, 2]), } } })
def parallelPull(self, manifest={}): self.xp_state.versioningDirectory = os.path.expanduser( '~') + '/' + 'jarvis.d' # Runs one experiment per pull # Each experiment has many trials tmpexperiment = self.xp_state.tmpexperiment if os.path.exists(tmpexperiment): rmtree(tmpexperiment) os.mkdir(tmpexperiment) else: os.mkdir(tmpexperiment) self.xp_state.visited = [] if not util.isOrphan(self): self.loclist = list( map(lambda x: x.getLocation(), self.parent.out_artifacts)) else: self.loclist = [ self.getLocation(), ] self.scriptNames = [] literalsAttached = set([]) lambdas = [] if not util.isOrphan(self): self.parent.__serialize__(lambdas, self.loclist, self.scriptNames) self.loclist = list(set(self.loclist)) self.scriptNames = list(set(self.scriptNames)) # Need to sort to compare self.loclist.sort() self.scriptNames.sort() for _, names in lambdas: literalsAttached |= set(names) original_dir = os.getcwd() experimentName = self.xp_state.jarvisFile.split('.')[0] def exportedExec(config, reporter): tee = tuple([]) for litName in config['8ilk9274']: tee += (config[litName], ) i = -1 for j, v in enumerate(config['6zax7937']): if v == tee: i = j break assert i >= 0 os.chdir(tmpexperiment + '/' + str(i)) with open('.' + experimentName + '.jarvis', 'w') as fp: json.dump(config, fp) for f, names in lambdas: literals = list(map(lambda x: config[x], names)) f(literals) reporter(timesteps_total=1) os.chdir(original_dir) config = {} numTrials = 1 literals = [] literalNames = [] for kee in self.xp_state.literalNameToObj: if kee in literalsAttached: if self.xp_state.literalNameToObj[kee].__oneByOne__: config[kee] = grid_search( self.xp_state.literalNameToObj[kee].v) numTrials *= len(self.xp_state.literalNameToObj[kee].v) literals.append(self.xp_state.literalNameToObj[kee].v) else: config[kee] = self.xp_state.literalNameToObj[kee].v if util.isIterable(self.xp_state.literalNameToObj[kee].v): if type(self.xp_state.literalNameToObj[kee].v ) == tuple: literals.append( (self.xp_state.literalNameToObj[kee].v, )) else: literals.append([ self.xp_state.literalNameToObj[kee].v, ]) else: literals.append([ self.xp_state.literalNameToObj[kee].v, ]) literalNames.append(kee) literals = list(itertools.product(*literals)) config['6zax7937'] = literals config['8ilk9274'] = literalNames for i in range(numTrials): dst = tmpexperiment + '/' + str(i) copytree(os.getcwd(), dst, True) ts = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') register_trainable('exportedExec', exportedExec) self.xp_state.ray['literalNames'] = literalNames run_experiments({ experimentName: { 'run': 'exportedExec', 'resources': { 'cpu': 1, 'gpu': 0 }, 'config': config } }) if not os.path.isdir(self.xp_state.versioningDirectory): os.mkdir(self.xp_state.versioningDirectory) moveBackFlag = False if os.path.exists(self.xp_state.versioningDirectory + '/' + self.xp_state.jarvisFile.split('.')[0]): move( self.xp_state.versioningDirectory + '/' + self.xp_state.jarvisFile.split('.')[0] + '/.git', '/tmp/') rmtree(self.xp_state.versioningDirectory + '/' + self.xp_state.jarvisFile.split('.')[0]) moveBackFlag = True if manifest: os.chdir(tmpexperiment) dirs = [x for x in os.listdir() if util.isNumber(x)] table_full = [] table_small = [] for trial in dirs: os.chdir(trial) with open('.' + experimentName + '.jarvis', 'r') as fp: config = json.load(fp) record_full = {} record_small = {} for literalName in literalNames: record_full[literalName] = config[literalName] record_small[literalName] = config[literalName] for artifactLabel in manifest: record_full[artifactLabel] = util.loadArtifact( manifest[artifactLabel].loc) if total_size(record_full[artifactLabel]) >= 1000: record_small[artifactLabel] = " . . . " else: record_small[artifactLabel] = record_full[ artifactLabel] if util.isNumber(record_full[artifactLabel]): record_full[artifactLabel] = eval( record_full[artifactLabel]) if util.isNumber(record_small[artifactLabel]): record_small[artifactLabel] = eval( record_small[artifactLabel]) record_small['__trialNum__'] = trial record_full['__trialNum__'] = trial table_full.append(record_full) table_small.append(record_small) os.chdir('../') df = pd.DataFrame(table_small) util.pickleTo(df, experimentName + '.pkl') os.chdir(original_dir) copytree( tmpexperiment, self.xp_state.versioningDirectory + '/' + self.xp_state.jarvisFile.split('.')[0]) os.chdir(self.xp_state.versioningDirectory + '/' + self.xp_state.jarvisFile.split('.')[0]) if moveBackFlag: move( '/tmp/.git', self.xp_state.versioningDirectory + '/' + self.xp_state.jarvisFile.split('.')[0]) repo = git.Repo(os.getcwd()) repo.git.add(A=True) repo.index.commit('incremental commit') else: repo = git.Repo.init(os.getcwd()) repo.git.add(A=True) repo.index.commit('initial commit') os.chdir(original_dir) if manifest: return pd.DataFrame(table_full)
def execute( self, config, dataset=None, training_set=None, validation_set=None, test_set=None, training_set_metadata=None, data_format=None, experiment_name="hyperopt", model_name="run", # model_load_path=None, # model_resume_path=None, skip_save_training_description=False, skip_save_training_statistics=False, skip_save_model=False, skip_save_progress=False, skip_save_log=False, skip_save_processed_input=True, skip_save_unprocessed_output=False, skip_save_predictions=False, skip_save_eval_stats=False, output_directory="results", gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, backend=None, random_seed=default_random_seed, debug=False, **kwargs): if isinstance(dataset, str) and not os.path.isabs(dataset): dataset = os.path.abspath(dataset) if gpus is not None: raise ValueError( "Parameter `gpus` is not supported when using Ray Tune. " "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your " "hyperopt config.") hyperopt_dict = dict( config=config, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, experiment_name=experiment_name, model_name=model_name, # model_load_path=model_load_path, # model_resume_path=model_resume_path, eval_split=self.split, skip_save_training_description=skip_save_training_description, skip_save_training_statistics=skip_save_training_statistics, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, skip_save_processed_input=skip_save_processed_input, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, skip_save_eval_stats=skip_save_eval_stats, output_directory=output_directory, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, backend=backend, random_seed=random_seed, debug=debug, ) mode = "min" if self.goal != MAXIMIZE else "max" metric = "metric_score" if self.search_alg_dict is not None: if TYPE not in self.search_alg_dict: logger.warning("WARNING: Kindly set type param for search_alg " "to utilize Tune's Search Algorithms.") search_alg = None else: search_alg_type = self.search_alg_dict.pop(TYPE) search_alg = tune.create_searcher(search_alg_type, metric=metric, mode=mode, **self.search_alg_dict) else: search_alg = None sync_config = None if self.kubernetes_namespace: from ray.tune.integration.kubernetes import NamespacedKubernetesSyncer sync_config = tune.SyncConfig( sync_to_driver=NamespacedKubernetesSyncer( self.kubernetes_namespace)) resources_per_trial = { "cpu": self.cpu_resources_per_trial or 1, "gpu": self.gpu_resources_per_trial or 0, } def run_experiment_trial(config, checkpoint_dir=None): return self._run_experiment(config, checkpoint_dir, hyperopt_dict, self.decode_ctx) register_trainable(f"trainable_func_f{hash_dict(config)}", run_experiment_trial) analysis = tune.run( f"trainable_func_f{hash_dict(config)}", config=self.search_space, scheduler=self.scheduler, search_alg=search_alg, num_samples=self.num_samples, resources_per_trial=resources_per_trial, queue_trials=True, sync_config=sync_config, local_dir=output_directory, metric=metric, mode=mode, trial_name_creator=lambda trial: f"trial_{trial.trial_id}", trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}", ) hyperopt_results = analysis.results_df.sort_values( "metric_score", ascending=self.goal != MAXIMIZE) return hyperopt_results.to_dict(orient="records")
"num_workers": 32, "buffer_size": 2000000, "learning_starts": 50000, "train_batch_size": 512, "rollout_fragment_length": 50, "target_network_update_freq": 500000, "timesteps_per_iteration": 1000, "exploration_config": { "type": "PerWorkerEpsilonGreedy" }, "worker_side_prioritization": True, "min_iter_time_s": 30, "training_intensity": None, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta": 0.4, "final_prioritized_replay_beta": 0.4, "prioritized_replay_beta_annealing_timesteps": 20000, "prioritized_replay_eps": 1e-6, }, ) QMixTrainer = GenericOffPolicyTrainer.with_updates( name="QMIXApex", default_config=QMIX_APEX_DEFAULT_CONFIG, default_policy=QMixTorchPolicy, get_policy_class=None, execution_plan=apex_execution_plan) register_trainable("QMIXApex", QMixTrainer)
y = np.dot(sin_x, sin_z) # Negate y since we want to minimize y value reporter(timesteps_total=1, neg_mean_loss=-y) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init(redirect_output=True) register_trainable("exp", michalewicz_function) space = SearchSpace({ ContinuousSpace('x1', 0, 4, 100), ContinuousSpace('x2', -2, 2, 100), ContinuousSpace('x3', 1, 5, 100), ContinuousSpace('x4', -3, 3, 100), DiscreteSpace('x5', [-1, 0, 1, 2, 3]), }) config = { "my_exp": { "run": "exp", "stop": { "training_iteration": 100 },
train(epoch) test() if __name__ == "__main__": args = parser.parse_args() import numpy as np import ray from ray import tune from ray.tune.schedulers import HyperBandScheduler ray.init() sched = HyperBandScheduler(time_attr="training_iteration", reward_attr="neg_mean_loss", max_t=100) tune.register_trainable("train_dream", lambda cfg, rprtr: train_dream(args, cfg, rprtr)) tune.run_experiments( { "exp": { "stop": { # "neg_mean_loss": 0.0, "training_iteration": 100 if args.smoke_test else 100, }, "resources_per_trial": { "cpu": 1, "gpu": 0 }, "run": "train_dream", "num_samples": 1 if args.smoke_test else 20, # "checkpoint_at_end": True,
#!/usr/bin/env python from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import ray from ray.tune import register_trainable, run_experiments def f(config, reporter): reporter(timesteps_total=1) if __name__ == "__main__": ray.init() register_trainable("my_class", f) run_experiments({ "test": { "run": "my_class", "stop": { "training_iteration": 1 } } }) assert 'ray.rllib' not in sys.modules, "RLlib should not be imported"
def execute( self, config, dataset=None, training_set=None, validation_set=None, test_set=None, training_set_metadata=None, data_format=None, experiment_name="hyperopt", model_name="run", resume=None, skip_save_training_description=False, skip_save_training_statistics=False, skip_save_model=False, skip_save_progress=False, skip_save_log=False, skip_save_processed_input=True, skip_save_unprocessed_output=False, skip_save_predictions=False, skip_save_eval_stats=False, output_directory="results", gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, callbacks=None, backend=None, random_seed=default_random_seed, debug=False, hyperopt_log_verbosity=3, features_eligible_for_shared_params=None, **kwargs, ) -> RayTuneResults: if isinstance(dataset, str) and not has_remote_protocol( dataset) and not os.path.isabs(dataset): dataset = os.path.abspath(dataset) if isinstance(backend, str): backend = initialize_backend(backend) if gpus is not None: raise ValueError( "Parameter `gpus` is not supported when using Ray Tune. " "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your " "hyperopt config.") if gpu_memory_limit is None and 0 < self._gpu_resources_per_trial_non_none < 1: # Enforce fractional GPU utilization gpu_memory_limit = self.gpu_resources_per_trial hyperopt_dict = dict( config=config, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, experiment_name=experiment_name, model_name=model_name, eval_split=self.split, skip_save_training_description=skip_save_training_description, skip_save_training_statistics=skip_save_training_statistics, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, skip_save_processed_input=skip_save_processed_input, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, skip_save_eval_stats=skip_save_eval_stats, output_directory=output_directory, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, callbacks=callbacks, backend=backend, random_seed=random_seed, debug=debug, ) mode = "min" if self.goal != MAXIMIZE else "max" metric = "metric_score" # if random seed not set, use Ludwig seed self.search_algorithm.check_for_random_seed(random_seed) if self.search_algorithm.search_alg_dict is not None: if TYPE not in self.search_algorithm.search_alg_dict: candiate_search_algs = [ search_alg for search_alg in SEARCH_ALG_IMPORT.keys() ] logger.warning( "WARNING: search_alg type parameter missing, using 'variant_generator' as default. " f"These are possible values for the type parameter: {candiate_search_algs}." ) search_alg = None else: search_alg_type = self.search_algorithm.search_alg_dict[TYPE] search_alg = tune.create_searcher( search_alg_type, metric=metric, mode=mode, **self.search_algorithm.search_alg_dict) else: search_alg = None if self.max_concurrent_trials: assert ( self.max_concurrent_trials > 0 ), f"`max_concurrent_trials` must be greater than 0, got {self.max_concurrent_trials}" if isinstance(search_alg, BasicVariantGenerator) or search_alg is None: search_alg = BasicVariantGenerator( max_concurrent=self.max_concurrent_trials) elif isinstance(search_alg, ConcurrencyLimiter): raise ValueError( "You have specified `max_concurrent_trials`, but the search " "algorithm is already a `ConcurrencyLimiter`. FIX THIS " "by setting `max_concurrent_trials=None`.") else: search_alg = ConcurrencyLimiter( search_alg, max_concurrent=self.max_concurrent_trials) resources_per_trial = { "cpu": self._cpu_resources_per_trial_non_none, "gpu": self._gpu_resources_per_trial_non_none, } def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None): return self._run_experiment( config, checkpoint_dir, local_hyperopt_dict, self.decode_ctx, features_eligible_for_shared_params, _is_ray_backend(backend), ) tune_config = {} tune_callbacks = [] for callback in callbacks or []: run_experiment_trial, tune_config = callback.prepare_ray_tune( run_experiment_trial, tune_config, tune_callbacks, ) if _is_ray_backend(backend): # for now, we do not do distributed training on cpu (until spread scheduling is implemented for Ray Train) # but we do want to enable it when GPUs are specified resources_per_trial = PlacementGroupFactory( [{}] + ([{ "CPU": 0, "GPU": 1 }] * self._gpu_resources_per_trial_non_none) if self. _gpu_resources_per_trial_non_none else [{}] + [{ "CPU": self._cpu_resources_per_trial_non_none }]) if has_remote_protocol(output_directory): run_experiment_trial = tune.durable(run_experiment_trial) self.sync_config = tune.SyncConfig(sync_to_driver=False, upload_dir=output_directory) if _ray_114: self.sync_client = get_node_to_storage_syncer( SyncConfig(upload_dir=output_directory)) else: self.sync_client = get_cloud_sync_client(output_directory) output_directory = None elif self.kubernetes_namespace: from ray.tune.integration.kubernetes import KubernetesSyncClient, NamespacedKubernetesSyncer self.sync_config = tune.SyncConfig( sync_to_driver=NamespacedKubernetesSyncer( self.kubernetes_namespace)) self.sync_client = KubernetesSyncClient(self.kubernetes_namespace) run_experiment_trial_params = tune.with_parameters( run_experiment_trial, local_hyperopt_dict=hyperopt_dict) register_trainable( f"trainable_func_f{hash_dict(config).decode('ascii')}", run_experiment_trial_params) # Note that resume="AUTO" will attempt to resume the experiment if possible, and # otherwise will start a new experiment: # https://docs.ray.io/en/latest/tune/tutorials/tune-stopping.html should_resume = "AUTO" if resume is None else resume try: analysis = tune.run( f"trainable_func_f{hash_dict(config).decode('ascii')}", name=experiment_name, config={ **self.search_space, **tune_config, }, scheduler=self.scheduler, search_alg=search_alg, num_samples=self.num_samples, keep_checkpoints_num=1, max_failures=1, # retry a trial failure once resources_per_trial=resources_per_trial, time_budget_s=self.time_budget_s, sync_config=self.sync_config, local_dir=output_directory, metric=metric, mode=mode, trial_name_creator=lambda trial: f"trial_{trial.trial_id}", trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}", callbacks=tune_callbacks, stop=CallbackStopper(callbacks), verbose=hyperopt_log_verbosity, resume=should_resume, log_to_file=True, ) except Exception as e: # Explicitly raise a RuntimeError if an error is encountered during a Ray trial. # NOTE: Cascading the exception with "raise _ from e" still results in hanging. raise RuntimeError(f"Encountered Ray Tune error: {e}") if "metric_score" in analysis.results_df.columns: ordered_trials = analysis.results_df.sort_values( "metric_score", ascending=self.goal != MAXIMIZE) # Catch nans in edge case where the trial doesn't complete temp_ordered_trials = [] for kwargs in ordered_trials.to_dict(orient="records"): for key in ["parameters", "training_stats", "eval_stats"]: if isinstance(kwargs[key], float): kwargs[key] = {} temp_ordered_trials.append(kwargs) # Trials w/empty eval_stats fields & non-empty training_stats fields ran intermediate # tune.report call(s) but were terminated before reporting eval_stats from post-train # evaluation (e.g., trial stopped due to time budget or relatively poor performance.) # For any such trials, run model evaluation for the best model in that trial & record # results in ordered_trials which is returned & is persisted in hyperopt_statistics.json. for trial in temp_ordered_trials: if trial["eval_stats"] == "{}" and trial[ "training_stats"] != "{}": # Evaluate the best model on the eval_split, which is validation_set if validation_set is not None and validation_set.size > 0: trial_path = trial["trial_dir"] best_model_path = self._get_best_model_path( trial_path, analysis) if best_model_path is not None: self._evaluate_best_model( trial, trial_path, best_model_path, validation_set, data_format, skip_save_unprocessed_output, skip_save_predictions, skip_save_eval_stats, gpus, gpu_memory_limit, allow_parallel_threads, backend, debug, ) else: logger.warning( "Skipping evaluation as no model checkpoints were available" ) else: logger.warning( "Skipping evaluation as no validation set was provided" ) ordered_trials = [ TrialResults.from_dict(load_json_values(kwargs)) for kwargs in temp_ordered_trials ] else: logger.warning( "No trials reported results; check if time budget lower than epoch latency" ) ordered_trials = [] return RayTuneResults(ordered_trials=ordered_trials, experiment_analysis=analysis)
values = values[:len(values) - max(upper, 0)] values = np.pad( values, pad_width=[ (-min(lower, 0), -min(0, upper)), *[(0, 0) for k in range(values.ndim - 1)], ], mode="constant", ) return values CCPPOPolicy = PPOTFPolicy.with_updates( name="CCPPOPolicy", postprocess_fn=centralized_critic_postprocessing, loss_fn=loss_with_central_critic, before_loss_init=setup_mixins, grad_stats_fn=central_vf_stats, mixins=[ LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, CentralizedValueMixin, ], ) register_trainable( "CcConcatenate", PPOTrainer.with_updates(name="CCPPOTrainer", get_policy_class=lambda c: CCPPOPolicy), )
def _save(self, checkpoint_dir): return self.saver.save( self.sess, checkpoint_dir + "/save", global_step=self.iterations) def _restore(self, path): return self.saver.restore(self.sess, path) # !!! Example of using the ray.tune Python API !!! if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable("my_class", TrainMNIST) mnist_spec = { 'run': 'my_class', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'learning_rate': sample_from( lambda spec: 10**np.random.uniform(-5, -3)), 'activation': grid_search(['relu', 'elu', 'tanh']), }, "num_samples": 10, } if args.smoke_test:
space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_ %d' % (i, j), 0.0, 1.0) space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_ %d' % (i, j), 0.0, 1.0) final_policy_set = [] total_computation = 0 reward_attr = 'top1_valid' # top1_valid or minus_loss for _ in range(1): # run multiple times. for cv_fold in range(cv_num): name = "search_%s_%s_fold%d_ratio%.1f" % (C.get()['dataset'], C.get()['model']['type'], cv_fold, args.cv_ratio) print(name) register_trainable( name, lambda augs, rpt: eval_tta(copy.deepcopy(copied_c), augs, rpt)) algo = HyperOptSearch(space, max_concurrent=4 * 20, reward_attr=reward_attr) exp_config = { name: { 'run': name, 'num_samples': 4 if args.smoke_test else args.num_search, 'resources_per_trial': { 'gpu': 1 }, 'stop': { 'training_iteration': args.num_policy },
config["entropy_coeff_schedule"]) warmup_steps = config["model"]["custom_options"].get( "warmup_steps", 100000) TransformerLearningRateSchedule.__init__( policy, config["model"]["custom_options"]["transformer"]["num_heads"], warmup_steps) TTFPPOPolicy = PPOTFPolicy.with_updates(name="TTFPPOPolicy", before_loss_init=setup_mixins, mixins=[ TransformerLearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, ValueNetworkMixin ]) TTFPPOPolicyInfer = PPOTFPolicy.with_updates(name="TTFPPOPolicyInfer", before_loss_init=setup_mixins, mixins=[ LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, ValueNetworkMixin ]) register_trainable( "TTFPPO", PPOTrainer.with_updates(name="TTFPPOTrainer", get_policy_class=lambda c: TTFPPOPolicy), )
eao = eao_vot(tracker, model, model_config) print("penalty_k: {0}, scale_lr: {1}, window_influence: {2}, small_sz: {3}, big_sz: {4}, ratio: {6}, eao: {5}".format(penalty_k, scale_lr, window_influence, small_sz, big_sz, eao, ratio)) reporter(EAO=eao) # OTB and Ocean if args.dataset.startswith('OTB'): auc = auc_otb(tracker, model, model_config) print("penalty_k: {0}, scale_lr: {1}, window_influence: {2}, small_sz: {3}, big_sz: {4}, ratio: {6}, eao: {5}".format(penalty_k, scale_lr, window_influence, small_sz, big_sz, auc.item(), ratio)) reporter(AUC=auc) if __name__ == "__main__": # the resources you computer have, object_store_memory is shm #ray.init(num_gpus=args.gpu_nums, num_cpus=args.gpu_nums * 8, object_store_memory=50000000000) ray.init(num_gpus=args.gpu_nums, num_cpus=args.gpu_nums * 8, object_store_memory=500000000) tune.register_trainable("fitness", fitness) if 'Ocean' in args.arch: params = { "penalty_k": hp.quniform('penalty_k', 0.001, 0.2, 0.001), "scale_lr": hp.quniform('scale_lr', 0.3, 0.8, 0.001), "window_influence": hp.quniform('window_influence', 0.15, 0.65, 0.001), "small_sz": hp.choice("small_sz", [255]), "big_sz": hp.choice("big_sz", [287, 303, 319]), "ratio": hp.quniform('ratio', 0.7, 1, 0.01), } if 'VOT' not in args.dataset or not args.align: params['ratio'] = hp.choice("ratio", [1]) print('tuning range: ') pprint(params)
def fit(self): if self.config is None: raise ValueError('Have to set config file') tune.register_trainable( "tune_train_eval", lambda tuned, rprtr: tune_train_eval( self.dataLoader, self.model, self.criterion, self.customMetric, self.config, tuned, rprtr)) if 'mlflow_tracking_URI' in self.config.keys(): host = self.config['mlflow_tracking_URI'].split('//')[1].split( ':')[0] port = self.config['mlflow_tracking_URI'].split('//')[1].split( ':')[1] os.system('mlflow ui -h ' + host + ' -p ' + port + ' &') print('mlflow server start') experiment_config = {} experiment_config['exp'] = {} experiment_config['exp']['trial_resources'] = {} if self.config['multiGPU'] == 'Y': experiment_config['exp']['trial_resources']['gpu'] = int( torch.cuda.device_count()) else: if torch.cuda.device_count > 0: experiment_config['exp']['trial_resources']['gpu'] = 1 else: experiment_config['exp']['trial_resources']['gpu'] = 0 if 'trial_resources_cpu' in self.config.keys(): experiment_config['exp']['trial_resources']['cpu'] = int( self.config['trial_resources_cpu']) if 'trial_resources_gpu' in self.config.keys(): experiment_config['exp']['trial_resources']['gpu'] = int( self.config['trial_resources_gpu']) experiment_config['exp']['run'] = "tune_train_eval" experiment_config['exp']['stop'] = {} experiment_config['exp']['stop']['training_iteration'] = int( self.config['epoch']) experiment_config['exp']['local_dir'] = self.config['ray_dir'] if 'num_samples' in self.config.keys(): experiment_config['exp']['num_samples'] = int( self.config['num_samples']) #set hyper parameter candidate experiment_config['exp']['config'] = {} setExperimentConfigParam(self.config, 'learning_rate', experiment_config['exp']['config']) setExperimentConfigParam(self.config, 'momentum', experiment_config['exp']['config']) setExperimentConfigParam(self.config, 'lr_decay', experiment_config['exp']['config']) setExperimentConfigParam(self.config, 'weight_decay', experiment_config['exp']['config']) setExperimentConfigParam(self.config, 'amsgrad', experiment_config['exp']['config']) setExperimentConfigParam(self.config, 'nesterov', experiment_config['exp']['config']) print('tuning experiment config') print(experiment_config) tune.run_experiments(experiment_config, verbose=0, scheduler=self.sched)
def setUp(self): def dummy_train(config, reporter): reporter(timesteps_total=100, done=True) register_trainable("f1", dummy_train)
def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir): """Tests restoration of HyperOptSearch experiment on cluster shutdown with actual interrupt. Restoration should restore both state of trials and previous search algorithm (HyperOptSearch) state. This is an end-to-end test. """ cluster = start_connected_cluster dirpath = str(tmpdir) local_checkpoint_dir = os.path.join(dirpath, "experiment") from ray.tune import register_trainable register_trainable("trainable", MyTrainableClass) def execute_script_with_args(*args): current_dir = os.path.dirname(__file__) script = os.path.join(current_dir, "_test_cluster_interrupt_searcher.py") subprocess.Popen([sys.executable, script] + list(args)) args = ["--ray-address", cluster.address, "--local-dir", dirpath] execute_script_with_args(*args) # Wait until the right checkpoint is saved. # The trainable returns every 0.5 seconds, so this should not miss # the checkpoint. trials = [] for i in range(50): if TrialRunner.checkpoint_exists(local_checkpoint_dir): # Inspect the internal trialrunner runner = TrialRunner(resume="LOCAL", local_checkpoint_dir=local_checkpoint_dir) trials = runner.get_trials() if trials and len(trials) >= 10: break time.sleep(.5) else: raise ValueError(f"Didn't generate enough trials: {len(trials)}") if not TrialRunner.checkpoint_exists(local_checkpoint_dir): raise RuntimeError( f"Checkpoint file didn't appear in {local_checkpoint_dir}. " f"Current list: {os.listdir(local_checkpoint_dir)}.") ray.shutdown() cluster.shutdown() cluster = _start_new_cluster() execute_script_with_args(*(args + ["--resume"])) time.sleep(2) register_trainable("trainable", MyTrainableClass) reached = False for i in range(50): if TrialRunner.checkpoint_exists(local_checkpoint_dir): # Inspect the internal trialrunner runner = TrialRunner(resume="LOCAL", local_checkpoint_dir=local_checkpoint_dir) trials = runner.get_trials() if len(trials) == 0: continue # nonblocking script hasn't resumed yet, wait reached = True assert len(trials) >= 10 assert len(trials) <= 20 if len(trials) == 20: break else: stop_fn = runner.trial_executor.stop_trial [stop_fn(t) for t in trials if t.status is not Trial.ERROR] time.sleep(.5) assert reached is True ray.shutdown() cluster.shutdown()
def search(conf): sw = StopWatch.get() # region conf vars conf_dataset = conf['dataset'] dataroot = conf['dataroot'] redis_ip = conf['redis'] conf_loader = conf['autoaug']['loader'] conf_model = conf['autoaug']['model'] model_type = conf_model['type'] ds_name = conf_dataset['name'] aug = conf_loader['aug'] val_ratio = conf_loader['val_ratio'] epochs = conf_loader['epochs'] val_fold = conf_loader['val_fold'] cv_num = conf_loader['cv_num'] num_policy = conf['autoaug']['num_policy'] num_op = conf['autoaug']['num_op'] num_search = conf['autoaug']['num_search'] num_result_per_cv = conf['autoaug']['num_result_per_cv'] smoke_test = conf['smoke_test'] resume = conf['resume'] # endregion ray.init(redis_address=redis_ip, # allocate all GPUs on local node if cluster is not specified num_gpus=torch.cuda.device_count() if not redis_ip else None) # first train with no aug _train_no_aug(conf) # get values from config num_samples = 4 if smoke_test else num_search logger.info('----- Search Test-Time Augmentation Policies -----') sw.start(tag='search') save_paths = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_fold%d' % (val_ratio, i)) for i in range(cv_num)] copied_c = copy.deepcopy(conf) ops = augment_list(False) space = {} for i in range(num_policy): for j in range(num_op): space['policy_%d_%d' % (i, j)] = hp.choice('policy_%d_%d' % (i, j), list(range(0, len(ops)))) space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_ %d' % (i, j), 0.0, 1.0) space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_ %d' % (i, j), 0.0, 1.0) final_policy_set = [] total_computation = 0 reward_attr = 'top1_valid' # top1_valid or minus_loss for _ in range(1): # run multiple times. for val_fold in range(cv_num): name = "search_%s_%s_fold%d_ratio%.1f" % (ds_name, model_type, val_fold, val_ratio) #logger.info(name) register_trainable(name, (lambda augs, rpt: _eval_tta(copy.deepcopy(copied_c), augs, rpt))) algo = HyperOptSearch(space, max_concurrent=4*20, reward_attr=reward_attr) exp_config = { name: { 'run': name, 'num_samples': num_samples, 'resources_per_trial': {'gpu': 1}, 'stop': {'training_iteration': num_policy}, 'config': { 'dataroot': dataroot, 'save_path': save_paths[val_fold], 'val_ratio': val_ratio, 'val_fold': val_fold, 'num_op': num_op, 'num_policy': num_policy }, } } results = run_experiments(exp_config, search_alg=algo, scheduler=None, verbose=0, queue_trials=True, resume=resume, raise_on_failed_trial=False) results = [x for x in results if x.last_result is not None] results = sorted(results, key=lambda x: x.last_result[reward_attr], reverse=True) # calculate computation usage for result in results: total_computation += result.last_result['elapsed_time'] for result in results[:num_result_per_cv]: final_policy = policy_decoder(result.config, num_policy, num_op) logger.info('loss=%.12f top1_valid=%.4f %s' % (result.last_result['minus_loss'], result.last_result['top1_valid'], final_policy)) final_policy = remove_deplicates(final_policy) final_policy_set.extend(final_policy) logger.info(json.dumps(final_policy_set)) logger.info('final_policy=%d' % len(final_policy_set)) logger.info('processed in %.4f secs, gpu hours=%.4f' % (sw.pause('search'), total_computation / 3600.)) logger.info('----- Train with Augmentations model=%s dataset=%s aug=%s ratio(test)=%.1f -----' \ % (model_type, ds_name, aug, val_ratio)) sw.start(tag='train_aug') num_experiments = 5 default_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_default%d' \ % (val_ratio, _)) for _ in range(num_experiments)] augment_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_augment%d' \ % (val_ratio, _)) for _ in range(num_experiments)] reqs = [_train_model.remote(copy.deepcopy(copied_c), dataroot, aug, 0.0, 0, save_path=default_path[_], only_eval=True) \ for _ in range(num_experiments)] + \ [_train_model.remote(copy.deepcopy(copied_c), dataroot, final_policy_set, 0.0, 0, save_path=augment_path[_]) \ for _ in range(num_experiments)] tqdm_epoch = tqdm(range(epochs)) is_done = False for epoch in tqdm_epoch: while True: epochs = OrderedDict() for exp_idx in range(num_experiments): try: if os.path.exists(default_path[exp_idx]): latest_ckpt = torch.load(default_path[exp_idx]) epochs['default_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch'] except: pass try: if os.path.exists(augment_path[exp_idx]): latest_ckpt = torch.load(augment_path[exp_idx]) epochs['augment_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch'] except: pass tqdm_epoch.set_postfix(epochs) if len(epochs) == num_experiments*2 and min(epochs.values()) >= epochs: is_done = True if len(epochs) == num_experiments*2 and min(epochs.values()) >= epoch: break time.sleep(10) if is_done: break logger.info('getting results...') final_results = ray.get(reqs) for train_mode in ['default', 'augment']: avg = 0. for _ in range(num_experiments): r_model, r_cv, r_dict = final_results.pop(0) logger.info('[%s] top1_train=%.4f top1_test=%.4f' % (train_mode, r_dict['top1_train'], r_dict['top1_test'])) avg += r_dict['top1_test'] avg /= num_experiments logger.info('[%s] top1_test average=%.4f (#experiments=%d)' % (train_mode, avg, num_experiments)) logger.info('processed in %.4f secs' % sw.pause('train_aug')) logger.info(sw)
def execute( self, config, dataset=None, training_set=None, validation_set=None, test_set=None, training_set_metadata=None, data_format=None, experiment_name="hyperopt", model_name="run", # model_load_path=None, # model_resume_path=None, skip_save_training_description=False, skip_save_training_statistics=False, skip_save_model=False, skip_save_progress=False, skip_save_log=False, skip_save_processed_input=True, skip_save_unprocessed_output=False, skip_save_predictions=False, skip_save_eval_stats=False, output_directory="results", gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, callbacks=None, backend=None, random_seed=default_random_seed, debug=False, **kwargs, ) -> RayTuneResults: if isinstance(dataset, str) and not has_remote_protocol( dataset) and not os.path.isabs(dataset): dataset = os.path.abspath(dataset) if isinstance(backend, str): backend = initialize_backend(backend) if gpus is not None: raise ValueError( "Parameter `gpus` is not supported when using Ray Tune. " "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your " "hyperopt config.") if gpu_memory_limit is None and 0 < self._gpu_resources_per_trial_non_none < 1: # Enforce fractional GPU utilization gpu_memory_limit = self.gpu_resources_per_trial hyperopt_dict = dict( config=config, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, experiment_name=experiment_name, model_name=model_name, # model_load_path=model_load_path, # model_resume_path=model_resume_path, eval_split=self.split, skip_save_training_description=skip_save_training_description, skip_save_training_statistics=skip_save_training_statistics, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, skip_save_processed_input=skip_save_processed_input, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, skip_save_eval_stats=skip_save_eval_stats, output_directory=output_directory, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, callbacks=callbacks, backend=backend, random_seed=random_seed, debug=debug, ) mode = "min" if self.goal != MAXIMIZE else "max" metric = "metric_score" if self.search_alg_dict is not None: if TYPE not in self.search_alg_dict: logger.warning("WARNING: Kindly set type param for search_alg " "to utilize Tune's Search Algorithms.") search_alg = None else: search_alg_type = self.search_alg_dict.pop(TYPE) search_alg = tune.create_searcher(search_alg_type, metric=metric, mode=mode, **self.search_alg_dict) else: search_alg = None if self.max_concurrent_trials: assert ( self.max_concurrent_trials > 0 ), f"`max_concurrent_trials` must be greater than 0, got {self.max_concurrent_trials}" if isinstance(search_alg, BasicVariantGenerator) or search_alg is None: search_alg = BasicVariantGenerator( max_concurrent=self.max_concurrent_trials) elif isinstance(search_alg, ConcurrencyLimiter): raise ValueError( "You have specified `max_concurrent_trials`, but the search " "algorithm is already a `ConcurrencyLimiter`. FIX THIS " "by setting `max_concurrent_trials=None`.") else: search_alg = ConcurrencyLimiter( search_alg, max_concurrent=self.max_concurrent_trials) resources_per_trial = { "cpu": self._cpu_resources_per_trial_non_none, "gpu": self._gpu_resources_per_trial_non_none, } def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None): return self._run_experiment(config, checkpoint_dir, local_hyperopt_dict, self.decode_ctx, _is_ray_backend(backend)) tune_config = {} tune_callbacks = [] for callback in callbacks or []: run_experiment_trial, tune_config = callback.prepare_ray_tune( run_experiment_trial, tune_config, tune_callbacks, ) if _is_ray_backend(backend): # we can't set Trial actor's CPUs to 0 so we just go very low resources_per_trial = PlacementGroupFactory( [{ "CPU": 0.001 }] + ([{ "CPU": 1, "GPU": 1 }] * self._gpu_resources_per_trial_non_none) if self. _gpu_resources_per_trial_non_none else [{ "CPU": 0.001 }] + [{ "CPU": 1 }] * self._cpu_resources_per_trial_non_none) if has_remote_protocol(output_directory): run_experiment_trial = tune.durable(run_experiment_trial) self.sync_config = tune.SyncConfig(sync_to_driver=False, upload_dir=output_directory) output_directory = None elif self.kubernetes_namespace: from ray.tune.integration.kubernetes import NamespacedKubernetesSyncer self.sync_config = tune.SyncConfig( sync_to_driver=NamespacedKubernetesSyncer( self.kubernetes_namespace)) run_experiment_trial_params = tune.with_parameters( run_experiment_trial, local_hyperopt_dict=hyperopt_dict) register_trainable( f"trainable_func_f{hash_dict(config).decode('ascii')}", run_experiment_trial_params) analysis = tune.run( f"trainable_func_f{hash_dict(config).decode('ascii')}", config={ **self.search_space, **tune_config, }, scheduler=self.scheduler, search_alg=search_alg, num_samples=self.num_samples, keep_checkpoints_num=1, resources_per_trial=resources_per_trial, time_budget_s=self.time_budget_s, sync_config=self.sync_config, local_dir=output_directory, metric=metric, mode=mode, trial_name_creator=lambda trial: f"trial_{trial.trial_id}", trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}", callbacks=tune_callbacks, ) ordered_trials = analysis.results_df.sort_values( "metric_score", ascending=self.goal != MAXIMIZE) # Catch nans in edge case where the trial doesn't complete temp_ordered_trials = [] for kwargs in ordered_trials.to_dict(orient="records"): for key in ["parameters", "training_stats", "eval_stats"]: if isinstance(kwargs[key], float): kwargs[key] = {} temp_ordered_trials.append(kwargs) ordered_trials = [ TrialResults.from_dict(load_json_values(kwargs)) for kwargs in temp_ordered_trials ] return RayTuneResults(ordered_trials=ordered_trials, experiment_analysis=analysis)
def checkAndReturnConsistentLogs(self, results, sleep_per_iter=None): """Checks logging is the same between APIs. Ignore "DONE" for logging but checks that the scheduler is notified properly with the last result. """ class_results = copy.deepcopy(results) function_results = copy.deepcopy(results) class_output = [] function_output = [] scheduler_notif = [] class MockScheduler(FIFOScheduler): def on_trial_complete(self, runner, trial, result): scheduler_notif.append(result) class ClassAPILogger(Logger): def on_result(self, result): class_output.append(result) class FunctionAPILogger(Logger): def on_result(self, result): function_output.append(result) class _WrappedTrainable(Trainable): def _setup(self, config): del config self._result_iter = copy.deepcopy(class_results) def _train(self): if sleep_per_iter: time.sleep(sleep_per_iter) res = self._result_iter.pop(0) # This should not fail if not self._result_iter: # Mark "Done" for last result res[DONE] = True return res def _function_trainable(config, reporter): for result in function_results: if sleep_per_iter: time.sleep(sleep_per_iter) reporter(**result) class_trainable_name = "class_trainable" register_trainable(class_trainable_name, _WrappedTrainable) trials = run_experiments( { "function_api": { "run": _function_trainable, "loggers": [FunctionAPILogger], }, "class_api": { "run": class_trainable_name, "loggers": [ClassAPILogger], }, }, raise_on_failed_trial=False, scheduler=MockScheduler()) # Ignore these fields NO_COMPARE_FIELDS = { HOSTNAME, NODE_IP, TRIAL_ID, EXPERIMENT_TAG, PID, TIME_THIS_ITER_S, TIME_TOTAL_S, DONE, # This is ignored because FunctionAPI has different handling "timestamp", "time_since_restore", "experiment_id", "date", } self.assertEqual(len(class_output), len(results)) self.assertEqual(len(function_output), len(results)) def as_comparable_result(result): return { k: v for k, v in result.items() if k not in NO_COMPARE_FIELDS } function_comparable = [ as_comparable_result(result) for result in function_output ] class_comparable = [ as_comparable_result(result) for result in class_output ] self.assertEqual(function_comparable, class_comparable) self.assertEqual(sum(t.get(DONE) for t in scheduler_notif), 2) self.assertEqual(as_comparable_result(scheduler_notif[0]), as_comparable_result(scheduler_notif[1])) # Make sure the last result is the same. self.assertEqual(as_comparable_result(trials[0].last_result), as_comparable_result(trials[1].last_result)) return function_output, trials
# Here we use `episode_reward_mean`, but you can also report other # objectives such as loss or accuracy (see tune/result.py). return TrainingResult(episode_reward_mean=v, timesteps_this_iter=1) def _save(self, checkpoint_dir): path = os.path.join(checkpoint_dir, "checkpoint") with open(path, "w") as f: f.write(json.dumps({"timestep": self.timestep})) return path def _restore(self, checkpoint_path): with open(checkpoint_path) as f: self.timestep = json.loads(f.read())["timestep"] register_trainable("my_class", MyTrainableClass) if __name__ == "__main__": ray.init() # Hyperband early stopping, configured with `episode_reward_mean` as the # objective and `timesteps_total` as the time unit. hyperband = HyperBandScheduler(time_attr="timesteps_total", reward_attr="episode_reward_mean", max_t=100) run_experiments( { "hyperband_test": { "run": "my_class", "repeat": 100,
if __name__ == "__main__": datasets.MNIST('~/data', train=True, download=True) args = parser.parse_args() import numpy as np import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler ray.init() sched = AsyncHyperBandScheduler( time_attr="training_iteration", reward_attr="neg_mean_loss", max_t=400, grace_period=20) tune.register_trainable("train_mnist", lambda cfg, rprtr: train_mnist(args, cfg, rprtr)) tune.run_experiments( { "exp": { "stop": { "mean_accuracy": 0.98, "training_iteration": 1 if args.smoke_test else 20 }, "resources_per_trial": { "cpu": 3, "gpu": int(not args.no_cuda) }, "run": "train_mnist", "num_samples": 1 if args.smoke_test else 10, "config": { "lr": tune.sample_from(
#!/usr/bin/env python import multiprocessing import sys import ray from ray.tune import register_trainable, run_experiments from rainbow_rllib_agent import RainbowRLlibAgent register_trainable("Rainbow", RainbowRLlibAgent) ray.init(num_gpus=1) run_experiments({ "rainbow-simple-pong": { "run": "Rainbow", "env": "PongNoFrameskip-v4", "resources": { "cpu": 1, "gpu": 1, }, "config": { "num_workers": 0, "apex": False, "lr": .0001, "n_step": 3, "gamma": 0.99, "sample_batch_size": 4, "train_batch_size": 32, "force_remote_evaluators": False,
def _save(self, checkpoint_dir): return self.saver.save( self.sess, checkpoint_dir + "/save", global_step=self.iterations) def _restore(self, path): return self.saver.restore(self.sess, path) # !!! Example of using the ray.tune Python API !!! if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable("my_class", TrainMNIST) mnist_spec = { 'run': 'my_class', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'learning_rate': lambda spec: 10 ** np.random.uniform(-5, -3), 'activation': grid_search(['relu', 'elu', 'tanh']), }, "repeat": 10, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 2
from ray.tune.schedulers import AsyncHyperBandScheduler, FIFOScheduler ray.init() if args.scheduler == "fifo": sched = FIFOScheduler() elif args.scheduler == "asynchyperband": sched = AsyncHyperBandScheduler( time_attr="training_iteration", metric="mean_loss", mode="min", max_t=400, grace_period=60) else: raise NotImplementedError tune.register_trainable( "TRAIN_FN", lambda config, reporter: train_cifar10(args, config, reporter)) tune.run( "TRAIN_FN", name=args.expname, verbose=2, scheduler=sched, **{ "stop": { "mean_accuracy": 0.98, "training_iteration": 1 if args.smoke_test else args.epochs }, "resources_per_trial": { "cpu": int(args.num_workers), "gpu": int(args.num_gpus) },
if __name__ == "__main__": import argparse import os assert "SIGOPT_KEY" in os.environ, \ "SigOpt API key must be stored as environment variable at SIGOPT_KEY" parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() register_trainable("exp", easy_objective) space = [ { 'name': 'width', 'type': 'int', 'bounds': { 'min': 0, 'max': 20 }, }, { 'name': 'height', 'type': 'int', 'bounds': { 'min': -100,
from gym.spaces import Discrete, Box from gym.envs.registration import EnvSpec from gym.envs.registration import registry from ray.rllib.env import MultiAgentEnv from ray.tune.registry import register_env from ray.rllib.models import ModelCatalog from ray.rllib.evaluation.episode import _flatten_action from ray.rllib.agents.registry import get_agent_class from ray.rllib.models.preprocessors import get_preprocessor from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID from common import train_env_factory from environment.core.utils.config import extend_config from agents.sacq import SACQAgent logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO) tune.register_trainable("SACQ", SACQAgent) EXAMPLE_USAGE = """ Example Usage via RLlib CLI: python rollout.py --steps 1000 \ --checkpoint=checkpoints/October2c/checkpoint_120/checkpoint-120 """ ENVIRONMENT = "MultiRobot-v0" RESET_ON_TARGET = True DEFAULT_TIMESTEP = 0.1 FRAME_MULTIPLIER = 5 EVAL_TIMESTEP = DEFAULT_TIMESTEP/FRAME_MULTIPLIER
parser = argparse.ArgumentParser() parser.add_argument( '--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', help='Directory for storing input data') FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) # !!! Example of using the ray.tune Python API !!! if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable('train_mnist', train) mnist_spec = { 'run': 'train_mnist', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'activation': grid_search(['relu', 'elu', 'tanh']), }, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 2 ray.init()
#!/usr/bin/env python import multiprocessing import sys import ray from ray.tune import register_trainable, run_experiments from dqn_agent import DQNRLlibAgent register_trainable("DQNBaseline", DQNRLlibAgent) ray.init() run_experiments({ "baseline-rllib-cartpole": { "run": "DQNBaseline", "env": "CartPole-v0", "resources": { "cpu": 1, }, "config": { "num_workers": 0, "env_config": {"cartpole": True}, "apex": False, "lr": .0005, "n_step": 1, "gamma": 0.99, "sample_batch_size": 1, "train_batch_size": 32, "force_remote_evaluators": False,