def run_experiments(experiments, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=True): """Tunes experiments. Args: experiments (Experiment | list | dict): Experiments to run. scheduler (TrialScheduler): Scheduler for executing the experiment. Choose among FIFO (default), MedianStopping, AsyncHyperBand, HyperBand, or HyperOpt. with_server (bool): Starts a background Tune server. Needed for using the Client API. server_port (int): Port number for launching TuneServer. verbose (bool): How much output should be printed for each trial. """ if scheduler is None: scheduler = FIFOScheduler() runner = TrialRunner( scheduler, launch_web_server=with_server, server_port=server_port, verbose=verbose) exp_list = experiments if isinstance(experiments, Experiment): exp_list = [experiments] elif type(experiments) is dict: exp_list = [ Experiment.from_json(name, spec) for name, spec in experiments.items() ] if (type(exp_list) is list and all(isinstance(exp, Experiment) for exp in exp_list)): for experiment in exp_list: scheduler.add_experiment(experiment, runner) else: raise TuneError("Invalid argument: {}".format(experiments)) print(runner.debug_string(max_debug=99999)) last_debug = 0 while not runner.is_finished(): runner.step() if time.time() - last_debug > DEBUG_PRINT_INTERVAL: print(runner.debug_string()) last_debug = time.time() print(runner.debug_string(max_debug=99999)) for trial in runner.get_trials(): # TODO(rliaw): What about errored? if trial.status != Trial.TERMINATED: raise TuneError("Trial did not complete", trial) wait_for_log_sync() return runner.get_trials()
def __init__(self, time_attr='training_iteration', reward_attr='episode_reward_mean', max_t=100, grace_period=10, reduction_factor=3, brackets=3): assert max_t > 0, "Max (time_attr) not valid!" assert max_t >= grace_period, "grace_period must be <= max_t!" assert grace_period > 0, "grace_period must be positive!" assert reduction_factor > 1, "Reduction Factor not valid!" assert brackets > 0, "brackets must be positive!" FIFOScheduler.__init__(self) self._reduction_factor = reduction_factor self._max_t = max_t self._trial_info = {} # Stores Trial -> Bracket # Tracks state for new trial add self._brackets = [ _Bracket(grace_period, max_t, reduction_factor, s) for s in range(brackets) ] self._counter = 0 # for self._num_stopped = 0 self._reward_attr = reward_attr self._time_attr = time_attr
def __init__( self, time_attr="time_total_s", reward_attr="episode_reward_mean", grace_period=60.0, min_samples_required=3, hard_stop=True): FIFOScheduler.__init__(self) self._stopped_trials = set() self._completed_trials = set() self._results = collections.defaultdict(list) self._grace_period = grace_period self._min_samples_required = min_samples_required self._reward_attr = reward_attr self._time_attr = time_attr self._hard_stop = hard_stop
def __init__( self, time_attr='time_total_s', reward_attr='episode_reward_mean', grace_period=60.0, min_samples_required=3, hard_stop=True): FIFOScheduler.__init__(self) self._stopped_trials = set() self._completed_trials = set() self._results = collections.defaultdict(list) self._grace_period = grace_period self._min_samples_required = min_samples_required self._reward_attr = reward_attr self._time_attr = time_attr self._hard_stop = hard_stop
def __init__(self, scheduler=None, launch_web_server=False, server_port=TuneServer.DEFAULT_PORT): """Initializes a new TrialRunner. Args: scheduler (TrialScheduler): Defaults to FIFOScheduler. launch_web_server (bool): Flag for starting TuneServer server_port (int): Port number for launching TuneServer""" self._scheduler_alg = scheduler or FIFOScheduler() self._trials = [] self._running = {} self._avail_resources = Resources(cpu=0, gpu=0) self._committed_resources = Resources(cpu=0, gpu=0) self._resources_initialized = False # For debugging, it may be useful to halt trials after some time has # elapsed. TODO(ekl) consider exposing this in the API. self._global_time_limit = float( os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf'))) self._total_time = 0 self._server = None if launch_web_server: self._server = TuneServer(self, server_port) self._stop_queue = []
def __init__( self, time_attr='training_iteration', reward_attr='episode_reward_mean', grace_period=10.0, perturbation_interval=6.0, hyperparameter_mutations=None): FIFOScheduler.__init__(self) self._completed_trials = set() self._results = collections.defaultdict(list) self._last_perturbation_time = {} self._grace_period = grace_period self._reward_attr = reward_attr self._time_attr = time_attr self._hyperparameter_mutations = hyperparameter_mutations self._perturbation_interval = perturbation_interval self._checkpoint_paths = {}
def run_experiments(experiments, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT): # Make sure rllib agents are registered from ray import rllib # noqa # pylint: disable=unused-import if scheduler is None: scheduler = FIFOScheduler() runner = TrialRunner(scheduler, launch_web_server=with_server, server_port=server_port) for name, spec in experiments.items(): for trial in generate_trials(spec, name): runner.add_trial(trial) print(runner.debug_string(max_debug=99999)) last_debug = 0 while not runner.is_finished(): runner.step() if time.time() - last_debug > DEBUG_PRINT_INTERVAL: print(runner.debug_string()) last_debug = time.time() print(runner.debug_string(max_debug=99999)) for trial in runner.get_trials(): # TODO(rliaw): What about errored? if trial.status != Trial.TERMINATED: raise TuneError("Trial did not complete", trial) return runner.get_trials()
def __init__(self, scheduler=None): """Initializes a new TrialRunner.""" self._scheduler_alg = scheduler or FIFOScheduler() self._trials = [] self._running = {} self._avail_resources = Resources(cpu=0, gpu=0) self._committed_resources = Resources(cpu=0, gpu=0)
def __init__( self, time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=60.0, hyperparam_mutations={}, resample_probability=0.25, custom_explore_fn=None): if not hyperparam_mutations and not custom_explore_fn: raise TuneError( "You must specify at least one of `hyperparam_mutations` or " "`custom_explore_fn` to use PBT.") FIFOScheduler.__init__(self) self._reward_attr = reward_attr self._time_attr = time_attr self._perturbation_interval = perturbation_interval self._hyperparam_mutations = hyperparam_mutations self._resample_probability = resample_probability self._trial_state = {} self._custom_explore_fn = custom_explore_fn # Metrics self._num_checkpoints = 0 self._num_perturbations = 0
def __init__(self, time_attr='training_iteration', reward_attr='episode_reward_mean', max_t=81): assert max_t > 0, "Max (time_attr) not valid!" FIFOScheduler.__init__(self) self._eta = 3 self._s_max_1 = 5 # bracket max trials self._get_n0 = lambda s: int( np.ceil(self._s_max_1 / (s + 1) * self._eta**s)) # bracket initial iterations self._get_r0 = lambda s: int((max_t * self._eta**(-s))) self._hyperbands = [[]] # list of hyperband iterations self._trial_info = {} # Stores Trial -> Bracket, Band Iteration # Tracks state for new trial add self._state = {"bracket": None, "band_idx": 0} self._num_stopped = 0 self._reward_attr = reward_attr self._time_attr = time_attr
def __init__( self, time_attr='training_iteration', reward_attr='episode_reward_mean', max_t=100, grace_period=10, reduction_factor=3, brackets=3): assert max_t > 0, "Max (time_attr) not valid!" assert max_t >= grace_period, "grace_period must be <= max_t!" assert grace_period > 0, "grace_period must be positive!" assert reduction_factor > 1, "Reduction Factor not valid!" assert brackets > 0, "brackets must be positive!" FIFOScheduler.__init__(self) self._reduction_factor = reduction_factor self._max_t = max_t self._trial_info = {} # Stores Trial -> Bracket # Tracks state for new trial add self._brackets = [_Bracket( grace_period, max_t, reduction_factor, s) for s in range(brackets)] self._counter = 0 # for self._num_stopped = 0 self._reward_attr = reward_attr self._time_attr = time_attr
def __init__( self, time_attr='training_iteration', reward_attr='episode_reward_mean', max_t=81): assert max_t > 0, "Max (time_attr) not valid!" FIFOScheduler.__init__(self) self._eta = 3 self._s_max_1 = 5 self._max_t_attr = max_t # bracket max trials self._get_n0 = lambda s: int( np.ceil(self._s_max_1/(s+1) * self._eta**s)) # bracket initial iterations self._get_r0 = lambda s: int((max_t*self._eta**(-s))) self._hyperbands = [[]] # list of hyperband iterations self._trial_info = {} # Stores Trial -> Bracket, Band Iteration # Tracks state for new trial add self._state = {"bracket": None, "band_idx": 0} self._num_stopped = 0 self._reward_attr = reward_attr self._time_attr = time_attr
def __init__(self, time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=60.0, hyperparam_mutations={}, resample_probability=0.25, custom_explore_fn=None): if not hyperparam_mutations and not custom_explore_fn: raise TuneError( "You must specify at least one of `hyperparam_mutations` or " "`custom_explore_fn` to use PBT.") FIFOScheduler.__init__(self) self._reward_attr = reward_attr self._time_attr = time_attr self._perturbation_interval = perturbation_interval self._hyperparam_mutations = hyperparam_mutations self._resample_probability = resample_probability self._trial_state = {} self._custom_explore_fn = custom_explore_fn # Metrics self._num_checkpoints = 0 self._num_perturbations = 0
def __init__(self, scheduler=None): """Initializes a new TrialRunner.""" self._scheduler_alg = scheduler or FIFOScheduler() self._trials = [] self._running = {} self._avail_resources = Resources(cpu=0, gpu=0) self._committed_resources = Resources(cpu=0, gpu=0) self._resources_initialized = False # For debugging, it may be useful to halt trials after some time has # elapsed. TODO(ekl) consider exposing this in the API. self._global_time_limit = float( os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf'))) self._total_time = 0
def __init__(self, max_iter=200, eta=3): """ args: max_iter (int): maximum iterations per configuration eta (int): # defines downsampling rate (default=3) """ assert max_iter > 0, "Max Iterations not valid!" assert eta > 1, "Downsampling rate (eta) not valid!" FIFOScheduler.__init__(self) self._eta = eta self._s_max_1 = s_max_1 = calculate_bracket_count(max_iter, eta) # total number of iterations per execution of Succesive Halving (n,r) B = s_max_1 * max_iter # bracket trial count total self._get_n0 = lambda s: int(np.ceil(B / max_iter / (s + 1) * eta**s)) # bracket initial iterations self._get_r0 = lambda s: int(max_iter * eta**(-s)) self._hyperbands = [[]] # list of hyperband iterations self._trial_info = {} # Stores Trial -> Bracket, Band Iteration # Tracks state for new trial add self._state = {"bracket": None, "band_idx": 0} self._num_stopped = 0
def __init__(self, search_alg, scheduler=None, launch_web_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=True, queue_trials=False): """Initializes a new TrialRunner. Args: search_alg (SearchAlgorithm): SearchAlgorithm for generating Trial objects. scheduler (TrialScheduler): Defaults to FIFOScheduler. launch_web_server (bool): Flag for starting TuneServer server_port (int): Port number for launching TuneServer verbose (bool): Flag for verbosity. If False, trial results will not be output. queue_trials (bool): Whether to queue trials when the cluster does not currently have enough resources to launch one. This should be set to True when running on an autoscaling cluster to enable automatic scale-up. """ self._search_alg = search_alg self._scheduler_alg = scheduler or FIFOScheduler() self._trials = [] self._running = {} self._avail_resources = Resources(cpu=0, gpu=0) self._committed_resources = Resources(cpu=0, gpu=0) self._resources_initialized = False # For debugging, it may be useful to halt trials after some time has # elapsed. TODO(ekl) consider exposing this in the API. self._global_time_limit = float( os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf'))) self._total_time = 0 self._server = None if launch_web_server: self._server = TuneServer(self, server_port) self._stop_queue = [] self._verbose = verbose self._queue_trials = queue_trials
def run_experiments(experiments, scheduler=None, **ray_args): if scheduler is None: scheduler = FIFOScheduler() runner = TrialRunner(scheduler) for name, spec in experiments.items(): for trial in generate_trials(spec, name): runner.add_trial(trial) print(runner.debug_string()) ray.init(**ray_args) while not runner.is_finished(): runner.step() print(runner.debug_string()) for trial in runner.get_trials(): if trial.status != Trial.TERMINATED: raise TuneError("Trial did not complete", trial) return runner.get_trials()
def choose_trial_to_run(self, trial_runner): self._add_new_trials_if_needed(trial_runner) return FIFOScheduler.choose_trial_to_run(self, trial_runner)
def run_experiments(experiments=None, search_alg=None, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=True, queue_trials=False): """Tunes experiments. Args: experiments (Experiment | list | dict): Experiments to run. search_alg (SearchAlgorithm): Search Algorithm. Defaults to BasicVariantGenerator. scheduler (TrialScheduler): Scheduler for executing the experiment. Choose among FIFO (default), MedianStopping, AsyncHyperBand, and HyperBand. with_server (bool): Starts a background Tune server. Needed for using the Client API. server_port (int): Port number for launching TuneServer. verbose (bool): How much output should be printed for each trial. queue_trials (bool): Whether to queue trials when the cluster does not currently have enough resources to launch one. This should be set to True when running on an autoscaling cluster to enable automatic scale-up. Returns: List of Trial objects, holding data for each executed trial. """ if scheduler is None: scheduler = FIFOScheduler() if search_alg is None: assert experiments is not None, "Experiments need to be specified" \ "if search_alg is not provided." search_alg = BasicVariantGenerator(experiments) runner = TrialRunner(search_alg, scheduler=scheduler, launch_web_server=with_server, server_port=server_port, verbose=verbose, queue_trials=queue_trials) print(runner.debug_string(max_debug=99999)) last_debug = 0 while not runner.is_finished(): runner.step() if time.time() - last_debug > DEBUG_PRINT_INTERVAL: print(runner.debug_string()) last_debug = time.time() print(runner.debug_string(max_debug=99999)) errored_trials = [] for trial in runner.get_trials(): if trial.status != Trial.TERMINATED: errored_trials += [trial] if errored_trials: raise TuneError("Trials did not complete", errored_trials) wait_for_log_sync() return runner.get_trials()
def run_experiments(experiments, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=True): """Tunes experiments. Args: experiments (Experiment | list | dict): Experiments to run. scheduler (TrialScheduler): Scheduler for executing the experiment. Choose among FIFO (default), MedianStopping, AsyncHyperBand, or HyperBand. with_server (bool): Starts a background Tune server. Needed for using the Client API. server_port (int): Port number for launching TuneServer. verbose (bool): How much output should be printed for each trial. """ # Make sure rllib agents are registered from ray import rllib # noqa # pylint: disable=unused-import if scheduler is None: scheduler = FIFOScheduler() runner = TrialRunner(scheduler, launch_web_server=with_server, server_port=server_port) if type(experiments) is dict: for name, spec in experiments.items(): for trial in generate_trials(spec, name): trial.set_verbose(verbose) runner.add_trial(trial) elif (type(experiments) is list and all(isinstance(exp, Experiment) for exp in experiments)): for experiment in experiments: for trial in experiment.trials(): trial.set_verbose(verbose) runner.add_trial(trial) elif isinstance(experiments, Experiment): for trial in experiments.trials(): trial.set_verbose(verbose) runner.add_trial(trial) print(runner.debug_string(max_debug=99999)) last_debug = 0 while not runner.is_finished(): runner.step() if time.time() - last_debug > DEBUG_PRINT_INTERVAL: print(runner.debug_string()) last_debug = time.time() print(runner.debug_string(max_debug=99999)) for trial in runner.get_trials(): # TODO(rliaw): What about errored? if trial.status != Trial.TERMINATED: raise TuneError("Trial did not complete", trial) wait_for_log_sync() return runner.get_trials()
def run_experiments(experiments, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=True, queue_trials=False): """Tunes experiments. Args: experiments (Experiment | list | dict): Experiments to run. scheduler (TrialScheduler): Scheduler for executing the experiment. Choose among FIFO (default), MedianStopping, AsyncHyperBand, HyperBand, or HyperOpt. with_server (bool): Starts a background Tune server. Needed for using the Client API. server_port (int): Port number for launching TuneServer. verbose (bool): How much output should be printed for each trial. queue_trials (bool): Whether to queue trials when the cluster does not currently have enough resources to launch one. This should be set to True when running on an autoscaling cluster to enable automatic scale-up. Returns: List of Trial objects, holding data for each executed trial. """ if scheduler is None: scheduler = FIFOScheduler() runner = TrialRunner( scheduler, launch_web_server=with_server, server_port=server_port, verbose=verbose, queue_trials=queue_trials) exp_list = experiments if isinstance(experiments, Experiment): exp_list = [experiments] elif type(experiments) is dict: exp_list = [ Experiment.from_json(name, spec) for name, spec in experiments.items() ] if (type(exp_list) is list and all(isinstance(exp, Experiment) for exp in exp_list)): for experiment in exp_list: scheduler.add_experiment(experiment, runner) else: raise TuneError("Invalid argument: {}".format(experiments)) print(runner.debug_string(max_debug=99999)) last_debug = 0 while not runner.is_finished(): runner.step() if time.time() - last_debug > DEBUG_PRINT_INTERVAL: print(runner.debug_string()) last_debug = time.time() print(runner.debug_string(max_debug=99999)) errored_trials = [] for trial in runner.get_trials(): if trial.status != Trial.TERMINATED: errored_trials += [trial] if errored_trials: raise TuneError("Trials did not complete", errored_trials) wait_for_log_sync() return runner.get_trials()