def set_algorithm(experiment_name, config): ''' Configure search algorithm. ''' if args.algorithm == 'hyperopt': algorithm = HyperOptSearch(points_to_evaluate=best_params) elif args.algorithm == 'ax': ax_client = AxClient(enforce_sequential_optimization=False) ax_client.create_experiment(name=experiment_name, parameters=config, objective_name="minimum", minimize=True) algorithm = AxSearch(ax_client=ax_client, points_to_evaluate=best_params) elif args.algorithm == 'nevergrad': algorithm = NevergradSearch( points_to_evaluate=best_params, optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"]) elif args.algorithm == 'optuna': algorithm = OptunaSearch(points_to_evaluate=best_params, seed=args.seed) elif args.algorithm == 'pbt': algorithm = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=args.perturbation, hyperparam_mutations=config, synch=True) elif args.algorithm == 'random': algorithm = BasicVariantGenerator(max_concurrent=args.jobs) if args.algorithm not in ['random', 'pbt']: algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs) return algorithm
def testFailResumeAfterPreset(self): os.environ["TUNE_MAX_PENDING_TRIALS_PG"] = "1" search_alg = BasicVariantGenerator(points_to_evaluate=[{ "test": -1, "test2": -1 }, { "test": -1 }, { "test2": -1 }]) config = dict( num_samples=3 + 3, # 3 preset, 3 samples fail_fast=True, config={ "test": tune.grid_search([1, 2, 3]), "test2": tune.grid_search([1, 2, 3]), }, stop={"training_iteration": 2}, local_dir=self.logdir, verbose=1, ) with self.assertRaises(RuntimeError): tune.run( "trainable", callbacks=[self.FailureInjectorCallback(15)], search_alg=search_alg, **config, ) analysis = tune.run( "trainable", resume=True, callbacks=[self.CheckStateCallback(expected_trials=15)], search_alg=search_alg, **config, ) assert len(analysis.trials) == 34 test_counter = Counter([t.config["test"] for t in analysis.trials]) assert test_counter.pop(-1) == 4 assert all(v == 10 for v in test_counter.values()) test2_counter = Counter([t.config["test2"] for t in analysis.trials]) assert test2_counter.pop(-1) == 4 assert all(v == 10 for v in test2_counter.values())
def testPointsToEvaluateBasicVariantAdvanced(self): config = { "grid_1": tune.grid_search(["a", "b", "c", "d"]), "grid_2": tune.grid_search(["x", "y", "z"]), "nested": { "random": tune.uniform(2., 10.), "dependent": tune.sample_from(lambda spec: -1. * spec.config.nested.random) } } points = [ { "grid_1": "b" }, { "grid_2": "z" }, { "grid_1": "a", "grid_2": "y" }, { "nested": { "random": 8.0 } }, ] from ray.tune.suggest.basic_variant import BasicVariantGenerator # grid_1 * grid_2 are 3 * 4 = 12 variants per complete grid search # However if one grid var is set by preset variables, that run # is excluded from grid search. # Point 1 overwrites grid_1, so the first trial only grid searches # over grid_2 (3 trials). # The remaining 5 trials search over the whole space (5 * 12 trials) searcher = BasicVariantGenerator(points_to_evaluate=[points[0]]) exp = Experiment(run=_mock_objective, name="test", config=config, num_samples=6) searcher.add_configurations(exp) self.assertEqual(searcher.total_samples, 1 * 3 + 5 * 12) # Point 2 overwrites grid_2, so the first trial only grid searches # over grid_1 (4 trials). # The remaining 5 trials search over the whole space (5 * 12 trials) searcher = BasicVariantGenerator(points_to_evaluate=[points[1]]) exp = Experiment(run=_mock_objective, name="test", config=config, num_samples=6) searcher.add_configurations(exp) self.assertEqual(searcher.total_samples, 1 * 4 + 5 * 12) # Point 3 overwrites grid_1 and grid_2, so the first trial does not # grid search. # The remaining 5 trials search over the whole space (5 * 12 trials) searcher = BasicVariantGenerator(points_to_evaluate=[points[2]]) exp = Experiment(run=_mock_objective, name="test", config=config, num_samples=6) searcher.add_configurations(exp) self.assertEqual(searcher.total_samples, 1 + 5 * 12) # When initialized with all points, the first three trials are # defined by the logic above. Only 3 trials are grid searched # compeletely. searcher = BasicVariantGenerator(points_to_evaluate=points) exp = Experiment(run=_mock_objective, name="test", config=config, num_samples=6) searcher.add_configurations(exp) self.assertEqual(searcher.total_samples, 1 * 3 + 1 * 4 + 1 + 3 * 12) # Run this and confirm results analysis = tune.run(exp, search_alg=searcher) configs = [trial.config for trial in analysis.trials] self.assertEqual(len(configs), searcher.total_samples) self.assertTrue(all(config["grid_1"] == "b" for config in configs[0:3])) self.assertTrue(all(config["grid_2"] == "z" for config in configs[3:7])) self.assertTrue(configs[7]["grid_1"] == "a" and configs[7]["grid_2"] == "y") self.assertTrue(configs[8]["nested"]["random"] == 8.0) self.assertTrue(configs[8]["nested"]["dependent"] == -8.0)
"relu_dropout": 0.1, "res_dropout": 0.1, } points_to_evaluate = [] for k, v in org_config.items(): point = copy(org_config) if point[k] + 0.1 <= 1.0: point[k] += 0.1 points_to_evaluate.append(point) if point[k] - 0.1 > 0.0: point = copy(org_config) point[k] -= 0.1 points_to_evaluate.append(point) search_space = {k: tune.quniform(0, 0.5, 0.05) for k in org_config.keys()} # for p in points_to_evaluate: # print(p) tune.run( train, config=search_space, resources_per_trial={ "cpu": 16, "gpu": 1 }, search_alg=BasicVariantGenerator(points_to_evaluate=points_to_evaluate), name="tune_mosi_dropouts", )
def tune_xgboost(): search_space = { # You can mix constants with search space objects. "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "max_depth": 9, "learning_rate": 1, "min_child_weight": tune.grid_search([2, 3]), "subsample": tune.grid_search([0.8, 0.9]), "colsample_bynode": tune.grid_search([0.8, 0.9]), "random_state": 1, "num_parallel_tree": 2000, } # This will enable aggressive early stopping of bad trials. base_scheduler = ASHAScheduler( max_t=16, # 16 training iterations grace_period=1, reduction_factor=2) def example_resources_allocation_function( trial_runner: "trial_runner.TrialRunner", trial: Trial, result: Dict[str, Any], scheduler: "ResourceChangingScheduler" ) -> Union[None, PlacementGroupFactory, Resources]: """This is a basic example of a resource allocating function. The function naively balances available CPUs over live trials. This function returns a new ``PlacementGroupFactory`` with updated resource requirements, or None. If the returned ``PlacementGroupFactory`` is equal by value to the one the trial has currently, the scheduler will skip the update process internally (same with None). See :func:`evenly_distribute_cpus_gpus` for a more complex, robust approach. Args: trial_runner (TrialRunner): Trial runner for this Tune run. Can be used to obtain information about other trials. trial (Trial): The trial to allocate new resources to. result (Dict[str, Any]): The latest results of trial. scheduler (ResourceChangingScheduler): The scheduler calling the function. """ # Get base trial resources as defined in # ``tune.run(resources_per_trial)`` base_trial_resource = scheduler._base_trial_resources # Don't bother if this is just the first iteration if result["training_iteration"] < 1: return None # default values if resources_per_trial is unspecified if base_trial_resource is None: base_trial_resource = PlacementGroupFactory([{"CPU": 1, "GPU": 0}]) # Assume that the number of CPUs cannot go below what was # specified in tune.run min_cpu = base_trial_resource.required_resources.get("CPU", 0) # Get the number of CPUs available in total (not just free) total_available_cpus = ( trial_runner.trial_executor._avail_resources.cpu) # Divide the free CPUs among all live trials cpu_to_use = max( min_cpu, total_available_cpus // len(trial_runner.get_live_trials())) # Assign new CPUs to the trial in a PlacementGroupFactory return PlacementGroupFactory([{"CPU": cpu_to_use}]) # You can either define your own resources_allocation_function, or # use the default one - evenly_distribute_cpus_gpus # from ray.tune.schedulers.resource_changing_scheduler import \ # evenly_distribute_cpus_gpus scheduler = ResourceChangingScheduler( base_scheduler=base_scheduler, resources_allocation_function=example_resources_allocation_function # resources_allocation_function=evenly_distribute_cpus_gpus # default ) search = BasicVariantGenerator() analysis = tune.run(BreastCancerTrainable, metric="eval-logloss", mode="min", resources_per_trial=PlacementGroupFactory([{ "CPU": 1, "GPU": 0 }]), config=search_space, search_alg=search, num_samples=1, checkpoint_at_end=True, scheduler=scheduler) assert analysis.results_df["training_iteration"].max() == 16 assert analysis.results_df["nthread"].max() > 1 return analysis
def hyper_parameter(task=None, model_name=None, dataset_name=None, config_file=None, space_file=None, scheduler=None, search_alg=None, other_args=None, num_samples=5, max_concurrent=1, cpu_per_trial=1, gpu_per_trial=1): """ Use Ray tune to hyper parameter tune Args: task(str): task name model_name(str): model name dataset_name(str): dataset name config_file(str): config filename used to modify the pipeline's settings. the config file should be json. space_file(str): the file which specifies the parameter search space scheduler(str): the trial sheduler which will be used in ray.tune.run search_alg(str): the search algorithm other_args(dict): the rest parameter args, which will be pass to the Config """ # load config experiment_config = ConfigParser(task, model_name, dataset_name, config_file=config_file, other_args=other_args) # logger logger = get_logger(experiment_config) logger.info(experiment_config.config) # check space_file if space_file is None: logger.error( 'the space_file should not be None when hyperparameter tune.') exit(0) # seed seed = experiment_config.get('seed', 0) set_random_seed(seed) # parse space_file search_sapce = parse_search_space(space_file) # load dataset dataset = get_dataset(experiment_config) # get train valid test data train_data, valid_data, test_data = dataset.get_data() data_feature = dataset.get_data_feature() def train(config, checkpoint_dir=None, experiment_config=None, train_data=None, valid_data=None, data_feature=None): """trainable function which meets ray tune API Args: config (dict): A dict of hyperparameter. """ # modify experiment_config for key in config: if key in experiment_config: experiment_config[key] = config[key] experiment_config['hyper_tune'] = True logger = get_logger(experiment_config) logger.info( 'Begin pipeline, task={}, model_name={}, dataset_name={}'.format( str(task), str(model_name), str(dataset_name))) logger.info('running parameters: ' + str(config)) # load model model = get_model(experiment_config, data_feature) # load executor executor = get_executor(experiment_config, model, data_feature) # checkpoint by ray tune if checkpoint_dir: checkpoint = os.path.join(checkpoint_dir, 'checkpoint') executor.load_model(checkpoint) # train executor.train(train_data, valid_data) # init search algorithm and scheduler if search_alg == 'BasicSearch': algorithm = BasicVariantGenerator() elif search_alg == 'BayesOptSearch': algorithm = BayesOptSearch(metric='loss', mode='min') # add concurrency limit algorithm = ConcurrencyLimiter(algorithm, max_concurrent=max_concurrent) elif search_alg == 'HyperOpt': algorithm = HyperOptSearch(metric='loss', mode='min') # add concurrency limit algorithm = ConcurrencyLimiter(algorithm, max_concurrent=max_concurrent) else: raise ValueError('the search_alg is illegal.') if scheduler == 'FIFO': tune_scheduler = FIFOScheduler() elif scheduler == 'ASHA': tune_scheduler = ASHAScheduler() elif scheduler == 'MedianStoppingRule': tune_scheduler = MedianStoppingRule() else: raise ValueError('the scheduler is illegal') # ray tune run ensure_dir('./libcity/cache/hyper_tune') result = tune.run(tune.with_parameters(train, experiment_config=experiment_config, train_data=train_data, valid_data=valid_data, data_feature=data_feature), resources_per_trial={ 'cpu': cpu_per_trial, 'gpu': gpu_per_trial }, config=search_sapce, metric='loss', mode='min', scheduler=tune_scheduler, search_alg=algorithm, local_dir='./libcity/cache/hyper_tune', num_samples=num_samples) best_trial = result.get_best_trial("loss", "min", "last") logger.info("Best trial config: {}".format(best_trial.config)) logger.info("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) # save best best_path = os.path.join(best_trial.checkpoint.value, "checkpoint") model_state, optimizer_state = torch.load(best_path) model_cache_file = './libcity/cache/model_cache/{}_{}.m'.format( model_name, dataset_name) ensure_dir('./libcity/cache/model_cache') torch.save((model_state, optimizer_state), model_cache_file)