def testNewResources(self): sched = ResourceChangingScheduler(resources_allocation_function=( lambda a, b, c, d: PlacementGroupFactory([{ "CPU": 2 }]))) def train(config, checkpoint_dir=None): tune.report(metric=1, resources=tune.get_trial_resources()) analysis = tune.run(train, scheduler=sched, stop={"training_iteration": 2}, resources_per_trial=PlacementGroupFactory([{ "CPU": 1 }]), num_samples=1) results_list = list(analysis.results.values()) assert results_list[0]["resources"].head_cpus == 2.0
def tune_xgboost(use_class_trainable=True): search_space = { # You can mix constants with search space objects. "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "max_depth": 9, "learning_rate": 1, "min_child_weight": tune.grid_search([2, 3]), "subsample": tune.grid_search([0.8, 0.9]), "colsample_bynode": tune.grid_search([0.8, 0.9]), "random_state": 1, "num_parallel_tree": 2000, } # This will enable aggressive early stopping of bad trials. base_scheduler = ASHAScheduler( max_t=16, grace_period=1, reduction_factor=2 # 16 training iterations ) def example_resources_allocation_function( trial_runner: "trial_runner.TrialRunner", trial: Trial, result: Dict[str, Any], scheduler: "ResourceChangingScheduler", ) -> Union[None, PlacementGroupFactory, Resources]: """This is a basic example of a resource allocating function. The function naively balances available CPUs over live trials. This function returns a new ``PlacementGroupFactory`` with updated resource requirements, or None. If the returned ``PlacementGroupFactory`` is equal by value to the one the trial has currently, the scheduler will skip the update process internally (same with None). See :class:`DistributeResources` for a more complex, robust approach. Args: trial_runner (TrialRunner): Trial runner for this Tune run. Can be used to obtain information about other trials. trial (Trial): The trial to allocate new resources to. result (Dict[str, Any]): The latest results of trial. scheduler (ResourceChangingScheduler): The scheduler calling the function. """ # Get base trial resources as defined in # ``tune.run(resources_per_trial)`` base_trial_resource = scheduler._base_trial_resources # Don't bother if this is just the first iteration if result["training_iteration"] < 1: return None # default values if resources_per_trial is unspecified if base_trial_resource is None: base_trial_resource = PlacementGroupFactory([{"CPU": 1, "GPU": 0}]) # Assume that the number of CPUs cannot go below what was # specified in tune.run min_cpu = base_trial_resource.required_resources.get("CPU", 0) # Get the number of CPUs available in total (not just free) total_available_cpus = trial_runner.trial_executor._avail_resources.cpu # Divide the free CPUs among all live trials cpu_to_use = max( min_cpu, total_available_cpus // len(trial_runner.get_live_trials())) # Assign new CPUs to the trial in a PlacementGroupFactory return PlacementGroupFactory([{"CPU": cpu_to_use, "GPU": 0}]) # You can either define your own resources_allocation_function, or # use the default one - DistributeResources # from ray.tune.schedulers.resource_changing_scheduler import \ # DistributeResources scheduler = ResourceChangingScheduler( base_scheduler=base_scheduler, resources_allocation_function=example_resources_allocation_function # resources_allocation_function=DistributeResources() # default ) if use_class_trainable: fn = BreastCancerTrainable else: fn = train_breast_cancer analysis = tune.run( fn, metric="eval-logloss", mode="min", resources_per_trial=PlacementGroupFactory([{ "CPU": 1, "GPU": 0 }]), config=search_space, num_samples=1, scheduler=scheduler, checkpoint_at_end=use_class_trainable, ) if use_class_trainable: assert analysis.results_df["nthread"].max() > 1 return analysis