Ejemplo n.º 1
0
def set_algorithm(experiment_name, config):
    '''
    Configure search algorithm.
    '''
    if args.algorithm == 'hyperopt':
        algorithm = HyperOptSearch(points_to_evaluate=best_params)
    elif args.algorithm == 'ax':
        ax_client = AxClient(enforce_sequential_optimization=False)
        ax_client.create_experiment(name=experiment_name,
                                    parameters=config,
                                    objective_name="minimum",
                                    minimize=True)
        algorithm = AxSearch(ax_client=ax_client,
                             points_to_evaluate=best_params)
    elif args.algorithm == 'nevergrad':
        algorithm = NevergradSearch(
            points_to_evaluate=best_params,
            optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"])
    elif args.algorithm == 'optuna':
        algorithm = OptunaSearch(points_to_evaluate=best_params,
                                 seed=args.seed)
    elif args.algorithm == 'pbt':
        algorithm = PopulationBasedTraining(
            time_attr="training_iteration",
            perturbation_interval=args.perturbation,
            hyperparam_mutations=config,
            synch=True)
    elif args.algorithm == 'random':
        algorithm = BasicVariantGenerator(max_concurrent=args.jobs)
    if args.algorithm not in ['random', 'pbt']:
        algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs)
    return algorithm
Ejemplo n.º 2
0
    def testFailResumeAfterPreset(self):
        os.environ["TUNE_MAX_PENDING_TRIALS_PG"] = "1"

        search_alg = BasicVariantGenerator(points_to_evaluate=[{
            "test": -1,
            "test2": -1
        }, {
            "test": -1
        }, {
            "test2": -1
        }])

        config = dict(
            num_samples=3 + 3,  # 3 preset, 3 samples
            fail_fast=True,
            config={
                "test": tune.grid_search([1, 2, 3]),
                "test2": tune.grid_search([1, 2, 3]),
            },
            stop={"training_iteration": 2},
            local_dir=self.logdir,
            verbose=1,
        )

        with self.assertRaises(RuntimeError):
            tune.run(
                "trainable",
                callbacks=[self.FailureInjectorCallback(15)],
                search_alg=search_alg,
                **config,
            )

        analysis = tune.run(
            "trainable",
            resume=True,
            callbacks=[self.CheckStateCallback(expected_trials=15)],
            search_alg=search_alg,
            **config,
        )
        assert len(analysis.trials) == 34
        test_counter = Counter([t.config["test"] for t in analysis.trials])
        assert test_counter.pop(-1) == 4
        assert all(v == 10 for v in test_counter.values())
        test2_counter = Counter([t.config["test2"] for t in analysis.trials])
        assert test2_counter.pop(-1) == 4
        assert all(v == 10 for v in test2_counter.values())
Ejemplo n.º 3
0
    def testPointsToEvaluateBasicVariantAdvanced(self):
        config = {
            "grid_1": tune.grid_search(["a", "b", "c", "d"]),
            "grid_2": tune.grid_search(["x", "y", "z"]),
            "nested": {
                "random":
                tune.uniform(2., 10.),
                "dependent":
                tune.sample_from(lambda spec: -1. * spec.config.nested.random)
            }
        }

        points = [
            {
                "grid_1": "b"
            },
            {
                "grid_2": "z"
            },
            {
                "grid_1": "a",
                "grid_2": "y"
            },
            {
                "nested": {
                    "random": 8.0
                }
            },
        ]

        from ray.tune.suggest.basic_variant import BasicVariantGenerator

        # grid_1 * grid_2 are 3 * 4 = 12 variants per complete grid search
        # However if one grid var is set by preset variables, that run
        # is excluded from grid search.

        # Point 1 overwrites grid_1, so the first trial only grid searches
        # over grid_2 (3 trials).
        # The remaining 5 trials search over the whole space (5 * 12 trials)
        searcher = BasicVariantGenerator(points_to_evaluate=[points[0]])
        exp = Experiment(run=_mock_objective,
                         name="test",
                         config=config,
                         num_samples=6)
        searcher.add_configurations(exp)
        self.assertEqual(searcher.total_samples, 1 * 3 + 5 * 12)

        # Point 2 overwrites grid_2, so the first trial only grid searches
        # over grid_1 (4 trials).
        # The remaining 5 trials search over the whole space (5 * 12 trials)
        searcher = BasicVariantGenerator(points_to_evaluate=[points[1]])
        exp = Experiment(run=_mock_objective,
                         name="test",
                         config=config,
                         num_samples=6)
        searcher.add_configurations(exp)
        self.assertEqual(searcher.total_samples, 1 * 4 + 5 * 12)

        # Point 3 overwrites grid_1 and grid_2, so the first trial does not
        # grid search.
        # The remaining 5 trials search over the whole space (5 * 12 trials)
        searcher = BasicVariantGenerator(points_to_evaluate=[points[2]])
        exp = Experiment(run=_mock_objective,
                         name="test",
                         config=config,
                         num_samples=6)
        searcher.add_configurations(exp)
        self.assertEqual(searcher.total_samples, 1 + 5 * 12)

        # When initialized with all points, the first three trials are
        # defined by the logic above. Only 3 trials are grid searched
        # compeletely.
        searcher = BasicVariantGenerator(points_to_evaluate=points)
        exp = Experiment(run=_mock_objective,
                         name="test",
                         config=config,
                         num_samples=6)
        searcher.add_configurations(exp)
        self.assertEqual(searcher.total_samples, 1 * 3 + 1 * 4 + 1 + 3 * 12)

        # Run this and confirm results
        analysis = tune.run(exp, search_alg=searcher)
        configs = [trial.config for trial in analysis.trials]

        self.assertEqual(len(configs), searcher.total_samples)
        self.assertTrue(all(config["grid_1"] == "b"
                            for config in configs[0:3]))
        self.assertTrue(all(config["grid_2"] == "z"
                            for config in configs[3:7]))
        self.assertTrue(configs[7]["grid_1"] == "a"
                        and configs[7]["grid_2"] == "y")
        self.assertTrue(configs[8]["nested"]["random"] == 8.0)
        self.assertTrue(configs[8]["nested"]["dependent"] == -8.0)
Ejemplo n.º 4
0
    "relu_dropout": 0.1,
    "res_dropout": 0.1,
}

points_to_evaluate = []
for k, v in org_config.items():
    point = copy(org_config)
    if point[k] + 0.1 <= 1.0:
        point[k] += 0.1
        points_to_evaluate.append(point)
    if point[k] - 0.1 > 0.0:
        point = copy(org_config)
        point[k] -= 0.1
    points_to_evaluate.append(point)

search_space = {k: tune.quniform(0, 0.5, 0.05) for k in org_config.keys()}

# for p in points_to_evaluate:
#     print(p)

tune.run(
    train,
    config=search_space,
    resources_per_trial={
        "cpu": 16,
        "gpu": 1
    },
    search_alg=BasicVariantGenerator(points_to_evaluate=points_to_evaluate),
    name="tune_mosi_dropouts",
)
def tune_xgboost():
    search_space = {
        # You can mix constants with search space objects.
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "max_depth": 9,
        "learning_rate": 1,
        "min_child_weight": tune.grid_search([2, 3]),
        "subsample": tune.grid_search([0.8, 0.9]),
        "colsample_bynode": tune.grid_search([0.8, 0.9]),
        "random_state": 1,
        "num_parallel_tree": 2000,
    }
    # This will enable aggressive early stopping of bad trials.
    base_scheduler = ASHAScheduler(
        max_t=16,  # 16 training iterations
        grace_period=1,
        reduction_factor=2)

    def example_resources_allocation_function(
        trial_runner: "trial_runner.TrialRunner", trial: Trial,
        result: Dict[str, Any], scheduler: "ResourceChangingScheduler"
    ) -> Union[None, PlacementGroupFactory, Resources]:
        """This is a basic example of a resource allocating function.

        The function naively balances available CPUs over live trials.

        This function returns a new ``PlacementGroupFactory`` with updated
        resource requirements, or None. If the returned
        ``PlacementGroupFactory`` is equal by value to the one the
        trial has currently, the scheduler will skip the update process
        internally (same with None).

        See :func:`evenly_distribute_cpus_gpus` for a more complex,
        robust approach.

        Args:
            trial_runner (TrialRunner): Trial runner for this Tune run.
                Can be used to obtain information about other trials.
            trial (Trial): The trial to allocate new resources to.
            result (Dict[str, Any]): The latest results of trial.
            scheduler (ResourceChangingScheduler): The scheduler calling
                the function.
        """

        # Get base trial resources as defined in
        # ``tune.run(resources_per_trial)``
        base_trial_resource = scheduler._base_trial_resources

        # Don't bother if this is just the first iteration
        if result["training_iteration"] < 1:
            return None

        # default values if resources_per_trial is unspecified
        if base_trial_resource is None:
            base_trial_resource = PlacementGroupFactory([{"CPU": 1, "GPU": 0}])

        # Assume that the number of CPUs cannot go below what was
        # specified in tune.run
        min_cpu = base_trial_resource.required_resources.get("CPU", 0)

        # Get the number of CPUs available in total (not just free)
        total_available_cpus = (
            trial_runner.trial_executor._avail_resources.cpu)

        # Divide the free CPUs among all live trials
        cpu_to_use = max(
            min_cpu,
            total_available_cpus // len(trial_runner.get_live_trials()))

        # Assign new CPUs to the trial in a PlacementGroupFactory
        return PlacementGroupFactory([{"CPU": cpu_to_use}])

    # You can either define your own resources_allocation_function, or
    # use the default one - evenly_distribute_cpus_gpus

    # from ray.tune.schedulers.resource_changing_scheduler import \
    #    evenly_distribute_cpus_gpus

    scheduler = ResourceChangingScheduler(
        base_scheduler=base_scheduler,
        resources_allocation_function=example_resources_allocation_function
        # resources_allocation_function=evenly_distribute_cpus_gpus  # default
    )

    search = BasicVariantGenerator()

    analysis = tune.run(BreastCancerTrainable,
                        metric="eval-logloss",
                        mode="min",
                        resources_per_trial=PlacementGroupFactory([{
                            "CPU": 1,
                            "GPU": 0
                        }]),
                        config=search_space,
                        search_alg=search,
                        num_samples=1,
                        checkpoint_at_end=True,
                        scheduler=scheduler)

    assert analysis.results_df["training_iteration"].max() == 16
    assert analysis.results_df["nthread"].max() > 1

    return analysis
Ejemplo n.º 6
0
def hyper_parameter(task=None,
                    model_name=None,
                    dataset_name=None,
                    config_file=None,
                    space_file=None,
                    scheduler=None,
                    search_alg=None,
                    other_args=None,
                    num_samples=5,
                    max_concurrent=1,
                    cpu_per_trial=1,
                    gpu_per_trial=1):
    """ Use Ray tune to hyper parameter tune

    Args:
        task(str): task name
        model_name(str): model name
        dataset_name(str): dataset name
        config_file(str): config filename used to modify the pipeline's
            settings. the config file should be json.
        space_file(str): the file which specifies the parameter search space
        scheduler(str): the trial sheduler which will be used in ray.tune.run
        search_alg(str): the search algorithm
        other_args(dict): the rest parameter args, which will be pass to the Config
    """
    # load config
    experiment_config = ConfigParser(task,
                                     model_name,
                                     dataset_name,
                                     config_file=config_file,
                                     other_args=other_args)
    # logger
    logger = get_logger(experiment_config)
    logger.info(experiment_config.config)
    # check space_file
    if space_file is None:
        logger.error(
            'the space_file should not be None when hyperparameter tune.')
        exit(0)
    # seed
    seed = experiment_config.get('seed', 0)
    set_random_seed(seed)
    # parse space_file
    search_sapce = parse_search_space(space_file)
    # load dataset
    dataset = get_dataset(experiment_config)
    # get train valid test data
    train_data, valid_data, test_data = dataset.get_data()
    data_feature = dataset.get_data_feature()

    def train(config,
              checkpoint_dir=None,
              experiment_config=None,
              train_data=None,
              valid_data=None,
              data_feature=None):
        """trainable function which meets ray tune API

        Args:
            config (dict): A dict of hyperparameter.
        """
        # modify experiment_config
        for key in config:
            if key in experiment_config:
                experiment_config[key] = config[key]
        experiment_config['hyper_tune'] = True
        logger = get_logger(experiment_config)
        logger.info(
            'Begin pipeline, task={}, model_name={}, dataset_name={}'.format(
                str(task), str(model_name), str(dataset_name)))
        logger.info('running parameters: ' + str(config))
        # load model
        model = get_model(experiment_config, data_feature)
        # load executor
        executor = get_executor(experiment_config, model, data_feature)
        # checkpoint by ray tune
        if checkpoint_dir:
            checkpoint = os.path.join(checkpoint_dir, 'checkpoint')
            executor.load_model(checkpoint)
        # train
        executor.train(train_data, valid_data)

    # init search algorithm and scheduler
    if search_alg == 'BasicSearch':
        algorithm = BasicVariantGenerator()
    elif search_alg == 'BayesOptSearch':
        algorithm = BayesOptSearch(metric='loss', mode='min')
        # add concurrency limit
        algorithm = ConcurrencyLimiter(algorithm,
                                       max_concurrent=max_concurrent)
    elif search_alg == 'HyperOpt':
        algorithm = HyperOptSearch(metric='loss', mode='min')
        # add concurrency limit
        algorithm = ConcurrencyLimiter(algorithm,
                                       max_concurrent=max_concurrent)
    else:
        raise ValueError('the search_alg is illegal.')
    if scheduler == 'FIFO':
        tune_scheduler = FIFOScheduler()
    elif scheduler == 'ASHA':
        tune_scheduler = ASHAScheduler()
    elif scheduler == 'MedianStoppingRule':
        tune_scheduler = MedianStoppingRule()
    else:
        raise ValueError('the scheduler is illegal')
    # ray tune run
    ensure_dir('./libcity/cache/hyper_tune')
    result = tune.run(tune.with_parameters(train,
                                           experiment_config=experiment_config,
                                           train_data=train_data,
                                           valid_data=valid_data,
                                           data_feature=data_feature),
                      resources_per_trial={
                          'cpu': cpu_per_trial,
                          'gpu': gpu_per_trial
                      },
                      config=search_sapce,
                      metric='loss',
                      mode='min',
                      scheduler=tune_scheduler,
                      search_alg=algorithm,
                      local_dir='./libcity/cache/hyper_tune',
                      num_samples=num_samples)
    best_trial = result.get_best_trial("loss", "min", "last")
    logger.info("Best trial config: {}".format(best_trial.config))
    logger.info("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    # save best
    best_path = os.path.join(best_trial.checkpoint.value, "checkpoint")
    model_state, optimizer_state = torch.load(best_path)
    model_cache_file = './libcity/cache/model_cache/{}_{}.m'.format(
        model_name, dataset_name)
    ensure_dir('./libcity/cache/model_cache')
    torch.save((model_state, optimizer_state), model_cache_file)