Exemple #1
0
    def _test_metrics(self, result_func, metric, mode):
        sched = HyperBandScheduler(time_attr="time_total_s",
                                   metric=metric,
                                   mode=mode)
        stats = self.default_statistics()

        for i in range(stats["max_trials"]):
            t = Trial("__fake")
            sched.on_trial_add(None, t)
        runner = _MockTrialRunner(sched)

        big_bracket = sched._hyperbands[0][-1]

        for trl in big_bracket.current_trials():
            runner._launch_trial(trl)
        current_length = len(big_bracket.current_trials())

        # Provides results from 0 to 8 in order, keeping the last one running
        for i, trl in enumerate(big_bracket.current_trials()):
            action = sched.on_trial_result(runner, trl, result_func(1, i))
            runner.process_action(trl, action)

        new_length = len(big_bracket.current_trials())
        self.assertEqual(action, TrialScheduler.CONTINUE)
        self.assertEqual(new_length, self.downscale(current_length, sched))
Exemple #2
0
    def testConfigSameEta(self):
        sched = HyperBandScheduler()
        i = 0
        while not sched._cur_band_filled():
            t = Trial("__fake")
            sched.on_trial_add(None, t)
            i += 1
        self.assertEqual(len(sched._hyperbands[0]), 5)
        self.assertEqual(sched._hyperbands[0][0]._n, 5)
        self.assertEqual(sched._hyperbands[0][0]._r, 81)
        self.assertEqual(sched._hyperbands[0][-1]._n, 81)
        self.assertEqual(sched._hyperbands[0][-1]._r, 1)

        reduction_factor = 10
        sched = HyperBandScheduler(max_t=1000,
                                   reduction_factor=reduction_factor)
        i = 0
        while not sched._cur_band_filled():
            t = Trial("__fake")
            sched.on_trial_add(None, t)
            i += 1
        self.assertEqual(len(sched._hyperbands[0]), 4)
        self.assertEqual(sched._hyperbands[0][0]._n, 4)
        self.assertEqual(sched._hyperbands[0][0]._r, 1000)
        self.assertEqual(sched._hyperbands[0][-1]._n, 1000)
        self.assertEqual(sched._hyperbands[0][-1]._r, 1)
Exemple #3
0
    def testAlternateMetrics(self):
        """Checking that alternate metrics will pass."""

        def result2(t, rew):
            return dict(time_total_s=t, neg_mean_loss=rew)

        sched = HyperBandScheduler(
            time_attr='time_total_s', reward_attr='neg_mean_loss')
        stats = self.default_statistics()

        for i in range(stats["max_trials"]):
            t = Trial("__fake")
            sched.on_trial_add(None, t)
        runner = _MockTrialRunner(sched)

        big_bracket = sched._hyperbands[0][-1]

        for trl in big_bracket.current_trials():
            runner._launch_trial(trl)
        current_length = len(big_bracket.current_trials())

        # Provides results from 0 to 8 in order, keeping the last one running
        for i, trl in enumerate(big_bracket.current_trials()):
            action = sched.on_trial_result(runner, trl, result2(1, i))
            runner.process_action(trl, action)

        new_length = len(big_bracket.current_trials())
        self.assertEqual(action, TrialScheduler.CONTINUE)
        self.assertEqual(new_length, self.downscale(current_length, sched))
    def testAlternateMetrics(self):
        """Checking that alternate metrics will pass."""

        def result2(t, rew):
            return dict(time_total_s=t, neg_mean_loss=rew)

        sched = HyperBandScheduler(
            time_attr='time_total_s', reward_attr='neg_mean_loss')
        stats = self.default_statistics()

        for i in range(stats["max_trials"]):
            t = Trial("__fake")
            sched.on_trial_add(None, t)
        runner = _MockTrialRunner(sched)

        big_bracket = sched._hyperbands[0][-1]

        for trl in big_bracket.current_trials():
            runner._launch_trial(trl)
        current_length = len(big_bracket.current_trials())

        # Provides results from 0 to 8 in order, keeping the last one running
        for i, trl in enumerate(big_bracket.current_trials()):
            action = sched.on_trial_result(runner, trl, result2(1, i))
            runner.process_action(trl, action)

        new_length = len(big_bracket.current_trials())
        self.assertEqual(action, TrialScheduler.CONTINUE)
        self.assertEqual(new_length, self.downscale(current_length, sched))
Exemple #5
0
 def testConfigSameEtaSmall(self):
     sched = HyperBandScheduler(max_t=1)
     i = 0
     while len(sched._hyperbands) < 2:
         t = Trial("__fake")
         sched.on_trial_add(None, t)
         i += 1
     self.assertEqual(len(sched._hyperbands[0]), 1)
 def testConfigSameEtaSmall(self):
     sched = HyperBandScheduler(max_t=1)
     i = 0
     while len(sched._hyperbands) < 2:
         t = Trial("__fake")
         sched.on_trial_add(None, t)
         i += 1
     self.assertEqual(len(sched._hyperbands[0]), 5)
     self.assertTrue(all(v is None for v in sched._hyperbands[0][1:]))
Exemple #7
0
 def default_statistics(self):
     """Default statistics for HyperBand."""
     sched = HyperBandScheduler()
     res = {
         str(s): {
             "n": sched._get_n0(s),
             "r": sched._get_r0(s)
         }
         for s in range(sched._s_max_1)
     }
     res["max_trials"] = sum(v["n"] for v in res.values())
     res["brack_count"] = sched._s_max_1
     res["s_max"] = sched._s_max_1 - 1
     return res
 def default_statistics(self):
     """Default statistics for HyperBand."""
     sched = HyperBandScheduler()
     res = {
         str(s): {
             "n": sched._get_n0(s),
             "r": sched._get_r0(s)
         }
         for s in range(sched._s_max_1)
     }
     res["max_trials"] = sum(v["n"] for v in res.values())
     res["brack_count"] = sched._s_max_1
     res["s_max"] = sched._s_max_1 - 1
     return res
Exemple #9
0
    def schedulerSetup(self, num_trials, max_t=81):
        """Setup a scheduler and Runner with max Iter = 9.

        Bracketing is placed as follows:
        (5, 81);
        (8, 27) -> (3, 54);
        (15, 9) -> (5, 27) -> (2, 45);
        (34, 3) -> (12, 9) -> (4, 27) -> (2, 42);
        (81, 1) -> (27, 3) -> (9, 9) -> (3, 27) -> (1, 41);"""
        sched = HyperBandScheduler(max_t=max_t)
        for i in range(num_trials):
            t = Trial("__fake")
            sched.on_trial_add(None, t)
        runner = _MockTrialRunner(sched)
        return sched, runner
    def schedulerSetup(self, num_trials, max_t=81):
        """Setup a scheduler and Runner with max Iter = 9.

        Bracketing is placed as follows:
        (5, 81);
        (8, 27) -> (3, 54);
        (15, 9) -> (5, 27) -> (2, 45);
        (34, 3) -> (12, 9) -> (4, 27) -> (2, 42);
        (81, 1) -> (27, 3) -> (9, 9) -> (3, 27) -> (1, 41);"""
        sched = HyperBandScheduler(max_t=max_t)
        for i in range(num_trials):
            t = Trial("__fake")
            sched.on_trial_add(None, t)
        runner = _MockTrialRunner(sched)
        return sched, runner
Exemple #11
0
    def __init__(
            self,
            redis_address: str,
            corpus: Corpus,
            base_path: Union[str, Path],
            max_epochs: int,
            evaluation_metric: EvaluationMetric,
            training_runs: int,
            optimization_value: OptimizationValue,
            use_gpu=torch.cuda.is_available(),
    ):
        self.corpus = corpus
        self.max_epochs = max_epochs
        self.base_path = base_path
        self.evaluation_metric = evaluation_metric
        self.training_runs = training_runs
        self.optimization_value = optimization_value
        self.use_gpu = use_gpu

        ray.init(redis_address=redis_address)
        self.hb_scheduler = HyperBandScheduler(time_attr="training_iteration",
                                               metric="mean_loss",
                                               mode="min")

        # Config dictionary for Tune Param Selector
        config, args = dict(), dict()
        self.config = config
        config["args"] = args
        args["cuda"] = self.use_gpu
        args["corpus"] = corpus
Exemple #12
0
def resolve_early_stopping(early_stopping, max_iters, metric_name):
    if isinstance(early_stopping, str):
        if early_stopping in TuneBaseSearchCV.defined_schedulers:
            if early_stopping == "PopulationBasedTraining":
                return PopulationBasedTraining(metric=metric_name, mode="max")
            elif early_stopping == "AsyncHyperBandScheduler":
                return AsyncHyperBandScheduler(metric=metric_name,
                                               mode="max",
                                               max_t=max_iters)
            elif early_stopping == "HyperBandScheduler":
                return HyperBandScheduler(metric=metric_name,
                                          mode="max",
                                          max_t=max_iters)
            elif early_stopping == "MedianStoppingRule":
                return MedianStoppingRule(metric=metric_name, mode="max")
            elif early_stopping == "ASHAScheduler":
                return ASHAScheduler(metric=metric_name,
                                     mode="max",
                                     max_t=max_iters)
        raise ValueError(
            "{} is not a defined scheduler. "
            "Check the list of available schedulers.".format(early_stopping))
    elif isinstance(early_stopping, TrialScheduler):
        early_stopping._metric = metric_name
        early_stopping._mode = "max"
        return early_stopping
    else:
        raise TypeError("`early_stopping` must be a str, boolean, "
                        f"or tune scheduler. Got {type(early_stopping)}.")
Exemple #13
0
def test_custom_scheduler():
    try:
        from ray.tune.schedulers import HyperBandScheduler
    except ImportError:
        print("skip the test as ray tune cannot be imported.")
        return
    my_scheduler = HyperBandScheduler(time_attr="samplesize",
                                      max_t=1000,
                                      reduction_factor=2)
    best_config = test_scheduler(scheduler=my_scheduler)
    print("Custom ASHA scheduler, test error:",
          abs(10 / 2 - best_config["z"] / 2))
Exemple #14
0
def get_raytune_schedule(raytune_cfg):
    if raytune_cfg["sched"] == "asha":
        return AsyncHyperBandScheduler(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["asha"]["max_t"],
            grace_period=raytune_cfg["asha"]["grace_period"],
            reduction_factor=raytune_cfg["asha"]["reduction_factor"],
            brackets=raytune_cfg["asha"]["brackets"],
        )
    elif raytune_cfg["sched"] == "hyperband":
        return HyperBandScheduler(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["hyperband"]["max_t"],
            reduction_factor=raytune_cfg["hyperband"]["reduction_factor"],
        )
    # requires pip install hpbandster ConfigSpace
    elif (raytune_cfg["sched"] == "bohb") or (raytune_cfg["sched"] == "BOHB"):
        return HyperBandForBOHB(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["hyperband"]["max_t"],
            reduction_factor=raytune_cfg["hyperband"]["reduction_factor"],
        )
    elif (raytune_cfg["sched"] == "pbt") or (raytune_cfg["sched"] == "PBT"):
        return PopulationBasedTraining(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            perturbation_interval=raytune_cfg["pbt"]["perturbation_interval"],
            hyperparam_mutations=raytune_cfg["pbt"]["hyperparam_mutations"],
            log_config=True,
        )
    # requires pip install GPy sklearn
    elif (raytune_cfg["sched"] == "pb2") or (raytune_cfg["sched"] == "PB2"):
        return PB2(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            perturbation_interval=raytune_cfg["pb2"]["perturbation_interval"],
            hyperparam_bounds=raytune_cfg["pb2"]["hyperparam_bounds"],
            log_config=True,
        )
    else:
        print("INFO: Not using any Ray Tune trial scheduler.")
        return None
Exemple #15
0
def tune_():
    hyperband = HyperBandScheduler(metric="accuracy", mode="max")

    analysis = tune.run(_inject_config,
                        config={},
                        stop={
                            "accuracy": 0.90,
                            "training_iteration": 100
                        },
                        resources_per_trial={"gpu": 1},
                        num_samples=10,
                        scheduler=hyperband)

    print("Best config: ",
          analysis.get_best_config(metric="accuracy", mode="max"))

    df = analysis.results_df
    print(df)
    def testConfigSameEta(self):
        sched = HyperBandScheduler()
        i = 0
        while not sched._cur_band_filled():
            t = Trial("__fake")
            sched.on_trial_add(None, t)
            i += 1
        self.assertEqual(len(sched._hyperbands[0]), 5)
        self.assertEqual(sched._hyperbands[0][0]._n, 5)
        self.assertEqual(sched._hyperbands[0][0]._r, 81)
        self.assertEqual(sched._hyperbands[0][-1]._n, 81)
        self.assertEqual(sched._hyperbands[0][-1]._r, 1)

        sched = HyperBandScheduler(max_t=810)
        i = 0
        while not sched._cur_band_filled():
            t = Trial("__fake")
            sched.on_trial_add(None, t)
            i += 1
        self.assertEqual(len(sched._hyperbands[0]), 5)
        self.assertEqual(sched._hyperbands[0][0]._n, 5)
        self.assertEqual(sched._hyperbands[0][0]._r, 810)
        self.assertEqual(sched._hyperbands[0][-1]._n, 81)
        self.assertEqual(sched._hyperbands[0][-1]._r, 10)
    def __init__(self,
                 estimator,
                 early_stopping=None,
                 scoring=None,
                 n_jobs=None,
                 cv=5,
                 refit=True,
                 verbose=0,
                 error_score="raise",
                 return_train_score=False,
                 max_iters=10,
                 use_gpu=False):

        self.estimator = estimator

        if early_stopping and self._can_early_stop():
            self.max_iters = max_iters
            if early_stopping is True:
                # Override the early_stopping variable so
                # that it is resolved appropriately in
                # the next block
                early_stopping = "AsyncHyperBandScheduler"
            # Resolve the early stopping object
            if isinstance(early_stopping, str):
                if early_stopping in TuneBaseSearchCV.defined_schedulers:
                    if early_stopping == "PopulationBasedTraining":
                        self.early_stopping = PopulationBasedTraining(
                            metric="average_test_score")
                    elif early_stopping == "AsyncHyperBandScheduler":
                        self.early_stopping = AsyncHyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "HyperBandScheduler":
                        self.early_stopping = HyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "MedianStoppingRule":
                        self.early_stopping = MedianStoppingRule(
                            metric="average_test_score")
                    elif early_stopping == "ASHAScheduler":
                        self.early_stopping = ASHAScheduler(
                            metric="average_test_score")
                else:
                    raise ValueError(
                        "{} is not a defined scheduler. "
                        "Check the list of available schedulers.".format(
                            early_stopping))
            elif isinstance(early_stopping, TrialScheduler):
                self.early_stopping = early_stopping
                self.early_stopping.metric = "average_test_score"
            else:
                raise TypeError("`early_stopping` must be a str, boolean, "
                                "or tune scheduler")
        elif not early_stopping:
            warnings.warn("Early stopping is not enabled. "
                          "To enable early stopping, pass in a supported "
                          "scheduler from Tune and ensure the estimator "
                          "has `partial_fit`.")

            self.max_iters = 1
            self.early_stopping = None
        else:
            raise ValueError("Early stopping is not supported because "
                             "the estimator does not have `partial_fit`")

        self.cv = cv
        self.scoring = scoring
        self.n_jobs = n_jobs
        self.refit = refit
        self.verbose = verbose
        self.error_score = error_score
        self.return_train_score = return_train_score
        self.use_gpu = use_gpu
Exemple #18
0
def hparams(algorithm, scheduler, num_samples, tensorboard, bare):
    from glob import glob

    import tensorflow.summary
    from tensorflow import random as tfrandom, int64 as tfint64
    from ray import init as init_ray, shutdown as shutdown_ray
    from ray import tune
    from wandb.ray import WandbLogger
    from wandb import sweep as wandbsweep
    from wandb.apis import CommError as wandbCommError

    # less summaries are logged if MLENCRYPT_TB is TRUE (for efficiency)
    # TODO: use tf.summary.record_if?
    environ["MLENCRYPT_TB"] = str(tensorboard).upper()
    environ["MLENCRYPT_BARE"] = str(bare).upper()
    if getenv('MLENCRYPT_TB', 'FALSE') == 'TRUE' and \
            getenv('MLENCRYPT_BARE', 'FALSE') == 'TRUE':
        raise ValueError('TensorBoard logging cannot be enabled in bare mode.')

    logdir = f'logs/hparams/{datetime.now()}'

    # "These results show that K = 3 is the optimal choice for the
    # cryptographic application of neural synchronization. K = 1 and K = 2 are
    # too insecure in regard to the geometric attack. And for K > 3 the effort
    # of A and B grows exponentially with increasing L, while the simple attack
    # is quite successful in the limit K -> infinity. Consequently, one should
    # only use Tree Parity Machines with three hidden units for the neural
    # key-exchange protocol." (Ruttor, 2006)
    # https://arxiv.org/pdf/0711.2411.pdf#page=59

    update_rules = [
        'random-same',
        # 'random-different-A-B-E', 'random-different-A-B',
        'hebbian',
        'anti_hebbian',
        'random_walk'
    ]
    K_bounds = {'min': 4, 'max': 8}
    N_bounds = {'min': 4, 'max': 8}
    L_bounds = {'min': 4, 'max': 8}

    # TODO: don't use *_bounds.values() since .values doesn't preserve order

    def get_session_num(logdir):
        current_runs = glob(join(logdir, "run-*"))
        if current_runs:
            last_run_path = current_runs[-1]
            last_run_session_num = int(last_run_path.split('-')[-1])
            return last_run_session_num + 1
        else:  # there are no runs yet, start at 0
            return 0

    def trainable(config, reporter):
        """
        Args:
            config (dict): Parameters provided from the search algorithm
                or variant generation.
        """
        if not isinstance(config['update_rule'], str):
            update_rule = update_rules[int(config['update_rule'])]
        else:
            update_rule = config['update_rule']
        K, N, L = int(config['K']), int(config['N']), int(config['L'])

        run_name = f"run-{get_session_num(logdir)}"
        run_logdir = join(logdir, run_name)
        # for each attack, the TPMs should start with the same weights
        initial_weights_tensors = get_initial_weights(K, N, L)
        training_steps_ls = {}
        eve_scores_ls = {}
        losses_ls = {}
        # for each attack, the TPMs should use the same inputs
        seed = tfrandom.uniform([],
                                minval=0,
                                maxval=tfint64.max,
                                dtype=tfint64).numpy()
        for attack in ['none', 'geometric']:
            initial_weights = {
                tpm: weights_tensor_to_variable(weights, tpm)
                for tpm, weights in initial_weights_tensors.items()
            }
            tfrandom.set_seed(seed)

            if tensorboard:
                attack_logdir = join(run_logdir, attack)
                attack_writer = tensorflow.summary.create_file_writer(
                    attack_logdir)
                with attack_writer.as_default():
                    training_steps, sync_scores, loss = run(
                        update_rule, K, N, L, attack, initial_weights)
            else:
                training_steps, sync_scores, loss = run(
                    update_rule, K, N, L, attack, initial_weights)
            training_steps_ls[attack] = training_steps
            eve_scores_ls[attack] = sync_scores
            losses_ls[attack] = loss
        avg_training_steps = tensorflow.math.reduce_mean(
            list(training_steps_ls.values()))
        avg_eve_score = tensorflow.math.reduce_mean(
            list(eve_scores_ls.values()))
        mean_loss = tensorflow.math.reduce_mean(list(losses_ls.values()))
        reporter(
            avg_training_steps=avg_training_steps.numpy(),
            avg_eve_score=avg_eve_score.numpy(),
            mean_loss=mean_loss.numpy(),
            done=True,
        )

    if algorithm == 'hyperopt':
        from hyperopt import hp as hyperopt
        from hyperopt.pyll.base import scope
        from ray.tune.suggest.hyperopt import HyperOptSearch

        space = {
            'update_rule': hyperopt.choice(
                'update_rule',
                update_rules,
            ),
            'K': scope.int(hyperopt.quniform('K', *K_bounds.values(), q=1)),
            'N': scope.int(hyperopt.quniform('N', *N_bounds.values(), q=1)),
            'L': scope.int(hyperopt.quniform('L', *L_bounds.values(), q=1)),
        }
        algo = HyperOptSearch(
            space,
            metric='mean_loss',
            mode='min',
            points_to_evaluate=[
                {
                    'update_rule': 0,
                    'K': 3,
                    'N': 16,
                    'L': 8
                },
                {
                    'update_rule': 0,
                    'K': 8,
                    'N': 16,
                    'L': 8
                },
                {
                    'update_rule': 0,
                    'K': 8,
                    'N': 16,
                    'L': 128
                },
            ],
        )
    elif algorithm == 'bayesopt':
        from ray.tune.suggest.bayesopt import BayesOptSearch

        space = {
            'update_rule': (0, len(update_rules)),
            'K': tuple(K_bounds.values()),
            'N': tuple(N_bounds.values()),
            'L': tuple(L_bounds.values()),
        }
        algo = BayesOptSearch(
            space,
            metric="mean_loss",
            mode="min",
            # TODO: what is utility_kwargs for and why is it needed?
            utility_kwargs={
                "kind": "ucb",
                "kappa": 2.5,
                "xi": 0.0
            })
    elif algorithm == 'nevergrad':
        from ray.tune.suggest.nevergrad import NevergradSearch
        from nevergrad import optimizers
        from nevergrad import p as ngp

        algo = NevergradSearch(
            optimizers.TwoPointsDE(
                ngp.Instrumentation(
                    update_rule=ngp.Choice(update_rules),
                    K=ngp.Scalar(lower=K_bounds['min'],
                                 upper=K_bounds['max']).set_integer_casting(),
                    N=ngp.Scalar(lower=N_bounds['min'],
                                 upper=N_bounds['max']).set_integer_casting(),
                    L=ngp.Scalar(lower=L_bounds['min'],
                                 upper=L_bounds['max']).set_integer_casting(),
                )),
            None,  # since the optimizer is already instrumented with kwargs
            metric="mean_loss",
            mode="min")
    elif algorithm == 'skopt':
        from skopt import Optimizer
        from ray.tune.suggest.skopt import SkOptSearch

        optimizer = Optimizer([
            update_rules,
            tuple(K_bounds.values()),
            tuple(N_bounds.values()),
            tuple(L_bounds.values())
        ])
        algo = SkOptSearch(
            optimizer,
            ["update_rule", "K", "N", "L"],
            metric="mean_loss",
            mode="min",
            points_to_evaluate=[
                ['random-same', 3, 16, 8],
                ['random-same', 8, 16, 8],
                ['random-same', 8, 16, 128],
            ],
        )
    elif algorithm == 'dragonfly':
        # TODO: doesn't work
        from ray.tune.suggest.dragonfly import DragonflySearch
        from dragonfly.exd.experiment_caller import EuclideanFunctionCaller
        from dragonfly.opt.gp_bandit import EuclideanGPBandit
        # from dragonfly.exd.experiment_caller import CPFunctionCaller
        # from dragonfly.opt.gp_bandit import CPGPBandit
        from dragonfly import load_config

        domain_config = load_config({
            "domain": [
                {
                    "name": "update_rule",
                    "type": "discrete",
                    "dim": 1,
                    "items": update_rules
                },
                {
                    "name": "K",
                    "type": "int",
                    "min": K_bounds['min'],
                    "max": K_bounds['max'],
                    # "dim": 1
                },
                {
                    "name": "N",
                    "type": "int",
                    "min": N_bounds['min'],
                    "max": N_bounds['max'],
                    # "dim": 1
                },
                {
                    "name": "L",
                    "type": "int",
                    "min": L_bounds['min'],
                    "max": L_bounds['max'],
                    # "dim": 1
                }
            ]
        })
        func_caller = EuclideanFunctionCaller(
            None, domain_config.domain.list_of_domains[0])
        optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True)
        algo = DragonflySearch(
            optimizer,
            metric="mean_loss",
            mode="min",
            points_to_evaluate=[
                ['random-same', 3, 16, 8],
                ['random-same', 8, 16, 8],
                ['random-same', 8, 16, 128],
            ],
        )
    elif algorithm == 'bohb':
        from ConfigSpace import ConfigurationSpace
        from ConfigSpace import hyperparameters as CSH
        from ray.tune.suggest.bohb import TuneBOHB

        config_space = ConfigurationSpace()
        config_space.add_hyperparameter(
            CSH.CategoricalHyperparameter("update_rule", choices=update_rules))
        config_space.add_hyperparameter(
            CSH.UniformIntegerHyperparameter(name='K',
                                             lower=K_bounds['min'],
                                             upper=K_bounds['max']))
        config_space.add_hyperparameter(
            CSH.UniformIntegerHyperparameter(name='N',
                                             lower=N_bounds['min'],
                                             upper=N_bounds['max']))
        config_space.add_hyperparameter(
            CSH.UniformIntegerHyperparameter(name='L',
                                             lower=L_bounds['min'],
                                             upper=L_bounds['max']))
        algo = TuneBOHB(config_space, metric="mean_loss", mode="min")
    elif algorithm == 'zoopt':
        from ray.tune.suggest.zoopt import ZOOptSearch
        from zoopt import ValueType

        space = {
            "update_rule":
            (ValueType.DISCRETE, range(0, len(update_rules)), False),
            "K": (ValueType.DISCRETE,
                  range(K_bounds['min'], K_bounds['max'] + 1), True),
            "N": (ValueType.DISCRETE,
                  range(N_bounds['min'], N_bounds['max'] + 1), True),
            "L": (ValueType.DISCRETE,
                  range(L_bounds['min'], L_bounds['max'] + 1), True),
        }
        # TODO: change budget to a large value
        algo = ZOOptSearch(budget=10,
                           dim_dict=space,
                           metric="mean_loss",
                           mode="min")

    # TODO: use more appropriate arguments for schedulers:
    # https://docs.ray.io/en/master/tune/api_docs/schedulers.html
    if scheduler == 'fifo':
        sched = None  # Tune defaults to FIFO
    elif scheduler == 'pbt':
        from ray.tune.schedulers import PopulationBasedTraining
        from random import randint
        sched = PopulationBasedTraining(
            metric="mean_loss",
            mode="min",
            hyperparam_mutations={
                "update_rule": update_rules,
                "K": lambda: randint(K_bounds['min'], K_bounds['max']),
                "N": lambda: randint(N_bounds['min'], N_bounds['max']),
                "L": lambda: randint(L_bounds['min'], L_bounds['max']),
            })
    elif scheduler == 'ahb' or scheduler == 'asha':
        # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler
        from ray.tune.schedulers import AsyncHyperBandScheduler
        sched = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
    elif scheduler == 'hb':
        from ray.tune.schedulers import HyperBandScheduler
        sched = HyperBandScheduler(metric="mean_loss", mode="min")
    elif algorithm == 'bohb' or scheduler == 'bohb':
        from ray.tune.schedulers import HyperBandForBOHB
        sched = HyperBandForBOHB(metric="mean_loss", mode="min")
    elif scheduler == 'msr':
        from ray.tune.schedulers import MedianStoppingRule
        sched = MedianStoppingRule(metric="mean_loss", mode="min")
    init_ray(
        address=getenv("ip_head"),
        redis_password=getenv('redis_password'),
    )
    analysis = tune.run(
        trainable,
        name='mlencrypt_research',
        config={
            "monitor": True,
            "env_config": {
                "wandb": {
                    "project": "mlencrypt-research",
                    "sync_tensorboard": True,
                },
            },
        },
        # resources_per_trial={"cpu": 1, "gpu": 3},
        local_dir='./ray_results',
        export_formats=['csv'],  # TODO: add other formats?
        num_samples=num_samples,
        loggers=[
            tune.logger.JsonLogger, tune.logger.CSVLogger,
            tune.logger.TBXLogger, WandbLogger
        ],
        search_alg=algo,
        scheduler=sched,
        queue_trials=True,
    )
    try:
        wandbsweep(analysis)
    except wandbCommError:
        # see https://docs.wandb.com/sweeps/ray-tune#feature-compatibility
        pass
    best_config = analysis.get_best_config(metric='mean_loss', mode='min')
    print(f"Best config: {best_config}")
    shutdown_ray()
        "--server-address",
        type=str,
        default=None,
        required=False,
        help="The address of server to connect to if using "
        "Ray Client.",
    )
    args, _ = parser.parse_known_args()
    if args.server_address is not None:
        ray.init(f"ray://{args.server_address}")
    else:
        ray.init(num_cpus=4 if args.smoke_test else None)

    # Hyperband early stopping, configured with `episode_reward_mean` as the
    # objective and `training_iteration` as the time unit,
    # which is automatically filled by Tune.
    hyperband = HyperBandScheduler(max_t=200)

    analysis = tune.run(
        train,
        name="hyperband_test",
        num_samples=20,
        metric="episode_reward_mean",
        mode="max",
        stop={"training_iteration": 10 if args.smoke_test else 99999},
        config={"height": tune.uniform(0, 100)},
        scheduler=hyperband,
        fail_fast=True,
    )
    print("Best hyperparameters found were: ", analysis.best_config)
Exemple #20
0
    def __init__(self,
                 estimator,
                 early_stopping=None,
                 scoring=None,
                 n_jobs=None,
                 cv=5,
                 refit=True,
                 verbose=0,
                 error_score="raise",
                 return_train_score=False,
                 max_iters=10,
                 use_gpu=False):

        self.estimator = estimator

        if early_stopping is not None and self._can_early_stop():
            self.max_iters = max_iters
            if isinstance(early_stopping, str):
                if early_stopping in TuneBaseSearchCV.defined_schedulers:
                    if early_stopping == "PopulationBasedTraining":
                        self.early_stopping = PopulationBasedTraining(
                            metric="average_test_score")
                    elif early_stopping == "AsyncHyperBandScheduler":
                        self.early_stopping = AsyncHyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "HyperBandScheduler":
                        self.early_stopping = HyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "HyperBandForBOHB":
                        self.early_stopping = HyperBandForBOHB(
                            metric="average_test_score")
                    elif early_stopping == "MedianStoppingRule":
                        self.early_stopping = MedianStoppingRule(
                            metric="average_test_score")
                    elif early_stopping == "ASHAScheduler":
                        self.early_stopping = ASHAScheduler(
                            metric="average_test_score")
                else:
                    raise ValueError(
                        "{} is not a defined scheduler. "
                        "Check the list of available schedulers.".format(
                            early_stopping))
            elif isinstance(early_stopping, TrialScheduler):
                self.early_stopping = early_stopping
                self.early_stopping.metric = "average_test_score"
            else:
                raise TypeError("Scheduler must be a str or tune scheduler")
        else:
            warnings.warn("Early stopping is not enabled. "
                          "To enable early stopping, pass in a supported "
                          "scheduler from Tune and ensure the estimator "
                          "has `partial_fit`.")

            self.max_iters = 1
            self.early_stopping = None

        self.cv = cv
        self.scoring = scoring
        self.n_jobs = n_jobs
        self.refit = refit
        self.verbose = verbose
        self.error_score = error_score
        self.return_train_score = return_train_score
        self.use_gpu = use_gpu
                    f.write(json.dumps({"timestep": timestep}))

        # Here we use `episode_reward_mean`, but you can also report other
        # objectives such as loss or accuracy.
        tune.report(episode_reward_mean=v)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(num_cpus=4 if args.smoke_test else None)

    # Hyperband early stopping, configured with `episode_reward_mean` as the
    # objective and `training_iteration` as the time unit,
    # which is automatically filled by Tune.
    hyperband = HyperBandScheduler(time_attr="training_iteration",
                                   metric="episode_reward_mean",
                                   mode="max",
                                   max_t=200)

    tune.run(train,
             name="hyperband_test",
             num_samples=20,
             stop={"training_iteration": 10 if args.smoke_test else 99999},
             config={"height": tune.uniform(0, 100)},
             scheduler=hyperband,
             fail_fast=True)
Exemple #22
0
def main(args=None):
    config = trainer_util.default_config

    ray.init(num_cpus=args.cpus_per_trial * args.num_avail_gpus,
             num_gpus=args.num_avail_gpus)
    scheduler = HyperBandScheduler(time_attr="training_iteration",
                                   metric="precision",
                                   mode="max",
                                   max_t=args.num_epochs)
    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=[
            "loss", "f1", "training_iteration", 'acc', 'pos_acc', 'neg_acc',
            'precision', 'recall'
        ])
    result = tune.run(partial(trainer_util.train, args=args),
                      name='hyperband_test',
                      resources_per_trial={
                          "cpu": args.cpus_per_trial,
                          "gpu": args.gpus_per_trial
                      },
                      config=config,
                      stop={"training_iteration": args.num_epochs},
                      num_samples=args.num_tune_samples,
                      scheduler=scheduler,
                      progress_reporter=reporter,
                      local_dir=args.work_dir)

    best_trial = result.get_best_trial("precision", "max", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation precision: {}".format(
        best_trial.last_result["precision"]))
    print("Best trial final validation recall: {}".format(
        best_trial.last_result["recall"]))
    print("Best trial final validation acc: {}".format(
        best_trial.last_result["acc"]))
    print("Best trial final validation pos_acc: {}".format(
        best_trial.last_result["pos_acc"]))
    print("Best trial final validation neg_acc: {}".format(
        best_trial.last_result["neg_acc"]))

    bert_model, vocab = get_pytorch_kobert_model()

    num_classes = 4 if best_trial.config["use_multi_class"] else 2
    best_trained_model = ExtractiveModel(
        bert_model,
        100,
        11,
        768,
        use_bert_sum_words=best_trial.config["use_bert_sum_words"],
        use_pos=best_trial.config["use_pos"],
        use_media=best_trial.config['use_media'],
        simple_model=best_trial.config['simple_model'],
        num_classes=num_classes,
        dim_feedforward=best_trial.config['dim_feedforward'],
        dropout=best_trial.config['dropout'])

    if torch.cuda.is_available():
        device = "cuda"
        if args.gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(
        os.path.join(best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = trainer_util.test_accuracy(best_trained_model,
                                          best_trial.config["use_multi_class"],
                                          best_trial.config["max_token_cnt"],
                                          device, args)
    print("Best trial test set f1: {}".format(test_acc))
Exemple #23
0
				output = self.model(data)
				test_loss += F.nll_loss(output, target, reduction="sum").item()
				pred = output.argmax(dim=1, keepdim=True)
				correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
		test_loss = test_loss / len(self.test_loader.dataset)
		accuracy = correct.item() / len(self.test_loader.dataset)
		return {"mean_loss": test_loss, "mean_accuracy": accuracy}

	def _train(self):
		self._train_iteration()
		return self._test()
	def _save(self, checkpoint_dir):
		checkpoint_path=os.path.join(checkpoint_dir,"model.pth")
		torch.save(self.model.state_dict(),checkpoint_path)
		return checkpoint_path

	def _restore(self, checkpointpath):
		self.model.load_state_dict(torch.load(checkpointpath))


if __name__ == '__main__':
	args = parser.parse_args()
	ray.init(redis_address=args.redis_address)
	sched = HyperBandScheduler(time_attr="training_iteration", metric="mean_loss", mode="min")
	tune.run(TrainAPTO, scheduler=sched,
			 **{"stop": {"mean_accuracy": 0.65, "training_iteration": 1 if args.smoke_test else 5, },
				"resources_per_trial": {"cpu": 6, "gpu": int(not args.no_cuda)},
				"num_samples": 1 if args.smoke_test else 4,
				"checkpoint_at_end": True,
				"config": {"args": args, "lr": tune.uniform(0.002, 0.0002), "momentum": tune.uniform(0.8, 0.95), }},reuse_actors=False)
Exemple #24
0
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    mnist_spec = {
        "stop": {
            "mean_accuracy": 0.99,
            "time_total_s": 600,
        },
        "config": {
            "learning_rate":
            sample_from(lambda spec: 10**np.random.uniform(-5, -3)),
            "activation":
            "relu",
        },
        "num_samples": 10,
    }

    if args.smoke_test:
        mnist_spec["stop"]["training_iteration"] = 20
        mnist_spec["num_samples"] = 1

    ray.init()
    hyperband = HyperBandScheduler(time_attr="training_iteration",
                                   metric="mean_accuracy",
                                   mode="max",
                                   max_t=10)

    tune.run(TrainMNIST,
             name="mnist_hyperband_test",
             scheduler=hyperband,
             **mnist_spec)
Exemple #25
0
        checkpoint = torch.load(os.path.join(checkpoint_dir, "checkpoint.pt"))
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

parser = argparse.ArgumentParser("PyTorch Hyperparameter Sweep Test")
parser.add_argument("--use-gpu", action="store_true", default=False, help="enables CUDA training")
parser.add_argument("--ray-address", type=str, help="The Redis address of the cluster.")
parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing")
args = parser.parse_args()
ray.init(address=args.ray_address, num_cpus=6 if args.smoke_test else None)

# validate_save_restore(MyTrainableClass)
# validate_save_restore(MyTrainableClass, use_object_store=True)

hb = HyperBandScheduler(
    metric="test_loss",
    mode="min")

ahsa = ASHAScheduler(
    metric="test_loss")

analysis = tune.run(MyTrainableClass,
    name="pytorch_hparams_test",
    scheduler=ahsa,
    stop={"training_iteration": 3 if args.smoke_test else 10},
    num_samples=1 if args.smoke_test else 10,
    resources_per_trial={
        "cpu": 1,
        "gpu": int(args.use_gpu)
    },
    checkpoint_at_end=True,
    def _restore(self, path):
        self.model.load_state_dict(os.path.join(path, "model.pth"))


if __name__ == '__main__':
    datasets.MNIST('~/data', train=True, download=True)
    args = parser.parse_args()

    import numpy as np
    import ray
    from ray import tune
    from ray.tune.schedulers import HyperBandScheduler

    ray.init()
    sched = HyperBandScheduler(time_attr="training_iteration",
                               reward_attr="neg_mean_loss")
    tune.run_experiments(
        {
            "exp": {
                "stop": {
                    "mean_accuracy": 0.95,
                    "training_iteration": 1 if args.smoke_test else 20,
                },
                "trial_resources": {
                    "cpu": 3
                },
                "run": TrainMNIST,
                "num_samples": 1 if args.smoke_test else 20,
                "checkpoint_at_end": True,
                "config": {
                    "args": args,
Exemple #27
0
    parser.add_argument("--server-address",
                        type=str,
                        default=None,
                        required=False,
                        help="The address of server to connect to if using "
                        "Ray Client.")
    args, _ = parser.parse_known_args()
    if args.server_address:
        ray.init(f"ray://{args.server_address}")
    else:
        ray.init(num_cpus=4 if args.smoke_test else None)

    # Hyperband early stopping, configured with `episode_reward_mean` as the
    # objective and `training_iteration` as the time unit,
    # which is automatically filled by Tune.
    hyperband = HyperBandScheduler(time_attr="training_iteration", max_t=200)

    analysis = tune.run(
        MyTrainableClass,
        name="hyperband_test",
        num_samples=20 if args.smoke_test else 200,
        metric="episode_reward_mean",
        mode="max",
        stop={"training_iteration": 1 if args.smoke_test else 200},
        config={
            "width": tune.randint(10, 90),
            "height": tune.randint(0, 100)
        },
        verbose=1,
        scheduler=hyperband,
        fail_fast=True)
parser.add_argument("--smoke-test",
                    default=False,
                    action="store_true",
                    help="Finish quickly for testing")
parser.add_argument(
    "--ray-address",
    default=None,
    help="Address of Ray cluster for seamless distributed execution.")
args, _ = parser.parse_known_args()
ray.init(
    address=args.ray_address) if args.ray_address is not None else ray.init()

# validate_save_restore(MyTrainableClass)
# validate_save_restore(MyTrainableClass, use_object_store=True)

hb = HyperBandScheduler(metric="episode_reward_mean", mode="max")

tune.run(MyTrainableClass,
         name="asynchyperband_test",
         scheduler=hb,
         stop={"training_iteration": 1 if args.smoke_test else 99999},
         num_samples=20,
         resources_per_trial={
             "cpu": 1,
             "gpu": 0
         },
         config={
             "width":
             tune.sample_from(lambda spec: 10 + int(90 * random.random())),
             "height":
             tune.sample_from(lambda spec: int(100 * random.random())),
Exemple #29
0
def run_ray_logistic(latents_path, tags, kf, idx, log_name):

    ray.init(num_cpus=5, num_gpus=1)
    data_train_list = []
    data_val_list = []
    for train_idx, val_idx in kf.split(idx):
        train_idx = idx[train_idx]  #Indexes from the full tensor.
        val_idx = idx[val_idx]  #Indexes from the full tensor.

        latents_train, latents_val = PCA_macau_samples(dir_path=latents_path,
                                                       idx_train=train_idx,
                                                       idx_val=val_idx)

        data_train_list += [latent_dataset(latents_train, tags[train_idx])]
        data_val_list += [latent_dataset(latents_val, tags[val_idx])]

    data_train = pin_in_object_store(data_train_list)
    data_val = pin_in_object_store(data_val_list)

    class train_class(Trainable):
        def _setup(self):
            self.device = torch.device("cuda:0")
            mod_opt = {'type': "plain_fact", 'cov': False, 'latents': 20}
            self.nfolds = self.config["nfolds"]
            #data_train=TensorFactDataset(csv_file_serie="complete_tensor_train1.csv",cov_path="complete_covariates")
            self.mod = []
            self.dataloader = []
            self.data_val = get_pinned_object(data_val)
            for fold in range(self.nfolds):
                mod_fold = MLP_class_mod(
                    get_pinned_object(data_train)[fold].get_dim())
                mod_fold.to(self.device)
                self.mod += [mod_fold]
                #self.mod=MLP_class_mod(get_pinned_object(data_train).get_dim())

                self.dataloader += [
                    DataLoader(get_pinned_object(data_train)[fold],
                               batch_size=5000,
                               shuffle=True)
                ]
                #self.dataloader_val += DataLoader(get_pinned_object(data_val),batch_size=1000,shuffle=False)
            #self.dataloader=DataLoader(data_train,batch_size=65000,shuffle=True,num_workers=2)
            self.timestep = 0
            print("SETUUUUP")

        def _train(self):
            self.timestep += 1

            print("Timestep")
            print(self.timestep)

            #Select learning rate depending on the epoch.
            if self.timestep < 40:
                l_r = 0.005
            elif self.timestep < 60:
                l_r = 0.0015
            else:
                l_r = 0.0005

            auc_mean_folds = 0
            for fold in range(self.nfolds):
                optimizer = torch.optim.Adam(self.mod[fold].parameters(),
                                             lr=l_r,
                                             weight_decay=self.config["L2"])

                criterion = nn.BCEWithLogitsLoss()
                total_loss = 0
                for idx, sampled_batch in enumerate(self.dataloader[fold]):
                    optimizer.zero_grad()
                    target = sampled_batch[1].to(self.device)
                    preds = self.mod[fold].fwd(sampled_batch[0].to(
                        self.device))
                    loss = criterion(preds, target)
                    loss.backward()
                    optimizer.step()

                with torch.no_grad():
                    loss_val = 0
                    target = self.data_val[fold].tags.to(self.device)
                    preds = self.mod[fold].fwd(self.data_val[fold].latents.to(
                        self.device))
                    loss_val += roc_auc_score(target, preds)
                    auc_mean = loss_val
                #rmse_val_loss_computed=(np.sqrt(loss_val.detach().cpu().numpy()/(i_val+1)))
                auc_mean_folds += auc_mean

            #return TrainingResult(mean_accuracy=(auc_mean_folds/self.nfolds),timesteps_this_iter=1)
            return {
                "mean_accuracy": (auc_mean_folds / self.nfolds),
                "time_steps_this_iter": 1
            }

        def _save(self, checkpoint_dir):
            print("Saving")
            path = os.path.join(checkpoint_dir, "checkpoint")
            state_dict_list = []
            for fold in range(self.nfolds):
                state_dict_list += [self.mod[fold].state_dict()]
            torch.save(state_dict_list, path)
            print("SAVIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIING")
            #raise Exception()
            #torch.cuda.empty_cache()
            np.save(path + "_timestep.npy", self.timestep)
            return path

        def _restore(self, checkpoint_path):
            print("LOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADING")
            state_dict_list = torch.load(checkpoint_path)
            for fold in range(self.nfolds):
                self.mod[fold].load_state_dict(state_dict_list[fold])
            self.timestep = np.load(checkpoint_path + "_timestep.npy").item()

    tune.register_trainable("my_class", train_class)

    hyperband = HyperBandScheduler(time_attr="timesteps_total",
                                   reward_attr="mean_accuracy",
                                   max_t=100)

    exp = {
        'run': "my_class",
        'num_samples': 50,
        'trial_resources': {
            "gpu": 1
        },
        'stop': {
            "training_iteration": 100
        },
        'config': {
            "L2": lambda spec: 10**(8 * random.random() - 4),
            "nfolds": kf.get_n_splits()
        }
    }

    tune.run_experiments({log_name: exp}, scheduler=hyperband)
Exemple #30
0
def main():
    #sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

    # paths
    file_root = os.path.join("/opt", "data", "gaul_severstal_data")
    project_dir = os.path.join("/opt", "project")
    log_dir = os.path.join(project_dir, "src", "logs", "fit",
                           datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    images_dir = os.path.join(file_root, "train_images")
    df_path = os.path.join(file_root, "train.csv")
    local_tune_dir = os.path.join(project_dir, "src", "tune")
    plot_dir = os.path.join(project_dir, "src", "plots")

    # dataset split
    training_split = 0.6
    validation_split = 0.2
    testing_split = 1.0 - (training_split + validation_split)

    # unet encoder backbone
    backbone = 'vgg16'

    # data options
    classes = [3]
    n_classes = len(classes)
    use_greyscale = False
    batch_size = 1

    # load in damage labels
    train_df, val_df, test_df = load_dataframe_split(df_path,
                                                     classes=classes,
                                                     val_size=validation_split,
                                                     test_size=testing_split)

    # image resizing parameters
    image_scale_down = 5
    height = int(np.floor(256 / image_scale_down / 32) * 32)
    width = int(np.floor(1600 / image_scale_down / 32) * 32)
    image_channels = 1 if use_greyscale else 3
    resize_shape = (
        height, width, image_channels
    )  # original is (256, 1600, 3), needs to be divisible by 32
    mask_shape = (height, width, n_classes)

    # image preprocessing
    preprocessing_input = sm.get_preprocessing(backbone)

    # loss
    dice_loss = sm.losses.DiceLoss()

    # metrics
    iou_score = sm.metrics.IOUScore(threshold=0.5)
    metrics = [iou_score]

    # datasets and dataloaders
    train_dataset, val_dataset, test_dataset = get_datasets(
        images_dir=images_dir,
        preprocessing_input=preprocessing_input,
        resize_shape=resize_shape,
        train_df=train_df,
        val_df=val_df,
        test_df=test_df,
        use_greyscale=use_greyscale)
    train_dataloader, val_dataloader, test_dataloader = get_dataloaders(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        test_dataset=test_dataset,
        train_batch_size=batch_size)

    # tuner config
    config = {
        "dropout":
        tune.grid_search([0.1, 0.2, 0.3, 0.4, 0.5]),
        "learning_rate":
        tune.grid_search([1e-5, 1e-4, 1e-3, 1e-2]),
        "optimizer":
        tune.grid_search([keras.optimizers.Adam, keras.optimizers.RMSprop]),
        "encoder_freeze":
        tune.grid_search([True, False]),
    }

    # best model config
    best_config = {
        "dropout": 0.1,
        "learning_rate": 1e-4,
        "optimizer": keras.optimizers.Adam,
        "encoder_freeze": False,
    }

    use_best_config = True
    config = best_config if use_best_config else config

    # search algorithm
    # hyperopt = HyperOptSearch(metric="val_iou_score", mode="max")

    # trial scheduler
    hyperband = HyperBandScheduler(metric="val_iou_score", mode="max")

    # tune model or load best_model.h5
    tune_model = True
    model_name = "best_model.h5"

    if tune_model:
        analysis = tune.run(
            tune.with_parameters(train_unet,
                                 train_dataloader=train_dataloader,
                                 val_dataloader=val_dataloader,
                                 loss=dice_loss,
                                 metrics=metrics),
            resources_per_trial={"gpu": 1},
            config=config,
            # search_alg=hyperopt,
            scheduler=hyperband,
            local_dir=local_tune_dir)
    else:
        model = keras.models.load_model(model_name,
                                        custom_objects={
                                            "dice_loss": dice_loss,
                                            "iou_score": iou_score
                                        })

        use_test = True
        evaluate_dataset = test_dataset if use_test else val_dataset
        evaluate_dataloader = test_dataloader if use_test else val_dataloader

        make_plots = False
        if make_plots is True:
            for i in range(len(evaluate_dataset)):
                imageId = evaluate_dataset.ids[i]
                image, true_mask = evaluate_dataset[i]
                image = np.expand_dims(image, axis=0)
                pr_mask = model.predict(image)

                image = denormalize(image[0])
                pr_mask = denormalize(pr_mask[0]).astype('uint8')

                visualize(image,
                          true_mask,
                          pr_mask,
                          name=imageId,
                          save_dir=plot_dir)

        # model evaluation and baseline comparison
        evaluate_results = model.evaluate(evaluate_dataloader, batch_size=1)
        for i, val in enumerate(evaluate_results):
            print(f'{model.metrics_names[i]}: {val}')

        # baseline is a mask covering the entire left half of the image
        baseline_mask = np.zeros(mask_shape)
        baseline_mask[:, :np.int(width / 2), :] = 1
        baseline_iou_scores = []
        for i in range(len(evaluate_dataset)):
            image, true_mask = evaluate_dataset[i]
            image = denormalize(image)
            iou = iou_score(true_mask.astype('float32'), baseline_mask)
            #visualize(image, true_mask, baseline_mask.astype('uint8'))
            baseline_iou_scores.append(iou)
        average_baseline_iou = np.average(baseline_iou_scores)
        print(f'baseline iou_score: {average_baseline_iou}')
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    mnist_spec = {
        "stop": {
            "mean_accuracy": 0.99,
            "time_total_s": 600,
        },
        "config": {
            "learning_rate":
            sample_from(lambda spec: 10**np.random.uniform(-5, -3)),
            "activation":
            grid_search(["relu", "elu", "tanh"]),
        },
        "num_samples": 10,
    }

    if args.smoke_test:
        mnist_spec["stop"]["training_iteration"] = 20
        mnist_spec["num_samples"] = 2

    ray.init()
    hyperband = HyperBandScheduler(time_attr="training_iteration",
                                   reward_attr="mean_accuracy",
                                   max_t=10)

    tune.run(TrainMNIST,
             name="mnist_hyperband_test",
             scheduler=hyperband,
             **mnist_spec)
            {
                "accuracy": self.accuracy,
                "state_dict": self.model.state_dict(),
            }, checkpoint_path)
        return checkpoint_path

    def _restore(self, checkpoint_path):
        self.model.load_state_dict(checkpoint_path)


if __name__ == "__main__":

    ray.init()

    sched = HyperBandScheduler(time_attr="training_iteration",
                               reward_attr="episode_reward_mean",
                               max_t=5)

    space1 = {
        "factor": (0.01, 0.999),
        "lr": (1, 5),
        "momentum": (0, 0.99),
        "weight_decay": (1, 5)
    }

    space2 = {
        'factor':
        hp.uniform('factor', 0.01, 0.999),
        'lr':
        10**-hp.uniform('lr', 1, 5),
        'momentum':
Exemple #33
0
tune.run(
    trainable,
    config=search_space,
    metric="score",
    mode="min",
    search_alg=algo,
    stop={"training_iteration": 20},
)
# __bayes_end__

# __hyperband_start__
from ray.tune.schedulers import HyperBandScheduler

# Create HyperBand scheduler and minimize the score
hyperband = HyperBandScheduler(metric="score", mode="max")

config = {"a": tune.uniform(0, 1), "b": tune.uniform(0, 1)}

tune.run(trainable, config=config, num_samples=20, scheduler=hyperband)
# __hyperband_end__

# __analysis_start__
analysis = tune.run(
    trainable,
    config=config,
    metric="score",
    mode="min",
    search_alg=BayesOptSearch(random_search_steps=4),
    stop={"training_iteration": 20},
)
Exemple #34
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser((ModelArguments, DataTrainingArguments,
                               TrainingArguments, RayArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args, ray_args = \
            parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args, ray_args = \
             parser.parse_args_into_dataclasses()

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(
            training_args.output_dir
    ) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(
                training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome.")
        elif last_checkpoint is not None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )
    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank
                                                    ) else logging.WARN)

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
    logger.info(f"Training/evaluation parameters {training_args}")

    # Set seed before initializing model.
    set_seed(training_args.seed)

    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
    # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
    #
    # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the
    # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named
    # label if at least two columns are provided.
    #
    # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this
    # single column. You can easily tweak this behavior (see below)
    #
    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
    # download the dataset.
    if data_args.task_name is not None:
        # Downloading and loading a dataset from the hub.
        datasets = load_dataset("glue", data_args.task_name)
    else:
        # Loading a dataset from your local files.
        # CSV/JSON training and evaluation files are needed.
        data_files = {
            "train": data_args.train_file,
            "validation": data_args.validation_file
        }

        # Get the test dataset: you can provide your own CSV/JSON test file (see below)
        # when you use `do_predict` without specifying a GLUE benchmark task.
        if training_args.do_predict:
            if data_args.test_file is not None:
                train_extension = data_args.train_file.split(".")[-1]
                test_extension = data_args.test_file.split(".")[-1]
                assert (
                    test_extension == train_extension
                ), "`test_file` should have the same extension (csv or json) as `train_file`."
                data_files["test"] = data_args.test_file
            else:
                raise ValueError(
                    "Need either a GLUE task or a test file for `do_predict`.")

        for key in data_files.keys():
            logger.info(f"load a local file for {key}: {data_files[key]}")

        if data_args.train_file.endswith(".csv"):
            # Loading a dataset from local csv files
            datasets = load_dataset("csv", data_files=data_files)
        else:
            # Loading a dataset from local json files
            datasets = load_dataset("json", data_files=data_files)
    # See more about loading any type of standard or custom dataset at
    # https://huggingface.co/docs/datasets/loading_datasets.html.

    # Labels
    if data_args.task_name is not None:
        is_regression = data_args.task_name == "stsb"
        if not is_regression:
            label_list = datasets["train"].features["label"].names
            num_labels = len(label_list)
        else:
            num_labels = 1
    else:
        # Trying to have good defaults here, don't hesitate to tweak to your needs.
        is_regression = datasets["train"].features["label"].dtype in [
            "float32", "float64"
        ]
        if is_regression:
            num_labels = 1
        else:
            # A useful fast method:
            # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique
            label_list = datasets["train"].unique("label")
            label_list.sort()  # Let's sort it for determinism
            num_labels = len(label_list)

    # Load pretrained model and tokenizer
    #
    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        use_fast=model_args.use_fast_tokenizer,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    # model = AutoModelForSequenceClassification.from_pretrained(
    #     model_args.model_name_or_path,
    #     from_tf=bool(".ckpt" in model_args.model_name_or_path),
    #     config=config,
    #     cache_dir=model_args.cache_dir,
    #     revision=model_args.model_revision,
    #     use_auth_token=True if model_args.use_auth_token else None,
    # )

    # Preprocessing the datasets
    if data_args.task_name is not None:
        sentence1_key, sentence2_key = task_to_keys[data_args.task_name]
    else:
        # Again, we try to have some nice defaults but don't hesitate to tweak to your use case.
        non_label_column_names = [
            name for name in datasets["train"].column_names if name != "label"
        ]
        if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names:
            sentence1_key, sentence2_key = "sentence1", "sentence2"
        else:
            if len(non_label_column_names) >= 2:
                sentence1_key, sentence2_key = non_label_column_names[:2]
            else:
                sentence1_key, sentence2_key = non_label_column_names[0], None

    # Padding strategy
    if data_args.pad_to_max_length:
        padding = "max_length"
    else:
        # We will pad later, dynamically at batch creation, to the max sequence length in each batch
        padding = False

    # Some models have set the order of the labels to use, so let's make sure we do use it.
    label_to_id = None
    if (config.label2id != PretrainedConfig(num_labels=num_labels).label2id
            and data_args.task_name is not None and not is_regression):
        # Some have all caps in their config, some don't.
        label_name_to_id = {k.lower(): v for k, v in config.label2id.items()}
        if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
            label_to_id = {
                i: label_name_to_id[label_list[i]]
                for i in range(num_labels)
            }
        else:
            logger.warn(
                "Your model seems to have been trained with labels, but they don't match the dataset: ",
                f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
                "\nIgnoring the model labels as a result.",
            )
    elif data_args.task_name is None and not is_regression:
        label_to_id = {v: i for i, v in enumerate(label_list)}

    def preprocess_function(examples):
        # Tokenize the texts
        args = ((examples[sentence1_key], ) if sentence2_key is None else
                (examples[sentence1_key], examples[sentence2_key]))
        result = tokenizer(*args,
                           padding=padding,
                           max_length=data_args.max_seq_length,
                           truncation=True)

        # Map labels to IDs (not necessary for GLUE tasks)
        if label_to_id is not None and "label" in examples:
            result["label"] = [label_to_id[l] for l in examples["label"]]
        return result

    datasets = datasets.map(preprocess_function,
                            batched=True,
                            load_from_cache_file=not data_args.overwrite_cache)

    train_dataset = datasets["train"]
    eval_dataset = datasets["validation_matched" if data_args.task_name ==
                            "mnli" else "validation"]
    if data_args.task_name is not None or data_args.test_file is not None:
        test_dataset = datasets["test_matched" if data_args.task_name ==
                                "mnli" else "test"]

    # Log a few random samples from the training set:
    for index in random.sample(range(len(train_dataset)), 3):
        logger.info(
            f"Sample {index} of the training set: {train_dataset[index]}.")

    # Get the metric function
    if data_args.task_name is not None:
        metric = load_metric("f1")
    # TODO: When datasets metrics include regular accuracy, make an else here and remove special branch from
    # compute_metrics

    # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
    # predictions and label_ids field) and has to return a dictionary string to float.
    import time

    def make_compute_metrics(training_args):
        def compute_metrics(p: EvalPrediction):
            preds = p.predictions[0] if isinstance(p.predictions,
                                                   tuple) else p.predictions

            filename = 'preds_{0}.npy'.format(int(time.time()))
            path = os.path.join(training_args.output_dir, filename)
            np.save(path, preds)

            preds = np.squeeze(preds) if is_regression else np.argmax(preds,
                                                                      axis=1)
            if data_args.task_name is not None:
                result = metric.compute(predictions=preds,
                                        references=p.label_ids)
                if len(result) > 1:
                    result["combined_score"] = np.mean(list(
                        result.values())).item()
                return result
            elif is_regression:
                return {"mse": ((preds - p.label_ids)**2).mean().item()}
            else:
                return {
                    "accuracy":
                    (preds == p.label_ids).astype(np.float32).mean().item()
                }

        return compute_metrics

    compute_metrics = make_compute_metrics(training_args)

    # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
    if data_args.pad_to_max_length:
        data_collator = default_data_collator
    elif training_args.fp16:
        data_collator = DataCollatorWithPadding(tokenizer,
                                                pad_to_multiple_of=8)
    else:
        data_collator = None

    def model_init():
        model = AutoModelForSequenceClassification.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
            revision=model_args.model_revision,
            use_auth_token=True if model_args.use_auth_token else None)
        return model

    from typing import Any

    class CustomTrainer(Trainer):
        def __init__(self, *args, **kwargs):
            super(CustomTrainer, self).__init__(*args, **kwargs)

        def _hp_search_setup(self, trial: Any):
            try:
                trial.pop('wandb', None)
            except AttributeError:
                pass
            super(CustomTrainer, self)._hp_search_setup(trial)

    # Initialize our Trainer
    trainer = CustomTrainer(
        model_init=model_init,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset if training_args.do_eval else None,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    # Hyperparameter Search

    def hp_space_fn(empty_arg):
        config = {
            "seed": tune.randint(1, 100),
        }
        return config

    time_budget_s = 60 * int(ray_args.time_budget_s)

    best_run = trainer.hyperparameter_search(
        direction="maximize",
        backend="ray",
        scheduler=HyperBandScheduler(
            time_attr='time_total_s',
            metric='eval_accuracy',
            mode='max',
            max_t=600,
        ),
        hp_space=hp_space_fn,
        time_budget_s=time_budget_s,
        keep_checkpoints_num=1,
        checkpoint_score_attr='eval_accuracy')

    output_params_file = os.path.join(training_args.output_dir,
                                      "best_run.json")

    with open(output_params_file, "w") as f:
        json.dump(best_run.hyperparameters, f, indent=4)

    return best_run