Esempio n. 1
0
def get_raytune_schedule(raytune_cfg):
    if raytune_cfg["sched"] == "asha":
        return AsyncHyperBandScheduler(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["asha"]["max_t"],
            grace_period=raytune_cfg["asha"]["grace_period"],
            reduction_factor=raytune_cfg["asha"]["reduction_factor"],
            brackets=raytune_cfg["asha"]["brackets"],
        )
    elif raytune_cfg["sched"] == "hyperband":
        return HyperBandScheduler(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["hyperband"]["max_t"],
            reduction_factor=raytune_cfg["hyperband"]["reduction_factor"],
        )
    # requires pip install hpbandster ConfigSpace
    elif (raytune_cfg["sched"] == "bohb") or (raytune_cfg["sched"] == "BOHB"):
        return HyperBandForBOHB(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["hyperband"]["max_t"],
            reduction_factor=raytune_cfg["hyperband"]["reduction_factor"],
        )
    elif (raytune_cfg["sched"] == "pbt") or (raytune_cfg["sched"] == "PBT"):
        return PopulationBasedTraining(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            perturbation_interval=raytune_cfg["pbt"]["perturbation_interval"],
            hyperparam_mutations=raytune_cfg["pbt"]["hyperparam_mutations"],
            log_config=True,
        )
    # requires pip install GPy sklearn
    elif (raytune_cfg["sched"] == "pb2") or (raytune_cfg["sched"] == "PB2"):
        return PB2(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            perturbation_interval=raytune_cfg["pb2"]["perturbation_interval"],
            hyperparam_bounds=raytune_cfg["pb2"]["hyperparam_bounds"],
            log_config=True,
        )
    else:
        print("INFO: Not using any Ray Tune trial scheduler.")
        return None
Esempio n. 2
0
def create_pbt_scheduler(model):
    """
    Create a population-based training (PBT) scheduler.
    :return: A new PBT scheduler.
    """
    hyperparam_mutations = create_hparam_tune_dict(model=model, is_config=False)

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        perturbation_interval=10,
        metric="episode_reward_mean",
        mode="max",
        hyperparam_mutations=hyperparam_mutations,
    )
    return pbt
Esempio n. 3
0
def tuning(args):
    activation = nn.PReLU if args.actv == 'prelu' else nn.SELU
    config = {
        "l1_units": tune.choice([480, 512, 544]),
        "l2_units": tune.choice([224, 256, 288]),
        "l3_units": tune.choice([96, 128, 160]),
        "lambda": tune.choice([1e-3, 1e-4, 1e-5]),
        "actv": tune.choice([activation])
    }
    scheduler = PopulationBasedTraining(time_attr='training_iteration',
                                        perturbation_interval=4,
                                        hyperparam_mutations={
                                            "l1_units":
                                            [464, 496, 528, 560, 576],
                                            "l2_units":
                                            [208, 240, 272, 304, 328],
                                            "l3_units":
                                            [80, 112, 144, 176, 208]
                                        })

    reporter = CLIReporter(parameter_columns=[
        "l1_units",
        "l2_units",
        "l3_units",
        "lambda",
    ],
                           metric_columns=["loss", "training_iteration"])

    analysis = tune.run(tune.with_parameters(train,
                                             batch_size=args.batch_size,
                                             num_epochs=args.num_epochs,
                                             num_gpus=args.num_gpus),
                        resources_per_trial={
                            "cpu": args.num_cpus,
                            "gpu": args.num_gpus
                        },
                        metric="loss",
                        mode="min",
                        config=config,
                        num_samples=args.num_trials,
                        scheduler=scheduler,
                        progress_reporter=reporter,
                        max_failures=3,
                        stop={"training_iteration": 10},
                        name="tune_cae")

    print(f"Found best hyperparameters: {analysis.best_config}")
Esempio n. 4
0
    def testPermutationContinuationFunc(self):
        def MockTrainingFunc(config, checkpoint_dir=None):
            iter = 0
            a = config["a"]
            b = config["b"]

            if checkpoint_dir:
                checkpoint_path = os.path.join(checkpoint_dir, "model.mock")
                with open(checkpoint_path, "rb") as fp:
                    a, b, iter = pickle.load(fp)

            while True:
                iter += 1
                with tune.checkpoint_dir(step=iter) as checkpoint_dir:
                    checkpoint_path = os.path.join(checkpoint_dir,
                                                   "model.mock")
                    with open(checkpoint_path, "wb") as fp:
                        pickle.dump((a, b, iter), fp)
                tune.report(mean_accuracy=(a - iter) * b)

        scheduler = PopulationBasedTraining(
            time_attr="training_iteration",
            metric="mean_accuracy",
            mode="max",
            perturbation_interval=1,
            log_config=True,
            hyperparam_mutations={"c": lambda: 1},
        )
        param_a = MockParam([10, 20, 30, 40])
        param_b = MockParam([1.2, 0.9, 1.1, 0.8])
        random.seed(100)
        np.random.seed(1000)
        tune.run(
            MockTrainingFunc,
            config={
                "a": tune.sample_from(lambda _: param_a()),
                "b": tune.sample_from(lambda _: param_b()),
                "c": 1,
            },
            fail_fast=True,
            num_samples=4,
            keep_checkpoints_num=1,
            checkpoint_score_attr="min-training_iteration",
            scheduler=scheduler,
            name="testPermutationContinuationFunc",
            stop={"training_iteration": 3},
        )
Esempio n. 5
0
def setup_tune_scheduler():
    ss, custom_explore = workload.create_sample_space()
    search_space = workload.create_search_space()

    scheduler = PopulationBasedTraining(time_attr="training_iteration",
                                        perturbation_interval=5,
                                        hyperparam_mutations=ss,
                                        custom_explore_fn=custom_explore,
                                        **workload.exp_metric())

    return dict(
        scheduler=scheduler,
        config=search_space,
        # num_samples in PBT only sets population
        num_samples=10,
        resources_per_trial=com.detect_baseline_resource(),
    )
Esempio n. 6
0
def set_tuning_parameters(agent, config):
    scheduler = None
    if agent.lower() == "ppo":
        # Postprocess the perturbed config to ensure it's still valid
        def explore(config):
            # ensure we collect enough timesteps to do sgd
            if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
                config["train_batch_size"] = config["sgd_minibatch_size"] * 2
            # ensure we run at least one sgd iter
            if config["num_sgd_iter"] < 1:
                config["num_sgd_iter"] = 1
            return config

        # optimization related parameters
        # hype_params["kl_coeff"] = lambda: random.uniform(.1, .8)
        # hype_params["entropy_coeff"] = lambda: random.uniform(0.0, 1.0)
        # hype_params["kl_target"] = lambda: random.uniform(0.0, 0.05)
        hype_params = {
            "lambda": lambda: random.uniform(0.9, 1.0),
            "clip_param": lambda: random.uniform(0.01, 0.5),
            "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
            "num_sgd_iter": lambda: random.randint(1, 30),
            "sgd_minibatch_size": lambda: random.randint(128, 16384),
            "train_batch_size": lambda: random.randint(2000, 160000),
        }
        config["num_sgd_iter"] = tune.sample_from(
            lambda spec: random.choice([10, 20, 30])),
        config["sgd_minibatch_size"] = tune.sample_from(
            lambda spec: random.choice([128, 512, 2048])),
        config["train_batch_size"] = tune.sample_from(
            lambda spec: random.choice([10000, 20000, 40000]))
        scheduler = PopulationBasedTraining(time_attr="time_total_s",
                                            reward_attr="episode_reward_mean",
                                            perturbation_interval=120,
                                            resample_probability=0.25,
                                            hyperparam_mutations=hype_params,
                                            custom_explore_fn=explore)

    if agent.lower() == "ddpg":
        pass

    if agent.lower() == "pg":
        pass

    return config, scheduler
Esempio n. 7
0
def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
    data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_")
    LightningMNISTClassifier.download_data(data_dir)

    config = {
        "layer_1_size": tune.choice([32, 64, 128]),
        "layer_2_size": tune.choice([64, 128, 256]),
        "lr": 1e-3,
        "batch_size": 64,
    }

    scheduler = PopulationBasedTraining(
        perturbation_interval=4,
        hyperparam_mutations={
            "lr": tune.loguniform(1e-4, 1e-1),
            "batch_size": [32, 64, 128]
        })

    reporter = CLIReporter(
        parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
        metric_columns=["loss", "mean_accuracy", "training_iteration"])

    analysis = tune.run(
        tune.with_parameters(
            train_mnist_tune_checkpoint,
            data_dir=data_dir,
            num_epochs=num_epochs,
            num_gpus=gpus_per_trial),
        resources_per_trial={
            "cpu": 1,
            "gpu": gpus_per_trial
        },
        metric="loss",
        mode="min",
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        name="tune_mnist_pbt")

    print("Best hyperparameters found were: ", analysis.best_config)

    shutil.rmtree(data_dir)
Esempio n. 8
0
def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
    data_dir = mkdtemp(prefix="mnist_data_")
    LightningMNISTClassifier.download_data(data_dir)

    config = {
        "layer_1_size": tune.choice([32, 64, 128]),
        "layer_2_size": tune.choice([64, 128, 256]),
        "lr": 1e-3,
        "batch_size": 64,
    }

    scheduler = PopulationBasedTraining(
        time_attr="training_iteration",
        metric="loss",
        mode="min",
        perturbation_interval=4,
        hyperparam_mutations={
            "lr": lambda: tune.loguniform(1e-4, 1e-1).func(None),
            "batch_size": [32, 64, 128]
        })

    reporter = CLIReporter(
        parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
        metric_columns=["loss", "mean_accuracy", "training_iteration"])

    tune.run(
        partial(
            train_mnist_tune_checkpoint,
            data_dir=data_dir,
            num_epochs=num_epochs,
            num_gpus=gpus_per_trial),
        resources_per_trial={
            "cpu": 1,
            "gpu": gpus_per_trial
        },
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        name="tune_mnist_pbt")

    shutil.rmtree(data_dir)
Esempio n. 9
0
def run_experiment(args):

    if args.smoke_test:
        args.layers = 2

    if args.ray_address:
        ray.init(address=args.ray_address)

    sched = AsyncHyperBandScheduler(time_attr="training_iteration",
                                    metric="mean_accuracy")
    sched = PopulationBasedTraining(time_attr='time_total_s',
                                    metric='mean_accuracy',
                                    mode='max',
                                    perturbation_interval=5.0,
                                    custom_explore_fn=lambda c: {
                                        'arch': perturb_arch(c['arch'], 4),
                                        'use_gpu': c['use_gpu']
                                    })

    analysis = tune.run(
        train_cnn,
        name="darts",
        scheduler=sched,
        stop={
            "mean_accuracy": 0.95,
            "training_iteration": 2 if args.smoke_test else 100
        },
        resources_per_trial={
            "cpu": 2,
            "gpu": 1  # int(args.cuda) * 0.5
        },
        num_samples=1 if args.smoke_test else 50,
        config={
            "args": args,
            "arch": tune.sample_from(lambda _: sample_arch(4)),
            "layers": args.
            layers  # can use a flag to make this variable per tune worker later on
        })

    print("Best config is:", analysis.get_best_config(metric="mean_accuracy"))
Esempio n. 10
0
def run(task, name=None):
    ray.init()
    import random
    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="episode_reward_mean",
        perturbation_interval=100,
        hyperparam_mutations={
            # Allow for scaling-based perturbations, with a uniform backing
            # distribution for resampling.
            "actor_learning_rate": lambda: random.uniform(0.01, 1.0),
            # Allow perturbations within this set of categorical values.
            "critic_learning_rate": lambda: random.uniform(0.01, 1.0),
            "discounting": [0.8, 0.9, 0.95, 1.0],
        })

    # Try to find the best factor 1 and factor 2
    run_experiments(
        {
            "pbt_test3": {
                "run": LogicRLTrainable,
                "stop": {
                    "training_iteration": 8000
                },
                "num_samples": 6,
                "config": {
                    "task": task,
                    "name": name,
                    "actor_learning_rate": 0.1,
                    "critic_learning_rate": 0.1,
                    "discounting": 1.0
                },
                "trial_resources": {
                    "cpu": 2,
                },
            },
        },
        scheduler=pbt,
        verbose=False)
Esempio n. 11
0
def set_tuning_parameters(agent, config):
    hype_params = {}
    explore = None
    if agent == "PPO":
        # optimization related parameters
        hype_params["lr"] = [
            float(1e-2), float(1e-3),
            float(1e-4), float(1e-5)
        ]
        hype_params["train_batch_size"] = [1000, 2000, 4000]
        hype_params["sgd_minibatch_size"] = [16, 32, 64, 128]
        hype_params["num_sgd_iter"] = lambda: random.randint(1, 30)
        hype_params["lambda"] = random.random()  # GAE param
        # initial coeff of KL term
        hype_params["kl_coeff"] = lambda: random.uniform(.1, .8)
        # size of clipping in PPO term
        hype_params["clip_param"] = lambda: random.uniform(.1, .8)
        hype_params["entropy_coeff"] = lambda: random.uniform(
            0.0, 1.0)  # entropy coeff
        hype_params["kl_target"] = lambda: random.uniform(
            0.0, 0.05)  # .1 might be a bit high
        explore = ppo_explore

    for k in hype_params:
        # just to give some variation at start
        if isinstance(hype_params[k], list) and not k == 'lr':
            if k == 'train_batch_size':
                config[k] = lambda spec: random.choice([1000, 2000, 4000])
            if k == 'sgd_minibatch_size':
                config[k] = lambda spec: random.choice([16, 32, 64, 128])
    scheduler = PopulationBasedTraining(
        time_attr='time_total_s',
        reward_attr='episode_reward_mean',
        # this..will be pretty sparse
        perturbation_interval=5000,
        hyperparam_mutations=hype_params,
        resample_probability=0.25,
        custom_explore_fn=explore)
    return config, scheduler
Esempio n. 12
0
 def basicSetup(self,
                resample_prob=0.0,
                explore=None,
                perturbation_interval=10,
                log_config=False,
                hyperparams=None,
                hyperparam_mutations=None,
                step_once=True):
     hyperparam_mutations = hyperparam_mutations or {
         "float_factor": lambda: 100.0,
         "int_factor": lambda: 10,
         "id_factor": [100]
     }
     pbt = PopulationBasedTraining(
         time_attr="training_iteration",
         perturbation_interval=perturbation_interval,
         resample_probability=resample_prob,
         quantile_fraction=0.25,
         hyperparam_mutations=hyperparam_mutations,
         custom_explore_fn=explore,
         log_config=log_config)
     runner = _MockTrialRunner(pbt)
     for i in range(5):
         trial_hyperparams = hyperparams or {
             "float_factor": 2.0,
             "const_factor": 3,
             "int_factor": 10,
             "id_factor": i
         }
         trial = _MockTrial(i, trial_hyperparams)
         runner.add_trial(trial)
         trial.status = Trial.RUNNING
         if step_once:
             self.assertEqual(
                 pbt.on_trial_result(runner, trial, result(10, 50 * i)),
                 TrialScheduler.CONTINUE)
     pbt.reset_stats()
     return pbt, runner
Esempio n. 13
0
    def testNoConfig(self):
        scheduler = PopulationBasedTraining(
            time_attr="training_iteration",
            metric="mean_accuracy",
            mode="max",
            perturbation_interval=1,
            hyperparam_mutations={
                "a": tune.uniform(0, 0.3),
                "b": [1, 2, 3],
                "c": {
                    "c1": lambda: np.random.uniform(0.5),
                    "c2": tune.choice([2, 3, 4])
                }
            },
        )

        tune.run(
            MockTrainingFunc2,
            fail_fast=True,
            num_samples=4,
            scheduler=scheduler,
            name="testNoConfig",
            stop={"training_iteration": 3})
Esempio n. 14
0
    def testPermutationContinuation(self):
        """
        Tests continuation of runs after permutation.
        Sometimes, runs were continued from deleted checkpoints.
        This deterministic initialisation would fail when the
        fix was not applied.
        See issues #9036, #9036
        """
        scheduler = PopulationBasedTraining(
            time_attr="training_iteration",
            metric="mean_accuracy",
            mode="max",
            perturbation_interval=1,
            log_config=True,
            hyperparam_mutations={"c": lambda: 1})

        param_a = MockParam([10, 20, 30, 40])
        param_b = MockParam([1.2, 0.9, 1.1, 0.8])

        random.seed(100)
        np.random.seed(1000)
        tune.run(
            MockTrainable,
            config={
                "a": tune.sample_from(lambda _: param_a()),
                "b": tune.sample_from(lambda _: param_b()),
                "c": 1
            },
            fail_fast=True,
            num_samples=4,
            checkpoint_freq=1,
            checkpoint_at_end=True,
            keep_checkpoints_num=1,
            checkpoint_score_attr="min-training_iteration",
            scheduler=scheduler,
            name="testPermutationContinuation",
            stop={"training_iteration": 3})
        training_operator_cls=CifarTrainingOperator,
        initialization_hook=initialization_hook,
        num_workers=args.num_workers,
        config={
            "test_mode": args.smoke_test,  # whether to to subset the data
            BATCH_SIZE: 128 * args.num_workers,
        },
        use_gpu=args.use_gpu,
        use_fp16=args.fp16)

    pbt_scheduler = PopulationBasedTraining(
        time_attr="training_iteration",
        metric="val_loss",
        mode="min",
        perturbation_interval=1,
        hyperparam_mutations={
            # distribution for resampling
            "lr": lambda: np.random.uniform(0.001, 1),
            # allow perturbations within this set of categorical values
            "momentum": [0.8, 0.9, 0.99],
        })

    reporter = CLIReporter()
    reporter.add_metric_column("val_loss", "loss")
    reporter.add_metric_column("val_accuracy", "acc")

    analysis = tune.run(
        TorchTrainable,
        num_samples=4,
        config={
            "lr": tune.choice([0.001, 0.01, 0.1]),
Esempio n. 16
0
def main(
    scenario,
    headless,
    time_total_s,
    rollout_fragment_length,
    train_batch_size,
    seed,
    num_samples,
    num_agents,
    num_workers,
    resume_training,
    result_dir,
    checkpoint_num,
    save_model_path,
):
    assert train_batch_size > 0, f"{train_batch_size.__name__} cannot be less than 1."
    if rollout_fragment_length > train_batch_size:
        rollout_fragment_length = train_batch_size

    pbt = PopulationBasedTraining(
        time_attr="time_total_s",
        metric="episode_reward_mean",
        mode="max",
        perturbation_interval=300,
        resample_probability=0.25,
        # Specifies the mutations of these hyperparams
        # See: `ray.rllib.agents.trainer.COMMON_CONFIG` for common hyperparams
        hyperparam_mutations={
            "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
            "rollout_fragment_length": lambda: rollout_fragment_length,
            "train_batch_size": lambda: train_batch_size,
        },
        # Specifies additional mutations after hyperparam_mutations is applied
        custom_explore_fn=explore,
    )

    # XXX: There is a bug in Ray where we can only export a trained model if
    #      the policy it's attached to is named 'default_policy'.
    #      See: https://github.com/ray-project/ray/issues/5339
    rllib_policies = {
        "default_policy": (
            None,
            rllib_agent["observation_space"],
            rllib_agent["action_space"],
            {"model": {"custom_model": TrainingModel.NAME}},
        )
    }

    smarts.core.seed(seed)
    tune_config = {
        "env": RLlibHiWayEnv,
        "log_level": "WARN",
        "num_workers": num_workers,
        "env_config": {
            "seed": tune.sample_from(lambda spec: random.randint(0, 300)),
            "scenarios": [str(Path(scenario).expanduser().resolve().absolute())],
            "headless": headless,
            "agent_specs": {
                f"AGENT-{i}": rllib_agent["agent_spec"] for i in range(num_agents)
            },
        },
        "multiagent": {"policies": rllib_policies},
        "callbacks": Callbacks,
    }

    experiment_name = "rllib_example_multi"
    result_dir = Path(result_dir).expanduser().resolve().absolute()
    if checkpoint_num:
        checkpoint = str(
            result_dir / f"checkpoint_{checkpoint_num}" / f"checkpoint-{checkpoint_num}"
        )
    else:
        checkpoint = None

    print(f"Checkpointing at {str(result_dir)}")
    analysis = tune.run(
        "PG",
        name=experiment_name,
        stop={"time_total_s": time_total_s},
        checkpoint_freq=1,
        checkpoint_at_end=True,
        local_dir=str(result_dir),
        resume=resume_training,
        restore=checkpoint,
        max_failures=3,
        num_samples=num_samples,
        export_formats=["model", "checkpoint"],
        config=tune_config,
        scheduler=pbt,
    )

    print(analysis.dataframe().head())

    best_logdir = Path(analysis.get_best_logdir("episode_reward_max", mode="max"))
    model_path = best_logdir / "model"

    copy_tree(str(model_path), save_model_path, overwrite=True)
    print(f"Wrote model to: {save_model_path}")
        },
        "stop": {
            "mean_accuracy": 0.80,
            "training_iteration": 30,
        },
        "config": {
            "epochs": 1,
            "batch_size": 64,
            "lr": grid_search([10**-4, 10**-5]),
            "decay": lambda spec: spec.config.lr / 100.0,
            "dropout": grid_search([0.25, 0.5]),
        },
        "num_samples": 4,
    }

    if args.smoke_test:
        train_spec["config"]["lr"] = 10**-4
        train_spec["config"]["dropout"] = 0.5

    ray.init()

    pbt = PopulationBasedTraining(time_attr="training_iteration",
                                  reward_attr="mean_accuracy",
                                  perturbation_interval=10,
                                  hyperparam_mutations={
                                      "dropout":
                                      lambda _: np.random.uniform(0, 1),
                                  })

    run_experiments({"pbt_cifar10": train_spec}, scheduler=pbt)
Esempio n. 18
0
if __name__ == "__main__":
    # Hyper-Hyper parameters
    epochs_per_generation = 25
    population_size = 10
    num_generations = 4

    hyperparam_mutations = dict()
    hyperparam_mutations["actor_lr"] = lambda: tune.loguniform(1e-5, 1e-1)
    hyperparam_mutations["critic_lr"] = lambda: tune.loguniform(1e-5, 1e-1)
    hyperparam_mutations["THRESH"] = lambda: tune.uniform(.01, .99)
    #hyperparam_mutations["copy_step"] = [10, 25, 50, 100]

    schedule = PopulationBasedTraining(
        time_attr='epoch',
        metric='avg_reward',
        mode='max',
        perturbation_interval=epochs_per_generation,
        hyperparam_mutations=hyperparam_mutations)

    ## If this code throws an error bytes has no readonly flag, comment out a line in cloudpickle_fast (see this discussion: https://github.com/ray-project/ray/issues/8262)
    tune.run(
        DDPG_Trainable,
        verbose=0,
        local_dir=BASE_DIR,
        config=dict(
            total_episodes=epochs_per_generation,
            n_epochs=epochs_per_generation,
            grid_size=16,
            THRESH=tune.uniform(.01, .99),
            noise_std_dev=.2,
            #hidden_unit_0 = tune.choice([8, 16, 32, 64, 128]),
Esempio n. 19
0
    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    if args.smoke_test:
        ray.init(num_cpus=2)  # force pausing to happen for test
    else:
        ray.init()

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        metric="mean_accuracy",
        mode="max",
        perturbation_interval=4,
        hyperparam_mutations={
            # distribution for resampling
            "lr": lambda: random.uniform(0.0001, 0.02),
            # allow perturbations within this set of categorical values
            "some_other_factor": [1, 2],
        })

    tune.run(
        pbt_function,
        name="pbt_test",
        scheduler=pbt,
        verbose=False,
        stop={
            "training_iteration": 30,
        },
        num_samples=8,
Esempio n. 20
0
    run=Params.training_alg,  # must be the same as the default config
    config=config,
    stop=Params.stop_conditions,
    local_dir=Params.ray_results_dir,
    max_failures=9999,
    checkpoint_freq=Params.checkpoint_freq,
    checkpoint_at_end=True,
    loggers=loggers,
)

# defining population scheduler
pbt_scheduler = PopulationBasedTraining(
    time_attr='training_iteration',
    metric='episode_reward_mean',
    mode='max',
    perturbation_interval=Params.training_iteration //
    100,  # perturbate a total of N times during the training
    hyperparam_mutations={  # fixme: get correct params
        "lr": [1e-4],
    })

# run the experiment
trials = run(
    exp,
    reuse_actors=False,
    verbose=Params.verbose,
    raise_on_failed_trial=True,  # avoid agent not known error
    return_trials=True,
    # scheduler=pbt_scheduler,
)
Esempio n. 21
0
        # ensure we collect enough timesteps to do sgd
        if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
            config["train_batch_size"] = config["sgd_minibatch_size"] * 2
        # ensure we run at least one sgd iter
        if config["num_sgd_iter"] < 1:
            config["num_sgd_iter"] = 1
        return config

    pbt = PopulationBasedTraining(
        time_attr="time_total_s",
        metric="episode_reward_mean",
        mode="max",
        perturbation_interval=120,
        resample_probability=0.25,
        # Specifies the mutations of these hyperparams
        hyperparam_mutations={
            "lambda": lambda: random.uniform(0.9, 1.0),
            "clip_param": lambda: random.uniform(0.01, 0.5),
            "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
            "num_sgd_iter": lambda: random.randint(1, 30),
            "sgd_minibatch_size": lambda: random.randint(128, 16384),
            "train_batch_size": lambda: random.randint(2000, 160000),
        },
        custom_explore_fn=explore)

    ray.init()
    run(
        "PPO",
        name="pbt_humanoid_test",
        scheduler=pbt,
        num_samples=8,
        config={
Esempio n. 22
0
        config["num_sgd_iter"] = 1
    # ensure that the size of the train batches exactly batches the sum of length of rollout fragments
    config["rollout_fragment_length"] = config["train_batch_size"] \
        / (config["num_workers"] * config["num_envs_per_worker"])
    return config


pbt = PopulationBasedTraining(
    time_attr="timesteps_total",  #time_total_s
    metric="episode_reward_mean",
    mode="max",
    perturbation_interval=400000,
    resample_probability=0.25,
    quantile_fraction=0.25,
    # Specifies the mutations of these hyperparams
    hyperparam_mutations={
        "lambda": lambda: random.uniform(0.9, 1.0),
        "clip_param": lambda: random.uniform(0.01, 0.5),
        "lr": [1.0e-3, 3.0e-4, 1.0e-4, 3.0e-5, 1.0e-5],
        "num_sgd_iter": lambda: random.randint(1, 32),
        "sgd_minibatch_size": lambda: random.randint(64, 1024),
        "train_batch_size": lambda: random.randint(256, 4096),
    },
    custom_explore_fn=explore)

tune.run(
    AGENT_ALGORITHM,
    name="_".join([GYM_ENV_NAME, "PBT", AGENT_ALGORITHM]),
    scheduler=pbt,
    num_samples=8,
    reuse_actors=False,
Esempio n. 23
0
def main(args):
    cfg = setup(args)

    exp_metrics = dict(metric="score", mode="max")

    if args.srch_algo == "hyperopt":
        # Create a HyperOpt search space
        search_space = {
            # "lr": hp.loguniform("lr", np.log(1e-6), np.log(1e-3)),
            # "delay_epochs": hp.randint("delay_epochs", 20, 60),
            # "wd": hp.uniform("wd", 0, 1e-3),
            # "wd_bias": hp.uniform("wd_bias", 0, 1e-3),
            "bsz": hp.choice("bsz", [64, 96, 128, 160, 224, 256]),
            "num_inst": hp.choice("num_inst", [2, 4, 8, 16, 32]),
            # "ce_scale": hp.uniform("ce_scale", 0.1, 1.0),
            # "circle_scale": hp.choice("circle_scale", [16, 32, 64, 128, 256]),
            # "circle_margin": hp.uniform("circle_margin", 0, 1) * 0.4 + 0.1,
        }

        current_best_params = [{
            "bsz": 0,  # index of hp.choice list
            "num_inst": 3,
        }]

        search_algo = HyperOptSearch(search_space,
                                     points_to_evaluate=current_best_params,
                                     **exp_metrics)

        if args.pbt:
            scheduler = PopulationBasedTraining(
                time_attr="training_iteration",
                **exp_metrics,
                perturbation_interval=2,
                hyperparam_mutations={
                    "bsz": [64, 96, 128, 160, 224, 256],
                    "num_inst": [2, 4, 8, 16, 32],
                })
        else:
            scheduler = ASHAScheduler(grace_period=2,
                                      reduction_factor=3,
                                      max_t=7,
                                      **exp_metrics)

    elif args.srch_algo == "bohb":
        search_space = CS.ConfigurationSpace()
        search_space.add_hyperparameters([
            # CS.UniformFloatHyperparameter(name="lr", lower=1e-6, upper=1e-3, log=True),
            # CS.UniformIntegerHyperparameter(name="delay_epochs", lower=20, upper=60),
            # CS.UniformFloatHyperparameter(name="ce_scale", lower=0.1, upper=1.0),
            # CS.UniformIntegerHyperparameter(name="circle_scale", lower=8, upper=128),
            # CS.UniformFloatHyperparameter(name="circle_margin", lower=0.1, upper=0.5),
            # CS.UniformFloatHyperparameter(name="wd", lower=0, upper=1e-3),
            # CS.UniformFloatHyperparameter(name="wd_bias", lower=0, upper=1e-3),
            CS.CategoricalHyperparameter(name="bsz",
                                         choices=[64, 96, 128, 160, 224, 256]),
            CS.CategoricalHyperparameter(name="num_inst",
                                         choices=[2, 4, 8, 16, 32]),
            # CS.CategoricalHyperparameter(name="autoaug_enabled", choices=[True, False]),
            # CS.CategoricalHyperparameter(name="cj_enabled", choices=[True, False]),
        ])

        search_algo = TuneBOHB(search_space, max_concurrent=4, **exp_metrics)

        scheduler = HyperBandForBOHB(
            time_attr="training_iteration",
            reduction_factor=3,
            max_t=7,
            **exp_metrics,
        )

    else:
        raise ValueError(
            "Search algorithm must be chosen from [hyperopt, bohb], but got {}"
            .format(args.srch_algo))

    reporter = CLIReporter(parameter_columns=["bsz", "num_inst"],
                           metric_columns=["r1", "map", "training_iteration"])

    analysis = tune.run(partial(train_tuner, cfg=cfg),
                        resources_per_trial={
                            "cpu": 4,
                            "gpu": 1
                        },
                        search_alg=search_algo,
                        num_samples=args.num_trials,
                        scheduler=scheduler,
                        progress_reporter=reporter,
                        local_dir=cfg.OUTPUT_DIR,
                        keep_checkpoints_num=10,
                        name=args.srch_algo)

    best_trial = analysis.get_best_trial("score", "max", "last")
    logger.info("Best trial config: {}".format(best_trial.config))
    logger.info("Best trial final validation mAP: {}, Rank-1: {}".format(
        best_trial.last_result["map"], best_trial.last_result["r1"]))

    save_dict = dict(R1=best_trial.last_result["r1"].item(),
                     mAP=best_trial.last_result["map"].item())
    save_dict.update(best_trial.config)
    path = os.path.join(cfg.OUTPUT_DIR, "best_config.yaml")
    with PathManager.open(path, "w") as f:
        f.write(CfgNode(save_dict).dump())
    logger.info("Best config saved to {}".format(os.path.abspath(path)))
Esempio n. 24
0
def training_team(params):
    env_config, policies = initialize(params)

    # PBT setting
    pbt_scheduler = PopulationBasedTraining(
        time_attr=params["time_attr"],
        metric="policy_reward_mean/policy_0",
        mode="max",
        perturbation_interval=params["perturbation_interval"],
        custom_explore_fn=limit_gamma_explore,
        hyperparam_mutations={
            "lr": lambda: random.uniform(0.0001, 0.1),
            # "gamma": lambda: random.uniform(0.85, 0.999)
        })

    trials = tune.run(
        PPOTrainer,
        restore=params["restore"],
        resume=params["resume"],
        name=params["name"],
        queue_trials=params["queue_trials"],
        scheduler=pbt_scheduler,
        num_samples=params["num_samples"],
        stop={
            # "training_iteration": params["training_iteration"],
            "timesteps_total": 1000000000
        },
        checkpoint_freq=params["checkpoint_freq"],
        checkpoint_at_end=True,
        verbose=1,
        config={
            "gamma": params["gamma"],
            "lr": params["lr"],
            "entropy_coeff": params["entropy_coeff"],
            "kl_coeff": params["kl_coeff"],  # disable KL
            "batch_mode": "complete_episodes"
            if params["complete_episodes"] else "truncate_episodes",
            "rollout_fragment_length": params["rollout_fragment_length"],
            "env": "PommeMultiAgent-v1",
            "env_config": env_config,
            "num_workers": params["num_workers"],
            "num_envs_per_worker": params["num_envs_per_worker"],
            "num_gpus_per_worker": params["num_gpus_per_worker"],
            "num_gpus": params["num_gpus"],
            "train_batch_size": params["train_batch_size"],
            "sgd_minibatch_size": params["sgd_minibatch_size"],
            "clip_param": params["clip_param"],
            "lambda": params["lambda"],
            "num_sgd_iter": params["num_sgd_iter"],
            "vf_share_layers": True,
            "vf_loss_coeff": params["vf_loss_coeff"],
            "vf_clip_param": params["vf_clip_param"],
            "callbacks": PommeCallbacks,
            "multiagent": {
                "policies": policies,
                "policy_mapping_fn": policy_mapping,
                "policies_to_train": ["policy_0"],
            },
            "observation_filter": "MeanStdFilter",  # should use MeanStdFilter
            "evaluation_num_episodes": params["evaluation_num_episodes"],
            "evaluation_interval": params["evaluation_interval"],
            "log_level": "WARN",
            "use_pytorch": True
        })
Esempio n. 25
0
            "approx":
            sample_from(
                lambda _: random.choice(sample_blocks(32, args.approx))),
            "swap_period":
            100,
            "baseline":
            args.baseline,
            "restore_path":
            restore_path,
        },
        "num_samples": 10
    }

    ray.init()

    pbt = PopulationBasedTraining(time_attr="training_iteration",
                                  reward_attr="performance",
                                  perturbation_interval=10,
                                  hyperparam_mutations={
                                      "lr": lambda: random.uniform(.0001, 1),
                                      "mom": lambda: random.uniform(.5, 1),
                                  })

    run(Cifar100Model,
        name=args.test_name,
        local_dir=args.dir,
        scheduler=pbt,
        resume=False,
        reuse_actors=True,
        **train_spec)
Esempio n. 26
0
        if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
            config["train_batch_size"] = config["sgd_minibatch_size"] * 2
        # ensure we run at least one sgd iter
        if config["num_sgd_iter"] < 1:
            config["num_sgd_iter"] = 1
        return config

    pbt = PopulationBasedTraining(
        time_attr="time_total_s",
        reward_attr="episode_reward_mean",
        perturbation_interval=120,
        resample_probability=0.25,
        # Specifies the mutations of these hyperparams
        hyperparam_mutations={
            # "lambda": lambda: random.uniform(0.9, 1.0),
            # "clip_param": lambda: random.uniform(0.01, 0.5),
            "lr": [1e-2, 5e-3, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6],
            "gamma": [0.997,0.995,0.99,0.98,0.97,0.95,0.9,0.85,0.8],
            "entropy_coeff": [0.1, 0.09, 0.08, 0.07, 0.06, 0.05, 0.04, 0.03, 0.02, 0.01, 0.0],
            # "num_sgd_iter": lambda: random.randint(1, 30),
            # "sgd_minibatch_size": lambda: random.randint(128, 16384),
            # "train_batch_size": lambda: random.randint(2000, 160000),
        },
        custom_explore_fn=explore)

    ray.init()
    run(
        "PPO",
        name="pbt_halfcheetah",
        scheduler=pbt,
        **{
Esempio n. 27
0
def tune_transformer(num_samples=8, gpus_per_trial=0, smoke_test=False):
    data_dir_name = "./data" if not smoke_test else "./test_data"
    data_dir = os.path.abspath(os.path.join(os.getcwd(), data_dir_name))
    if not os.path.exists(data_dir):
        os.mkdir(data_dir, 0o755)

    # Change these as needed.
    model_name = "bert-base-uncased" if not smoke_test \
        else "sshleifer/tiny-distilroberta-base"
    task_name = "rte"

    task_data_dir = os.path.join(data_dir, task_name.upper())

    num_labels = glue_tasks_num_labels[task_name]

    config = AutoConfig.from_pretrained(model_name,
                                        num_labels=num_labels,
                                        finetuning_task=task_name)

    # Download and cache tokenizer, model, and features
    print("Downloading and caching Tokenizer")
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Triggers tokenizer download to cache
    print("Downloading and caching pre-trained model")
    AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=config,
    )

    def get_model():
        return AutoModelForSequenceClassification.from_pretrained(
            model_name,
            config=config,
        )

    # Download data.
    download_data(task_name, data_dir)

    data_args = GlueDataTrainingArguments(task_name=task_name,
                                          data_dir=task_data_dir)

    train_dataset = GlueDataset(data_args,
                                tokenizer=tokenizer,
                                mode="train",
                                cache_dir=task_data_dir)
    eval_dataset = GlueDataset(data_args,
                               tokenizer=tokenizer,
                               mode="dev",
                               cache_dir=task_data_dir)

    training_args = TrainingArguments(
        output_dir=".",
        learning_rate=1e-5,  # config
        do_train=True,
        do_eval=True,
        no_cuda=gpus_per_trial <= 0,
        evaluation_strategy="epoch",
        load_best_model_at_end=True,
        num_train_epochs=2,  # config
        max_steps=-1,
        per_device_train_batch_size=16,  # config
        per_device_eval_batch_size=16,  # config
        warmup_steps=0,
        weight_decay=0.1,  # config
        logging_dir="./logs",
        skip_memory_metrics=True,
        report_to="none")

    trainer = Trainer(model_init=get_model,
                      args=training_args,
                      train_dataset=train_dataset,
                      eval_dataset=eval_dataset,
                      compute_metrics=build_compute_metrics_fn(task_name))

    tune_config = {
        "per_device_train_batch_size": 32,
        "per_device_eval_batch_size": 32,
        "num_train_epochs": tune.choice([2, 3, 4, 5]),
        "max_steps": 1 if smoke_test else -1,  # Used for smoke test.
    }

    scheduler = PopulationBasedTraining(time_attr="training_iteration",
                                        metric="eval_acc",
                                        mode="max",
                                        perturbation_interval=1,
                                        hyperparam_mutations={
                                            "weight_decay":
                                            tune.uniform(0.0, 0.3),
                                            "learning_rate":
                                            tune.uniform(1e-5, 5e-5),
                                            "per_device_train_batch_size":
                                            [16, 32, 64],
                                        })

    reporter = CLIReporter(parameter_columns={
        "weight_decay": "w_decay",
        "learning_rate": "lr",
        "per_device_train_batch_size": "train_bs/gpu",
        "num_train_epochs": "num_epochs"
    },
                           metric_columns=[
                               "eval_acc", "eval_loss", "epoch",
                               "training_iteration"
                           ])

    trainer.hyperparameter_search(
        hp_space=lambda _: tune_config,
        backend="ray",
        n_trials=num_samples,
        resources_per_trial={
            "cpu": 1,
            "gpu": gpus_per_trial
        },
        scheduler=scheduler,
        keep_checkpoints_num=1,
        checkpoint_score_attr="training_iteration",
        stop={"training_iteration": 1} if smoke_test else None,
        progress_reporter=reporter,
        local_dir="~/ray_results/",
        name="tune_transformer_pbt",
        log_to_file=True)
Esempio n. 28
0
            "mean_accuracy": 0.90,
            "training_iteration": 50,
        },
        "config": {
            "epochs": 1,
            "batch_size": 64*6,
            "lr": grid_search([10**-3, 10**-4]),
            "decay": sample_from(lambda spec: spec.config.lr / 10.0),
            "depth": grid_search([20,32,44,50]),
        },
        "local_dir": args.output_dir,
        "num_samples": 8,
      #  "checkpoint_freq":1,
    }

    if args.smoke_test:
        train_spec["config"]["lr"] = 10**-4
        train_spec["config"]["depth"] = 20

    ray.init(redis_address=args.redis_address, log_to_driver=False)

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="mean_accuracy",
        perturbation_interval=10,
        hyperparam_mutations={
            "decay": [10**-4, 10**-5, 10**-6],
        })

    run_experiments({"pbt_cifar10": train_spec}, scheduler=pbt)
Esempio n. 29
0
def searchBestHypers(num_samples=10,
                     max_num_epochs=15,
                     n_epochs_stop=2,
                     grace_period=5,
                     gpus_per_trial=0,
                     data_obj=None):
    #import os
    #os.chdir('drive/My Drive/DL project/')
    assert data_obj is not None

    experiment_id = 'no_name_yet'

    config_schedule = {
        "batch_size": tune.choice([4, 8, 16, 32]),
        "lr": tune.loguniform(1e-4, 1e-1),
        "h1": tune.sample_from(lambda: 2**np.random.randint(2, 7)),  # conv 1
        "h2": tune.sample_from(lambda: 2**np.random.randint(2, 7)),  # conv 2
        "h3": tune.sample_from(lambda: 2**np.random.randint(2, 7)),  # conv 3
        "h4":
        tune.sample_from(lambda: 2**np.random.randint(0, 4)),  # LSTM hidden
        "h5":
        tune.sample_from(lambda: 2**np.random.randint(3, 8)),  # linear output
        "h6": tune.sample_from(lambda: np.random.randint(1, 3)),
        "wd": tune.loguniform(1e-4, 1e-1),
    }

    scheduler = ASHAScheduler(metric="loss",
                              mode="min",
                              max_t=max_num_epochs,
                              grace_period=1,
                              reduction_factor=2)

    pbt = PopulationBasedTraining(time_attr="training_iteration",
                                  metric="loss",
                                  mode="min",
                                  perturbation_interval=4,
                                  hyperparam_mutations={
                                      "batch_size": [4, 8, 16, 32, 64],
                                      "lr": tune.loguniform(1e-4, 1e-1),
                                      "h1": [4, 8, 16, 32, 64],
                                      "h2": [4, 8, 16, 32, 64],
                                      "wd": tune.loguniform(1e-4, 1e-1),
                                  })

    reporter = CLIReporter(metric_columns=["loss", "training_iteration"])

    result = tune.run(partial(train_cgm,
                              data_obj=data_obj,
                              n_epochs_stop=n_epochs_stop,
                              max_epochs=max_num_epochs,
                              grace_period=grace_period),
                      resources_per_trial={
                          "cpu": 1,
                          "gpu": gpus_per_trial
                      },
                      config=config_schedule,
                      num_samples=num_samples,
                      scheduler=scheduler,
                      progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))

    # Build best network
    best_trained_model = DilatedNet(h1=best_trial.config["h1"],
                                    h2=best_trial.config["h2"],
                                    h3=best_trial.config["h3"],
                                    h4=best_trial.config["h4"],
                                    h5=best_trial.config["h5"],
                                    h6=best_trial.config["h6"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value

    print("BEST MODEL DIR: ", best_checkpoint_dir)
    model_state, optimizer_state = torch.load(
        os.path.join(best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    # Call load to fit scaler. Should be a better solution
    trainset, valset = data_obj.load_train_and_val()

    test_rmse_val = test_rmse(best_trained_model, data_obj)
    print("Best trial test set rmse: {}".format(test_rmse_val))

    # Save the results
    experiment = {
        'name': str(experiment_id),
        'best_trial_dir': str(best_checkpoint_dir),
        'train_data': str(data_obj.train_data),
        'test_data': str(data_obj.test_data),
        'start_date_train': str(data_obj.start_date_train),
        'start_date_test': str(data_obj.start_date_test),
        'end_date_train': str(data_obj.end_date_train),
        'end_date_test': str(data_obj.end_date_test)
    }

    current_time = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
    user = getpass.getuser()
    experiment_id = f'id_{current_time}_{user}'
    experiment_path = code_path / 'hyper_experiments'  # / model_id
    experiment_path.mkdir(exist_ok=True, parents=True)

    with open(experiment_path / (experiment_id + '.json'), 'w') as outfile:
        json.dump(experiment, outfile, indent=4)
    ''' Optinally Print information on where optimal model is saved '''
    #print("\n Experiment details are saved in:\n", experiment_path / (experiment_id + '.json'))
    #print("\n Checkpoint for best configuration issaved in:\n", best_checkpoint_dir)

    return experiment_id
Esempio n. 30
0
#model.evaluate(x_test, y_test)

configuration = tune.Experiment(
	"pbt_tune_cifar10",
	run=train_cifar_tune,
	resources_per_trial={"cpu":8, "gpu":1},
	stop={'mean_accuracy':0.99},
	config={
		#'block': [ tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25))]
		'bloack': [6,6,6,6]
	}	
)

pbt = PopulationBasedTraining(time_attr="training_iteration", reward_attr="mean_accuracy",
	hyperparam_mutations={
		'block': [ tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25))]
	}
)

trials=tune.run_experiments(configuration, scheduler=pbt)
print(trials)
exit(1)



sched = AsyncHyperBandScheduler(
	time_attr="timesteps_total",
	reward_attr="mean_accuracy",
	max_t=400,
	grace_period=20)