Beispiel #1
0
def get_raytune_schedule(raytune_cfg):
    if raytune_cfg["sched"] == "asha":
        return AsyncHyperBandScheduler(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["asha"]["max_t"],
            grace_period=raytune_cfg["asha"]["grace_period"],
            reduction_factor=raytune_cfg["asha"]["reduction_factor"],
            brackets=raytune_cfg["asha"]["brackets"],
        )
    elif raytune_cfg["sched"] == "hyperband":
        return HyperBandScheduler(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["hyperband"]["max_t"],
            reduction_factor=raytune_cfg["hyperband"]["reduction_factor"],
        )
    # requires pip install hpbandster ConfigSpace
    elif (raytune_cfg["sched"] == "bohb") or (raytune_cfg["sched"] == "BOHB"):
        return HyperBandForBOHB(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            max_t=raytune_cfg["hyperband"]["max_t"],
            reduction_factor=raytune_cfg["hyperband"]["reduction_factor"],
        )
    elif (raytune_cfg["sched"] == "pbt") or (raytune_cfg["sched"] == "PBT"):
        return PopulationBasedTraining(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            perturbation_interval=raytune_cfg["pbt"]["perturbation_interval"],
            hyperparam_mutations=raytune_cfg["pbt"]["hyperparam_mutations"],
            log_config=True,
        )
    # requires pip install GPy sklearn
    elif (raytune_cfg["sched"] == "pb2") or (raytune_cfg["sched"] == "PB2"):
        return PB2(
            metric=raytune_cfg["default_metric"],
            mode=raytune_cfg["default_mode"],
            time_attr="training_iteration",
            perturbation_interval=raytune_cfg["pb2"]["perturbation_interval"],
            hyperparam_bounds=raytune_cfg["pb2"]["hyperparam_bounds"],
            log_config=True,
        )
    else:
        print("INFO: Not using any Ray Tune trial scheduler.")
        return None
Beispiel #2
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    if args.smoke_test:
        ray.init(num_cpus=2)  # force pausing to happen for test
    else:
        ray.init()

    pbt = PB2(
        time_attr="training_iteration",
        metric="mean_accuracy",
        mode="max",
        perturbation_interval=20,
        hyperparam_bounds={
            # hyperparameter bounds.
            "lr": [0.0001, 0.02],
        })

    tune.run(
        pbt_function,
        name="pbt_test",
        scheduler=pbt,
        verbose=False,
        stop={
            "training_iteration": 30,
        },
        num_samples=8,
        fail_fast=True,
Beispiel #3
0
import ray
from ray.tune import run, sample_from
from ray.tune.schedulers.pb2 import PB2
import random

# Create the PB2 scheduler.
pb2_scheduler = PB2(
    time_attr="timesteps_total",
    metric="episode_reward_mean",
    mode="max",
    perturbation_interval=50000,
    quantile_fraction=0.25,  # copy bottom % with top % (weights)
    # Specifies the hyperparam search space
    hyperparam_bounds={
        "lambda": [0.9, 1.0],
        "clip_param": [0.1, 0.5],
        "lr": [1e-3, 1e-5],
        "train_batch_size": [1000, 60000]
    })

# Run PPO algorithm experiment on BipedalWalker with PB2.
analysis = run(
    "PPO",
    name="ppo_pb2_bipedal",
    scheduler=pb2_scheduler,
    verbose=1,
    num_samples=4,  # population size
    stop={"timesteps_total": 1000000},
    config={
        "env": "BipedalWalker-v2",
        "log_level": "INFO",
Beispiel #4
0
def _pb2_importer(*args, **kwargs):
    # PB2 introduces a GPy dependency which can be expensive, so we import
    # lazily.
    from ray.tune.schedulers.pb2 import PB2
    return PB2(*args, **kwargs)
Beispiel #5
0
Datei: pbt.py Projekt: AJSVB/GPBT
    def load_checkpoint(self, checkpoint_dir):
        path = os.path.join(checkpoint_dir, "checkpoint")
        checkpoint = torch.load(path)
        self.obj.net.load_state_dict(checkpoint["model"])
        self.obj.optimizer.load_state_dict(checkpoint["optim"])


algo = HyperOptSearch(metric="loss", mode="min")
algo = ConcurrencyLimiter(algo, max_concurrent=4)
scheduler = PB2(
    time_attr="training_iteration",
    perturbation_interval=5,
    hyperparam_bounds={
        "lr": [1e-8, .23]  #tune.uniform(1e-4, 0.1 ),#,1e-4), #*10
        ,
        "weight_decay": [1e-4, 1e-2]  #tune.uniform(1, 5)#,1e-4), #*10 et 0
        ,
        "drp": [.05, .15]  #,1e-4), #*10 et 0
        ,
        "momentum": [.23, 1e-2]  #,1e-4), #*10 et 0
        ,
        "eps": [1e-4, 1e-2]  #,1e-4), #*10 et 0
    })


class TestLogger(tune.logger.Logger):
    def _init(self):
        progress_file = os.path.join("", "pb2.csv")  #aller jusqu'a 9
        self._continuing = os.path.exists(progress_file)
        self._file = open(progress_file, "a")
        self._csv_out = None
Beispiel #6
0
        help="The address of server to connect to if using "
        "Ray Client.",
    )
    args, _ = parser.parse_known_args()
    if args.smoke_test:
        ray.init(num_cpus=2)  # force pausing to happen for test
    else:
        if args.server_address:
            ray.init(f"ray://{args.server_address}")
        else:
            ray.init()

    pbt = PB2(
        perturbation_interval=20,
        hyperparam_bounds={
            # hyperparameter bounds.
            "lr": [0.0001, 0.02],
        },
    )

    analysis = tune.run(
        pbt_function,
        name="pbt_test",
        scheduler=pbt,
        metric="mean_accuracy",
        mode="max",
        verbose=False,
        stop={
            "training_iteration": 30,
        },
        num_samples=8,
Beispiel #7
0
        # Specifies the search space for these hyperparams
        hyperparam_mutations={
            "lambda": lambda: random.uniform(0.9, 1.0),
            "clip_param": lambda: random.uniform(0.1, 0.5),
            "lr": lambda: random.uniform(1e-3, 1e-5),
            "train_batch_size": lambda: random.randint(1000, 60000),
        },
        custom_explore_fn=explore)

    pb2 = PB2(
        time_attr=args.criteria,
        metric="episode_reward_mean",
        mode="max",
        perturbation_interval=args.t_ready,
        quantile_fraction=args.perturb,  # copy bottom % with top %
        # Specifies the hyperparam search space
        hyperparam_bounds={
            "lambda": [0.9, 1.0],
            "clip_param": [0.1, 0.5],
            "lr": [1e-3, 1e-5],
            "train_batch_size": [1000, 60000]
        })

    methods = {"pbt": pbt, "pb2": pb2}

    timelog = str(datetime.date(datetime.now())) + "_" + str(
        datetime.time(datetime.now()))

    args.dir = "{}_{}_{}_Size{}_{}_{}".format(args.algo,
                                              args.filename, args.method,
                                              str(args.num_samples),
Beispiel #8
0
    sched_asha = ASHAScheduler(
        time_attr="training_iteration",
        max_t=100,
        grace_period=10,
        #mode='max', #find maximum, do not define here if you define in tune.run
        reduction_factor=3,
        brackets=1)

    sched_pb2 = PB2(
        time_attr="training_iteration",
        #metric="mean_accuracy", #defined in ray.tune
        #mode="max", #defined in ray.tune
        perturbation_interval=600.0,
        quantile_fraction=0.25,  # copy bottom % with top %
        # Specifies the hyperparam search space
        hyperparam_bounds={
            # "threads": 2,
            "lr": [0.001, 0.1],
            "hidden": [16, 256],
            "dropout": lambda: random.uniform(0.0, 0.2),
            "activation": ["relu", "elu"],
            "layers": [1, 3]
        })

    analysis = tune.run(
        train_mnist,
        name="exp",
        scheduler=sched_pb2,
        #Checkpoint settings
        keep_checkpoints_num=3,
        checkpoint_freq=3,
experiment_name = "HWalk_Low_Mimic_Search_3"
experiment_id = "PPO_HumanoidBulletEnv-v0-Low_4964e_00001_1_2021-05-25_10-04-17"
checkpoint_num = "349"

resume = False

pb2 = PB2(
    time_attr='training_iteration',
    metric="episode_reward_mean",
    mode="max",
    perturbation_interval=10,
    quantile_fraction=0.25,
    hyperparam_bounds={
        "lambda": [0.9, 1.0],
        "clip_param": [0.01, 0.5],
        "lr": [1e-6, 1e-3],
        # "num_sgd_iter": [3, 30],
        "train_batch_size": [8192, 40000]
        # "gamma": [0.8, 0.9997],
        # "kl_coeff": [0.3, 1],
        # "vf_loss_coeff": [0.5, 1],
        # "entropy_coeff": [0, 0.01],
    })

analysis = tune.run(
    PPOTrainer,
    name="HWalk_Low_Mimic_Search_7",
    resume=False,
    # restore="/home/aditya/ray_results/{}/{}/checkpoint_{}/checkpoint-{}".format(
    #     experiment_name, experiment_id, checkpoint_num, checkpoint_num