def tune_fn(): mlflow.set_experiment(experiment_name=experiment_name) optuna_search = OptunaSearch(metric="auroc", mode="max") ax_search = AxSearch(metric="auroc", mode="max") tune.run(objective, name="mlflow_gbdt", num_samples=65, config={ "num_leaves": tune.randint(5, 95), "learning_rate": tune.loguniform(1e-4, 1.0), "n_estimators": tune.randint(100, 100000), "subsample": tune.loguniform(0.01, 1.0), "subsample_freq": tune.randint(1, 5), "objective": "binary", "reg_alpha": tune.loguniform(1e-4, 1.0), "reg_lambda": tune.loguniform(1e-4, 1.0), "tree_learner": "feature", "feature_sel": 0, "mlflow": { "experiment_name": experiment_name, "tracking_uri": mlflow.get_tracking_uri() } }, search_alg=optuna_search)
def set_algorithm(experiment_name, config): ''' Configure search algorithm. ''' if args.algorithm == 'hyperopt': algorithm = HyperOptSearch(points_to_evaluate=best_params) elif args.algorithm == 'ax': ax_client = AxClient(enforce_sequential_optimization=False) ax_client.create_experiment(name=experiment_name, parameters=config, objective_name="minimum", minimize=True) algorithm = AxSearch(ax_client=ax_client, points_to_evaluate=best_params) elif args.algorithm == 'nevergrad': algorithm = NevergradSearch( points_to_evaluate=best_params, optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"]) elif args.algorithm == 'optuna': algorithm = OptunaSearch(points_to_evaluate=best_params, seed=args.seed) elif args.algorithm == 'pbt': algorithm = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=args.perturbation, hyperparam_mutations=config, synch=True) elif args.algorithm == 'random': algorithm = BasicVariantGenerator(max_concurrent=args.jobs) if args.algorithm not in ['random', 'pbt']: algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs) return algorithm
def main(): parser = argparse.ArgumentParser() parser.add_argument("--gpus") parser.add_argument("--gpus-per-trial", type=float) parser.add_argument("--num-epochs", type=int) parser.add_argument("--num-samples", type=int) parser.add_argument("--w2v", type=str) args = parser.parse_args() w2v_sd = torch.load(args.w2v) gpus_per_trial = args.gpus_per_trial trainable = tune.with_parameters( train_model, gpus=args.gpus, w2v=w2v_sd, num_epochs=args.num_epochs, ) algo = AxSearch(max_concurrent=4) scheduler = AsyncHyperBandScheduler() analysis = tune.run(trainable, resources_per_trial={ "cpu": 4, "gpu": gpus_per_trial }, metric="acc", mode="max", search_alg=algo, scheduler=scheduler, config=config, num_samples=args.num_samples, name="tune_w2v_lr") print(analysis.best_config)
def main(data_path, experiment_path, model_path, params_path): ray.init(address='auto') data_path = os.path.abspath(data_path) params_path = os.path.abspath(params_path) model_path = os.path.abspath(model_path) n_splits = 4 cfg = pickle.load(open(params_path, "rb")) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') exp_config = { **locals().copy(), **cfg, 'objective': 'soft-boundary', 'net_name':'cicflow_mlp_2', } if exp_config['seed'] != -1: random.seed(exp_config['seed']) np.random.seed(exp_config['seed']) torch.manual_seed(exp_config['seed']) torch.cuda.manual_seed(exp_config['seed']) torch.backends.cudnn.deterministic = True dates = ['2019-11-08', '2019-11-09', '2019-11-11', '2019-11-12', '2019-11-13', '2019-11-14', '2019-11-15','2019-11-16','2019-11-17','2019-11-18','2019-11-19'] ax = AxClient(enforce_sequential_optimization=False) ax.create_experiment( name="SVDDCICFlowExp", parameters=[ { "name": "dates", "type": "choice", "values": dates }, ], objective_name="val_auc_pr", ) search_alg = AxSearch(ax) analysis = tune.run(OneDaySVDDCICFlowExp, name="DriftSVDDCICFlowExp", checkpoint_at_end=True, checkpoint_freq=1, stop={ "training_iteration": 1, }, resources_per_trial={"gpu": 1}, num_samples=len(dates), local_dir=experiment_path, search_alg=search_alg, config=exp_config) print("Best config is:", analysis.get_best_config(metric="val_auc_pr"))
def testConvergenceAx(self): from ray.tune.suggest.ax import AxSearch np.random.seed(0) searcher = AxSearch() analysis = self._testConvergence(searcher, patience=10) assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-5)
def set_basic_conf(self): from ax.service.ax_client import AxClient space = AxSearch.convert_search_space({ "width": tune.uniform(0, 20), "height": tune.uniform(-100, 100) }) from ax.modelbridge.generation_strategy import ( GenerationStep, GenerationStrategy, ) from ax.modelbridge.registry import Models # set generation strategy to sobol to ensure reproductibility try: # ax-platform>=0.2.0 gs = GenerationStrategy(steps=[ GenerationStep( model=Models.SOBOL, num_trials=-1, model_kwargs={"seed": 4321}, ), ]) except TypeError: # ax-platform<0.2.0 gs = GenerationStrategy(steps=[ GenerationStep( model=Models.SOBOL, num_arms=-1, model_kwargs={"seed": 4321}, ), ]) client = AxClient(random_seed=4321, generation_strategy=gs) client.create_experiment(parameters=space, objective_name="loss", minimize=True) def cost(space, reporter): reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3)) search_alg = AxSearch(ax_client=client) return search_alg, cost
def testAx(self): from ray.tune.suggest.ax import AxSearch from ax.service.ax_client import AxClient converted_config = AxSearch.convert_search_space(self.config) client = AxClient() client.create_experiment( parameters=converted_config, objective_name=self.metric_name, minimize=False ) searcher = AxSearch(ax_client=client) self._save(searcher) client = AxClient() client.create_experiment( parameters=converted_config, objective_name=self.metric_name, minimize=False ) searcher = AxSearch(ax_client=client) self._restore(searcher)
def testAx(self): from ray.tune.suggest.ax import AxSearch from ax.service.ax_client import AxClient converted_config = AxSearch.convert_search_space(self.config) # At least one nan, inf, -inf and float client = AxClient(random_seed=4321) client.create_experiment(parameters=converted_config, objective_name="_metric") searcher = AxSearch(ax_client=client, metric="_metric", mode="max") out = tune.run(_invalid_objective, search_alg=searcher, metric="_metric", mode="max", num_samples=4, reuse_actors=False) best_trial = out.best_trial self.assertLessEqual(best_trial.config["report"], 2.0)
def raytune_ax_train(model_params: dict, config_params: dict): depth = [int(d) for d in config_params['ht_depth_range'].split(',')] features = [ float(d) for d in config_params['ht_features_range'].split(',') ] estimators = [int(d) for d in config_params['ht_est_range'].split(',')] experiments = config_params['ht_experiments'] ax = AxClient(enforce_sequential_optimization=False) ax.create_experiment(name="hpo_experiment", parameters=[{ "name": "max_depth", "type": "range", "bounds": depth, "parameter_type": ParameterType.INT }, { "name": "max_features", "type": "range", "bounds": features, "parameter_type": ParameterType.FLOAT }, { "name": "n_estimators", "type": "range", "bounds": estimators, "parameter_type": ParameterType.INT }], objective_name="accuracy", minimize=False) tune.run( run_or_experiment=lambda parameters: ax_train_proxy( model_params=model_params, config_params=config_params, ax_params=parameters), num_samples=experiments, search_alg=AxSearch( ax), # Note that the argument here is the `AxClient`. verbose= 1, # Set this level to 1 to see status updates and to 2 to also see trial results. # To use GPU, specify: resources_per_trial={"gpu": 1}. resources_per_trial={"gpu": 1} if ('GPU' in config_params['compute']) else {"cpu": 8}) print(f"FINISHED RAY TUNE RUNE", flush=True) best_parameters, best_values = ax.get_best_parameters() means, covariances = best_values print("Ax Optimization Results:", flush=True) print(best_parameters, flush=True) print(best_values, flush=True) return means['accuracy']
def testCreateSearcher(self): kwargs = {"metric": "metric_foo", "mode": "min"} searcher_ax = "ax" shim_searcher_ax = tune.create_searcher(searcher_ax, **kwargs) real_searcher_ax = AxSearch(space=[], **kwargs) assert type(shim_searcher_ax) is type(real_searcher_ax) searcher_hyperopt = "hyperopt" shim_searcher_hyperopt = tune.create_searcher(searcher_hyperopt, **kwargs) real_searcher_hyperopt = HyperOptSearch({}, **kwargs) assert type(shim_searcher_hyperopt) is type(real_searcher_hyperopt)
def main(data_path, load_model, ratio_known_normal, ratio_known_outlier, seed, optimizer_name, validation, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, num_threads, n_jobs_dataloader, normal_class, known_outlier_class, n_known_outlier_classes): ray.init(address='auto') data_path = os.path.abspath(data_path) n_splits = 5 kf = KFold(n_splits) r = np.array(range(_get_len(data_path))) kf_idx = [i for i in kf.split(r)] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') exp_config = { **locals().copy(), 'net_name': 'nsl_kdd_mlp', } if exp_config['seed'] != -1: random.seed(exp_config['seed']) np.random.seed(exp_config['seed']) torch.manual_seed(exp_config['seed']) torch.cuda.manual_seed(exp_config['seed']) torch.backends.cudnn.deterministic = True ax = AxClient(enforce_sequential_optimization=False) ax.create_experiment( name="SVDDKDDExperiment", parameters=[ { "name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True }, { "name": "nu", "type": "range", "bounds": [0.0, 0.2] }, { "name": "objective", "type": "choice", "values": ['one-class', 'soft-boundary'] }, { "name": "pretrain", "type": "choice", "values": [True, False] }, ], objective_name="val_auc_pr", ) search_alg = AxSearch(ax) re_search_alg = Repeater(search_alg, repeat=n_splits) sched = ASHAScheduler(time_attr='training_iteration', grace_period=10, metric="val_auc_pr") analysis = tune.run(SVDDKDDExp, name="SVDDKDDExp", checkpoint_at_end=True, checkpoint_freq=5, stop={ "training_iteration": 100, }, resources_per_trial={"gpu": 1}, num_samples=20, search_alg=re_search_alg, scheduler=sched, config=exp_config) print("Best config is:", analysis.get_best_config(metric="val_auc_pr"))
ray.init() tune_kwargs = { "num_samples": 10 if args.smoke_test else 50, "config": { "iterations": 100, "x1": tune.uniform(0.0, 1.0), "x2": tune.uniform(0.0, 1.0), "x3": tune.uniform(0.0, 1.0), "x4": tune.uniform(0.0, 1.0), "x5": tune.uniform(0.0, 1.0), "x6": tune.uniform(0.0, 1.0), }, "stop": { "timesteps_total": 100 } } algo = AxSearch( max_concurrent=4, metric="hartmann6", mode="min", parameter_constraints=["x1 + x2 <= 2.0"], # Optional. outcome_constraints=["l2norm <= 1.25"], # Optional. ) scheduler = AsyncHyperBandScheduler(metric="hartmann6", mode="min") tune.run(easy_objective, name="ax", search_alg=algo, scheduler=scheduler, **tune_kwargs)
def _test_xgboost(method='BlendSearch'): try: import ray except ImportError: return if method == 'BlendSearch': from flaml import tune else: from ray import tune search_space = { # You can mix constants with search space objects. "max_depth": tune.randint(1, 8) if method in [ "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9), "min_child_weight": tune.choice([1, 2, 3]), "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1) } max_iter = 10 for num_samples in [256]: time_budget_s = 60 #None for n_cpu in [8]: start_time = time.time() ray.init(num_cpus=n_cpu, num_gpus=0) if method == 'BlendSearch': analysis = tune.run( train_breast_cancer, init_config={ "max_depth": 1, "min_child_weight": 3, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, metric="eval-logloss", mode="min", max_resource=max_iter, min_resource=1, report_intermediate_result=True, # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir='logs/', num_samples=num_samples*n_cpu, time_budget_s=time_budget_s, use_ray=True) else: if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB(max_concurrent=n_cpu) scheduler = HyperBandForBOHB(max_t=max_iter) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "max_depth": 1, "min_child_weight": 3, }], cat_hp_cost={ "min_child_weight": [6, 3, 2], }) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif 'SkOpt' == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif 'ZOOpt' == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples*n_cpu) elif 'Ax' == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif 'HyperOpt' == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler( max_t=max_iter, grace_period=1) analysis = tune.run( train_breast_cancer, metric="eval-logloss", mode="min", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir='logs/', num_samples=num_samples*n_cpu, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo) ray.shutdown() # # Load the best model checkpoint # best_bst = xgb.Booster() # best_bst.load_model(os.path.join(analysis.best_checkpoint, # "model.xgb")) best_trial = analysis.get_best_trial("eval-logloss","min","all") accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"] logloss = best_trial.metric_analysis["eval-logloss"]["min"] logger.info(f"method={method}") logger.info(f"n_samples={num_samples*n_cpu}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval loss: {logloss:.4f}") logger.info(f"Best model total accuracy: {accuracy:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model, ratio_known_normal, ratio_known_outlier, device, seed, optimizer_name, validation, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, num_threads, n_jobs_dataloader, normal_class, known_outlier_class, n_known_outlier_classes): """ Deep SAD, a method for deep semi-supervised anomaly detection. :arg DATASET_NAME: Name of the dataset to load. :arg NET_NAME: Name of the neural network to use. :arg XP_PATH: Export path for logging the experiment. :arg DATA_PATH: Root path of data. """ ###################################################### # GLOBAL CONFIG # ###################################################### sys.path.append('../') xp_path = os.path.abspath(xp_path) data_path = os.path.abspath(data_path) # Get configuration cfg = Config(locals().copy()) # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(tune.__name__) logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') log_file = xp_path + '/log.txt' file_handler = logging.FileHandler(log_file) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) # Print paths logger.info('Log file is %s' % log_file) logger.info('Data path is %s' % data_path) logger.info('Export path is %s' % xp_path) # Print experimental setup logger.info('Dataset: %s' % dataset_name) logger.info('Normal class: %d' % normal_class) logger.info('Ratio of labeled normal train samples: %.2f' % ratio_known_normal) logger.info('Ratio of labeled anomalous samples: %.2f' % ratio_known_outlier) if n_known_outlier_classes == 1: logger.info('Known anomaly class: %d' % known_outlier_class) else: logger.info('Number of known anomaly classes: %d' % n_known_outlier_classes) logger.info('Network: %s' % net_name) if cfg.settings['seed'] != -1: random.seed(cfg.settings['seed']) np.random.seed(cfg.settings['seed']) torch.manual_seed(cfg.settings['seed']) torch.cuda.manual_seed(cfg.settings['seed']) torch.backends.cudnn.deterministic = True logger.info('Set seed to %d.' % cfg.settings['seed']) ###################################################### # EXP CONFIG # ###################################################### # Init ray ray.init(address='auto') ax = AxClient(enforce_sequential_optimization=False) # Default device to 'cpu' if cuda is not available ax.create_experiment( name="cicflow_mlp_experiment", parameters=[ { "name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True }, { "name": "pretrain", "type": "choice", "values": [False, True], }, ], objective_name="mean_auc", ) def mlp_trainable(parameterization, reporter): return train_evaluate(parameterization, reporter, validation=validation, data_path=data_path, n_known_outlier_classes=n_known_outlier_classes, ratio_known_normal=ratio_known_normal, ratio_known_outlier=ratio_known_outlier, cfg=cfg, n_jobs_dataloader=n_jobs_dataloader, net_name=net_name, pretrain=pretrain) tune.run( mlp_trainable, name="MLP Supervised", num_samples=10, resources_per_trial={'gpu': 1}, search_alg=AxSearch( ax), # Note that the argument here is the `AxClient`. verbose= 2, # Set this level to 1 to see status updates and to 2 to also see trial results. # To use GPU, specify: resources_per_trial={"gpu": 1}. ) best_parameters, values = ax.get_best_parameters() best_parameters
def ray_tune(model_wrapper, job_config: ht.config, resume: bool = False): """Performs automatic hyper-parameters tuning with Ray""" # initialize tuner = job_config.tune.clone().tuner log_dir = pathlib.Path(job_config.run.save_sub_dir) / "tmp_log" log_dir.mkdir(parents=True, exist_ok=True) # set up config config = get_hypers_tune(job_config) # set up scheduler sched_class = getattr(schedulers, tuner.scheduler_class) logger.info(f"Setting up scheduler: {tuner.scheduler_class}") sched_config = tuner.scheduler.get_config_dict() sched = sched_class(**sched_config) # set up algorithm algo_class = tuner.algo_class logger.info(f"Setting up search algorithm: {tuner.algo_class}") algo_config = tuner.algo.get_config_dict() algo = None if algo_class is None: algo = None elif algo_class == "AxSearch": from ray.tune.suggest.ax import AxSearch algo = AxSearch(**algo_config) elif algo_class == "HyperOptSearch": from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch(**algo_config) elif algo_class == "HEBOSearch": from ray.tune.suggest.hebo import HEBOSearch algo = HEBOSearch(**algo_config) else: logger.error(f"Unsupported search algorithm: {algo_class}") logger.info(f"Using default value None for search algorithm") # set stopper if tuner.stopper_class is None: stop = None else: logger.info(f"Setting up stopper: {tuner.stopper_class}") stop_class = getattr(ray.tune.stopper, tuner.stopper_class) stop_config = tuner.stopper.get_config_dict() stop = stop_class(**stop_config) # set up extra run configs run_config = (tuner.run.get_config_dict() ) # important: convert Hepy_Config class to dict if not "raise_on_failed_trial" in run_config: run_config["raise_on_failed_trial"] = False tune_func = getattr(hep_model, model_wrapper._tune_fun_name) # start tuning jobs if os.name == "posix": logger.info(f"Ignoring tune.tmp.tmp_dir setting on Unix OS") ray.init(**(tuner.init.get_config_dict())) else: ray.init( _temp_dir=str(job_config.tune.tmp_dir), **(tuner.init.get_config_dict()), ) analysis = tune.run( tune_func, name="ray_tunes", stop=stop, search_alg=algo, scheduler=sched, config=config, local_dir=job_config.run.save_sub_dir, resume=resume, **run_config, ) print("#### Best hyperparameters found were:") print(analysis.best_config) print(yaml.dump(analysis.best_config)) return analysis
help="Finish quickly for testing") parser.add_argument("--server-address", type=str, default=None, required=False, help="The address of server to connect to if using " "Ray Client.") args, _ = parser.parse_known_args() if args.server_address: import ray ray.init(f"ray://{args.server_address}") algo = AxSearch( max_concurrent=4, parameter_constraints=["x1 + x2 <= 2.0"], # Optional. outcome_constraints=["l2norm <= 1.25"], # Optional. ) scheduler = AsyncHyperBandScheduler() analysis = tune.run( easy_objective, name="ax", metric="hartmann6", # provided in the 'easy_objective' function mode="min", search_alg=algo, scheduler=scheduler, num_samples=10 if args.smoke_test else 50, config={ "iterations": 100, "x1": tune.uniform(0.0, 1.0), "x2": tune.uniform(0.0, 1.0),
def main(args): utils.init_random() exp_configs, tune_configs = utils.get_tune_configs(args.logdir) hparams = {} parameters = [] for param_subset, params in tune_configs.items(): hparams[param_subset] = [] for param, options in params.items(): parameters.append({'name': param, **options}) hparams[param_subset].append(param) exp_configs['hparams'] = hparams exp_configs['data_params']['subset'] = args.subset exp_configs['data_params']['workers'] = args.ds_workers max_epochs = 2 if args.smoke else args.max_epochs num_samples = 2 if args.smoke else args.num_samples exp_configs.update({'num_gpus': 1}) # ray.init() ray.init(memory=2000 * 1024 * 1024, object_store_memory=200 * 1024 * 1024, driver_object_store_memory=100 * 1024 * 1024) scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", metric="val_accuracy", mode="max", grace_period=5, max_t=max(max_epochs, 5)) client = AxClient(enforce_sequential_optimization=True) client.create_experiment(parameters=parameters, objective_name='val_accuracy') search_alg = AxSearch(client, max_concurrent=1, mode='max') # search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2) reporter = CLIReporter() reporter.add_metric_column("val_accuracy") reporter.add_metric_column("train_loss") trainable = TorchTrainer.as_trainable( model_creator=utils.model_creator, data_creator=utils.data_creator, optimizer_creator=utils.optimizer_creator, loss_creator=utils.loss_creator, scheduler_creator=utils.scheduler_creator, scheduler_step_freq="epoch", use_gpu=True, config={BATCH_SIZE: exp_configs['batch_size']}, num_workers=args.workers) analysis = tune.run(trainable, num_samples=num_samples, config=exp_configs, trial_name_creator=utils.trial_str_creator, progress_reporter=reporter, scheduler=scheduler, search_alg=search_alg, stop={"training_iteration": max_epochs}, local_dir=args.logdir, checkpoint_freq=10, checkpoint_at_end=True, keep_checkpoints_num=3, resume=args.resume, checkpoint_score_attr='val_accuracy', max_failures=2, verbose=1)
def _test_distillbert(method='BlendSearch'): max_num_epoch = 64 num_samples = -1 time_budget_s = 10800 search_space = { # You can mix constants with search space objects. "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch), "learning_rate": flaml.tune.loguniform(1e-6, 1e-4), "adam_beta1": flaml.tune.uniform(0.8, 0.99), "adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4), "adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7), } start_time = time.time() ray.init(num_cpus=4, num_gpus=4) if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import tuneBOHB algo = tuneBOHB(max_concurrent=4) scheduler = HyperBandForBOHB(max_t=max_num_epoch) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "num_train_epochs": 1, }]) elif 'BlendSearch' == method: from flaml import BlendSearch algo = BlendSearch(points_to_evaluate=[{ "num_train_epochs": 1, }]) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif 'SkOpt' == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif 'ZOOpt' == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples) elif 'Ax' == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif 'HyperOpt' == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_num_epoch, grace_period=1) scheduler = None analysis = ray.tune.run( train_distilbert, metric=HP_METRIC, mode=MODE, # You can add "gpu": 1 to allocate GPUs resources_per_trial={"gpu": 1}, config=search_space, local_dir='test/logs/', num_samples=num_samples, time_budget_s=time_budget_s, keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC, scheduler=scheduler, search_alg=algo) ray.shutdown() best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all") metric = best_trial.metric_analysis[HP_METRIC][MODE] logger.info(f"method={method}") logger.info(f"n_trials={len(analysis.trials)}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
"name": "x3", "type": "range", "bounds": [0.0, 1.0], }, { "name": "x4", "type": "range", "bounds": [0.0, 1.0], }, { "name": "x5", "type": "range", "bounds": [0.0, 1.0], }, { "name": "x6", "type": "range", "bounds": [0.0, 1.0], }, ] algo = AxSearch( parameters=parameters, objective_name="hartmann6", max_concurrent=4, minimize=True, # Optional, defaults to False. parameter_constraints=["x1 + x2 <= 2.0"], # Optional. outcome_constraints=["l2norm <= 1.25"], # Optional. ) scheduler = AsyncHyperBandScheduler(reward_attr="hartmann6") run(easy_objective, name="ax", search_alg=algo, **config)
def _test_xgboost(method="BlendSearch"): try: import ray except ImportError: return if method == "BlendSearch": from flaml import tune else: from ray import tune search_space = { "max_depth": tune.randint(1, 9) if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9), "min_child_weight": tune.choice([1, 2, 3]), "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1), } max_iter = 10 for num_samples in [128]: time_budget_s = 60 for n_cpu in [2]: start_time = time.time() # ray.init(address='auto') if method == "BlendSearch": analysis = tune.run( train_breast_cancer, config=search_space, low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, metric="eval-logloss", mode="min", max_resource=max_iter, min_resource=1, scheduler="asha", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, use_ray=True, ) else: if "ASHA" == method: algo = None elif "BOHB" == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB(max_concurrent=n_cpu) scheduler = HyperBandForBOHB(max_t=max_iter) elif "Optuna" == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif "CFO" == method: from flaml import CFO algo = CFO( low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, ) elif "CFOCat" == method: from flaml.searcher.cfo_cat import CFOCat algo = CFOCat( low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, ) elif "Dragonfly" == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif "SkOpt" == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif "Nevergrad" == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif "ZOOpt" == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples * n_cpu) elif "Ax" == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif "HyperOpt" == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != "BOHB": from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_iter, grace_period=1) analysis = tune.run( train_breast_cancer, metric="eval-logloss", mode="min", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo, ) # # Load the best model checkpoint # import os # best_bst = xgb.Booster() # best_bst.load_model(os.path.join(analysis.best_checkpoint, # "model.xgb")) best_trial = analysis.get_best_trial("eval-logloss", "min", "all") accuracy = 1.0 - best_trial.metric_analysis["eval-error"]["min"] logloss = best_trial.metric_analysis["eval-logloss"]["min"] logger.info(f"method={method}") logger.info(f"n_samples={num_samples*n_cpu}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval loss: {logloss:.4f}") logger.info(f"Best model total accuracy: {accuracy:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def main(data_path, experiment_path, load_model, ratio_known_normal, ratio_known_outlier, seed, optimizer_name, validation, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, num_threads, n_jobs_dataloader, normal_class, known_outlier_class, n_known_outlier_classes): ray.init(address='auto') data_path = os.path.abspath(data_path) n_splits = 4 period = np.array([ '2019-11-08', '2019-11-09', '2019-11-11', '2019-11-12', '2019-11-13', '2019-11-14', '2019-11-15' ]) dates = period[:2] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') exp_config = {**locals().copy(), 'objective': 'soft-boundary'} if exp_config['seed'] != -1: random.seed(exp_config['seed']) np.random.seed(exp_config['seed']) torch.manual_seed(exp_config['seed']) torch.cuda.manual_seed(exp_config['seed']) torch.backends.cudnn.deterministic = True ax = AxClient(enforce_sequential_optimization=False) ax.create_experiment( name="SVDDCICFlowExp", parameters=[ { "name": "lr", "type": "range", "bounds": [1e-6, 0.1], "log_scale": True }, { "name": "nu", "type": "range", "bounds": [0.005, 0.5] }, { "name": "net_name", "type": "choice", "values": ['cicflow_mlp', 'cicflow_mlp_2', 'cicflow_mlp_3'] }, { "name": "weight_decay", "type": "range", "bounds": [1e-6, 0.01], "log_scale": True }, ], objective_name="val_auc_pr", ) search_alg = AxSearch(ax) analysis = tune.run(OneDaySVDDCICFlowExp, name="OneDaySVDDCICFlowExp", checkpoint_at_end=True, checkpoint_freq=5, stop={ "training_iteration": 50, }, resources_per_trial={"gpu": 1}, num_samples=10, local_dir=experiment_path, search_alg=search_alg, config=exp_config) print("Best config is:", analysis.get_best_config(metric="val_auc_pr"))
def testConvertAx(self): from ray.tune.suggest.ax import AxSearch from ax.service.ax_client import AxClient config = { "a": tune.sample.Categorical([2, 3, 4]).uniform(), "b": { "x": tune.sample.Integer(0, 5).quantized(2), "y": 4, "z": tune.sample.Float(1e-4, 1e-2).loguniform() } } converted_config = AxSearch.convert_search_space(config) ax_config = [ { "name": "a", "type": "choice", "values": [2, 3, 4] }, { "name": "b/x", "type": "range", "bounds": [0, 5], "value_type": "int" }, { "name": "b/y", "type": "fixed", "value": 4 }, { "name": "b/z", "type": "range", "bounds": [1e-4, 1e-2], "value_type": "float", "log_scale": True }, ] client1 = AxClient(random_seed=1234) client1.create_experiment(parameters=converted_config) searcher1 = AxSearch(ax_client=client1) client2 = AxClient(random_seed=1234) client2.create_experiment(parameters=ax_config) searcher2 = AxSearch(ax_client=client2) config1 = searcher1.suggest("0") config2 = searcher2.suggest("0") self.assertEqual(config1, config2) self.assertIn(config1["a"], [2, 3, 4]) self.assertIn(config1["b"]["x"], list(range(5))) self.assertEqual(config1["b"]["y"], 4) self.assertLess(1e-4, config1["b"]["z"]) self.assertLess(config1["b"]["z"], 1e-2) searcher = AxSearch(metric="a", mode="max") analysis = tune.run(_mock_objective, config=config, search_alg=searcher, num_samples=1) trial = analysis.trials[0] assert trial.config["a"] in [2, 3, 4] mixed_config = {"a": tune.uniform(5, 6), "b": tune.uniform(8, 9)} searcher = AxSearch(space=mixed_config, metric="a", mode="max") config = searcher.suggest("0") self.assertTrue(5 <= config["a"] <= 6) self.assertTrue(8 <= config["b"] <= 9)
def _test_roberta(method='BlendSearch'): max_num_epoch = 100 num_samples = -1 time_budget_s = 3600 search_space = { # You can mix constants with search space objects. "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch), "learning_rate": flaml.tune.loguniform(1e-5, 3e-5), "weight_decay": flaml.tune.uniform(0, 0.3), "per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]), "seed": flaml.tune.choice([12, 22, 33, 42]), } start_time = time.time() ray.init(num_cpus=4, num_gpus=4) if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import tuneBOHB algo = tuneBOHB(max_concurrent=4) scheduler = HyperBandForBOHB(max_t=max_num_epoch) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "num_train_epochs": 1, "per_device_train_batch_size": 128, }]) elif 'BlendSearch' == method: from flaml import BlendSearch algo = BlendSearch( points_to_evaluate=[{ "num_train_epochs": 1, "per_device_train_batch_size": 128, }]) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif 'SkOpt' == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif 'ZOOpt' == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples) elif 'Ax' == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch(max_concurrent=3) elif 'HyperOpt' == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_num_epoch, grace_period=1) scheduler = None analysis = ray.tune.run(train_roberta, metric=HP_METRIC, mode=MODE, resources_per_trial={ "gpu": 4, "cpu": 4 }, config=search_space, local_dir='logs/', num_samples=num_samples, time_budget_s=time_budget_s, keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC, scheduler=scheduler, search_alg=algo) ray.shutdown() best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all") metric = best_trial.metric_analysis[HP_METRIC][MODE] logger.info(f"method={method}") logger.info(f"n_trials={len(analysis.trials)}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def main(data_path, experiment_path, load_model, ratio_known_normal, ratio_known_outlier, seed, optimizer_name, validation, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, num_threads, n_jobs_dataloader, normal_class, known_outlier_class, n_known_outlier_classes): ray.init(address='auto') data_path = os.path.abspath(data_path) n_splits = 4 period = np.array([ '2019-11-08', '2019-11-09', '2019-11-11', '2019-11-12', '2019-11-13', '2019-11-14', '2019-11-15' ]) test_dates = period[-2:] train_dates = get_train_val_split(period[:-2], validation, n_splits) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') exp_config = { **locals().copy(), 'net_name': 'cicflow_mlp', } if exp_config['seed'] != -1: random.seed(exp_config['seed']) np.random.seed(exp_config['seed']) torch.manual_seed(exp_config['seed']) torch.cuda.manual_seed(exp_config['seed']) torch.backends.cudnn.deterministic = True ax = AxClient(enforce_sequential_optimization=False) ax.create_experiment( name="SupervisedCICFlowExp", parameters=[ { "name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True }, { "name": "weight_decay", "type": "range", "bounds": [1e-6, 1.0], "log_scale": True }, ], objective_name="val_f1", ) search_alg = AxSearch(ax) re_search_alg = Repeater(search_alg, repeat=n_splits) sched = ASHAScheduler(time_attr='training_iteration', grace_period=10, metric="val_f1") analysis = tune.run(SupervisedCICFlowExp, name="SupervisedCICFlowExp", checkpoint_at_end=True, checkpoint_freq=5, stop={ "training_iteration": 100, }, resources_per_trial={"gpu": 1}, num_samples=30, local_dir=experiment_path, search_alg=re_search_alg, scheduler=sched, config=exp_config) print("Best config is:", analysis.get_best_config(metric="val_f1"))
"type": "range", "bounds": [0.0, 1.0], }, { "name": "x4", "type": "range", "bounds": [0.0, 1.0], }, { "name": "x5", "type": "range", "bounds": [0.0, 1.0], }, { "name": "x6", "type": "range", "bounds": [0.0, 1.0], }, ] client = AxClient(enforce_sequential_optimization=False) client.create_experiment( parameters=parameters, objective_name="hartmann6", minimize=True, # Optional, defaults to False. parameter_constraints=["x1 + x2 <= 2.0"], # Optional. outcome_constraints=["l2norm <= 1.25"], # Optional. ) algo = AxSearch(client, max_concurrent=4) scheduler = AsyncHyperBandScheduler(reward_attr="hartmann6") run(easy_objective, name="ax", search_alg=algo, **config)
ax_client.create_experiment(name="tune_RL", parameters=parameters, objective_name='episode_reward_mean', minimize=False, overwrite_existing_experiment=True) # add scheduling of configurations, i.e. intensify solely asha_scheduler = ASHAScheduler(time_attr='training_iteration', metric='episode_reward_mean', mode='max') ray.init(num_cpus=args.ray_cpus) ray.tune.run(evaluate_objective, num_samples=RAY_TUNE_SAMPLES, search_alg=AxSearch(ax_client), scheduler=asha_scheduler, verbose=2) # get best parameters, retrain agent and log results for best agent best_parameters, values = ax_client.get_best_parameters() ray.shutdown() env = NFVDeepMonitor(base_env, args.logs) callback = MetricLoggingCallback() eval_agent = agent( **{ 'policy': policy, 'env': env, 'verbose': 1,