def hp_space(trial): from ray import tune return { "a": tune.randint(-4, 4), "b": tune.randint(-4, 4), }
def tune_fn(): mlflow.set_experiment(experiment_name=experiment_name) optuna_search = OptunaSearch(metric="auroc", mode="max") ax_search = AxSearch(metric="auroc", mode="max") tune.run(objective, name="mlflow_gbdt", num_samples=65, config={ "num_leaves": tune.randint(5, 95), "learning_rate": tune.loguniform(1e-4, 1.0), "n_estimators": tune.randint(100, 100000), "subsample": tune.loguniform(0.01, 1.0), "subsample_freq": tune.randint(1, 5), "objective": "binary", "reg_alpha": tune.loguniform(1e-4, 1.0), "reg_lambda": tune.loguniform(1e-4, 1.0), "tree_learner": "feature", "feature_sel": 0, "mlflow": { "experiment_name": experiment_name, "tracking_uri": mlflow.get_tracking_uri() } }, search_alg=optuna_search)
def search_space(self, all_available_features): space = { "n_estimators": tune.randint(self.n_estimators_range[0], self.n_estimators_range[1]), "max_depth": tune.randint(self.max_depth_range[0], self.max_depth_range[1]), } return space
def fixed_params(self): total_fixed_params = { "n_estimators": tune.randint(self.n_estimators_range[0], self.n_estimators_range[1]), "max_depth": tune.randint(self.max_depth_range[0], self.max_depth_range[1]), } return total_fixed_params
def tune_function(mlflow_tracking_uri, finish_fast=False): tune.run(easy_objective, name="mlflow", num_samples=5, callbacks=[ MLflowLoggerCallback(tracking_uri=mlflow_tracking_uri, experiment_name="example", save_artifact=True) ], config={ "width": tune.randint(10, 100), "height": tune.randint(0, 100), "steps": 5 if finish_fast else 100, })
def main(cpus_per_actor, num_actors, num_samples): # Set XGBoost config. config = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9) } analysis = tune.run( tune.with_parameters(train_breast_cancer, cpus_per_actor=cpus_per_actor, num_actors=num_actors), # extra_cpu is used if the trainable creates additional remote actors. # https://docs.ray.io/en/master/tune/api_docs/trainable.html#advanced-resource-allocation resources_per_trial={ "cpu": 1, "extra_cpu": cpus_per_actor * num_actors }, config=config, num_samples=num_samples, metric="eval-error", mode="min") # Load the best model checkpoint best_bst = xgb.Booster() best_bst.load_model(os.path.join(analysis.best_logdir, "simple.xgb")) accuracy = 1. - analysis.best_result["eval-error"] print(f"Best model parameters: {analysis.best_config}") print(f"Best model total accuracy: {accuracy:.4f}")
def tune_mnist(num_training_iterations): sched = AsyncHyperBandScheduler(time_attr="training_iteration", max_t=400, grace_period=20) analysis = tune.run( train_mnist, name="exp", scheduler=sched, metric="mean_accuracy", mode="max", stop={ "mean_accuracy": 0.99, "training_iteration": num_training_iterations }, num_samples=10, resources_per_trial={ "cpu": 2, "gpu": 0 }, config={ "threads": 2, "lr": tune.uniform(0.001, 0.1), "momentum": tune.uniform(0.1, 0.9), "hidden": tune.randint(32, 512), }, ) print("Best hyperparameters found were: ", analysis.best_config)
def randint(lower, upper): ''' Uniformly sample integer between lower and upper. (Both inclusive) :param lower: Lower bound of the sampling range. :param upper: Upper bound of the sampling range. ''' return tune.randint(lower, upper)
def run_tune( sync_to_driver: bool, upload_dir: Optional[str] = None, durable: bool = False, experiment_name: str = "cloud_test", indicator_file: str = "/tmp/tune_cloud_indicator", ): num_cpus_per_trial = int(os.environ.get("TUNE_NUM_CPUS_PER_TRIAL", "2")) if durable: trainable = tune.durable(train) else: trainable = train tune.run(trainable, name=experiment_name, resume="AUTO", num_samples=4, config={ "max_iterations": 30, "sleep_time": 5, "checkpoint_freq": 2, "score_multiplied": tune.randint(0, 100), }, sync_config=tune.SyncConfig( sync_to_driver=sync_to_driver, upload_dir=upload_dir, sync_on_checkpoint=True, cloud_sync_period=0.5, ), keep_checkpoints_num=2, resources_per_trial={"cpu": num_cpus_per_trial}, callbacks=[IndicatorCallback(indicator_file=indicator_file)], verbose=2)
def read_tune_ax(name, this): dict_ = dict(name=name) min_, max_ = this['minmax'] if min_ == max_: dict_["type"] = "fixed" dict_["value"] = min_ elif this['type'] == 'int': if this['step'] == 1: dict_["type"] = "range" dict_["bounds"] = [min_, max_] dict_["value_type"] = "int" else: dict_["type"] = "choice" dict_["values"] = tune.randint(min_, max_, this['step']) dict_["value_type"] = "int" elif this['type'] == 'float': if this['step'] == 1: dict_["type"] = "choice" dict_["values"] = tune.choice( np.adarray.tolist( np.arange(config_min, config_max, config_step))) dict_["value_type"] = "float" else: dict_["type"] = "range" dict_["bounds"] = [min_, max_] dict_["value_type"] = "float" return dict_
def main(cpus_per_actor, num_actors, num_samples): # Set XGBoost config. config = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9) } ray_params = RayParams(max_actor_restarts=1, gpus_per_actor=0, cpus_per_actor=cpus_per_actor, num_actors=num_actors) analysis = tune.run( tune.with_parameters(train_breast_cancer, ray_params=ray_params), # Use the `get_tune_resources` helper function to set the resources. resources_per_trial=ray_params.get_tune_resources(), config=config, num_samples=num_samples, metric="eval-error", mode="min") # Load the best model checkpoint. best_bst = xgboost_ray.tune.load_model( os.path.join(analysis.best_logdir, "tuned.xgb")) best_bst.save_model("best_model.xgb") accuracy = 1. - analysis.best_result["eval-error"] print(f"Best model parameters: {analysis.best_config}") print(f"Best model total accuracy: {accuracy:.4f}")
def tune_decorated(mlflow_tracking_uri, finish_fast=False): # Set the experiment, or create a new one if does not exist yet. mlflow.set_tracking_uri(mlflow_tracking_uri) mlflow.set_experiment(experiment_name="mixin_example") tune.run(decorated_easy_objective, name="mlflow", num_samples=5, config={ "width": tune.randint(10, 100), "height": tune.randint(0, 100), "steps": 5 if finish_fast else 100, "mlflow": { "experiment_name": "mixin_example", "tracking_uri": mlflow.get_tracking_uri() } })
def update_search_space(self, search_space): ''' Tuners are advised to support updating search space at run-time. If a tuner can only set search space once before generating first hyper-parameters, it should explicitly document this behaviour. search_space: JSON object created by experiment owner ''' config = {} for key, value in search_space: v = value.get("_value") _type = value['_type'] if _type == 'choice': config[key] = choice(v) elif _type == 'randint': config[key] = randint(v[0], v[1] - 1) elif _type == 'uniform': config[key] = uniform(v[0], v[1]) elif _type == 'quniform': config[key] = quniform(v[0], v[1], v[2]) elif _type == 'loguniform': config[key] = loguniform(v[0], v[1]) elif _type == 'qloguniform': config[key] = qloguniform(v[0], v[1], v[2]) elif _type == 'normal': config[key] = randn(v[1], v[2]) elif _type == 'qnormal': config[key] = qrandn(v[1], v[2], v[3]) else: raise ValueError( f'unsupported type in search_space {_type}') self._ls.set_search_properties(None, None, config) if self._gs is not None: self._gs.set_search_properties(None, None, config) self._init_search()
def tune_mnist(): sched = ASHAScheduler(time_attr="training_iteration") metric = "mean_accuracy" analysis = tune.run( train_mnist, name="foo", scheduler=sched, metric=metric, mode="max", #stop={ # "mean_accuracy": 0.99, # "training_iteration": num_training_iterations #}, num_samples=50, resources_per_trial={ "cpu": 1, "gpu": 0 }, config={ "dropout": tune.uniform(0.05, 0.5), "lr": tune.uniform(0.001, 0.1), "momentum": tune.uniform(0.1, 0.9), "hidden": tune.randint(32, 512), }) print("Best hyperparameters found were: ", analysis.best_config) print("Best value for", metric, ':', analysis.best_result[metric])
def get_past_seq_config(look_back): """ generate pass sequence config based on look_back :param look_back: look_back configuration :return: search configuration for past sequence """ if isinstance( look_back, tuple) and len(look_back) == 2 and isinstance( look_back[0], int) and isinstance( look_back[1], int): if look_back[1] < 2: raise ValueError( "The max look back value should be at least 2") if look_back[0] < 2: print( "The input min look back value is smaller than 2. " "We sample from range (2, {}) instead.".format( look_back[1])) past_seq_config = tune.randint(look_back[0], look_back[1] + 1) elif isinstance(look_back, int): if look_back < 2: raise ValueError( "look back value should not be smaller than 2. " "Current value is ", look_back) past_seq_config = look_back else: raise ValueError( "look back is {}.\n " "look_back should be either a tuple with 2 int values:" " (min_len, max_len) or a single int".format(look_back)) return past_seq_config
def test_tune(ray_start_4_cpus): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = XGBoostTrainer( scaling_config=scale_config, label_column="target", params={ **params, **{ "max_depth": 1 } }, datasets={ TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset }, ) tune.run( trainer.as_trainable(), config={"params": { "max_depth": tune.randint(2, 4) }}, num_samples=2, ) # Make sure original Trainer is not affected. assert trainer.params["max_depth"] == 1
def test_tune(ray_start_4_cpus): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = SklearnTrainer( estimator=RandomForestClassifier(), scaling_config=scale_config, label_column="target", params={"max_depth": 4}, cv=5, datasets={ TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset }, ) tune.run( trainer.as_trainable(), config={"params": { "max_depth": tune.randint(2, 4) }}, num_samples=2, metric="cv/test_score_mean", mode="max", ) # Make sure original Trainer is not affected. assert trainer.params["max_depth"] == 4
def tune_xgboost(train_df, test_df, target_column): # Set XGBoost config. config = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9) } ray_params = RayParams(max_actor_restarts=1, gpus_per_actor=0, cpus_per_actor=4, num_actors=4) analysis = tune.run( tune.with_parameters(train_xgboost, train_df=train_df, test_df=test_df, target_column=target_column, ray_params=ray_params), # Use the `get_tune_resources` helper function to set the resources. resources_per_trial=ray_params.get_tune_resources(), config=config, num_samples=1, metric="eval-error", mode="min", verbose=1) accuracy = 1. - analysis.best_result["eval-error"] print(f"Best model parameters: {analysis.best_config}") print(f"Best model total accuracy: {accuracy:.4f}") return analysis.best_config
def tune_xgboost(use_cv: bool = False): search_space = { # You can mix constants with search space objects. "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "max_depth": tune.randint(1, 9), "min_child_weight": tune.choice([1, 2, 3]), "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1), } # This will enable aggressive early stopping of bad trials. scheduler = ASHAScheduler( max_t=10, grace_period=1, reduction_factor=2 # 10 training iterations ) analysis = tune.run( train_breast_cancer if not use_cv else train_breast_cancer_cv, metric="test-logloss", mode="min", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, num_samples=10, scheduler=scheduler, ) return analysis
def tunerTrain(): ray.init(_memory=4000000000, num_cpus=5) searchSpace = { 'lr': tune.loguniform(1e-4, 9e-1), 'finalOutput': tune.randint(2, 50), # minimum of 2, other 1//2 = 0 activation maps 'stride1': tune.grid_search(np.arange(1, 4).tolist()), 'stride2': tune.grid_search(np.arange(1, 4).tolist()), 'batchSize': tune.grid_search([2, 4, 8, 16, 32, 64, 128, 256]), 'finalChannel': tune.randint(1, 50), } analysis = tune.run(train, num_samples=1, scheduler=ASHAScheduler(metric='score', mode='max'), config=searchSpace) print(f"Best Config: {analysis.get_all_configs(metric='score', mode='max')}") df = analysis.results_df logdir = analysis.get_best_logdir("mean_accuracy", mode="max") print(f"dir of best: {logdir}")
def create_config(backtest_config: dict) -> dict: config = {} config['starting_balance'] = backtest_config['starting_balance'] config['min_size'] = backtest_config['min_size'] config['min_step'] = backtest_config['min_step'] config['taker_fee'] = backtest_config['taker_fee'] config['qty_pct'] = tune.uniform(backtest_config['ranges']['qty_pct'][0], backtest_config['ranges']['qty_pct'][1]) config['leverage'] = tune.randint(backtest_config['ranges']['leverage'][0], backtest_config['ranges']['leverage'][1]) config['ema_fast'] = tune.randint(backtest_config['ranges']['ema_fast'][0], backtest_config['ranges']['ema_fast'][1]) config['ema_slow'] = tune.randint(backtest_config['ranges']['ema_slow'][0], backtest_config['ranges']['ema_slow'][1]) return config
def update_search_space(self, search_space): """Required by NNI. Tuners are advised to support updating search space at run-time. If a tuner can only set search space once before generating first hyper-parameters, it should explicitly document this behaviour. Args: search_space: JSON object created by experiment owner. """ config = {} for key, value in search_space.items(): v = value.get("_value") _type = value["_type"] if _type == "choice": config[key] = choice(v) elif _type == "randint": config[key] = randint(*v) elif _type == "uniform": config[key] = uniform(*v) elif _type == "quniform": config[key] = quniform(*v) elif _type == "loguniform": config[key] = loguniform(*v) elif _type == "qloguniform": config[key] = qloguniform(*v) elif _type == "normal": config[key] = randn(*v) elif _type == "qnormal": config[key] = qrandn(*v) else: raise ValueError(f"unsupported type in search_space {_type}") # low_cost_partial_config is passed to constructor, # which is before update_search_space() is called init_config = self._ls.init_config add_cost_to_space(config, init_config, self._cat_hp_cost) self._ls = self.LocalSearch( init_config, self._ls.metric, self._mode, config, self._ls.resource_attr, self._ls.min_resource, self._ls.max_resource, self._ls.resource_multiple_factor, cost_attr=self.cost_attr, seed=self._ls.seed, ) if self._gs is not None: self._gs = GlobalSearch( space=config, metric=self._metric, mode=self._mode, sampler=self._gs._sampler, ) self._gs.space = config self._init_search()
def qrandint(lower, upper, q=1): ''' Uniformly sample integer between lower and upper. (Both inclusive) Round the result to nearest value with granularity q. :param lower: Lower bound of the sampling range. :param upper: Upper bound of the sampling range. :param q: Integer Granularity for increment. ''' return tune.randint(lower, upper, q)
def run_tune( no_syncer: bool, upload_dir: Optional[str] = None, experiment_name: str = "cloud_test", indicator_file: str = "/tmp/tune_cloud_indicator", trainable: str = "function", num_cpus_per_trial: int = 2, ): if trainable == "function": train = fn_trainable config = { "max_iterations": 100, "sleep_time": 5, "checkpoint_freq": 2, "score_multiplied": tune.randint(0, 100), } kwargs = {"resources_per_trial": {"cpu": num_cpus_per_trial}} elif trainable == "rllib_str" or trainable == "rllib_trainer": if trainable == "rllib_str": train = "PPO" else: train = PPO config = { "env": "CartPole-v1", "num_workers": 1, "num_envs_per_worker": 1, "callbacks": RLlibCallback, } kwargs = { "stop": { "training_iteration": 100 }, "checkpoint_freq": 2, "checkpoint_at_end": True, } else: raise RuntimeError(f"Unknown trainable: {trainable}") tune.run( train, name=experiment_name, resume="AUTO", num_samples=4, config=config, sync_config=tune.SyncConfig( syncer="auto" if not no_syncer else None, upload_dir=upload_dir, sync_on_checkpoint=True, sync_period=0.5, ), keep_checkpoints_num=2, callbacks=[IndicatorCallback(indicator_file=indicator_file)], verbose=2, **kwargs, )
def testRelativeLogdir(self): """Moving the experiment folder into another location. This should still work and is an important use case, because training in the cloud usually requests such relocations. """ local_dir_path = Path("/tmp/test_rel") if local_dir_path.exists(): local_dir = tempfile.mkdtemp(prefix=str(local_dir_path) + "_") else: local_dir = str(local_dir_path) tune.run("rel_logdir", config={"a": tune.randint(0, 10)}, local_dir=local_dir) # Copy the folder local_dir_moved = local_dir + "_moved" shutil.copytree(local_dir, local_dir_moved) # Load the moved trials analysis = tune.ExperimentAnalysis(local_dir) analysis_moved = tune.ExperimentAnalysis(local_dir_moved) configs = analysis.get_all_configs() configs_moved = analysis_moved.get_all_configs() config = configs[next(iter(configs))] config_moved = configs_moved[next(iter(configs_moved))] # Check, if the trial attributes can be loaded. self.assertEqual(len(configs), 1) self.assertEqual(len(configs_moved), 1) # Check, if the two configs are equal. self.assertDictEqual(config, config_moved) metric = "metric1" mode = "max" analysis_df = analysis.dataframe(metric, mode) analysis_moved_df = analysis_moved.dataframe(metric, mode) self.assertEqual(analysis_df.shape[0], 1) self.assertEqual(analysis_moved_df.shape[0], 1) # Drop the `logdir` column as this should be different # between the two trials. analysis_df.drop(columns="logdir", inplace=True) analysis_moved_df.drop(columns="logdir", inplace=True) self.assertEqual(analysis_df, analysis_moved_df) # Remove the files and directories. if shutil.rmtree.avoids_symlink_attacks: if local_dir_path.exists(): shutil.rmtree(local_dir) shutil.rmtree(local_dir_moved)
def create_config(backtest_config: dict) -> dict: config = {k: backtest_config[k] for k in backtest_config if k not in {'session_name', 'user', 'symbol', 'start_date', 'end_date', 'ranges'}} for k in backtest_config['ranges']: if backtest_config['ranges'][k][0] == backtest_config['ranges'][k][1]: config[k] = backtest_config['ranges'][k][0] elif k in ['n_close_orders', 'leverage']: config[k] = tune.randint(backtest_config['ranges'][k][0], backtest_config['ranges'][k][1] + 1) else: config[k] = tune.uniform(backtest_config['ranges'][k][0], backtest_config['ranges'][k][1]) return config
def testTuneSampleAPI(self): config = { "func": tune.sample_from(lambda spec: spec.config.uniform * 0.01), "uniform": tune.uniform(-5, -1), "quniform": tune.quniform(3.2, 5.4, 0.2), "loguniform": tune.loguniform(1e-4, 1e-2), "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5), "choice": tune.choice([2, 3, 4]), "randint": tune.randint(-9, 15), "qrandint": tune.qrandint(-21, 12, 3), "randn": tune.randn(10, 2), "qrandn": tune.qrandn(10, 2, 0.2), } for _, (_, generated) in zip( range(1000), generate_variants({ "config": config })): out = generated["config"] self.assertAlmostEqual(out["func"], out["uniform"] * 0.01) self.assertGreaterEqual(out["uniform"], -5) self.assertLess(out["uniform"], -1) self.assertGreaterEqual(out["quniform"], 3.2) self.assertLessEqual(out["quniform"], 5.4) self.assertAlmostEqual(out["quniform"] / 0.2, round(out["quniform"] / 0.2)) self.assertGreaterEqual(out["loguniform"], 1e-4) self.assertLess(out["loguniform"], 1e-2) self.assertGreaterEqual(out["qloguniform"], 1e-4) self.assertLessEqual(out["qloguniform"], 1e-1) self.assertAlmostEqual(out["qloguniform"] / 5e-5, round(out["qloguniform"] / 5e-5)) self.assertIn(out["choice"], [2, 3, 4]) self.assertGreaterEqual(out["randint"], -9) self.assertLess(out["randint"], 15) self.assertGreaterEqual(out["qrandint"], -21) self.assertLessEqual(out["qrandint"], 12) self.assertEqual(out["qrandint"] % 3, 0) # Very improbable self.assertGreater(out["randn"], 0) self.assertLess(out["randn"], 20) self.assertGreater(out["qrandn"], 0) self.assertLess(out["qrandn"], 20) self.assertAlmostEqual(out["qrandn"] / 0.2, round(out["qrandn"] / 0.2))
def main(loss_function="L1", num_samples=25, max_num_epochs=25, gpus_per_trial=1, cpus_per_trial=10): experiment_name = loss_function + "_shuffle_validation" save_dir = '/data/results/vcpujol/transformers/single_deployment/predict_maxmax/pytorch_transformer/' config = { "lr": tune.loguniform(1e-4, 5e-1), "lr_step": tune.randint(1,10), "gamma": tune.loguniform(0.85,0.9999), "epochs": tune.choice([5, 10, 15, 20, 25]), "n_heads": tune.randint(2,10), "dim_val": tune.choice([2,4,6]), # FIXME requires numero parell... "dim_att": tune.randint(2,12), "encoder_layers": tune.randint(1,7), "decoder_layers": tune.randint(1,7), "batch_size": tune.randint(1,10), "input_feat_enc": tune.choice([94]), "input_feat_dec": tune.choice([1]), "seq_len": tune.choice([16, 32, 64, 96, 128, 180, 220, 256, 312, 350, 420, 470, 512]), #[16, 32, 64, 128, 256, 512, 1024, 2048] "prediction_step": tune.choice([1]) } scheduler = ASHAScheduler( metric="loss", mode="min", max_t=max_num_epochs, grace_period=4, reduction_factor=2) reporter = CLIReporter( parameter_columns=["lr", "lr_step", "gamma", "epochs", "n_heads", "dim_val", "dim_att", "encoder_layers", "decoder_layers", "batch_size", "seq_len"], metric_columns=["loss", "training_iteration"]) result = tune.run( partial(transformer_train, save_dir=save_dir, loss_function=loss_function), resources_per_trial={"cpu": cpus_per_trial, "gpu": gpus_per_trial}, config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, local_dir=save_dir, name=experiment_name) best_trial = result.get_best_trial("loss", "min", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format(best_trial.last_result["loss"])) # print("Best trial final validation accuracy: {}".format(best_trial.last_result["accuracy"])) best_trained_model = Transformer(best_trial.config["dim_val"], best_trial.config["dim_att"], best_trial.config["input_feat_enc"], best_trial.config["input_feat_dec"], best_trial.config["seq_len"], best_trial.config["decoder_layers"], best_trial.config["encoder_layers"], best_trial.config["n_heads"]) best_checkpoint_dir = best_trial.checkpoint.value model_state, optimizer_state = torch.load(os.path.join( best_checkpoint_dir, "checkpoint")) best_trained_model.load_state_dict(model_state) local_dir = save_dir exp_name = experiment_name test_acc = test_transformer(best_trained_model, best_trial.config, local_dir, exp_name, loss_function) print("Best trial test set accuracy: {}".format(test_acc))
def gfp(local_dir, cpus, gpus, num_parallel, num_samples): """Evaluate Conservative Objective Models on GFP-v0 """ from design_baselines.coms_original import coms_original ray.init(num_cpus=cpus, num_gpus=gpus, include_dashboard=False, _temp_dir=os.path.expanduser('~/tmp')) tune.run(coms_original, config={ "logging_dir": "data", "task": "GFP-v0", "task_kwargs": { 'seed': tune.randint(1000) }, "is_discrete": True, "constraint_type": "mix", "normalize_ys": True, "normalize_xs": True, "continuous_noise_std": 0.2, "discrete_clip": 0.6, "val_size": 500, "batch_size": 128, "epochs": 500, "activations": ['leaky_relu', 'leaky_relu'], "hidden_size": 2048, "initial_max_std": 0.2, "initial_min_std": 0.1, "forward_model_lr": 0.001, "initial_alpha": 1.0, "alpha_lr": 0.05, "target_conservatism": 0.05, "negatives_fraction": 1.0, "lookahead_steps": 1, "lookahead_backprop": True, "evaluate_steps": [450], "solver_lr": 0.01, "solver_interval": 1, "solver_warmup": 50, "solver_steps": 1, "solver_beta": tune.grid_search([0.0, 0.1, 0.3, 0.7, 0.9, 1.0]) }, num_samples=num_samples, local_dir=local_dir, resources_per_trial={ 'cpu': cpus // num_parallel, 'gpu': gpus / num_parallel - 0.01 })
def get_model_search_space(model_type: ModelEnum) -> Mapping[str, Any]: if model_type == ModelEnum.RF: return dict( max_depth=tune.randint(1, 10), n_estimators=tune_q_log_uniform(high=100, q=1), ) elif model_type == ModelEnum.DT: return dict(max_leaf_nodes=tune.sample_from(_sample_max_leaf_nodes), ) elif model_type == ModelEnum.ADB: return dict( n_estimators=tune_q_log_uniform(low=1, high=500, q=1), learning_rate=tune.loguniform(1.0e-04, 1.0e+01), ) elif model_type == ModelEnum.GB: return dict( max_leaf_nodes=tune_q_log_uniform(low=4, high=15, q=1), n_estimators=tune_q_log_uniform(high=500, q=1), learning_rate=tune.loguniform(1.0e-04, 1.0e+01), ) elif model_type == ModelEnum.NN: return dict( units=tune.randint(10, 28), layers=tune.randint(2, 9), dropout=tune_bool(), dropout_rate=tune.uniform(0.1, 0.5), batch_size=tune.choice([2**i for i in range(6, 10)]), loss=tune.choice(['mean_squared_error', 'mean_absolute_error']), batch_normalization=tune_bool(), is_normalized=tune_bool(), patience=4, ) elif model_type == ModelEnum.KN: return dict(n_neighbors=1, ) elif model_type == ModelEnum.COP: return dict() else: raise ValueError(model_type)