def hyperparameter_opt(name, list_functions, beyond_window, pen_min, pen_max, rounding, window_preprocessing=10): #change if needed def objective(pen, function): #return functions.analysis_rbf(penalization = pen, iterations = 10, size_concepts=200, # data_creation_function = function, obs_amount_beyond_window=beyond_window, # windowsize_preprocessing = window_preprocessing) #return functions.analysis_linear(penalization = pen, iterations = 20, size_concepts=200, # data_creation_function = function, obs_amount_beyond_window=beyond_window) return functions.analysis_l2(penalization = pen, iterations = 10, size_concepts=200, data_creation_function = function, obs_amount_beyond_window=beyond_window) def training_function(config): # Hyperparameters pen = config["pen"] avg_prec = 0; avg_rec = 0; avg_del = 0; for function in list_functions: intermediate_result = objective(pen, function) avg_prec += intermediate_result[0] avg_rec += intermediate_result[1] avg_del += intermediate_result[2] avg_prec = avg_prec/3 avg_rec = avg_rec/3 avg_del = avg_del/3 tune.report(precision = avg_prec, recall = avg_rec, average_delay = avg_del) analysis = tune.run( training_function, config={ "pen": tune.quniform(pen_min, pen_max, rounding), }, num_samples=100) df2 = analysis.results_df #F1 = 2 * (precision * recall) / (precision + recall) df2["f1"] = 2*(df2["precision"]*df2["recall"])/(df2["precision"]+df2["recall"]) df2["f1"].fillna(0, inplace=True) #change name here df2.to_pickle( "results" + name + ".pkg") #Plot 1: ax = df2.plot.scatter(x='config.pen', y='recall', label='Recall', color='Green',); df2.plot.scatter(x='config.pen', y='precision', color='Orange', label='Precision', ax=ax); plt.xlabel("Penalization") plt.ylabel("Rate") plt.savefig( "results" + name + "_recall_vs_prec.png", dpi=150) #Plot 2: ax = df2.plot.scatter(x='config.pen', y='f1', color='Green',); plt.xlabel("Penalization") plt.savefig( "results" + name + "_f1.png", dpi=150)
def update_search_space(self, search_space): ''' Tuners are advised to support updating search space at run-time. If a tuner can only set search space once before generating first hyper-parameters, it should explicitly document this behaviour. search_space: JSON object created by experiment owner ''' config = {} for key, value in search_space: v = value.get("_value") _type = value['_type'] if _type == 'choice': config[key] = choice(v) elif _type == 'randint': config[key] = randint(v[0], v[1] - 1) elif _type == 'uniform': config[key] = uniform(v[0], v[1]) elif _type == 'quniform': config[key] = quniform(v[0], v[1], v[2]) elif _type == 'loguniform': config[key] = loguniform(v[0], v[1]) elif _type == 'qloguniform': config[key] = qloguniform(v[0], v[1], v[2]) elif _type == 'normal': config[key] = randn(v[1], v[2]) elif _type == 'qnormal': config[key] = qrandn(v[1], v[2], v[3]) else: raise ValueError( f'unsupported type in search_space {_type}') self._ls.set_search_properties(None, None, config) if self._gs is not None: self._gs.set_search_properties(None, None, config) self._init_search()
def make_config(num_cpus): config = { "batch_size": tune.choice([32, 64]), "logit_scale_init": tune.quniform( -1.0, 5.0, q=0.1 ), # value from checkpoint is 4.6 "opt_config": { # for each parameter, will find the longest matching prefix and apply that rule. "logit_scale": random_rate(0.0), "text_model": random_rate(0.7), "text_model.embeddings": random_rate(0.7), "text_model.encoder.layers.0.layer_norm": random_rate(0.5), "text_model.encoder.layers.11": random_rate(0.5), "text_model.encoder.layers.11.mlp": random_rate(0.3), "text_model.encoder.layers.11.layer_norm2": random_rate(0.3), "text_model.final_layer_norm": random_rate(0.1), "text_projection": random_rate(0.1), "vision_model": None, "visual_projection": None, }, "num_warmup_steps": tune.choice([20, 40]), "num_workers": num_cpus, "test_size": 1000, "val_batch_size": 500, } return config
def quniform(lower, upper, q): ''' Sample a float uniformly between lower and upper. Round the result to nearest value with granularity q, include upper. :param lower: Lower bound of the sampling range. :param upper: Upper bound of the sampling range. :param q: Granularity for increment. ''' return tune.quniform(lower, upper, q)
def update_search_space(self, search_space): """Required by NNI. Tuners are advised to support updating search space at run-time. If a tuner can only set search space once before generating first hyper-parameters, it should explicitly document this behaviour. Args: search_space: JSON object created by experiment owner. """ config = {} for key, value in search_space.items(): v = value.get("_value") _type = value["_type"] if _type == "choice": config[key] = choice(v) elif _type == "randint": config[key] = randint(*v) elif _type == "uniform": config[key] = uniform(*v) elif _type == "quniform": config[key] = quniform(*v) elif _type == "loguniform": config[key] = loguniform(*v) elif _type == "qloguniform": config[key] = qloguniform(*v) elif _type == "normal": config[key] = randn(*v) elif _type == "qnormal": config[key] = qrandn(*v) else: raise ValueError(f"unsupported type in search_space {_type}") # low_cost_partial_config is passed to constructor, # which is before update_search_space() is called init_config = self._ls.init_config add_cost_to_space(config, init_config, self._cat_hp_cost) self._ls = self.LocalSearch( init_config, self._ls.metric, self._mode, config, self._ls.resource_attr, self._ls.min_resource, self._ls.max_resource, self._ls.resource_multiple_factor, cost_attr=self.cost_attr, seed=self._ls.seed, ) if self._gs is not None: self._gs = GlobalSearch( space=config, metric=self._metric, mode=self._mode, sampler=self._gs._sampler, ) self._gs.space = config self._init_search()
def testTuneSampleAPI(self): config = { "func": tune.sample_from(lambda spec: spec.config.uniform * 0.01), "uniform": tune.uniform(-5, -1), "quniform": tune.quniform(3.2, 5.4, 0.2), "loguniform": tune.loguniform(1e-4, 1e-2), "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5), "choice": tune.choice([2, 3, 4]), "randint": tune.randint(-9, 15), "qrandint": tune.qrandint(-21, 12, 3), "randn": tune.randn(10, 2), "qrandn": tune.qrandn(10, 2, 0.2), } for _, (_, generated) in zip( range(1000), generate_variants({ "config": config })): out = generated["config"] self.assertAlmostEqual(out["func"], out["uniform"] * 0.01) self.assertGreaterEqual(out["uniform"], -5) self.assertLess(out["uniform"], -1) self.assertGreaterEqual(out["quniform"], 3.2) self.assertLessEqual(out["quniform"], 5.4) self.assertAlmostEqual(out["quniform"] / 0.2, round(out["quniform"] / 0.2)) self.assertGreaterEqual(out["loguniform"], 1e-4) self.assertLess(out["loguniform"], 1e-2) self.assertGreaterEqual(out["qloguniform"], 1e-4) self.assertLessEqual(out["qloguniform"], 1e-1) self.assertAlmostEqual(out["qloguniform"] / 5e-5, round(out["qloguniform"] / 5e-5)) self.assertIn(out["choice"], [2, 3, 4]) self.assertGreaterEqual(out["randint"], -9) self.assertLess(out["randint"], 15) self.assertGreaterEqual(out["qrandint"], -21) self.assertLessEqual(out["qrandint"], 12) self.assertEqual(out["qrandint"] % 3, 0) # Very improbable self.assertGreater(out["randn"], 0) self.assertLess(out["randn"], 20) self.assertGreater(out["qrandn"], 0) self.assertLess(out["qrandn"], 20) self.assertAlmostEqual(out["qrandn"] / 0.2, round(out["qrandn"] / 0.2))
def hp_space_fn(empty_arg): config = { "per_device_train_batch_size": tune.choice([2, 4]), "learning_rate": tune.choice([5e-5, 3e-5, 2e-5]), "num_train_epochs": tune.quniform(0.0, 10.0, 0.5), } wandb_config = { "wandb": { "project": os.environ.get( 'WANDB_PROJECT', 'wandb_project'), "api_key": os.environ.get('API_KEY'), "log_config": True } } config.update(wandb_config) return config
"relu_dropout": 0.1, "res_dropout": 0.1, } points_to_evaluate = [] for k, v in org_config.items(): point = copy(org_config) if point[k] + 0.1 <= 1.0: point[k] += 0.1 points_to_evaluate.append(point) if point[k] - 0.1 > 0.0: point = copy(org_config) point[k] -= 0.1 points_to_evaluate.append(point) search_space = {k: tune.quniform(0, 0.5, 0.05) for k in org_config.keys()} # for p in points_to_evaluate: # print(p) tune.run( train, config=search_space, resources_per_trial={ "cpu": 16, "gpu": 1 }, search_alg=BasicVariantGenerator(points_to_evaluate=points_to_evaluate), name="tune_mosi_dropouts", )
'pool_len': tune.choice([2, 3, 4]), 'fc_dims': None, 'fc_num_layers': tune.choice([3, 4, 5]), 'fc_1_pre': tune.uniform(np.log(40), np.log(200)), 'fc_2': tune.choice([1, 2, 3]), 'fc_3': tune.choice([1, 2, 3]), 'fc_4': tune.choice([1, 2, 3]), 'fc_5': tune.choice([1, 2, 3]), 'fc_6': tune.choice([1, 2, 3]), 'fc_7': tune.choice([1, 2, 3]), 'dropFrac_pre': tune.uniform(np.log(0.02), np.log(0.1)), 'lossName': tune.choice(['mse']), 'optimizer': None, 'optName': tune.choice(['adam']), 'batch_size': None, 'batch_size_j': tune.quniform(5, 8, 1), 'initLR_pre': tune.uniform(np.log(3e-4), np.log(4e-3)), 'reduceLR_pre': tune.uniform(np.log(0.05), np.log(0.4)), 'min_deltaLR_pre': tune.uniform(np.log(3e-5), np.log(1e-4)), 'steps': None, 'batch_norm_cnn': tune.choice([False, True]), 'batch_norm_flat': tune.choice([False, True]) } #wide space config_wide1 = { 'conv_filter': None, "cf_num_layers": tune.choice([3, 4, 5, 6]), "filter_1_pre": tune.uniform(np.log(10), np.log(50)), "filter_2": tune.choice([1, 2, 3, 4]), "filter_3": tune.choice([1, 2, 3, 4]),
from .lightningreapp import LightningReapp parser = ArgumentParser() callback_tuner = TuneReportCallback( { "loss": "val_loss", # "mean_accuracy": "val_accuracy" }, on="validation_end", ) default_tune_config = { "lr": tune.loguniform(1e-4, 1e-1), # loguniform samples by magnitude "hidden_layer_size": tune.quniform(10, 50, 1) } ### TUNING HYPERPARAMETERS def train_tune(config, **tuner_kwargs): model = LightningReapp(config) max_epochs = tuner_kwargs.get('max_epochs', 10) trainer = lit.Trainer( num_folds=3, fast_dev_run=1, max_epochs=max_epochs, gpus=1 if torch.cuda.is_available() else None, progress_bar_refresh_rate=30, callbacks=[callback_tuner],
if checkpoint_dir is not None: ck = th.load(os.path.join(checkpoint_dir, "checkpoint")) model.load_state_dict(ck["state_dict"]) trainer.current_epoch = ck["epoch"] trainer.fit(model) ck = th.load(checkpoint.best_model_path) model.load_state_dict(ck["state_dict"]) trainer.test(model) config = { "attn_dropout": tune.quniform(0, 1, 0.1), "attn_dropout_a": tune.quniform(0, 1, 0.1), "attn_dropout_v": tune.quniform(0, 1, 0.1), "embed_dropout": tune.quniform(0, 1, 0.1), "out_dropout": tune.quniform(0, 1, 0.1), "relu_dropout": tune.quniform(0, 1, 0.1), "res_dropout": tune.quniform(0, 1, 0.1), # "project_dim": tune.choice([40, 50, 60, 70]), "lr": tune.loguniform(1e-6, 1e-3), "weight_decay": tune.loguniform(1e-10, 1e-2), } previous_best = { "attn_dropout": 0.3, "attn_dropout_a": 0.5, "attn_dropout_v": 0.0,
def get_tune(self): from ray import tune return tune.quniform(lower=self.lower, upper=self.upper, q=self.q)
}, "batch_mode": "complete_episodes", "observation_filter": "NoFilter", "framework": "tf", } config_low_search = { "env": ENV_LOW, "callbacks": RewardLogCallback, "num_workers": 6, "num_envs_per_worker": 10, "log_level": "WARN", "num_gpus": 1, "monitor": True, "evaluation_num_episodes": 50, "gamma": tune.quniform(0.8, 0.9997, 0.0001), "lambda": tune.quniform(0.9, 1, 0.01), "clip_param": tune.quniform(0.1, 0.3, 0.1), "kl_coeff": tune.quniform(0.3, 1, 0.1), "lr": 5e-5, "vf_clip_param": 10, "num_sgd_iter": tune.choice([10, 20, 30]), "sgd_minibatch_size": 4096, "train_batch_size": tune.randint(8192, 40000), "vf_loss_coeff": tune.quniform(0.5, 1, 0.01), "entropy_coeff": tune.quniform(0, 0.01, 0.0001), "model": { "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "free_log_std": True, },
if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() tune_kwargs = { "num_samples": 10 if args.smoke_test else 1000, "config": { "steps": 10, "height": tune.quniform(-10, 10, 1e-2), "width": tune.randint(0, 10) } } # Optional: Pass the parameter space yourself # space = { # # for continuous dimensions: (continuous, search_range, precision) # "height": (ValueType.CONTINUOUS, [-10, 10], 1e-2), # # for discrete dimensions: (discrete, search_range, has_order) # "width": (ValueType.DISCRETE, [0, 10], True) # } zoopt_search = ZOOptSearch( algo="Asracos", # only support ASRacos currently budget=tune_kwargs["num_samples"],
avg_prec = avg_prec/3 avg_rec = avg_rec/3 avg_del = avg_del/3 #function = create_simdata.linear1_abrupt #intermediate_result = objective(pen, function) tune.report(precision = avg_prec, recall = avg_rec, average_delay = avg_del) #Feed the score back back to Tune. analysis = tune.run( training_function, config={ "pen": tune.quniform(0, 500, 1), #"datafunction": tune.choice(list_data_functions), }, #num_samples=16, num_samples=100) #resources_per_trial={"cpu": 2, "gpu": 0.1}) #resources_per_trial={"gpu": 0.1}) df2 = analysis.results_df #F1 = 2 * (precision * recall) / (precision + recall) df2["f1"] = 2*(df2["precision"]*df2["recall"])/(df2["precision"]+df2["recall"]) df2["f1"].fillna(0, inplace=True) #change name here
def create_next(client): ''' functional API for HPO ''' state = client.get_state() setting = client.get_settings_dict() if state is None: # first time call try: from ray.tune.trial import Trial except ImportError: from ..tune.trial import Trial method = setting.get('method', 'BlendSearch') mode = client.get_optimization_mode() if mode == 'minimize': mode = 'min' elif mode == 'maximize': mode = 'max' metric = client.get_primary_metric() hp_space = client.get_hyperparameter_space_dict() space = {} for key, value in hp_space.items(): t = value["type"] if t == 'continuous': space[key] = uniform(value["min_val"], value["max_val"]) elif t == 'discrete': space[key] = choice(value["values"]) elif t == 'integral': space[key] = randint(value["min_val"], value["max_val"]) elif t == 'quantized_continuous': space[key] = quniform(value["min_val"], value["max_val"], value["step"]) init_config = setting.get('init_config', None) if init_config: points_to_evaluate = [init_config] else: points_to_evaluate = None cat_hp_cost = setting.get('cat_hp_cost', None) if method == 'BlendSearch': Algo = BlendSearch elif method == 'CFO': Algo = CFO algo = Algo( mode=mode, metric=metric, space=space, points_to_evaluate=points_to_evaluate, cat_hp_cost=cat_hp_cost, ) time_budget_s = setting.get('time_budget_s', None) if time_budget_s: algo._deadline = time_budget_s + time.time() config2trialid = {} else: algo = state['algo'] config2trialid = state['config2trialid'] # update finished trials trials_completed = [] for trial in client.get_trials(): if trial.end_time is not None: signature = algo._ls.config_signature(trial.hp_sample) if not algo._result[signature]: trials_completed.append((trial.end_time, trial)) trials_completed.sort() for t in trials_completed: end_time, trial = t trial_id = config2trialid[trial.hp_sample] result = {} result[algo.metric] = trial.metrics[algo.metric].values[-1] result[algo.cost_attr] = (end_time - trial.start_time).total_seconds() for key, value in trial.hp_sample.items(): result['config/' + key] = value algo.on_trial_complete(trial_id, result=result) # propose new trial trial_id = Trial.generate_id() config = algo.suggest(trial_id) if config: config2trialid[config] = trial_id client.launch_trial(config) client.update_state({'algo': algo, 'config2trialid': config2trialid})
model_name, num_categories=num_categories) tune.report(test_accuracy=test_accuracy * 100) # initialize ray ray.init() # run the training program analysis = tune.run(tuning, name="tuning", local_dir="./ray_results", num_samples=1, resources_per_trial={ "cpu": 48, "gpu": 1 }, config={ "folder": tune.grid_search([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), "sequence_BiLSTM_model.dropout_rate": tune.quniform(0.3, 0.5, 0.05) }) # print the best result print("Best config is:", analysis.get_best_config(metric="test_accuracy", mode="max")) # Get a dataframe for analyzing trial results. df = analysis.dataframe()
parser.add_argument('--tag', type=str, help='') parser.add_argument('--n_cmt', type=int, help='') parser.add_argument('--htune', type=str, default='random') args = parser.parse_args() params = vars(args) exprid = timestamp_expr_id() args.n_cmt = 1 tune.ray_trial_executor.DEFAULT_GET_TIMEOUT = 3000000 if args.htune == 'random': config = { 'lr': tune.quniform(1e-4, 1, 1e-4), 'momentum': tune.quniform(0.8, 0.99, 0.01), 'dim_lstm_hidden': tune.choice([128, 256, 512]), 'dim_fc_hidden': tune.sample_from(lambda spec: spec.config.dim_lstm_hidden // (2**(np.random.randint(0, 3)))), 'weight_decay': tune.choice([1e-2, 1e-3, 1e-4, 1e-5]) } asha_scheduler = ASHAScheduler( metric='f1', mode='max', grace_period=10,
trained_model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.Huber(delta=0.3), metrics=[BinaryAccuracy(model_type=model_type)]) elif model_type == 'binary_classification': trained_model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=[BinaryAccuracy(model_type=model_type)]) elif model_type == 'multi_classification': trained_model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=[BinaryAccuracy(model_type=model_type)]) result = trained_model.evaluate(ds_test, return_dict=True) test_accuracy = result['binary_accuracy'] tune.report(test_accuracy=test_accuracy * 100) # initialize ray ray.init() # run the training program analysis = tune.run(tuning, name="EfficientNetB3_fine_tuning", local_dir="./ray_results", num_samples=1, resources_per_trial={"cpu": 48, "gpu": 1}, config={"folder": tune.grid_search([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), "efficientnetb3.trainable_rate": tune.quniform(0.01, 0.15, 0.01), "Trainer.learning_rate": tune.loguniform(1e-9, 1e-5), "output_block.dropout_rate": tune.quniform(0.3, 0.5, 0.05)}) # print the best result print("Best config is:", analysis.get_best_config(metric="test_accuracy", mode="max")) # Get a dataframe for analyzing trial results. df = analysis.dataframe()
tune.run(trainable, config={"a": 2, "b": 4}) # __run_tunable_end__ # __run_tunable_samples_start__ tune.run(trainable, config={"a": 2, "b": 4}, num_samples=10) # __run_tunable_samples_end__ # __search_space_start__ space = {"a": tune.uniform(0, 1), "b": tune.uniform(0, 1)} tune.run(trainable, config=space, num_samples=10) # __search_space_end__ # __config_start__ config = { "uniform": tune.uniform(-5, -1), # Uniform float between -5 and -1 "quniform": tune.quniform(3.2, 5.4, 0.2), # Round to increments of 0.2 "loguniform": tune.loguniform(1e-4, 1e-1), # Uniform float in log space "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5), # Round to increments of 0.00005 "randn": tune.randn(10, 2), # Normal distribution with mean 10 and sd 2 "qrandn": tune.qrandn(10, 2, 0.2), # Round to increments of 0.2 "randint": tune.randint(-9, 15), # Random integer between -9 and 15 "qrandint": tune.qrandint(-21, 12, 3), # Round to increments of 3 (includes 12) "lograndint": tune.lograndint(1, 10), # Random integer in log space "qlograndint": tune.qlograndint(1, 10, 2), # Round to increments of 2 "choice": tune.choice(["a", "b", "c"]), # Choose one of these options uniformly "func": tune.sample_from( lambda spec: spec.config.uniform * 0.01), # Depends on other value "grid": tune.grid_search([32, 64, 128]), # Search over all these values
import torch.nn.functional as F import torch.optim as optim from ray import tune from ray.tune import CLIReporter from ray.tune.schedulers import ASHAScheduler from model import ExtractiveModel default_config = { "optimizer": tune.choice(['adam', 'sgd']), # tune.grid_search(['adam', 'sgd']), "lr": tune.loguniform(1e-4, 1e-1), # tune.loguniform(1e-4, 1e-1), "weight_decay": tune.loguniform(1e-6, 1e-3), "scheduler": tune.choice(['step', 'cosine']), # tune.grid_search(['cosine', 'step']), "max_word_dropout_ratio": tune.quniform(0.1, 0.5, 0.05), # tune.choice([0.1, 0.2, 0.3]), "word_dropout_prob": tune.quniform(0.0, 1.0, 0.1), "label_smoothing": tune.choice([0.1, 0.0]), # tune.grid_search([0.1, 0.0]), "use_multi_class": False, # tune.grid_search([True, False]), "freeze_bert": tune.choice([False, True]), "use_bert_sum_words": tune.choice([True, False]), # tune.grid_search([True, False]), "use_pos": tune.choice([True, False]), # True, # tune.grid_search([True, False]), "use_media": tune.choice([True, False]), # tune.grid_search([True, False]), "simple_model": tune.choice([False, True]), # tune.grid_search([True, False]), "max_token_cnt": tune.choice([200, 300, 400, 500]), "dim_feedforward": tune.choice([512, 768, 1024]),