def test_trial_completion(self): ax_client = AxClient() ax_client.create_experiment( parameters=[ {"name": "x1", "type": "range", "bounds": [-5.0, 10.0]}, {"name": "x2", "type": "range", "bounds": [0.0, 15.0]}, ], minimize=True, ) params, idx = ax_client.get_next_trial() ax_client.complete_trial(trial_index=idx, raw_data={"objective": (0, 0.0)}) self.assertEqual(ax_client.get_best_parameters()[0], params) params2, idx2 = ax_client.get_next_trial() ax_client.complete_trial(trial_index=idx2, raw_data=(-1, 0.0)) self.assertEqual(ax_client.get_best_parameters()[0], params2) params3, idx3 = ax_client.get_next_trial() ax_client.complete_trial( trial_index=idx3, raw_data=-2, metadata={"dummy": "test"} ) self.assertEqual(ax_client.get_best_parameters()[0], params3) self.assertEqual( ax_client.experiment.trials.get(2).run_metadata.get("dummy"), "test" ) best_trial_values = ax_client.get_best_parameters()[1] self.assertEqual(best_trial_values[0], {"objective": -2.0}) self.assertTrue(math.isnan(best_trial_values[1]["objective"]["objective"]))
def test_trial_completion(self): ax_client = AxClient() ax_client.create_experiment( parameters=[ { "name": "x", "type": "range", "bounds": [-5.0, 10.0] }, { "name": "y", "type": "range", "bounds": [0.0, 15.0] }, ], minimize=True, ) params, idx = ax_client.get_next_trial() # Can't update before completing. with self.assertRaisesRegex(ValueError, ".* not yet"): ax_client.update_trial_data(trial_index=idx, raw_data={"objective": (0, 0.0)}) ax_client.complete_trial(trial_index=idx, raw_data={"objective": (0, 0.0)}) # Cannot complete a trial twice, should use `update_trial_data`. with self.assertRaisesRegex(ValueError, ".* already been completed"): ax_client.complete_trial(trial_index=idx, raw_data={"objective": (0, 0.0)}) # Cannot update trial data with observation for a metric it already has. with self.assertRaisesRegex(ValueError, ".* contained an observation"): ax_client.update_trial_data(trial_index=idx, raw_data={"objective": (0, 0.0)}) # Same as above, except objective name should be getting inferred. with self.assertRaisesRegex(ValueError, ".* contained an observation"): ax_client.update_trial_data(trial_index=idx, raw_data=1.0) ax_client.update_trial_data(trial_index=idx, raw_data={"m1": (1, 0.0)}) metrics_in_data = ax_client.experiment.fetch_data( ).df["metric_name"].values self.assertIn("m1", metrics_in_data) self.assertIn("objective", metrics_in_data) self.assertEqual(ax_client.get_best_parameters()[0], params) params2, idy = ax_client.get_next_trial() ax_client.complete_trial(trial_index=idy, raw_data=(-1, 0.0)) self.assertEqual(ax_client.get_best_parameters()[0], params2) params3, idx3 = ax_client.get_next_trial() ax_client.complete_trial(trial_index=idx3, raw_data=-2, metadata={"dummy": "test"}) self.assertEqual(ax_client.get_best_parameters()[0], params3) self.assertEqual( ax_client.experiment.trials.get(2).run_metadata.get("dummy"), "test") best_trial_values = ax_client.get_best_parameters()[1] self.assertEqual(best_trial_values[0], {"objective": -2.0}) self.assertTrue( math.isnan(best_trial_values[1]["objective"]["objective"]))
def test_attach_trial_ttl_seconds(self): ax_client = AxClient() ax_client.create_experiment( parameters=[ {"name": "x", "type": "range", "bounds": [-5.0, 10.0]}, {"name": "y", "type": "range", "bounds": [0.0, 15.0]}, ], minimize=True, ) params, idx = ax_client.attach_trial( parameters={"x": 0.0, "y": 1.0}, ttl_seconds=1 ) self.assertTrue(ax_client.experiment.trials.get(idx).status.is_running) time.sleep(1) # Wait for TTL to elapse. self.assertTrue(ax_client.experiment.trials.get(idx).status.is_failed) # Also make sure we can no longer complete the trial as it is failed. with self.assertRaisesRegex( ValueError, ".* has been marked FAILED, so it no longer expects data." ): ax_client.complete_trial(trial_index=idx, raw_data=5) params2, idx2 = ax_client.attach_trial( parameters={"x": 0.0, "y": 1.0}, ttl_seconds=1 ) ax_client.complete_trial(trial_index=idx2, raw_data=5) self.assertEqual(ax_client.get_best_parameters()[0], params2) self.assertEqual( ax_client.get_trial_parameters(trial_index=idx2), {"x": 0, "y": 1} )
def test_attach_trial_and_get_trial_parameters(self): ax_client = AxClient() ax_client.create_experiment( parameters=[ { "name": "x", "type": "range", "bounds": [-5.0, 10.0] }, { "name": "y", "type": "range", "bounds": [0.0, 15.0] }, ], minimize=True, ) params, idx = ax_client.attach_trial(parameters={"x": 0.0, "y": 1.0}) ax_client.complete_trial(trial_index=idx, raw_data=5) self.assertEqual(ax_client.get_best_parameters()[0], params) self.assertEqual(ax_client.get_trial_parameters(trial_index=idx), { "x": 0, "y": 1 }) with self.assertRaises(ValueError): ax_client.get_trial_parameters( trial_index=10) # No trial #10 in experiment. with self.assertRaisesRegex(ValueError, ".* is of type"): ax_client.attach_trial({"x": 1, "y": 2})
def model_training_hyperparameter_tuning(max_epochs, total_trials, params): """ This function takes input params max_epochs, total_trials, params and creates a nested run in Mlflow. The parameters, metrics, model and summary are dumped into their respective mlflow-run ids. The best parameters are dumped along with the baseline model. :param max_epochs: Max epochs used for training the model. Type:int :param total_trials: Number of ax-client experimental trials. Type:int :param params: Model parameters. Type:dict """ with mlflow.start_run(run_name="Parent Run"): train_evaluate(params=params, max_epochs=max_epochs) ax_client = AxClient() ax_client.create_experiment( parameters=[ {"name": "lr", "type": "range", "bounds": [1e-3, 0.15], "log_scale": True}, {"name": "weight_decay", "type": "range", "bounds": [1e-4, 1e-3]}, {"name": "momentum", "type": "range", "bounds": [0.7, 1.0]}, ], objective_name="test_accuracy", ) for i in range(total_trials): with mlflow.start_run(nested=True, run_name="Trial " + str(i)) as child_run: parameters, trial_index = ax_client.get_next_trial() test_accuracy = train_evaluate(params=parameters, max_epochs=max_epochs) # completion of trial ax_client.complete_trial(trial_index=trial_index, raw_data=test_accuracy.item()) best_parameters, metrics = ax_client.get_best_parameters() for param_name, value in best_parameters.items(): mlflow.log_param("optimum_" + param_name, value)
def test_ttl_trial(self): ax_client = AxClient() ax_client.create_experiment( parameters=[ { "name": "x", "type": "range", "bounds": [-5.0, 10.0] }, { "name": "y", "type": "range", "bounds": [0.0, 15.0] }, ], minimize=True, ) # A ttl trial that ends adds no data. params, idx = ax_client.get_next_trial(ttl_seconds=1) self.assertTrue(ax_client.experiment.trials.get(idx).status.is_running) time.sleep(1) # Wait for TTL to elapse. self.assertTrue(ax_client.experiment.trials.get(idx).status.is_failed) # Also make sure we can no longer complete the trial as it is failed. with self.assertRaisesRegex( ValueError, ".* has been marked FAILED, so it no longer expects data."): ax_client.complete_trial(trial_index=idx, raw_data={"objective": (0, 0.0)}) params2, idy = ax_client.get_next_trial(ttl_seconds=1) ax_client.complete_trial(trial_index=idy, raw_data=(-1, 0.0)) self.assertEqual(ax_client.get_best_parameters()[0], params2)
def test_attach_trial_numpy(self): ax_client = AxClient() ax_client.create_experiment( parameters=[ {"name": "x1", "type": "range", "bounds": [-5.0, 10.0]}, {"name": "x2", "type": "range", "bounds": [0.0, 15.0]}, ], minimize=True, ) params, idx = ax_client.attach_trial(parameters={"x1": 0, "x2": 1}) ax_client.complete_trial(trial_index=idx, raw_data=np.int32(5)) self.assertEqual(ax_client.get_best_parameters()[0], params)
def raytune_ax_train(model_params: dict, config_params: dict): depth = [int(d) for d in config_params['ht_depth_range'].split(',')] features = [ float(d) for d in config_params['ht_features_range'].split(',') ] estimators = [int(d) for d in config_params['ht_est_range'].split(',')] experiments = config_params['ht_experiments'] ax = AxClient(enforce_sequential_optimization=False) ax.create_experiment(name="hpo_experiment", parameters=[{ "name": "max_depth", "type": "range", "bounds": depth, "parameter_type": ParameterType.INT }, { "name": "max_features", "type": "range", "bounds": features, "parameter_type": ParameterType.FLOAT }, { "name": "n_estimators", "type": "range", "bounds": estimators, "parameter_type": ParameterType.INT }], objective_name="accuracy", minimize=False) tune.run( run_or_experiment=lambda parameters: ax_train_proxy( model_params=model_params, config_params=config_params, ax_params=parameters), num_samples=experiments, search_alg=AxSearch( ax), # Note that the argument here is the `AxClient`. verbose= 1, # Set this level to 1 to see status updates and to 2 to also see trial results. # To use GPU, specify: resources_per_trial={"gpu": 1}. resources_per_trial={"gpu": 1} if ('GPU' in config_params['compute']) else {"cpu": 8}) print(f"FINISHED RAY TUNE RUNE", flush=True) best_parameters, best_values = ax.get_best_parameters() means, covariances = best_values print("Ax Optimization Results:", flush=True) print(best_parameters, flush=True) print(best_values, flush=True) return means['accuracy']
def hyperparameter_search(n_jobs: int, params: dict, name: str = 'hyperparameter_search'): _, _, samples = data.pipeline(params['data']['n_input_steps'], params['data']['n_output_steps'], params['paths']['data']) datasets = data.get_datasets(samples, params['data']['n_input_steps']) # set up ax from ax.service.ax_client import AxClient ax_client = AxClient(enforce_sequential_optimization=False) # define hyperparameter bounds ax_client.create_experiment(name=name, parameters=[{ "name": "num_epochs", "type": "range", "bounds": [150, 200] }, { "name": "learning_rate", "type": "range", "bounds": [5e-4, 1e-3], "log_scale": True }, { "name": "batch_size", "type": "range", "bounds": [64, 1024] }, { "name": "variational_dropout_p", "type": "range", "bounds": [0.2, 0.5] }], objective_name='loss', minimize=True) for job in range(n_jobs): parameters, trial_index = ax_client.get_next_trial() ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate( parameters, datasets=datasets)['loss']) print(f'Best parameters found after {n_jobs}:') print(ax_client.get_best_parameters()) ax_client.save_to_json_file()
"train_size >= batch_size" ], minimize=False, objective_name="objective", outcome_constraints=None, name="Test") # print ("axclient",ax_client.experiment.trials ) # for loop in range(n_loops): for loop in range(n_loops): print(f"Running trial {loop}/{n_loops}...") parameters, trial_index = ax_client.get_next_trial() print("trial_index", trial_index) time.sleep(2) # parameters["n_epochs"] = 5 # Local evaluation here can be replaced with deployment to external system. ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate(parameters)) print("Best params", ax_client.get_best_parameters()) # periodic save if loop % save_loop == (save_loop - 1): optim_result = ax_client.get_best_parameters() # print("best_parameters", optim_result) # print("was I saved?", ax._save_experiment_and_generation_strategy_if_possible()) hyper = {} hyper["best_params"] = optim_result hyper["axclient"] = ax_client.to_json_snapshot() # print("optim_result", optim_result) with open(f"hyperparameters_{run_mode}.pl", "wb") as handle: pickle.dump(hyper, handle, protocol=pickle.HIGHEST_PROTOCOL)
def ml_run(self, run_id=None): seed_randomness(self.random_seed) mlflow.log_params(flatten(get_params_of_task(self))) total_training_time = 0 # should land to 'optimizer_props' params_space = [ { 'name': 'lr', 'type': 'range', 'bounds': [1e-6, 0.008], # 'value_type': 'float', 'log_scale': True, }, { 'name': 'beta_1', 'type': 'range', 'bounds': [.0, 0.9999], 'value_type': 'float', # 'log_scale': True, }, { 'name': 'beta_2', 'type': 'range', 'bounds': [.0, 0.9999], 'value_type': 'float', # 'log_scale': True, } ] # TODO: make reproducibility of search # without it we will get each time new params # for example we can use: # ax.storage.sqa_store.structs.DBSettings # DBSettings(url="sqlite://<path-to-file>") # to store experiments ax = AxClient( # can't use that feature yet. # got error # NotImplementedError: # Saving and loading experiment in `AxClient` functionality currently under development. # db_settings=DBSettings(url=self.output()['ax_settings'].path) ) # FIXME: temporal solution while ax doesn't have api to (re-)store state class_name = get_class_name_as_snake(self) ax.create_experiment( name=f'{class_name}_experiment', parameters=params_space, objective_name='score', minimize=should_minimize(self.metric), # parameter_constraints=['x1 + x2 <= 2.0'], # Optional. # outcome_constraints=['l2norm <= 1.25'], # Optional. ) trial_index = 0 experiment = self._get_ax_experiment() if experiment: print('AX: restore experiment') print('AX: num_trials:', experiment.num_trials) ax._experiment = experiment trial_index = experiment.num_trials - 1 model_task = get_model_task_by_name(self.model_name) while trial_index < self.max_runs: print(f'AX: Running trial {trial_index + 1}/{self.max_runs}...') # get last unfinished trial parameters = get_last_unfinished_params(ax) if parameters is None: print('AX: generate new Trial') parameters, trial_index = ax.get_next_trial() # good time to store experiment (with new Trial) with self.output()['ax_experiment'].open('w') as f: print('AX: store experiment: ', ax.experiment) pickle.dump(ax.experiment, f) print('AX: parameters', parameters) # now is time to evaluate model model_result = yield model_task( parent_run_id=run_id, random_seed=self.random_seed, # TODO: actually we should be able to pass even nested params # **parameters, optimizer_props=parameters) # TODO: store run_id in Trial model_run_id = self.get_run_id_from_result(model_result) with model_result['metrics'].open('r') as f: model_metrics = yaml.load(f) model_score_mean = model_metrics[self.metric]['val'] # TODO: we might know it :/ model_score_error = 0.0 total_training_time += model_metrics['train_time']['total'] with model_result['params'].open('r') as f: model_params = yaml.load(f) print('AX: complete trial:', trial_index) ax.complete_trial( trial_index=trial_index, raw_data={'score': (model_score_mean, model_score_error)}, metadata={ 'metrics': model_metrics, 'params': model_params, 'run_id': model_run_id, }) best_parameters, _ = ax.get_best_parameters() mlflow.log_metric('train_time.total', total_training_time) print('best params', best_parameters) best_trial = get_best_trial(experiment, self.metric) mlflow.log_metrics(flatten(best_trial.run_metadata['metrics'])) mlflow.log_params(flatten(best_trial.run_metadata['params']))
parameters=PARAMETERS, objective_name="mean_square_error", minimize=True) for _ in range(N_TRIALS): print(f"[ax-service-loop] Trial {_+1} of {N_TRIALS}") parameters, trial_index = ax.get_next_trial() ax.complete_trial( trial_index=trial_index, raw_data= evaluate(parameters) ) print(parameters) print("") print("[ax-service-loop] Training complete!") best_parameters, metrics = ax.get_best_parameters() print(f"[ax-service-loop] Sending data to db.") print(f"[ax-service-loop] Best parameters found: {best_parameters}") DB_URL = os.environ.get("DB_URL", "mysql://*****:*****@localhost/axdb") from sqlalchemy import create_engine engine = axst.sqa_store.db.create_mysql_engine_from_url(url=DB_URL) conn = engine.connect() axst.sqa_store.db.init_engine_and_session_factory(url=DB_URL) table_names = engine.table_names() axst.sqa_store.db.create_all_tables(engine) axst.sqa_store.save(experiment=ax.experiment) conn.close() engine.dispose()
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model, ratio_known_normal, ratio_known_outlier, device, seed, optimizer_name, validation, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, num_threads, n_jobs_dataloader, normal_class, known_outlier_class, n_known_outlier_classes): """ Deep SAD, a method for deep semi-supervised anomaly detection. :arg DATASET_NAME: Name of the dataset to load. :arg NET_NAME: Name of the neural network to use. :arg XP_PATH: Export path for logging the experiment. :arg DATA_PATH: Root path of data. """ ###################################################### # GLOBAL CONFIG # ###################################################### sys.path.append('../') xp_path = os.path.abspath(xp_path) data_path = os.path.abspath(data_path) # Get configuration cfg = Config(locals().copy()) # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(tune.__name__) logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') log_file = xp_path + '/log.txt' file_handler = logging.FileHandler(log_file) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) # Print paths logger.info('Log file is %s' % log_file) logger.info('Data path is %s' % data_path) logger.info('Export path is %s' % xp_path) # Print experimental setup logger.info('Dataset: %s' % dataset_name) logger.info('Normal class: %d' % normal_class) logger.info('Ratio of labeled normal train samples: %.2f' % ratio_known_normal) logger.info('Ratio of labeled anomalous samples: %.2f' % ratio_known_outlier) if n_known_outlier_classes == 1: logger.info('Known anomaly class: %d' % known_outlier_class) else: logger.info('Number of known anomaly classes: %d' % n_known_outlier_classes) logger.info('Network: %s' % net_name) if cfg.settings['seed'] != -1: random.seed(cfg.settings['seed']) np.random.seed(cfg.settings['seed']) torch.manual_seed(cfg.settings['seed']) torch.cuda.manual_seed(cfg.settings['seed']) torch.backends.cudnn.deterministic = True logger.info('Set seed to %d.' % cfg.settings['seed']) ###################################################### # EXP CONFIG # ###################################################### # Init ray ray.init(address='auto') ax = AxClient(enforce_sequential_optimization=False) # Default device to 'cpu' if cuda is not available ax.create_experiment( name="cicflow_mlp_experiment", parameters=[ { "name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True }, { "name": "pretrain", "type": "choice", "values": [False, True], }, ], objective_name="mean_auc", ) def mlp_trainable(parameterization, reporter): return train_evaluate(parameterization, reporter, validation=validation, data_path=data_path, n_known_outlier_classes=n_known_outlier_classes, ratio_known_normal=ratio_known_normal, ratio_known_outlier=ratio_known_outlier, cfg=cfg, n_jobs_dataloader=n_jobs_dataloader, net_name=net_name, pretrain=pretrain) tune.run( mlp_trainable, name="MLP Supervised", num_samples=10, resources_per_trial={'gpu': 1}, search_alg=AxSearch( ax), # Note that the argument here is the `AxClient`. verbose= 2, # Set this level to 1 to see status updates and to 2 to also see trial results. # To use GPU, specify: resources_per_trial={"gpu": 1}. ) best_parameters, values = ax.get_best_parameters() best_parameters
class AxSearchJob(AutoSearchJob): """Job for hyperparameter search using [ax](https://ax.dev/).""" def __init__(self, config: Config, dataset, parent_job=None): super().__init__(config, dataset, parent_job) self.num_trials = self.config.get("ax_search.num_trials") self.num_sobol_trials = self.config.get("ax_search.num_sobol_trials") self.ax_client: AxClient = None if self.__class__ == AxSearchJob: for f in Job.job_created_hooks: f(self) # Overridden such that instances of search job can be pickled to workers def __getstate__(self): state = super(AxSearchJob, self).__getstate__() del state["ax_client"] return state def _prepare(self): super()._prepare() if self.num_sobol_trials > 0: # BEGIN: from /ax/service/utils/dispatch.py generation_strategy = GenerationStrategy( name="Sobol+GPEI", steps=[ GenerationStep( model=Models.SOBOL, num_trials=self.num_sobol_trials, min_trials_observed=ceil(self.num_sobol_trials / 2), enforce_num_trials=True, model_kwargs={ "seed": self.config.get("ax_search.sobol_seed") }, ), GenerationStep(model=Models.GPEI, num_trials=-1, max_parallelism=3), ], ) # END: from /ax/service/utils/dispatch.py self.ax_client = AxClient(generation_strategy=generation_strategy) choose_generation_strategy_kwargs = dict() else: self.ax_client = AxClient() # set random_seed that will be used by auto created sobol search from ax # note that here the argument is called "random_seed" not "seed" choose_generation_strategy_kwargs = { "random_seed": self.config.get("ax_search.sobol_seed") } self.ax_client.create_experiment( name=self.job_id, parameters=self.config.get("ax_search.parameters"), objective_name="metric_value", minimize=not self.config.get("valid.metric_max"), parameter_constraints=self.config.get( "ax_search.parameter_constraints"), choose_generation_strategy_kwargs=choose_generation_strategy_kwargs, ) self.config.log("ax search initialized with {}".format( self.ax_client.generation_strategy)) # Make sure sobol models are resumed correctly if self.ax_client.generation_strategy._curr.model == Models.SOBOL: self.ax_client.generation_strategy._set_current_model( experiment=self.ax_client.experiment, data=None) # Regenerate and drop SOBOL arms already generated. Since we fixed the seed, # we will skip exactly the arms already generated in the job being resumed. num_generated = len(self.parameters) if num_generated > 0: num_sobol_generated = min( self.ax_client.generation_strategy._curr.num_trials, num_generated) for i in range(num_sobol_generated): generator_run = self.ax_client.generation_strategy.gen( experiment=self.ax_client.experiment) # self.config.log("Skipped parameters: {}".format(generator_run.arms)) self.config.log( "Skipped {} of {} Sobol trials due to prior data.".format( num_sobol_generated, self.ax_client.generation_strategy._curr.num_trials, )) def register_trial(self, parameters=None): trial_id = None try: if parameters is None: parameters, trial_id = self.ax_client.get_next_trial() else: _, trial_id = self.ax_client.attach_trial(parameters) except Exception as e: self.config.log( "Cannot generate trial parameters. Will try again after a " + "running trial has completed. message was: {}".format(e)) return parameters, trial_id def register_trial_result(self, trial_id, parameters, trace_entry): if trace_entry is None: self.ax_client.log_trial_failure(trial_index=trial_id) else: self.ax_client.complete_trial(trial_index=trial_id, raw_data=trace_entry["metric_value"]) def get_best_parameters(self): best_parameters, values = self.ax_client.get_best_parameters() return best_parameters, float(values[0]["metric_value"])
overwrite_existing_experiment=True) # add scheduling of configurations, i.e. intensify solely asha_scheduler = ASHAScheduler(time_attr='training_iteration', metric='episode_reward_mean', mode='max') ray.init(num_cpus=args.ray_cpus) ray.tune.run(evaluate_objective, num_samples=RAY_TUNE_SAMPLES, search_alg=AxSearch(ax_client), scheduler=asha_scheduler, verbose=2) # get best parameters, retrain agent and log results for best agent best_parameters, values = ax_client.get_best_parameters() ray.shutdown() env = NFVDeepMonitor(base_env, args.logs) callback = MetricLoggingCallback() eval_agent = agent( **{ 'policy': policy, 'env': env, 'verbose': 1, 'tensorboard_log': args.logs, **best_parameters }) tb_log_name = eval_agent.__class__.__name__ if isinstance(
class AxInterface(BaseInterface): """Specific override to support the Ax backend -- supports the service style API from Ax""" def __init__(self, tuner_config: AxTunerConfig, tuner_namespace): """AxInterface init call that maps variables, creates a map to fnc calls, and constructs the necessary underlying objects Args: tuner_config: configuration object for the ax backend tuner_namespace: tuner namespace that has attr classes that maps to an underlying library types """ super(AxInterface, self).__init__(tuner_config, tuner_namespace) self._tuner_obj = AxClient( generation_strategy=self._tuner_config.generation_strategy, enforce_sequential_optimization=self._tuner_config. enforce_sequential_optimization, random_seed=self._tuner_config.random_seed, verbose_logging=self._tuner_config.verbose_logging, ) # Some variables to use later self._trial_index = None self._sample_hash = None # Mapping spock underlying classes to ax distributions (search space) self._map_type = { "RangeHyperParameter": { "int": self._ax_range, "float": self._ax_range, }, "ChoiceHyperParameter": { "int": self._ax_choice, "float": self._ax_choice, "str": self._ax_choice, "bool": self._ax_choice, }, } # Build the correct underlying dictionary object for Ax client create experiment self._param_obj = self._construct() # Create the AxClient experiment self._tuner_obj.create_experiment( parameters=self._param_obj, name=self._tuner_config.name, objective_name=self._tuner_config.objective_name, minimize=self._tuner_config.minimize, parameter_constraints=self._tuner_config.parameter_constraints, outcome_constraints=self._tuner_config.outcome_constraints, overwrite_existing_experiment=self._tuner_config. overwrite_existing_experiment, tracking_metric_names=self._tuner_config.tracking_metric_names, immutable_search_space_and_opt_config=self._tuner_config. immutable_search_space_and_opt_config, is_test=self._tuner_config.is_test, ) @property def tuner_status(self) -> AxTunerStatus: return AxTunerStatus(client=self._tuner_obj, trial_index=self._trial_index) @property def best(self): best_obj = self._tuner_obj.get_best_parameters() rollup_dict, _ = self._sample_rollup(best_obj[0]) return ( self._gen_spockspace(rollup_dict), best_obj[1][0][self._tuner_obj.objective_name], ) @property def _get_sample(self): return self._tuner_obj.get_next_trial() def sample(self): parameters, self._trial_index = self._get_sample # Roll this back out into a Spockspace so it can be merged into the fixed parameter Spockspace # Also need to un-dot the param names to rebuild the nested structure rollup_dict, sample_hash = self._sample_rollup(parameters) self._sample_hash = sample_hash return self._gen_spockspace(rollup_dict) def _construct(self): param_list = [] # These will only be nested one level deep given the tuner syntax for k, v in vars(self._tuner_namespace).items(): for ik, iv in vars(v).items(): param_fn = self._map_type[type(iv).__name__][iv.type] param_list.append(param_fn(name=f"{k}.{ik}", val=iv)) return param_list def _ax_range(self, name, val): """Assemble the dictionary for ax range parameters Args: name: parameter name val: current attr val Returns: dictionary that can be added to a parameter list """ low, high = self._try_range_cast(val, type_string="RangeHyperParameter") return { "name": name, "type": "range", "bounds": [low, high], "value_type": val.type, "log_scale": val.log_scale, } def _ax_choice(self, name, val): """Assemble the dictionary for ax choice parameters Args: name: parameter name val: current attr val Returns: dictionary that can be added to a parameter list """ val = self._try_choice_cast(val, type_string="ChoiceHyperParameter") return { "name": name, "type": "choice", "values": val.choices, "value_type": val.type, }
def finetune(optimize_consistency, evaluate_on, original_dev_dataset, runs_per_trial, hyperparam_opt_runs, out_file, mute, baseline_gold_file, hyperparams, keep_predictions, original_ans_length, **kwargs): gold_files = get_baseline_intervention_control_from_baseline( baseline_gold_file) golds = tuple(load_json(g) for g in gold_files) # load eval gold for evaluation aligneds = align(*golds, assert_same=True) hyper_params = [{ 'name': hp['name'], 'type': hp.get("type", 'range'), 'bounds': hp['bounds'], 'value_type': hp.get('value_type', 'float'), 'log_scale': hp.get('log_scale', True) } for hp in json.loads(hyperparams)] logger.info(hyper_params) args = Args(**kwargs) args.debug_features = not mute tokenizer = get_tokenizer(args.model_path, args.do_lower_case) features = [] for f in gold_files: args.eval_file = f features.append(load_or_convert(args, tokenizer, evaluate=True)) if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") kwargs['n_gpu'] = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") kwargs['n_gpu'] = 1 kwargs['device'] = device args.n_gpu = kwargs['n_gpu'] args.device = kwargs['device'] if args.seed: set_seed(args) logger.debug(args) if args.fp16: try: import apex apex.amp.register_half_function(torch, "einsum") except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) # load train dataset train_dataset, train_examples, train_features = load_or_convert( args, tokenizer) if not mute: debug_features_examples_dataset(train_dataset, train_examples, train_features, tokenizer) if original_dev_dataset: args.eval_file = original_dev_dataset original_dev_dataset = load_or_convert(args, tokenizer, evaluate=True) ax_client = AxClient() ax_client.create_experiment( name=f'{args.model_path}@{args.train_file}', parameters=hyper_params, objective_name=evaluate_on, minimize=False, ) result = { "trials": [], "tried_params": defaultdict(list), "best_params": ..., 'pre_eval': train_and_eval_single_step(args, train_dataset, *aligneds, *features, original_dev_dataset, *gold_files, run_nr='eval', train=False, evaluate_on=evaluate_on, original_ans_length=original_ans_length) } # first, eval and save what is the performance before training click.echo(f"Results: {json.dumps(result['pre_eval'], indent=4)}") # run hyperparam optimisation predictions_folder = keep_predictions for i in trange(hyperparam_opt_runs): parameters, trial_index = ax_client.get_next_trial() logger.info(f"Trying parameters: {parameters}") single_step_args = deepcopy(kwargs) single_step_args.update(parameters) args = Args(**single_step_args) args.predictions_folder = str(predictions_folder) trial_result = train_and_eval_single_step( args, train_dataset, *aligneds, *features, original_dev_dataset, *gold_files, run_nr=i, num_runs=runs_per_trial, evaluate_on=evaluate_on, original_ans_length=original_ans_length) # if optimize_consistency: assert evaluate_on == 'eoi' mean = trial_result['consistency'] else: mean = trial_result['overall' if evaluate_on == 'eoi' else 'EMRelaxed'] if runs_per_trial > 1: mean, var, ci = mean if original_dev_dataset: logger.info(f"Mean: ({mean} * 100 + {trial_result['original']})/2") mean = (mean * 100 + trial_result['original']) / 2 trial_result["mean"] = mean logger.info(f"Result: {mean}") logger.info(f"Results: {json.dumps(trial_result, indent=4)}") result["trials"].append(trial_result) result['tried_params'][i].append(parameters) ax_client.complete_trial(trial_index=trial_index, raw_data=mean) best_params, metrics = ax_client.get_best_parameters() result['best_params'] = best_params result['best_metrics'] = metrics click.echo(f"What is metrics? {metrics}") click.echo(json.dumps(result, indent=4)) write_json(result, out_file)
def run_ax_search( fixed_params: Dict, ax_params: List[Dict[str, Any]], eval_fn: Callable, obj_name: str, minimize: bool, id_: str, parse_params_fn: Optional[Callable] = None, ax_param_constraints: Optional[List[str]] = None, num_ax_steps: int = 50, num_concur_samples: int = 2, num_seeds: int = 10, num_proc: int = 20, folder_name: Optional[str] = None, verbose: bool = False, ) -> Tuple[Dict[str, Any], AxClient]: """ Run a search for best hyperparameter values using Ax. Note that this requires the Ax package (https://ax.dev/) to be installed. Args: fixed_params: Fixed values of hyperparameters. ax_params: Ax configuration for hyperparameters that are searched over. See docs for ax_client.create_experiment() eval_fn: Evaluation function that returns a dictionary of metric values. obj_name: Objective name (key of the dict returned by eval_fn) minimize: If True, objective is minimized, if False it's maximized. id_: An arbitrary string identifier of the search (used as part of filename where results are saved) parse_params_fn: A function applied to the parameter dictionary to parse it. Can be used if the best representation for Ax doesn't match the format accepted by the eval_fn. ax_param_constraints: Constraints for the parameters that are searched over. num_ax_steps: The number of ax steps to take. num_concur_samples: Number of configurations to sample per ax step (in parallel) num_seeds: Number of seeds to average over num_proc: Number of processes to run in parallel. folder_name: Folder where to save best found parameters verbose: If True, some details are printed out Returns: A dict of best hyperparameters found by Ax """ for p in ax_params: assert ( p["name"] not in fixed_params ), f'Parameter {p["name"]} appers in both fixed and search parameters' if ax_param_constraints is None: ax_param_constraints = [] ax_client = AxClient() ax_client.create_experiment( name=f"hparams_search_{id_}", parameters=ax_params, objective_name=obj_name, minimize=minimize, parameter_constraints=ax_param_constraints, choose_generation_strategy_kwargs={ "max_parallelism_override": num_concur_samples, "num_initialization_trials": max(num_concur_samples, 5, len(ax_params)), }, ) best_params = None all_considered_params = [] all_considered_metrics = [] try: for i in range(1, num_ax_steps + 1): if verbose: print(f"ax step {i}/{num_ax_steps}") params_list = [] trial_indices_list = [] for _ in range(num_concur_samples): # sample several values (to be evaluated in parallel) parameters, trial_index = ax_client.get_next_trial() params_list.append(parameters) trial_indices_list.append(trial_index) res = ax_evaluate_params( params_list, fixed_params=fixed_params, eval_fn=eval_fn, parse_params_fn=parse_params_fn, num_seeds=num_seeds, num_proc=num_proc, ) all_considered_params.extend(params_list) all_considered_metrics.extend(res) for t_i, v in zip(trial_indices_list, res): ax_client.complete_trial(trial_index=t_i, raw_data=v) best_params, predicted_metrics = ax_client.get_best_parameters() predicted_metrics = predicted_metrics[0] # choose expected metric values if verbose: print(best_params, predicted_metrics) # save at every iteration in case search is interrupted if folder_name is not None: with open( os.path.join( os.path.expanduser(folder_name), f"ax_results_{id_}.json", ), "w", ) as f: json.dump( { "best_params": best_params, "predicted_metrics": predicted_metrics, "fixed_params": fixed_params, "ax_params": ax_params, "num_ax_steps": i, "num_concur_samples": num_concur_samples, "num_seeds": num_seeds, "num_proc": num_proc, "all_considered_params": all_considered_params, "all_considered_metrics": all_considered_metrics, }, f, indent=4, ) except KeyboardInterrupt: # handle keyboard interruption to enable returning intermediate results if interrupted pass return best_params, ax_client
opts.max_conv_size = parameters['max_conv_size'] opts.dense_kernel_size = parameters['dense_kernel_size'] opts.batch_size = 64 # parameters['batch_size'] opts.learning_rate = parameters['learning_rate'] opts.epochs = cmd_line_opts.epochs # max to run, we also use early stopping # run start_time = time.time() # final_loss = train.train_in_subprocess(opts) final_loss = train.train(opts) log_record.append(time.time() - start_time) log_record.append(final_loss) # complete trial if final_loss is None: print("ax trial", trial_index, "failed?") ax.log_trial_failure(trial_index=trial_index) else: ax.complete_trial(trial_index=trial_index, raw_data={'final_loss': (final_loss, 0)}) print("CURRENT_BEST", ax.get_best_parameters()) # flush log log_msg = "\t".join(map(str, log_record)) print(log_msg, file=log) print(log_msg) log.flush() # save ax state ax.save_to_json_file()
return {'error_rate': (1 - accuracy, 0)} ax_client = AxClient() ax_client.create_experiment( name="test", parameters=[{ "name": "svc_c", "type": "range", "bounds": [0.001, 0.1], "value_type": "float", # Optional, defaults to inference from type of "bounds". "log_scale": True, # Optional, defaults to False. }], objective_name="error_rate", minimize=True, # Optional, defaults to False. # total_trials=30, # Optional. ) for i in range(25): parameters, trial_index = ax_client.get_next_trial() # Local evaluation here can be replaced with deployment to external system. ax_client.complete_trial(trial_index=trial_index, raw_data=objective(parameters)) best_parameters, (means, covariances) = ax_client.get_best_parameters() print(best_parameters) print(means) print(covariances)
opt_parameters.append({ 'name': 'zeta', 'type': 'range', 'bounds': [0.01, 100], 'value_type': 'float', 'log_scale': True }) def tune(parameters): for name in opt_dests: setattr(opt, name, parameters.get(name)) return run_and_evaluate() ax = AxClient() ax.create_experiment(name='acc_optimization', parameters=opt_parameters, objective_name='Ave. Test Acc') for _ in range(opt.opt_num_rounds): parameters, idx = ax.get_next_trial() ax.complete_trial(trial_index=idx, raw_data=tune(parameters)) best_parameters, values = ax.get_best_parameters() # best_parameters, values, experiment, model = optimize( # parameters=opt_parameters, # evaluation_function=tune, # objective_name='Ave. Test Acc', # ) print(best_parameters) print(values) else: run_and_evaluate()
def automatic_hyperparameter_search(drug_id, num_trails=50, num_splits=3, num_epochs=50, data_folder='../example_data/'): data_file_name = f'{data_folder}/{drug_id}.pickle' with open(data_file_name, 'rb') as f: data = pickle.load(f) if 'hyperparameter_search' in data.keys(): return ax_client = AxClient() ax_client.create_experiment(name=f"drug_id_{drug_id}", parameters=[ { "name": "lr", "value_type": 'float', "type": "range", "bounds": [1e-5, 1e0], "log_scale": True }, { "name": "l2_regularization_coefficient", "value_type": 'float', "type": "range", "bounds": [1e-5, 1e0], "log_scale": True }, { "name": "num_layers", "value_type": 'int', "type": "range", "bounds": [1, 5] }, { "name": "num_neurons", "value_type": 'int', "type": "range", "bounds": [10, 100] }, ], objective_name="score", minimize=False) for i in range(num_trails): parameters, trial_index = ax_client.get_next_trial() ax_client.complete_trial(trial_index=trial_index, raw_data=evaluate(parameters, data, num_splits, num_epochs)) best_parameters, values = ax_client.get_best_parameters() trace = ax_client.get_optimization_trace() data['hyperparameter_search'] = {} data['hyperparameter_search']['score'] = values[0]['score'] data['hyperparameter_search']['neural_net_config'] = best_parameters with open(data_file_name, 'wb') as f: pickle.dump(data, f)