def dont_test_smac_choice(self): import numpy as np from sklearn import svm, datasets from sklearn.model_selection import cross_val_score # Import ConfigSpace and different types of parameters from smac.configspace import ConfigurationSpace # Import SMAC-utilities from smac.tae.execute_func import ExecuteTAFuncDict from smac.scenario.scenario import Scenario from smac.facade.smac_facade import SMAC as orig_SMAC tfm = PCA() | Nystroem() | NoOp() planned_pipeline1 = ( OneHotEncoder(handle_unknown='ignore', sparse=False) | NoOp()) >> tfm >> (LogisticRegression() | KNeighborsClassifier()) cs: ConfigurationSpace = get_smac_space(planned_pipeline1, lale_num_grids=1) # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 1, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true" }) # Optimize, using a SMAC-object tae = iris_fmin_tae(planned_pipeline1, num_folds=2) print( "Optimizing! Depending on your machine, this might take a few minutes." ) smac = orig_SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=tae) incumbent = smac.optimize() inc_value = tae(incumbent) print("Optimized Value: %.2f" % (inc_value))
def test_smac(self): import numpy as np from sklearn import svm, datasets from sklearn.model_selection import cross_val_score # Import ConfigSpace and different types of parameters from smac.configspace import ConfigurationSpace # Import SMAC-utilities from smac.tae.execute_func import ExecuteTAFuncDict from smac.scenario.scenario import Scenario from smac.facade.smac_facade import SMAC as orig_SMAC from lale.search.lale_smac import get_smac_space lr = LogisticRegression() cs: ConfigurationSpace = get_smac_space(lr) # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 1, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true", "abort_on_first_run_crash": False }) # Optimize, using a SMAC-object tae = iris_fmin_tae(lr, num_folds=2) print( "Optimizing! Depending on your machine, this might take a few minutes." ) smac = orig_SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=tae) incumbent = smac.optimize() inc_value = tae(incumbent) print("Optimized Value: %.2f" % (inc_value))
def fit(self, X_train, y_train): self.cv = check_cv( self.cv, y=y_train, classifier=True ) #TODO: Replace the classifier flag value by using tags? def smac_train_test(trainable, X_train, y_train): try: cv_score, logloss, execution_time = cross_val_score_track_trials( trainable, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of SMAC") except BaseException as e: #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug("Error {} with pipeline:{}".format( e, trainable.to_json())) raise e return cv_score, logloss, execution_time def f(trainable): return_dict = {} try: score, logloss, execution_time = smac_train_test( trainable, X_train=X_train, y_train=y_train) return_dict = { 'loss': self.best_score - score, 'time': execution_time, 'log_loss': logloss } except BaseException as e: logger.warning( f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT." ) raise e return return_dict['loss'] try: smac = orig_SMAC(scenario=self.scenario, rng=np.random.RandomState(42), tae_runner=lale_op_smac_tae(self.estimator, f)) incumbent = smac.optimize() self.trials = smac.get_runhistory() trainable = lale_trainable_op_from_config(self.estimator, incumbent) #get the trainable corresponding to the best params and train it on the entire training dataset. trained = trainable.fit(X_train, y_train) self._best_estimator = trained except BudgetExhaustedException: logger.warning( 'Maximum alloted optimization time exceeded. Optimization exited prematurely' ) except BaseException as e: logger.warning('Error during optimization: {}'.format(e)) self._best_estimator = None return self
def fit(self, X_train, y_train): data_schema = lale.helpers.fold_schema(X_train, y_train, self.cv, self.estimator.is_classifier()) self.search_space: ConfigurationSpace = get_smac_space( self.estimator, lale_num_grids=self.lale_num_grids, data_schema=data_schema) # Scenario object scenario_options = { "run_obj": "quality", # optimize quality (alternatively runtime) "runcount-limit": self.max_evals, # maximum function evaluations "cs": self.search_space, # configuration space "deterministic": "true", "abort_on_first_run_crash": False, } if self.max_opt_time is not None: scenario_options["wallclock_limit"] = self.max_opt_time self.scenario = Scenario(scenario_options) self.cv = check_cv(self.cv, y=y_train, classifier=self.estimator.is_classifier()) def smac_train_test(trainable, X_train, y_train): try: cv_score, logloss, execution_time = cross_val_score_track_trials( trainable, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of SMAC") except BaseException as e: # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: ( X_train_part, X_validation, y_train_part, y_validation, ) = train_test_split(X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug("Error {} with pipeline:{}".format( e, trainable.to_json())) raise e return cv_score, logloss, execution_time def f(trainable): return_dict = {} try: score, logloss, execution_time = smac_train_test( trainable, X_train=X_train, y_train=y_train) return_dict = { "loss": self.best_score - score, "time": execution_time, "log_loss": logloss, } except BaseException as e: logger.warning( f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT." ) raise e return return_dict["loss"] try: smac = orig_SMAC( scenario=self.scenario, rng=np.random.RandomState(42), tae_runner=lale_op_smac_tae(self.estimator, f), ) incumbent = smac.optimize() self.trials = smac.get_runhistory() trainable = lale_trainable_op_from_config(self.estimator, incumbent) # get the trainable corresponding to the best params and train it on the entire training dataset. trained = trainable.fit(X_train, y_train) self._best_estimator = trained except BudgetExhaustedException: logger.warning( "Maximum alloted optimization time exceeded. Optimization exited prematurely" ) except BaseException as e: logger.warning("Error during optimization: {}".format(e)) self._best_estimator = None return self