def fit(self, X_train, y_train): self.cv = check_cv( self.cv, y=y_train, classifier=True ) #TODO: Replace the classifier flag value by using tags? def smac_train_test(trainable, X_train, y_train): try: cv_score, logloss, execution_time = cross_val_score_track_trials( trainable, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of SMAC") except BaseException as e: #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug("Error {} with pipeline:{}".format( e, trainable.to_json())) raise e return cv_score, logloss, execution_time def f(trainable): return_dict = {} try: score, logloss, execution_time = smac_train_test( trainable, X_train=X_train, y_train=y_train) return_dict = { 'loss': self.best_score - score, 'time': execution_time, 'log_loss': logloss } except BaseException as e: logger.warning( f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT." ) raise e return return_dict['loss'] try: smac = orig_SMAC(scenario=self.scenario, rng=np.random.RandomState(42), tae_runner=lale_op_smac_tae(self.estimator, f)) incumbent = smac.optimize() self.trials = smac.get_runhistory() trainable = lale_trainable_op_from_config(self.estimator, incumbent) #get the trainable corresponding to the best params and train it on the entire training dataset. trained = trainable.fit(X_train, y_train) self._best_estimator = trained except BudgetExhaustedException: logger.warning( 'Maximum alloted optimization time exceeded. Optimization exited prematurely' ) except BaseException as e: logger.warning('Error during optimization: {}'.format(e)) self._best_estimator = None return self
def iris_fmin_tae(op, num_folds=5): return lale_op_smac_tae(op, iris_f_min_for_folds(num_folds=num_folds))
def fit(self, X_train, y_train): data_schema = lale.helpers.fold_schema(X_train, y_train, self.cv, self.estimator.is_classifier()) self.search_space: ConfigurationSpace = get_smac_space( self.estimator, lale_num_grids=self.lale_num_grids, data_schema=data_schema) # Scenario object scenario_options = { "run_obj": "quality", # optimize quality (alternatively runtime) "runcount-limit": self.max_evals, # maximum function evaluations "cs": self.search_space, # configuration space "deterministic": "true", "abort_on_first_run_crash": False, } if self.max_opt_time is not None: scenario_options["wallclock_limit"] = self.max_opt_time self.scenario = Scenario(scenario_options) self.cv = check_cv(self.cv, y=y_train, classifier=self.estimator.is_classifier()) def smac_train_test(trainable, X_train, y_train): try: cv_score, logloss, execution_time = cross_val_score_track_trials( trainable, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of SMAC") except BaseException as e: # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: ( X_train_part, X_validation, y_train_part, y_validation, ) = train_test_split(X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug("Error {} with pipeline:{}".format( e, trainable.to_json())) raise e return cv_score, logloss, execution_time def f(trainable): return_dict = {} try: score, logloss, execution_time = smac_train_test( trainable, X_train=X_train, y_train=y_train) return_dict = { "loss": self.best_score - score, "time": execution_time, "log_loss": logloss, } except BaseException as e: logger.warning( f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT." ) raise e return return_dict["loss"] try: smac = orig_SMAC( scenario=self.scenario, rng=np.random.RandomState(42), tae_runner=lale_op_smac_tae(self.estimator, f), ) incumbent = smac.optimize() self.trials = smac.get_runhistory() trainable = lale_trainable_op_from_config(self.estimator, incumbent) # get the trainable corresponding to the best params and train it on the entire training dataset. trained = trainable.fit(X_train, y_train) self._best_estimator = trained except BudgetExhaustedException: logger.warning( "Maximum alloted optimization time exceeded. Optimization exited prematurely" ) except BaseException as e: logger.warning("Error during optimization: {}".format(e)) self._best_estimator = None return self
def dont_test_car_smac(self): import numpy as np from lale.datasets.auto_weka import fetch_car from sklearn.metrics import accuracy_score, make_scorer from sklearn.preprocessing import LabelEncoder import pandas as pd from lale.lib.weka import J48 from lalegpl.lib.r import ArulesCBAClassifier from lale.operators import make_pipeline from lale.lib.lale import HyperoptClassifier from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier from smac.scenario.scenario import Scenario from smac.facade.smac_facade import SMAC from smac.configspace import ConfigurationSpace (X_train, y_train), (X_test, y_test) = fetch_car() y_name = y_train.name le = LabelEncoder() y_train = le.fit_transform(y_train) y_test = le.transform(y_test) y_train = pd.Series(y_train, name=y_name) y_test = pd.Series(y_test, name=y_name) # planned_pipeline = make_pipeline(J48() | ArulesCBAClassifier() | LogisticRegression() | KNeighborsClassifier()) planned_pipeline = make_pipeline(ArulesCBAClassifier() | KNeighborsClassifier() | LogisticRegression()) cs:ConfigurationSpace = get_smac_space(planned_pipeline) print(cs) # X_train = X_train[0:20] # y_train = y_train[0:20] # Scenario object run_count_limit = 1 scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": run_count_limit, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true", "abort_on_first_run_crash": False }) # Optimize, using a SMAC-object def f_min(op): return test_f_min(op, X_train, y_train, num_folds=2) tae = lale_op_smac_tae(planned_pipeline, f_min) print("Optimizing! Depending on your machine, this might take a few minutes.") smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=tae) incumbent = smac.optimize() trainable_pipeline = lale_trainable_op_from_config(planned_pipeline, incumbent) trained_pipeline = trainable_pipeline.fit(X_train, y_train) pred = trained_pipeline.predict(X_test) accuracy = accuracy_score(y_test, pred) print("Accuracy: %.2f" % (accuracy)) inc_value = tae(incumbent) print("Optimized Value: %.2f" % (inc_value)) print(f"Run count limit: {run_count_limit}")