class TaskOptimizer: def __init__(self, task_mode, learner_name, feature_name, logger, max_evals=100, verbose=True, refit_once=False): self.task_mode = task_mode self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): if self.task_mode == "single": feature = Feature(self.feature_name) elif self.task_mode == "stacking": feature = StackingFeature(self.feature_name) return feature def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]"%str(self.trial_counter) if self.task_mode == "single": task = Task(learner, self.feature, suffix, self.logger, self.verbose) elif self.task_mode == "stacking": task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once) task.go() ret = {"loss": task.rmse_cv_mean, "attachments": {"std": task.rmse_cv_std}, "status": STATUS_OK} return ret def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) trial_rmses = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_rmses) best_rmse_mean = trial_rmses[best_ind] best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"] self.logger.info("-"*50) self.logger.info("Best RMSE") self.logger.info(" Mean: %.6f"%best_rmse_mean) self.logger.info(" std: %.6f"%best_rmse_std) self.logger.info("Best param") for k,v in sorted(best_params.items()): self.logger.info(" %s: %s" % (k,v)) end = time.time() _sec = end - start _min = int(_sec/60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins"%_min) else: self.logger.info(" %d secs"%_sec) self.logger.info("-"*50)
class TaskOptimizer: def __init__(self, learner_name, feature_name, logger, max_evals=100, verbose=True, plot_importance=False): self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): return Feature(self.feature_name) def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]"%str(self.trial_counter) self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance) self.task.go() ret = { "loss": 1. - self.task.mean_tau, "attachments": { "std_tau": self.task.std_tau, }, "status": STATUS_OK, } return ret def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) # To turn this into a loss function these are actually 1 - tau, # converting back is same trial_mean_taus = 1 - np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_mean_taus) best_mean_tau = trial_mean_taus[best_ind] self.logger.info("-"*50) self.logger.info("Best Mean Kendalls Tau: %.6f" % (best_mean_tau)) self.logger.info("Best param") self.task._print_param_dict(best_params) end = time.time() _sec = end - start _min = int(_sec/60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins"%_min) else: self.logger.info(" %d secs"%_sec) self.logger.info("-"*50)
class TaskOptimizer: def __init__(self, task_mode, learner_name, feature_name, logger, max_evals=100, verbose=True, refit_once=False, plot_importance=False): self.task_mode = task_mode self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): if self.task_mode == "single": feature = Feature(self.feature_name) elif self.task_mode == "stacking": feature = StackingFeature(self.feature_name) return feature def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]" % str(self.trial_counter) if self.task_mode == "single": self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance) elif self.task_mode =="stacking": self.task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once) self.task.go() ret = { "loss": self.task, "attachments": { "std": self.task }, "status": STATUS_OK, } return ret def run(self):
class TaskOptimizer: def __init__(self, task_mode, learner_name, feature_name, logger, max_evals=100, verbose=True, refit_once=False, plot_importance=False): self.task_mode = task_mode self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): if self.task_mode == "single": feature = Feature(self.feature_name) elif self.task_mode == "stacking": feature = StackingFeature(self.feature_name) return feature def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]" % str(self.trial_counter) if self.task_mode == "single": self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance) elif self.task_mode == "stacking": self.task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once) self.task.go() ret = { "loss": self.task.mlogloss_cv_mean, "attachments": { "std": self.task.mlogloss_cv_std, }, "status": STATUS_OK, } return ret def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) trial_mloglosses = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_mloglosses) best_mlogloss_mean = trial_mloglosses[best_ind] best_mlogloss_std = trials.trial_attachments( trials.trials[best_ind])["std"] self.logger.info("-" * 50) self.logger.info("Best mlogloss") self.logger.info(" Mean: %.6f" % best_mlogloss_mean) self.logger.info(" std: %.6f" % best_mlogloss_std) self.logger.info("Best param") self.task._print_param_dict(best_params) end = time.time() _sec = end - start _min = int(_sec / 60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins" % _min) else: self.logger.info(" %d secs" % _sec) self.logger.info("-" * 50)
class TaskOptimizer: def __init__(self, X_train, y_train, X_test, y_test, cv=5, max_evals=2, verbose=True): self.X_train = X_train self.y_train = y_train self.X_test = X_test self.y_test = y_test self.n_iter = cv self.max_evals = max_evals self.verbose = verbose self.trial_counter = 0 def _obj(self, param_dict, task_mode): self.trial_counter += 1 param_dict = self.param_space._convert_int_param(param_dict) if self.leaner_name == 'ensemble': learner = EnsembleLearner(param_dict) else: learner = Learner(self.leaner_name, param_dict) suffix = "_Id@%s" % str(self.trial_counter) prefix = "%s" % task_mode if task_mode == 'single': self.task = Task(learner, self.X_train, self.y_train, self.X_test, self.y_test, self.n_iter, prefix, suffix, self.logger, self.verbose) elif task_mode == "stacking": train_fnames = glob.iglob("%s/train_single*.csv" % config.OUTPUT_DIR) test_fnames = glob.iglob("%s/test_single*.csv" % config.OUTPUT_DIR) stacking_level1_train = pd.concat( [pd.read_csv(f) for f in train_fnames], axis=1) stacking_level1_test = pd.concat( [pd.read_csv(f) for f in test_fnames], axis=1) stacking_level1_test = stacking_level1_test[ stacking_level1_train.columns] self.task = Task(learner, stacking_level1_train, self.y_train, stacking_level1_test, self.y_test, self.n_iter, prefix, suffix, self.logger, self.verbose) self.task.go() result = { "loss": -self.task.auc_cv_mean, "attachments": { "train_auc": self.task.train_auc, "test_auc": self.task.test_auc, "refit_time": self.task.refit_time, }, "status": STATUS_OK, } return result def run(self): line_index = 1 self.param_space = ModelParamSpace() for task_mode in learner_space.keys(): if task_mode not in learner_space: print('%s model missed' % task_mode) continue print('start %s model task' % task_mode) for learner in learner_space[task_mode]: print('optimizing %s' % learner) self.leaner_name = learner start = time.time() trials = Trials() logname = "%s_%s_%s.log" % ( task_mode, learner, datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) self.logger = logging_utils._get_logger( config.LOG_DIR, logname) best = fmin(lambda param: self._obj(param, task_mode), self.param_space._build_space(learner), tpe.suggest, self.max_evals, trials) end = time.time() time_cost = time_utils.time_diff(start, end) self.logger.info("Hyperopt_Time") self.logger.info(" %s" % time_cost) self.logger.info("-" * 50) print(" Finished %d hyper train with %d-fold cv, took %s" % (self.max_evals, self.n_iter, time_cost)) best_params = space_eval( self.param_space._build_space(learner), best) best_params = self.param_space._convert_int_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) auc_cv_mean = -trial_loss[best_ind] test_auc = trials.trial_attachments( trials.trials[best_ind])["test_auc"] refit_time = trials.trial_attachments( trials.trials[best_ind])["refit_time"] with open(config.MODEL_COMPARE, 'a+') as f: if line_index: line_index = 0 f.writelines( "task_mode learner auc_cv_mean test_auc refit_time best_params \n" ) f.writelines("%s %s %.4f %.4f %s %s \n" % (task_mode, learner, auc_cv_mean, test_auc, refit_time, best_params)) f.close()