def __init__(self, model_name, data_name, cv_runs, max_evals, logger): self.model_name = model_name self.data_name = data_name self.cv_runs = cv_runs self.max_evals = max_evals self.logger = logger self.model_param_space = ModelParamSpace(self.model_name)
class TaskOptimizer: def __init__(self, model_name, data_name, max_evals, cv_runs, logger, eval_by_rel): self.model_name = model_name self.data_name = data_name self.max_evals = max_evals self.cv_runs = cv_runs self.logger = logger self.eval_by_rel = eval_by_rel self.model_param_space = ModelParamSpace(self.model_name) def _obj(self, param_dict): param_dict = self.model_param_space._convert_into_param(param_dict) self.task = Task( self.model_name, self.data_name, self.cv_runs, param_dict, self.logger, self.eval_by_rel) self.task.cv() tf.reset_default_graph() ret = { "loss": -self.task.mrr, "attachments": { "raw_mrr": self.task.raw_mrr, "raw_hits_at1": self.task.raw_hits_at1, "raw_hits_at3": self.task.raw_hits_at3, "raw_hits_at10": self.task.raw_hits_at10, "hits_at1": self.task.hits_at1, "hits_at3": self.task.hits_at3, "hits_at10": self.task.hits_at10, }, "status": STATUS_OK } return ret def run(self): trials = Trials() best = fmin( self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) mrr = -trial_loss[best_ind] raw_mrr = trials.trial_attachments(trials.trials[best_ind])["raw_mrr"] raw_hits_at1 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at1"] raw_hits_at3 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at3"] raw_hits_at10 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at10"] hits_at1 = trials.trial_attachments(trials.trials[best_ind])["hits_at1"] hits_at3 = trials.trial_attachments(trials.trials[best_ind])["hits_at3"] hits_at10 = trials.trial_attachments(trials.trials[best_ind])["hits_at10"] self.logger.info("-" * 50) self.logger.info("Best CV Results:") self.logger.info("Raw MRR: %.6f" % raw_mrr) self.logger.info("Filtered MRR: %.6f" % mrr) self.logger.info("Raw: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % ( raw_hits_at1, raw_hits_at3, raw_hits_at10)) self.logger.info("Filtered: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % ( hits_at1, hits_at3, hits_at10)) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
class TaskOptimizer: def __init__(self, task_mode, learner_name, feature_name, logger, max_evals=100, verbose=True, refit_once=False): self.task_mode = task_mode self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): if self.task_mode == "single": feature = Feature(self.feature_name) elif self.task_mode == "stacking": feature = StackingFeature(self.feature_name) return feature def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]"%str(self.trial_counter) if self.task_mode == "single": task = Task(learner, self.feature, suffix, self.logger, self.verbose) elif self.task_mode == "stacking": task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once) task.go() ret = {"loss": task.rmse_cv_mean, "attachments": {"std": task.rmse_cv_std}, "status": STATUS_OK} return ret def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) trial_rmses = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_rmses) best_rmse_mean = trial_rmses[best_ind] best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"] self.logger.info("-"*50) self.logger.info("Best RMSE") self.logger.info(" Mean: %.6f"%best_rmse_mean) self.logger.info(" std: %.6f"%best_rmse_std) self.logger.info("Best param") for k,v in sorted(best_params.items()): self.logger.info(" %s: %s" % (k,v)) end = time.time() _sec = end - start _min = int(_sec/60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins"%_min) else: self.logger.info(" %d secs"%_sec) self.logger.info("-"*50)
class TaskOptimizer: def __init__(self, model_name, data_name, cv_runs, max_evals, logger, cvonfull, savename='', portion=100): self.model_name = model_name self.data_name = data_name self.cv_runs = cv_runs self.max_evals = max_evals self.logger = logger self.cvonfull = cvonfull self.save_name = savename self.model_param_space = ModelParamSpace( self.model_name) # get the param dict via dict name self.portion = portion def _obj(self, param_dict): param_dict = self.model_param_space._convert_into_param(param_dict) self.task = Task(model_name=self.model_name, data_name=self.data_name, cv_runs=self.cv_runs, params_dict=param_dict, logger=self.logger, save_name=self.save_name, portion=self.portion) self.task.cv(self.cvonfull) tf.reset_default_graph() ret = { "loss": -self.task.eacc, # "attachments": { # "pacc": self.task.pacc, # # "eacc": self.task.eacc, # }, "status": STATUS_OK } return ret def run(self): trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) best_loss = -trial_loss[best_ind] # best_pacc = trials.trial_attachments(trials.trials[best_ind])["pacc"] # best_eacc = trials.trial_attachments(trials.trials[best_ind])["eacc"] self.logger.info("-" * 50) self.logger.info("Best Exact Accuracy %.3f " % (best_loss, )) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
class TaskOptimizer: def __init__(self, learner_name, feature_name, logger, max_evals=100, verbose=True, plot_importance=False): self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): return Feature(self.feature_name) def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]"%str(self.trial_counter) self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance) self.task.go() ret = { "loss": 1. - self.task.mean_tau, "attachments": { "std_tau": self.task.std_tau, }, "status": STATUS_OK, } return ret def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) # To turn this into a loss function these are actually 1 - tau, # converting back is same trial_mean_taus = 1 - np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_mean_taus) best_mean_tau = trial_mean_taus[best_ind] self.logger.info("-"*50) self.logger.info("Best Mean Kendalls Tau: %.6f" % (best_mean_tau)) self.logger.info("Best param") self.task._print_param_dict(best_params) end = time.time() _sec = end - start _min = int(_sec/60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins"%_min) else: self.logger.info(" %d secs"%_sec) self.logger.info("-"*50)
def __init__(self, learner_name, feature_name, logger, max_evals=100, verbose=True, plot_importance=False): self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name)
def run(self): line_index = 1 self.param_space = ModelParamSpace() for task_mode in learner_space.keys(): if task_mode not in learner_space: print('%s model missed' % task_mode) continue print('start %s model task' % task_mode) for learner in learner_space[task_mode]: print('optimizing %s' % learner) self.leaner_name = learner start = time.time() trials = Trials() logname = "%s_%s_%s.log" % ( task_mode, learner, datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) self.logger = logging_utils._get_logger( config.LOG_DIR, logname) best = fmin(lambda param: self._obj(param, task_mode), self.param_space._build_space(learner), tpe.suggest, self.max_evals, trials) end = time.time() time_cost = time_utils.time_diff(start, end) self.logger.info("Hyperopt_Time") self.logger.info(" %s" % time_cost) self.logger.info("-" * 50) print(" Finished %d hyper train with %d-fold cv, took %s" % (self.max_evals, self.n_iter, time_cost)) best_params = space_eval( self.param_space._build_space(learner), best) best_params = self.param_space._convert_int_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) auc_cv_mean = -trial_loss[best_ind] test_auc = trials.trial_attachments( trials.trials[best_ind])["test_auc"] refit_time = trials.trial_attachments( trials.trials[best_ind])["refit_time"] with open(config.MODEL_COMPARE, 'a+') as f: if line_index: line_index = 0 f.writelines( "task_mode learner auc_cv_mean test_auc refit_time best_params \n" ) f.writelines("%s %s %.4f %.4f %s %s \n" % (task_mode, learner, auc_cv_mean, test_auc, refit_time, best_params)) f.close()
class TaskOptimizer: def __init__(self, model_name, data_name, cv_runs, max_evals, logger): self.model_name = model_name self.data_name = data_name self.cv_runs = cv_runs self.max_evals = max_evals self.logger = logger self.model_param_space = ModelParamSpace(self.model_name) def _obj(self, param_dict): param_dict = self.model_param_space._convert_into_param(param_dict) self.task = Task(self.model_name, self.data_name, self.cv_runs, param_dict, self.logger) self.task.cv() #self.task.save() tf.reset_default_graph() ret = { "loss": -self.task.eacc, "attachments": { "pacc": self.task.pacc, # "eacc": self.task.eacc, }, "status": STATUS_OK } return ret def run(self): trials = Trials() best = fmin(self._obj, space=self.model_param_space._build_space(), algo=tpe.suggest, max_evals=self.max_evals, trials=trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) best_loss = -trial_loss[best_ind] best_pacc = trials.trial_attachments(trials.trials[best_ind])["pacc"] # best_eacc = trials.trial_attachments(trials.trials[best_ind])["eacc"] self.logger.info("-" * 50) self.logger.info( "Best Exact Accuracy %.3f with Parital Accuracy %.3f" % (best_loss, best_pacc)) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
def __init__(self, model_name, data_name, cv_runs, max_evals, logger, cvonfull, savename='', portion=100): self.model_name = model_name self.data_name = data_name self.cv_runs = cv_runs self.max_evals = max_evals self.logger = logger self.cvonfull = cvonfull self.save_name = savename self.model_param_space = ModelParamSpace( self.model_name) # get the param dict via dict name self.portion = portion
def __init__(self, task_mode, learner_name, data_config, logger, max_evals=100, verbose=True, refit_once=False, plot_importance=False): self.task_mode = task_mode self.learner_name = learner_name self.data_config = data_config self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name)
class TaskOptimizer: def __init__(self, model_name, data_name, max_evals, cv_runs, logger): self.model_name = model_name self.data_name = data_name self.max_evals = max_evals self.cv_runs = cv_runs self.logger = logger self.model_param_space = ModelParamSpace(self.model_name) def _obj(self, param_dict): param_dict = self.model_param_space._convert_into_param(param_dict) self.task = Task(self.model_name, self.data_name, self.cv_runs, param_dict, self.logger) self.task.cv() tf.reset_default_graph() ret = { "loss": -self.task.acc, "attachments": { "loss": self.task.loss, }, "status": STATUS_OK } return ret def run(self): trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) acc = -trial_loss[best_ind] loss = trials.trial_attachments(trials.trials[best_ind])["loss"] self.logger.info("-" * 50) self.logger.info("Best CV Results:") self.logger.info("Loss: %.3f" % loss) self.logger.info("Accuracy: %.3f" % acc) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
class TaskOptimizer: def __init__(self, model_name, max_evals, runs, logger): self.model_name = model_name self.max_evals = max_evals self.runs = runs self.logger = logger self.model_param_space = ModelParamSpace(self.model_name) def _obj(self, param_dict): param_dict = self.model_param_space._convert_into_param(param_dict) self.task = Task(self.model_name, self.runs, param_dict, self.logger) self.task.cv() tf.reset_default_graph() ret = { "loss": -self.task.ap, "attachments": { "loss": self.task.loss, "acc": self.task.acc, }, "status": STATUS_OK } return ret def run(self): trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) best_ap = trial_loss[best_ind] best_loss = trials.trial_attachments(trials.trials[best_ind])["loss"] best_acc = trials.trial_attachments(trials.trials[best_ind])["acc"] self.logger.info("-" * 50) self.logger.info("Best Average Precision: %.3f" % best_ap) self.logger.info("with Loss %.3f, Accuracy %.3f" % (best_loss, best_acc)) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
def __init__(self, task_mode, learner_name, feature_name, logger, max_evals=100, verbose=True, refit_once=False): self.task_mode = task_mode self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name)
class TaskOptimizer: def __init__(self, task_mode, learner_name, feature_name, logger, max_evals=100, verbose=True, refit_once=False, plot_importance=False): self.task_mode = task_mode self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): if self.task_mode == "single": feature = Feature(self.feature_name) elif self.task_mode == "stacking": feature = StackingFeature(self.feature_name) return feature def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]" % str(self.trial_counter) if self.task_mode == "single": self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance) elif self.task_mode =="stacking": self.task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once) self.task.go() ret = { "loss": self.task, "attachments": { "std": self.task }, "status": STATUS_OK, } return ret def run(self):
def __init__(self, model_name, max_evals, runs, logger): self.model_name = model_name self.max_evals = max_evals self.runs = runs self.logger = logger self.model_param_space = ModelParamSpace(self.model_name)
class TaskOptimizer: def __init__(self, task_mode, learner_name, feature_name, logger, max_evals=100, verbose=True, refit_once=False, plot_importance=False): self.task_mode = task_mode self.learner_name = learner_name self.feature_name = feature_name self.feature = self._get_feature() self.logger = logger self.max_evals = max_evals self.verbose = verbose self.refit_once = refit_once self.plot_importance = plot_importance self.trial_counter = 0 self.model_param_space = ModelParamSpace(self.learner_name) def _get_feature(self): if self.task_mode == "single": feature = Feature(self.feature_name) elif self.task_mode == "stacking": feature = StackingFeature(self.feature_name) return feature def _obj(self, param_dict): self.trial_counter += 1 param_dict = self.model_param_space._convert_int_param(param_dict) learner = Learner(self.learner_name, param_dict) suffix = "_[Id@%s]" % str(self.trial_counter) if self.task_mode == "single": self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance) elif self.task_mode == "stacking": self.task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once) self.task.go() ret = { "loss": self.task.mlogloss_cv_mean, "attachments": { "std": self.task.mlogloss_cv_std, }, "status": STATUS_OK, } return ret def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) trial_mloglosses = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_mloglosses) best_mlogloss_mean = trial_mloglosses[best_ind] best_mlogloss_std = trials.trial_attachments( trials.trials[best_ind])["std"] self.logger.info("-" * 50) self.logger.info("Best mlogloss") self.logger.info(" Mean: %.6f" % best_mlogloss_mean) self.logger.info(" std: %.6f" % best_mlogloss_std) self.logger.info("Best param") self.task._print_param_dict(best_params) end = time.time() _sec = end - start _min = int(_sec / 60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins" % _min) else: self.logger.info(" %d secs" % _sec) self.logger.info("-" * 50)
class TaskOptimizer: def __init__(self, X_train, y_train, X_test, y_test, cv=5, max_evals=2, verbose=True): self.X_train = X_train self.y_train = y_train self.X_test = X_test self.y_test = y_test self.n_iter = cv self.max_evals = max_evals self.verbose = verbose self.trial_counter = 0 def _obj(self, param_dict, task_mode): self.trial_counter += 1 param_dict = self.param_space._convert_int_param(param_dict) if self.leaner_name == 'ensemble': learner = EnsembleLearner(param_dict) else: learner = Learner(self.leaner_name, param_dict) suffix = "_Id@%s" % str(self.trial_counter) prefix = "%s" % task_mode if task_mode == 'single': self.task = Task(learner, self.X_train, self.y_train, self.X_test, self.y_test, self.n_iter, prefix, suffix, self.logger, self.verbose) elif task_mode == "stacking": train_fnames = glob.iglob("%s/train_single*.csv" % config.OUTPUT_DIR) test_fnames = glob.iglob("%s/test_single*.csv" % config.OUTPUT_DIR) stacking_level1_train = pd.concat( [pd.read_csv(f) for f in train_fnames], axis=1) stacking_level1_test = pd.concat( [pd.read_csv(f) for f in test_fnames], axis=1) stacking_level1_test = stacking_level1_test[ stacking_level1_train.columns] self.task = Task(learner, stacking_level1_train, self.y_train, stacking_level1_test, self.y_test, self.n_iter, prefix, suffix, self.logger, self.verbose) self.task.go() result = { "loss": -self.task.auc_cv_mean, "attachments": { "train_auc": self.task.train_auc, "test_auc": self.task.test_auc, "refit_time": self.task.refit_time, }, "status": STATUS_OK, } return result def run(self): line_index = 1 self.param_space = ModelParamSpace() for task_mode in learner_space.keys(): if task_mode not in learner_space: print('%s model missed' % task_mode) continue print('start %s model task' % task_mode) for learner in learner_space[task_mode]: print('optimizing %s' % learner) self.leaner_name = learner start = time.time() trials = Trials() logname = "%s_%s_%s.log" % ( task_mode, learner, datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) self.logger = logging_utils._get_logger( config.LOG_DIR, logname) best = fmin(lambda param: self._obj(param, task_mode), self.param_space._build_space(learner), tpe.suggest, self.max_evals, trials) end = time.time() time_cost = time_utils.time_diff(start, end) self.logger.info("Hyperopt_Time") self.logger.info(" %s" % time_cost) self.logger.info("-" * 50) print(" Finished %d hyper train with %d-fold cv, took %s" % (self.max_evals, self.n_iter, time_cost)) best_params = space_eval( self.param_space._build_space(learner), best) best_params = self.param_space._convert_int_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) auc_cv_mean = -trial_loss[best_ind] test_auc = trials.trial_attachments( trials.trials[best_ind])["test_auc"] refit_time = trials.trial_attachments( trials.trials[best_ind])["refit_time"] with open(config.MODEL_COMPARE, 'a+') as f: if line_index: line_index = 0 f.writelines( "task_mode learner auc_cv_mean test_auc refit_time best_params \n" ) f.writelines("%s %s %.4f %.4f %s %s \n" % (task_mode, learner, auc_cv_mean, test_auc, refit_time, best_params)) f.close()