コード例 #1
0
ファイル: task.py プロジェクト: WeidongLi-KG/JointRuleKGE
 def __init__(self, model_name, data_name, cv_runs, max_evals, logger):
     self.model_name = model_name
     self.data_name = data_name
     self.cv_runs = cv_runs
     self.max_evals = max_evals
     self.logger = logger
     self.model_param_space = ModelParamSpace(self.model_name)
コード例 #2
0
class TaskOptimizer:
    def __init__(self, model_name, data_name, max_evals, cv_runs, logger, eval_by_rel):
        self.model_name = model_name
        self.data_name = data_name
        self.max_evals = max_evals
        self.cv_runs = cv_runs
        self.logger = logger
        self.eval_by_rel = eval_by_rel
        self.model_param_space = ModelParamSpace(self.model_name)

    def _obj(self, param_dict):
        param_dict = self.model_param_space._convert_into_param(param_dict)
        self.task = Task(
            self.model_name, self.data_name, self.cv_runs,
            param_dict, self.logger, self.eval_by_rel)
        self.task.cv()
        tf.reset_default_graph()
        ret = {
            "loss": -self.task.mrr,
            "attachments": {
                "raw_mrr": self.task.raw_mrr,
                "raw_hits_at1": self.task.raw_hits_at1,
                "raw_hits_at3": self.task.raw_hits_at3,
                "raw_hits_at10": self.task.raw_hits_at10,
                "hits_at1": self.task.hits_at1,
                "hits_at3": self.task.hits_at3,
                "hits_at10": self.task.hits_at10,
            },
            "status": STATUS_OK
        }
        return ret

    def run(self):
        trials = Trials()
        best = fmin(
            self._obj, self.model_param_space._build_space(),
            tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_into_param(best_params)
        trial_loss = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_loss)
        mrr = -trial_loss[best_ind]
        raw_mrr = trials.trial_attachments(trials.trials[best_ind])["raw_mrr"]
        raw_hits_at1 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at1"]
        raw_hits_at3 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at3"]
        raw_hits_at10 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at10"]
        hits_at1 = trials.trial_attachments(trials.trials[best_ind])["hits_at1"]
        hits_at3 = trials.trial_attachments(trials.trials[best_ind])["hits_at3"]
        hits_at10 = trials.trial_attachments(trials.trials[best_ind])["hits_at10"]
        self.logger.info("-" * 50)
        self.logger.info("Best CV Results:")
        self.logger.info("Raw MRR: %.6f" % raw_mrr)
        self.logger.info("Filtered MRR: %.6f" % mrr)
        self.logger.info("Raw: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % (
            raw_hits_at1, raw_hits_at3, raw_hits_at10))
        self.logger.info("Filtered: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % (
            hits_at1, hits_at3, hits_at10))
        self.logger.info("Best Param:")
        self.task._print_param_dict(best_params)
        self.logger.info("-" * 50)
コード例 #3
0
ファイル: task.py プロジェクト: MrSnark/Kaggle_HomeDepot
class TaskOptimizer:
    def __init__(self, task_mode, learner_name, feature_name, logger, 
                    max_evals=100, verbose=True, refit_once=False):
        self.task_mode = task_mode
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.refit_once = refit_once
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        if self.task_mode == "single":
            feature = Feature(self.feature_name)
        elif self.task_mode == "stacking":
            feature = StackingFeature(self.feature_name)
        return feature

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]"%str(self.trial_counter)
        if self.task_mode == "single":
            task = Task(learner, self.feature, suffix, self.logger, self.verbose)
        elif self.task_mode == "stacking":
            task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once)
        task.go()
        ret = {"loss": task.rmse_cv_mean, "attachments": {"std": task.rmse_cv_std}, "status": STATUS_OK}
        return ret

    def run(self):
        start = time.time()
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_int_param(best_params)
        trial_rmses = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_rmses)
        best_rmse_mean = trial_rmses[best_ind]
        best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"]
        self.logger.info("-"*50)
        self.logger.info("Best RMSE")
        self.logger.info("      Mean: %.6f"%best_rmse_mean)
        self.logger.info("      std: %.6f"%best_rmse_std)
        self.logger.info("Best param")
        for k,v in sorted(best_params.items()):
            self.logger.info("      %s: %s" % (k,v))
        end = time.time()
        _sec = end - start
        _min = int(_sec/60.)
        self.logger.info("Time")
        if _min > 0:
            self.logger.info("      %d mins"%_min)
        else:
            self.logger.info("      %d secs"%_sec)
        self.logger.info("-"*50)
コード例 #4
0
class TaskOptimizer:
    def __init__(self,
                 model_name,
                 data_name,
                 cv_runs,
                 max_evals,
                 logger,
                 cvonfull,
                 savename='',
                 portion=100):
        self.model_name = model_name
        self.data_name = data_name
        self.cv_runs = cv_runs
        self.max_evals = max_evals
        self.logger = logger
        self.cvonfull = cvonfull
        self.save_name = savename
        self.model_param_space = ModelParamSpace(
            self.model_name)  # get the param dict via dict name
        self.portion = portion

    def _obj(self, param_dict):
        param_dict = self.model_param_space._convert_into_param(param_dict)
        self.task = Task(model_name=self.model_name,
                         data_name=self.data_name,
                         cv_runs=self.cv_runs,
                         params_dict=param_dict,
                         logger=self.logger,
                         save_name=self.save_name,
                         portion=self.portion)
        self.task.cv(self.cvonfull)
        tf.reset_default_graph()
        ret = {
            "loss": -self.task.eacc,
            # "attachments": {
            #     "pacc": self.task.pacc,
            #     # "eacc": self.task.eacc,
            # },
            "status": STATUS_OK
        }
        return ret

    def run(self):
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(),
                    tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_into_param(best_params)
        trial_loss = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_loss)
        best_loss = -trial_loss[best_ind]
        # best_pacc = trials.trial_attachments(trials.trials[best_ind])["pacc"]
        # best_eacc = trials.trial_attachments(trials.trials[best_ind])["eacc"]
        self.logger.info("-" * 50)
        self.logger.info("Best Exact Accuracy %.3f " % (best_loss, ))
        self.logger.info("Best Param:")
        self.task._print_param_dict(best_params)
        self.logger.info("-" * 50)
コード例 #5
0
class TaskOptimizer:
    def __init__(self, learner_name, feature_name, logger,
                    max_evals=100, verbose=True, plot_importance=False):
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.plot_importance = plot_importance
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        return Feature(self.feature_name)

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]"%str(self.trial_counter)
        self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance)
        self.task.go()
        ret = {
            "loss": 1. - self.task.mean_tau,
            "attachments": {
                "std_tau": self.task.std_tau,
            },
            "status": STATUS_OK,
        }
        return ret

    def run(self):
        start = time.time()
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_int_param(best_params)
        # To turn this into a loss function these are actually 1 - tau,
        # converting back is same
        trial_mean_taus = 1 - np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_mean_taus)
        best_mean_tau = trial_mean_taus[best_ind]
        self.logger.info("-"*50)
        self.logger.info("Best Mean Kendalls Tau: %.6f" % (best_mean_tau))
        self.logger.info("Best param")
        self.task._print_param_dict(best_params)
        end = time.time()
        _sec = end - start
        _min = int(_sec/60.)
        self.logger.info("Time")
        if _min > 0:
            self.logger.info("      %d mins"%_min)
        else:
            self.logger.info("      %d secs"%_sec)
        self.logger.info("-"*50)
コード例 #6
0
 def __init__(self, learner_name, feature_name, logger,
                 max_evals=100, verbose=True, plot_importance=False):
     self.learner_name = learner_name
     self.feature_name = feature_name
     self.feature = self._get_feature()
     self.logger = logger
     self.max_evals = max_evals
     self.verbose = verbose
     self.plot_importance = plot_importance
     self.trial_counter = 0
     self.model_param_space = ModelParamSpace(self.learner_name)
コード例 #7
0
    def run(self):
        line_index = 1
        self.param_space = ModelParamSpace()
        for task_mode in learner_space.keys():
            if task_mode not in learner_space:
                print('%s model missed' % task_mode)
                continue
            print('start %s model task' % task_mode)
            for learner in learner_space[task_mode]:
                print('optimizing %s' % learner)
                self.leaner_name = learner
                start = time.time()
                trials = Trials()
                logname = "%s_%s_%s.log" % (
                    task_mode, learner,
                    datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"))
                self.logger = logging_utils._get_logger(
                    config.LOG_DIR, logname)
                best = fmin(lambda param: self._obj(param, task_mode),
                            self.param_space._build_space(learner),
                            tpe.suggest, self.max_evals, trials)

                end = time.time()
                time_cost = time_utils.time_diff(start, end)
                self.logger.info("Hyperopt_Time")
                self.logger.info("     %s" % time_cost)
                self.logger.info("-" * 50)
                print("   Finished %d hyper train with %d-fold cv, took %s" %
                      (self.max_evals, self.n_iter, time_cost))

                best_params = space_eval(
                    self.param_space._build_space(learner), best)
                best_params = self.param_space._convert_int_param(best_params)
                trial_loss = np.asarray(trials.losses(), dtype=float)
                best_ind = np.argmin(trial_loss)
                auc_cv_mean = -trial_loss[best_ind]
                test_auc = trials.trial_attachments(
                    trials.trials[best_ind])["test_auc"]
                refit_time = trials.trial_attachments(
                    trials.trials[best_ind])["refit_time"]

                with open(config.MODEL_COMPARE, 'a+') as f:
                    if line_index:
                        line_index = 0
                        f.writelines(
                            "task_mode   learner   auc_cv_mean   test_auc   refit_time   best_params \n"
                        )
                    f.writelines("%s   %s   %.4f   %.4f   %s   %s \n" %
                                 (task_mode, learner, auc_cv_mean, test_auc,
                                  refit_time, best_params))
                f.close()
コード例 #8
0
ファイル: task.py プロジェクト: Jordy-VL/SP-calibration-NER
class TaskOptimizer:
    def __init__(self, model_name, data_name, cv_runs, max_evals, logger):
        self.model_name = model_name
        self.data_name = data_name
        self.cv_runs = cv_runs
        self.max_evals = max_evals
        self.logger = logger
        self.model_param_space = ModelParamSpace(self.model_name)

    def _obj(self, param_dict):
        param_dict = self.model_param_space._convert_into_param(param_dict)
        self.task = Task(self.model_name, self.data_name, self.cv_runs,
                         param_dict, self.logger)
        self.task.cv()
        #self.task.save()
        tf.reset_default_graph()
        ret = {
            "loss": -self.task.eacc,
            "attachments": {
                "pacc": self.task.pacc,
                # "eacc": self.task.eacc,
            },
            "status": STATUS_OK
        }
        return ret

    def run(self):
        trials = Trials()
        best = fmin(self._obj,
                    space=self.model_param_space._build_space(),
                    algo=tpe.suggest,
                    max_evals=self.max_evals,
                    trials=trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_into_param(best_params)
        trial_loss = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_loss)
        best_loss = -trial_loss[best_ind]
        best_pacc = trials.trial_attachments(trials.trials[best_ind])["pacc"]
        # best_eacc = trials.trial_attachments(trials.trials[best_ind])["eacc"]
        self.logger.info("-" * 50)
        self.logger.info(
            "Best Exact Accuracy %.3f with Parital Accuracy %.3f" %
            (best_loss, best_pacc))
        self.logger.info("Best Param:")
        self.task._print_param_dict(best_params)
        self.logger.info("-" * 50)
コード例 #9
0
 def __init__(self,
              model_name,
              data_name,
              cv_runs,
              max_evals,
              logger,
              cvonfull,
              savename='',
              portion=100):
     self.model_name = model_name
     self.data_name = data_name
     self.cv_runs = cv_runs
     self.max_evals = max_evals
     self.logger = logger
     self.cvonfull = cvonfull
     self.save_name = savename
     self.model_param_space = ModelParamSpace(
         self.model_name)  # get the param dict via dict name
     self.portion = portion
コード例 #10
0
 def __init__(self,
              task_mode,
              learner_name,
              data_config,
              logger,
              max_evals=100,
              verbose=True,
              refit_once=False,
              plot_importance=False):
     self.task_mode = task_mode
     self.learner_name = learner_name
     self.data_config = data_config
     self.feature = self._get_feature()
     self.logger = logger
     self.max_evals = max_evals
     self.verbose = verbose
     self.refit_once = refit_once
     self.plot_importance = plot_importance
     self.trial_counter = 0
     self.model_param_space = ModelParamSpace(self.learner_name)
コード例 #11
0
ファイル: task.py プロジェクト: billy-inn/refe
class TaskOptimizer:
    def __init__(self, model_name, data_name, max_evals, cv_runs, logger):
        self.model_name = model_name
        self.data_name = data_name
        self.max_evals = max_evals
        self.cv_runs = cv_runs
        self.logger = logger
        self.model_param_space = ModelParamSpace(self.model_name)

    def _obj(self, param_dict):
        param_dict = self.model_param_space._convert_into_param(param_dict)
        self.task = Task(self.model_name, self.data_name, self.cv_runs,
                         param_dict, self.logger)
        self.task.cv()
        tf.reset_default_graph()
        ret = {
            "loss": -self.task.acc,
            "attachments": {
                "loss": self.task.loss,
            },
            "status": STATUS_OK
        }
        return ret

    def run(self):
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(),
                    tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_into_param(best_params)
        trial_loss = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_loss)
        acc = -trial_loss[best_ind]
        loss = trials.trial_attachments(trials.trials[best_ind])["loss"]
        self.logger.info("-" * 50)
        self.logger.info("Best CV Results:")
        self.logger.info("Loss: %.3f" % loss)
        self.logger.info("Accuracy: %.3f" % acc)
        self.logger.info("Best Param:")
        self.task._print_param_dict(best_params)
        self.logger.info("-" * 50)
コード例 #12
0
class TaskOptimizer:
    def __init__(self, model_name, max_evals, runs, logger):
        self.model_name = model_name
        self.max_evals = max_evals
        self.runs = runs
        self.logger = logger
        self.model_param_space = ModelParamSpace(self.model_name)

    def _obj(self, param_dict):
        param_dict = self.model_param_space._convert_into_param(param_dict)
        self.task = Task(self.model_name, self.runs, param_dict, self.logger)
        self.task.cv()
        tf.reset_default_graph()
        ret = {
            "loss": -self.task.ap,
            "attachments": {
                "loss": self.task.loss,
                "acc": self.task.acc,
            },
            "status": STATUS_OK
        }
        return ret

    def run(self):
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(),
                    tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_into_param(best_params)
        trial_loss = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_loss)
        best_ap = trial_loss[best_ind]
        best_loss = trials.trial_attachments(trials.trials[best_ind])["loss"]
        best_acc = trials.trial_attachments(trials.trials[best_ind])["acc"]
        self.logger.info("-" * 50)
        self.logger.info("Best Average Precision: %.3f" % best_ap)
        self.logger.info("with Loss %.3f, Accuracy %.3f" %
                         (best_loss, best_acc))
        self.logger.info("Best Param:")
        self.task._print_param_dict(best_params)
        self.logger.info("-" * 50)
コード例 #13
0
ファイル: task.py プロジェクト: yitang/Kaggle_HomeDepot
 def __init__(self, task_mode, learner_name, feature_name, logger, 
                 max_evals=100, verbose=True, refit_once=False):
     self.task_mode = task_mode
     self.learner_name = learner_name
     self.feature_name = feature_name
     self.feature = self._get_feature()
     self.logger = logger
     self.max_evals = max_evals
     self.verbose = verbose
     self.refit_once = refit_once
     self.trial_counter = 0
     self.model_param_space = ModelParamSpace(self.learner_name)
コード例 #14
0
ファイル: task_o.py プロジェクト: gsangeryee/TGS
class TaskOptimizer:
    def __init__(self, task_mode, learner_name, feature_name, logger,
                 max_evals=100, verbose=True, refit_once=False, plot_importance=False):
        self.task_mode = task_mode
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.refit_once = refit_once
        self.plot_importance = plot_importance
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        if self.task_mode == "single":
            feature = Feature(self.feature_name)
        elif self.task_mode == "stacking":
            feature = StackingFeature(self.feature_name)
        return  feature

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]" % str(self.trial_counter)
        if self.task_mode == "single":
            self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance)
        elif self.task_mode =="stacking":
            self.task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once)
        self.task.go()
        ret = {
            "loss": self.task,
            "attachments": {
                "std": self.task
            },
            "status": STATUS_OK,
        }
        return  ret

    def run(self):
コード例 #15
0
 def __init__(self, model_name, max_evals, runs, logger):
     self.model_name = model_name
     self.max_evals = max_evals
     self.runs = runs
     self.logger = logger
     self.model_param_space = ModelParamSpace(self.model_name)
コード例 #16
0
class TaskOptimizer:
    def __init__(self,
                 task_mode,
                 learner_name,
                 feature_name,
                 logger,
                 max_evals=100,
                 verbose=True,
                 refit_once=False,
                 plot_importance=False):
        self.task_mode = task_mode
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.refit_once = refit_once
        self.plot_importance = plot_importance
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        if self.task_mode == "single":
            feature = Feature(self.feature_name)
        elif self.task_mode == "stacking":
            feature = StackingFeature(self.feature_name)
        return feature

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]" % str(self.trial_counter)
        if self.task_mode == "single":
            self.task = Task(learner, self.feature, suffix, self.logger,
                             self.verbose, self.plot_importance)
        elif self.task_mode == "stacking":
            self.task = StackingTask(learner, self.feature, suffix,
                                     self.logger, self.verbose,
                                     self.refit_once)
        self.task.go()
        ret = {
            "loss": self.task.mlogloss_cv_mean,
            "attachments": {
                "std": self.task.mlogloss_cv_std,
            },
            "status": STATUS_OK,
        }
        return ret

    def run(self):
        start = time.time()
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(),
                    tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_int_param(best_params)
        trial_mloglosses = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_mloglosses)
        best_mlogloss_mean = trial_mloglosses[best_ind]
        best_mlogloss_std = trials.trial_attachments(
            trials.trials[best_ind])["std"]
        self.logger.info("-" * 50)
        self.logger.info("Best mlogloss")
        self.logger.info("      Mean: %.6f" % best_mlogloss_mean)
        self.logger.info("      std: %.6f" % best_mlogloss_std)
        self.logger.info("Best param")
        self.task._print_param_dict(best_params)
        end = time.time()
        _sec = end - start
        _min = int(_sec / 60.)
        self.logger.info("Time")
        if _min > 0:
            self.logger.info("      %d mins" % _min)
        else:
            self.logger.info("      %d secs" % _sec)
        self.logger.info("-" * 50)
コード例 #17
0
class TaskOptimizer:
    def __init__(self,
                 X_train,
                 y_train,
                 X_test,
                 y_test,
                 cv=5,
                 max_evals=2,
                 verbose=True):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.n_iter = cv
        self.max_evals = max_evals
        self.verbose = verbose
        self.trial_counter = 0

    def _obj(self, param_dict, task_mode):
        self.trial_counter += 1
        param_dict = self.param_space._convert_int_param(param_dict)
        if self.leaner_name == 'ensemble':
            learner = EnsembleLearner(param_dict)
        else:
            learner = Learner(self.leaner_name, param_dict)
        suffix = "_Id@%s" % str(self.trial_counter)
        prefix = "%s" % task_mode
        if task_mode == 'single':
            self.task = Task(learner, self.X_train, self.y_train, self.X_test,
                             self.y_test, self.n_iter, prefix, suffix,
                             self.logger, self.verbose)
        elif task_mode == "stacking":
            train_fnames = glob.iglob("%s/train_single*.csv" %
                                      config.OUTPUT_DIR)
            test_fnames = glob.iglob("%s/test_single*.csv" % config.OUTPUT_DIR)
            stacking_level1_train = pd.concat(
                [pd.read_csv(f) for f in train_fnames], axis=1)
            stacking_level1_test = pd.concat(
                [pd.read_csv(f) for f in test_fnames], axis=1)
            stacking_level1_test = stacking_level1_test[
                stacking_level1_train.columns]
            self.task = Task(learner, stacking_level1_train, self.y_train,
                             stacking_level1_test, self.y_test, self.n_iter,
                             prefix, suffix, self.logger, self.verbose)
        self.task.go()
        result = {
            "loss": -self.task.auc_cv_mean,
            "attachments": {
                "train_auc": self.task.train_auc,
                "test_auc": self.task.test_auc,
                "refit_time": self.task.refit_time,
            },
            "status": STATUS_OK,
        }
        return result

    def run(self):
        line_index = 1
        self.param_space = ModelParamSpace()
        for task_mode in learner_space.keys():
            if task_mode not in learner_space:
                print('%s model missed' % task_mode)
                continue
            print('start %s model task' % task_mode)
            for learner in learner_space[task_mode]:
                print('optimizing %s' % learner)
                self.leaner_name = learner
                start = time.time()
                trials = Trials()
                logname = "%s_%s_%s.log" % (
                    task_mode, learner,
                    datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"))
                self.logger = logging_utils._get_logger(
                    config.LOG_DIR, logname)
                best = fmin(lambda param: self._obj(param, task_mode),
                            self.param_space._build_space(learner),
                            tpe.suggest, self.max_evals, trials)

                end = time.time()
                time_cost = time_utils.time_diff(start, end)
                self.logger.info("Hyperopt_Time")
                self.logger.info("     %s" % time_cost)
                self.logger.info("-" * 50)
                print("   Finished %d hyper train with %d-fold cv, took %s" %
                      (self.max_evals, self.n_iter, time_cost))

                best_params = space_eval(
                    self.param_space._build_space(learner), best)
                best_params = self.param_space._convert_int_param(best_params)
                trial_loss = np.asarray(trials.losses(), dtype=float)
                best_ind = np.argmin(trial_loss)
                auc_cv_mean = -trial_loss[best_ind]
                test_auc = trials.trial_attachments(
                    trials.trials[best_ind])["test_auc"]
                refit_time = trials.trial_attachments(
                    trials.trials[best_ind])["refit_time"]

                with open(config.MODEL_COMPARE, 'a+') as f:
                    if line_index:
                        line_index = 0
                        f.writelines(
                            "task_mode   learner   auc_cv_mean   test_auc   refit_time   best_params \n"
                        )
                    f.writelines("%s   %s   %.4f   %.4f   %s   %s \n" %
                                 (task_mode, learner, auc_cv_mean, test_auc,
                                  refit_time, best_params))
                f.close()