Esempio n. 1
0
class TaskOptimizer:
    def __init__(self, task_mode, learner_name, feature_name, logger, 
                    max_evals=100, verbose=True, refit_once=False):
        self.task_mode = task_mode
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.refit_once = refit_once
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        if self.task_mode == "single":
            feature = Feature(self.feature_name)
        elif self.task_mode == "stacking":
            feature = StackingFeature(self.feature_name)
        return feature

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]"%str(self.trial_counter)
        if self.task_mode == "single":
            task = Task(learner, self.feature, suffix, self.logger, self.verbose)
        elif self.task_mode == "stacking":
            task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once)
        task.go()
        ret = {"loss": task.rmse_cv_mean, "attachments": {"std": task.rmse_cv_std}, "status": STATUS_OK}
        return ret

    def run(self):
        start = time.time()
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_int_param(best_params)
        trial_rmses = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_rmses)
        best_rmse_mean = trial_rmses[best_ind]
        best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"]
        self.logger.info("-"*50)
        self.logger.info("Best RMSE")
        self.logger.info("      Mean: %.6f"%best_rmse_mean)
        self.logger.info("      std: %.6f"%best_rmse_std)
        self.logger.info("Best param")
        for k,v in sorted(best_params.items()):
            self.logger.info("      %s: %s" % (k,v))
        end = time.time()
        _sec = end - start
        _min = int(_sec/60.)
        self.logger.info("Time")
        if _min > 0:
            self.logger.info("      %d mins"%_min)
        else:
            self.logger.info("      %d secs"%_sec)
        self.logger.info("-"*50)
Esempio n. 2
0
class TaskOptimizer:
    def __init__(self, learner_name, feature_name, logger,
                    max_evals=100, verbose=True, plot_importance=False):
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.plot_importance = plot_importance
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        return Feature(self.feature_name)

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]"%str(self.trial_counter)
        self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance)
        self.task.go()
        ret = {
            "loss": 1. - self.task.mean_tau,
            "attachments": {
                "std_tau": self.task.std_tau,
            },
            "status": STATUS_OK,
        }
        return ret

    def run(self):
        start = time.time()
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_int_param(best_params)
        # To turn this into a loss function these are actually 1 - tau,
        # converting back is same
        trial_mean_taus = 1 - np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_mean_taus)
        best_mean_tau = trial_mean_taus[best_ind]
        self.logger.info("-"*50)
        self.logger.info("Best Mean Kendalls Tau: %.6f" % (best_mean_tau))
        self.logger.info("Best param")
        self.task._print_param_dict(best_params)
        end = time.time()
        _sec = end - start
        _min = int(_sec/60.)
        self.logger.info("Time")
        if _min > 0:
            self.logger.info("      %d mins"%_min)
        else:
            self.logger.info("      %d secs"%_sec)
        self.logger.info("-"*50)
Esempio n. 3
0
class TaskOptimizer:
    def __init__(self, task_mode, learner_name, feature_name, logger,
                 max_evals=100, verbose=True, refit_once=False, plot_importance=False):
        self.task_mode = task_mode
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.refit_once = refit_once
        self.plot_importance = plot_importance
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        if self.task_mode == "single":
            feature = Feature(self.feature_name)
        elif self.task_mode == "stacking":
            feature = StackingFeature(self.feature_name)
        return  feature

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]" % str(self.trial_counter)
        if self.task_mode == "single":
            self.task = Task(learner, self.feature, suffix, self.logger, self.verbose, self.plot_importance)
        elif self.task_mode =="stacking":
            self.task = StackingTask(learner, self.feature, suffix, self.logger, self.verbose, self.refit_once)
        self.task.go()
        ret = {
            "loss": self.task,
            "attachments": {
                "std": self.task
            },
            "status": STATUS_OK,
        }
        return  ret

    def run(self):
Esempio n. 4
0
class TaskOptimizer:
    def __init__(self,
                 task_mode,
                 learner_name,
                 feature_name,
                 logger,
                 max_evals=100,
                 verbose=True,
                 refit_once=False,
                 plot_importance=False):
        self.task_mode = task_mode
        self.learner_name = learner_name
        self.feature_name = feature_name
        self.feature = self._get_feature()
        self.logger = logger
        self.max_evals = max_evals
        self.verbose = verbose
        self.refit_once = refit_once
        self.plot_importance = plot_importance
        self.trial_counter = 0
        self.model_param_space = ModelParamSpace(self.learner_name)

    def _get_feature(self):
        if self.task_mode == "single":
            feature = Feature(self.feature_name)
        elif self.task_mode == "stacking":
            feature = StackingFeature(self.feature_name)
        return feature

    def _obj(self, param_dict):
        self.trial_counter += 1
        param_dict = self.model_param_space._convert_int_param(param_dict)
        learner = Learner(self.learner_name, param_dict)
        suffix = "_[Id@%s]" % str(self.trial_counter)
        if self.task_mode == "single":
            self.task = Task(learner, self.feature, suffix, self.logger,
                             self.verbose, self.plot_importance)
        elif self.task_mode == "stacking":
            self.task = StackingTask(learner, self.feature, suffix,
                                     self.logger, self.verbose,
                                     self.refit_once)
        self.task.go()
        ret = {
            "loss": self.task.mlogloss_cv_mean,
            "attachments": {
                "std": self.task.mlogloss_cv_std,
            },
            "status": STATUS_OK,
        }
        return ret

    def run(self):
        start = time.time()
        trials = Trials()
        best = fmin(self._obj, self.model_param_space._build_space(),
                    tpe.suggest, self.max_evals, trials)
        best_params = space_eval(self.model_param_space._build_space(), best)
        best_params = self.model_param_space._convert_int_param(best_params)
        trial_mloglosses = np.asarray(trials.losses(), dtype=float)
        best_ind = np.argmin(trial_mloglosses)
        best_mlogloss_mean = trial_mloglosses[best_ind]
        best_mlogloss_std = trials.trial_attachments(
            trials.trials[best_ind])["std"]
        self.logger.info("-" * 50)
        self.logger.info("Best mlogloss")
        self.logger.info("      Mean: %.6f" % best_mlogloss_mean)
        self.logger.info("      std: %.6f" % best_mlogloss_std)
        self.logger.info("Best param")
        self.task._print_param_dict(best_params)
        end = time.time()
        _sec = end - start
        _min = int(_sec / 60.)
        self.logger.info("Time")
        if _min > 0:
            self.logger.info("      %d mins" % _min)
        else:
            self.logger.info("      %d secs" % _sec)
        self.logger.info("-" * 50)
Esempio n. 5
0
class TaskOptimizer:
    def __init__(self,
                 X_train,
                 y_train,
                 X_test,
                 y_test,
                 cv=5,
                 max_evals=2,
                 verbose=True):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.n_iter = cv
        self.max_evals = max_evals
        self.verbose = verbose
        self.trial_counter = 0

    def _obj(self, param_dict, task_mode):
        self.trial_counter += 1
        param_dict = self.param_space._convert_int_param(param_dict)
        if self.leaner_name == 'ensemble':
            learner = EnsembleLearner(param_dict)
        else:
            learner = Learner(self.leaner_name, param_dict)
        suffix = "_Id@%s" % str(self.trial_counter)
        prefix = "%s" % task_mode
        if task_mode == 'single':
            self.task = Task(learner, self.X_train, self.y_train, self.X_test,
                             self.y_test, self.n_iter, prefix, suffix,
                             self.logger, self.verbose)
        elif task_mode == "stacking":
            train_fnames = glob.iglob("%s/train_single*.csv" %
                                      config.OUTPUT_DIR)
            test_fnames = glob.iglob("%s/test_single*.csv" % config.OUTPUT_DIR)
            stacking_level1_train = pd.concat(
                [pd.read_csv(f) for f in train_fnames], axis=1)
            stacking_level1_test = pd.concat(
                [pd.read_csv(f) for f in test_fnames], axis=1)
            stacking_level1_test = stacking_level1_test[
                stacking_level1_train.columns]
            self.task = Task(learner, stacking_level1_train, self.y_train,
                             stacking_level1_test, self.y_test, self.n_iter,
                             prefix, suffix, self.logger, self.verbose)
        self.task.go()
        result = {
            "loss": -self.task.auc_cv_mean,
            "attachments": {
                "train_auc": self.task.train_auc,
                "test_auc": self.task.test_auc,
                "refit_time": self.task.refit_time,
            },
            "status": STATUS_OK,
        }
        return result

    def run(self):
        line_index = 1
        self.param_space = ModelParamSpace()
        for task_mode in learner_space.keys():
            if task_mode not in learner_space:
                print('%s model missed' % task_mode)
                continue
            print('start %s model task' % task_mode)
            for learner in learner_space[task_mode]:
                print('optimizing %s' % learner)
                self.leaner_name = learner
                start = time.time()
                trials = Trials()
                logname = "%s_%s_%s.log" % (
                    task_mode, learner,
                    datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"))
                self.logger = logging_utils._get_logger(
                    config.LOG_DIR, logname)
                best = fmin(lambda param: self._obj(param, task_mode),
                            self.param_space._build_space(learner),
                            tpe.suggest, self.max_evals, trials)

                end = time.time()
                time_cost = time_utils.time_diff(start, end)
                self.logger.info("Hyperopt_Time")
                self.logger.info("     %s" % time_cost)
                self.logger.info("-" * 50)
                print("   Finished %d hyper train with %d-fold cv, took %s" %
                      (self.max_evals, self.n_iter, time_cost))

                best_params = space_eval(
                    self.param_space._build_space(learner), best)
                best_params = self.param_space._convert_int_param(best_params)
                trial_loss = np.asarray(trials.losses(), dtype=float)
                best_ind = np.argmin(trial_loss)
                auc_cv_mean = -trial_loss[best_ind]
                test_auc = trials.trial_attachments(
                    trials.trials[best_ind])["test_auc"]
                refit_time = trials.trial_attachments(
                    trials.trials[best_ind])["refit_time"]

                with open(config.MODEL_COMPARE, 'a+') as f:
                    if line_index:
                        line_index = 0
                        f.writelines(
                            "task_mode   learner   auc_cv_mean   test_auc   refit_time   best_params \n"
                        )
                    f.writelines("%s   %s   %.4f   %.4f   %s   %s \n" %
                                 (task_mode, learner, auc_cv_mean, test_auc,
                                  refit_time, best_params))
                f.close()