Esempio n. 1
0
    def get_hill_climbing_params(self, current_models):

        # second, hill climbing
        for _ in range(self._hill_climbing_steps):
            # get models orderer by loss
            # TODO: refactor this callbacks.callbacks[0]
            scores = [m.get_final_loss() for m in current_models]
            model_types = [m.get_type() for m in current_models]
            df_models = pd.DataFrame({
                "model": current_models,
                "score": scores,
                "model_type": model_types
            })
            # do group by for debug reason
            df_models = df_models.groupby("model_type").apply(
                lambda x: x.sort_values("score"))
            unique_model_types = np.unique(df_models.model_type)

            for m_type in unique_model_types:
                if m_type in [
                        "Baseline",
                        "Decision Tree",
                        "Linear",
                        "Nearest Neighbors",
                ]:
                    # dont tune Baseline and Decision Tree
                    continue
                models = df_models[df_models.model_type == m_type]["model"]

                for i in range(min(self._top_models_to_improve, len(models))):
                    m = models[i]

                    for p in HillClimbing.get(
                            m.params.get("learner"),
                            self._ml_task,
                            len(current_models) + self._seed,
                    ):

                        model_indices = [
                            int(m.get_name().split("_")[0])
                            for m in current_models
                        ]
                        model_max_index = np.max(model_indices)

                        logger.info(
                            "Hill climbing step, for model #{0}".format(
                                model_max_index + 1))
                        if p is not None:
                            all_params = copy.deepcopy(m.params)
                            all_params["learner"] = p

                            all_params["name"] = self.get_model_name(
                                all_params["learner"]["model_type"],
                                model_max_index + 1)

                            unique_params_key = MljarTuner.get_params_key(
                                all_params)
                            if unique_params_key not in self._unique_params_keys:
                                self._unique_params_keys += [unique_params_key]
                                yield all_params
Esempio n. 2
0
    def get_hill_climbing_params(self, current_models):

        # second, hill climbing
        for _ in range(self._hill_climbing_steps):
            # get models orderer by loss
            # TODO: refactor this callbacks.callbacks[0]
            models = sorted(
                [(m.callbacks.callbacks[0].final_loss, m)
                 for m in current_models],
                key=lambda x: x[0],
            )
            for i in range(min(self._top_models_to_improve, len(models))):
                m = models[i][1]
                for p in HillClimbing.get(
                        m.params.get("learner"),
                        self._ml_task,
                        len(current_models) + self._seed,
                ):
                    logger.info("Hill climbing step, for model #{0}".format(
                        len(current_models) + 1))
                    if p is not None:
                        all_params = copy.deepcopy(m.params)
                        all_params["learner"] = p
                        all_params["name"] = f"model_{len(current_models) + 1}"

                        unique_params_key = MljarTuner.get_params_key(
                            all_params)
                        if unique_params_key not in self._unique_params_keys:
                            self._unique_params_keys += [unique_params_key]
                            yield all_params
Esempio n. 3
0
 def hill_climbing_step(self, X, y):
     for hill_climbing in range(self._hill_climbing_steps):
         # get models orderer by loss
         models = []
         for m in self._models:
             models += [(m.callbacks.callbacks[0].final_loss, m)]
         models = sorted(models, key=lambda x: x[0])
         for i in range(min(self._top_models_to_improve, len(models))):
             m = models[i][1]
             for p in HillClimbing.get(m.params.get("learner")):
                 if p is not None:
                     all_params = copy.deepcopy(m.params)
                     all_params["learner"] = p
                     new_model = self.train_model(all_params, X, y)
                     if new_model is not None:
                         self._models += [new_model]
                         self.verbose_print(
                             "Learner {} final loss {} time {}".format(
                                 new_model.get_name(),
                                 new_model.get_final_loss(),
                                 new_model.get_train_time(),
                             )
                         )
                         self.log_train_time(
                             new_model.get_name(), new_model.get_train_time()
                         )
Esempio n. 4
0
    def get_hill_climbing_params(self, current_models):
        df_models, algorithms = self.df_models_algorithms(current_models)
        generated_params = []
        counts = {model_type: 0 for model_type in algorithms}

        for i in range(df_models.shape[0]):

            model_type = df_models["model_type"].iloc[i]
            counts[model_type] += 1
            if counts[model_type] > self._top_models_to_improve:
                continue

            m = df_models["model"].iloc[i]

            for p in HillClimbing.get(
                m.params.get("learner"), self._ml_task, len(current_models) + self._seed
            ):

                model_indices = [
                    int(m.get_name().split("_")[0]) for m in current_models
                ]
                model_max_index = np.max(model_indices)

                logger.info(
                    "Hill climbing step, for model #{0}".format(model_max_index + 1)
                )
                if p is not None:
                    all_params = copy.deepcopy(m.params)
                    all_params["learner"] = p

                    all_params["name"] = self.get_model_name(
                        all_params["learner"]["model_type"],
                        model_max_index + 1 + len(generated_params),
                    )

                    if "golden_features" in all_params["preprocessing"]:
                        all_params["name"] += "_GoldenFeatures"
                    if "drop_features" in all_params["preprocessing"] and len(
                        all_params["preprocessing"]["drop_features"]
                    ):
                        all_params["name"] += "_SelectedFeatures"
                    all_params["status"] = "initialized"
                    all_params["final_loss"] = None
                    all_params["train_time"] = None
                    unique_params_key = MljarTuner.get_params_key(all_params)

                    if unique_params_key not in self._unique_params_keys:
                        generated_params += [all_params]

        return generated_params
Esempio n. 5
0
 def hill_climbing_step(self, X, y):
     for hill_climbing in range(self._hill_climbing_steps):
         # get models orderer by loss
         models = []
         for m in self._models:
             models += [(m.callbacks.callbacks[0].final_loss, m)]
         models = sorted(models, key=lambda x: x[0])
         for i in range(min(self._top_models_to_improve, len(models))):
             m = models[i][1]
             for p in HillClimbing.get(
                 m.params.get("learner"), len(self._models) + self._seed
             ):
                 if p is not None:
                     all_params = copy.deepcopy(m.params)
                     all_params["learner"] = p
                     new_model = self.train_model(all_params, X, y)
                     self.keep_model(new_model)
                 else:
                     self._progress_bar.update(1)
Esempio n. 6
0
    def get_hill_climbing_params(self, current_models):

        # second, hill climbing
        # for _ in range(self._hill_climbing_steps):
        # just do one step
        # get models orderer by loss
        # TODO: refactor this callbacks.callbacks[0]
        scores = [m.get_final_loss() for m in current_models]
        model_types = [m.get_type() for m in current_models]
        df_models = pd.DataFrame(
            {"model": current_models, "score": scores, "model_type": model_types}
        )
        # do group by for debug reason
        df_models = df_models.groupby("model_type").apply(
            lambda x: x.sort_values("score")
        )
        unique_model_types = np.unique(df_models.model_type)

        generated_params = []
        for m_type in unique_model_types:
            if m_type in ["Baseline", "Decision Tree", "Linear", "Nearest Neighbors"]:
                # dont tune Baseline and Decision Tree
                continue
            models = df_models[df_models.model_type == m_type]["model"]

            for i in range(min(self._top_models_to_improve, len(models))):
                m = models[i]

                for p in HillClimbing.get(
                    m.params.get("learner"),
                    self._ml_task,
                    len(current_models) + self._seed,
                ):

                    model_indices = [
                        int(m.get_name().split("_")[0]) for m in current_models
                    ]
                    model_max_index = np.max(model_indices)

                    logger.info(
                        "Hill climbing step, for model #{0}".format(model_max_index + 1)
                    )
                    if p is not None:
                        all_params = copy.deepcopy(m.params)
                        all_params["learner"] = p

                        all_params["name"] = self.get_model_name(
                            all_params["learner"]["model_type"],
                            model_max_index + 1 + len(generated_params),
                        )

                        if "golden_features" in all_params["preprocessing"]:
                            all_params["name"] += "_GoldenFeatures"
                        if "drop_features" in all_params["preprocessing"] and len(
                            all_params["preprocessing"]["drop_features"]
                        ):
                            all_params["name"] += "_SelectedFeatures"
                        all_params["status"] = "initialized"
                        all_params["final_loss"] = None
                        all_params["train_time"] = None
                        unique_params_key = MljarTuner.get_params_key(all_params)
                        if unique_params_key not in self._unique_params_keys:
                            self._unique_params_keys += [unique_params_key]
                            generated_params += [all_params]
        return generated_params