def get_hill_climbing_params(self, current_models): # second, hill climbing for _ in range(self._hill_climbing_steps): # get models orderer by loss # TODO: refactor this callbacks.callbacks[0] scores = [m.get_final_loss() for m in current_models] model_types = [m.get_type() for m in current_models] df_models = pd.DataFrame({ "model": current_models, "score": scores, "model_type": model_types }) # do group by for debug reason df_models = df_models.groupby("model_type").apply( lambda x: x.sort_values("score")) unique_model_types = np.unique(df_models.model_type) for m_type in unique_model_types: if m_type in [ "Baseline", "Decision Tree", "Linear", "Nearest Neighbors", ]: # dont tune Baseline and Decision Tree continue models = df_models[df_models.model_type == m_type]["model"] for i in range(min(self._top_models_to_improve, len(models))): m = models[i] for p in HillClimbing.get( m.params.get("learner"), self._ml_task, len(current_models) + self._seed, ): model_indices = [ int(m.get_name().split("_")[0]) for m in current_models ] model_max_index = np.max(model_indices) logger.info( "Hill climbing step, for model #{0}".format( model_max_index + 1)) if p is not None: all_params = copy.deepcopy(m.params) all_params["learner"] = p all_params["name"] = self.get_model_name( all_params["learner"]["model_type"], model_max_index + 1) unique_params_key = MljarTuner.get_params_key( all_params) if unique_params_key not in self._unique_params_keys: self._unique_params_keys += [unique_params_key] yield all_params
def get_hill_climbing_params(self, current_models): # second, hill climbing for _ in range(self._hill_climbing_steps): # get models orderer by loss # TODO: refactor this callbacks.callbacks[0] models = sorted( [(m.callbacks.callbacks[0].final_loss, m) for m in current_models], key=lambda x: x[0], ) for i in range(min(self._top_models_to_improve, len(models))): m = models[i][1] for p in HillClimbing.get( m.params.get("learner"), self._ml_task, len(current_models) + self._seed, ): logger.info("Hill climbing step, for model #{0}".format( len(current_models) + 1)) if p is not None: all_params = copy.deepcopy(m.params) all_params["learner"] = p all_params["name"] = f"model_{len(current_models) + 1}" unique_params_key = MljarTuner.get_params_key( all_params) if unique_params_key not in self._unique_params_keys: self._unique_params_keys += [unique_params_key] yield all_params
def hill_climbing_step(self, X, y): for hill_climbing in range(self._hill_climbing_steps): # get models orderer by loss models = [] for m in self._models: models += [(m.callbacks.callbacks[0].final_loss, m)] models = sorted(models, key=lambda x: x[0]) for i in range(min(self._top_models_to_improve, len(models))): m = models[i][1] for p in HillClimbing.get(m.params.get("learner")): if p is not None: all_params = copy.deepcopy(m.params) all_params["learner"] = p new_model = self.train_model(all_params, X, y) if new_model is not None: self._models += [new_model] self.verbose_print( "Learner {} final loss {} time {}".format( new_model.get_name(), new_model.get_final_loss(), new_model.get_train_time(), ) ) self.log_train_time( new_model.get_name(), new_model.get_train_time() )
def get_hill_climbing_params(self, current_models): df_models, algorithms = self.df_models_algorithms(current_models) generated_params = [] counts = {model_type: 0 for model_type in algorithms} for i in range(df_models.shape[0]): model_type = df_models["model_type"].iloc[i] counts[model_type] += 1 if counts[model_type] > self._top_models_to_improve: continue m = df_models["model"].iloc[i] for p in HillClimbing.get( m.params.get("learner"), self._ml_task, len(current_models) + self._seed ): model_indices = [ int(m.get_name().split("_")[0]) for m in current_models ] model_max_index = np.max(model_indices) logger.info( "Hill climbing step, for model #{0}".format(model_max_index + 1) ) if p is not None: all_params = copy.deepcopy(m.params) all_params["learner"] = p all_params["name"] = self.get_model_name( all_params["learner"]["model_type"], model_max_index + 1 + len(generated_params), ) if "golden_features" in all_params["preprocessing"]: all_params["name"] += "_GoldenFeatures" if "drop_features" in all_params["preprocessing"] and len( all_params["preprocessing"]["drop_features"] ): all_params["name"] += "_SelectedFeatures" all_params["status"] = "initialized" all_params["final_loss"] = None all_params["train_time"] = None unique_params_key = MljarTuner.get_params_key(all_params) if unique_params_key not in self._unique_params_keys: generated_params += [all_params] return generated_params
def hill_climbing_step(self, X, y): for hill_climbing in range(self._hill_climbing_steps): # get models orderer by loss models = [] for m in self._models: models += [(m.callbacks.callbacks[0].final_loss, m)] models = sorted(models, key=lambda x: x[0]) for i in range(min(self._top_models_to_improve, len(models))): m = models[i][1] for p in HillClimbing.get( m.params.get("learner"), len(self._models) + self._seed ): if p is not None: all_params = copy.deepcopy(m.params) all_params["learner"] = p new_model = self.train_model(all_params, X, y) self.keep_model(new_model) else: self._progress_bar.update(1)
def get_hill_climbing_params(self, current_models): # second, hill climbing # for _ in range(self._hill_climbing_steps): # just do one step # get models orderer by loss # TODO: refactor this callbacks.callbacks[0] scores = [m.get_final_loss() for m in current_models] model_types = [m.get_type() for m in current_models] df_models = pd.DataFrame( {"model": current_models, "score": scores, "model_type": model_types} ) # do group by for debug reason df_models = df_models.groupby("model_type").apply( lambda x: x.sort_values("score") ) unique_model_types = np.unique(df_models.model_type) generated_params = [] for m_type in unique_model_types: if m_type in ["Baseline", "Decision Tree", "Linear", "Nearest Neighbors"]: # dont tune Baseline and Decision Tree continue models = df_models[df_models.model_type == m_type]["model"] for i in range(min(self._top_models_to_improve, len(models))): m = models[i] for p in HillClimbing.get( m.params.get("learner"), self._ml_task, len(current_models) + self._seed, ): model_indices = [ int(m.get_name().split("_")[0]) for m in current_models ] model_max_index = np.max(model_indices) logger.info( "Hill climbing step, for model #{0}".format(model_max_index + 1) ) if p is not None: all_params = copy.deepcopy(m.params) all_params["learner"] = p all_params["name"] = self.get_model_name( all_params["learner"]["model_type"], model_max_index + 1 + len(generated_params), ) if "golden_features" in all_params["preprocessing"]: all_params["name"] += "_GoldenFeatures" if "drop_features" in all_params["preprocessing"] and len( all_params["preprocessing"]["drop_features"] ): all_params["name"] += "_SelectedFeatures" all_params["status"] = "initialized" all_params["final_loss"] = None all_params["train_time"] = None unique_params_key = MljarTuner.get_params_key(all_params) if unique_params_key not in self._unique_params_keys: self._unique_params_keys += [unique_params_key] generated_params += [all_params] return generated_params