def fit(self, X, y): if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) X = pd.DataFrame(self.sparsify.fit_transform(X).A) if self.scoring is None: tp = classification_or_regression(y) self.scoring = [f1_weighted_scorer, mse_scorer][tp != 'classification'] print(self.scoring) self.pick_model(X, y) self.get_pipeline() unique_combinations = np.prod(list(map(len, self.grid.values()))) print("unique_combinations", unique_combinations) if 'population_size' not in self.kwargs: self.kwargs['population_size'] = np.clip( int(unique_combinations / 1000), 5, 10) if 'generations_number' not in self.kwargs: self.kwargs['generations_number'] = np.clip( int(unique_combinations / 20), 10, 50) self.evo = evo_search(self.pipeline, self.grid, scoring=self.scoring, cv=self.cv, n_jobs=self.n_jobs, **self.kwargs) self.evo.fit(X, y) return self.evo.best_estimator_
def fit(self, X, y): evos = [] if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) self._feature_name = np.array(list(X.columns)) if isinstance(y, pd.DataFrame): y = np.array(y) elif hasattr(y, "__array__"): y = y.__array__() elif len(y.shape) > 1 and y.shape[1] == 1: y = y.ravel() X = pd.DataFrame(self.featurizer.fit_transform(X).A) self.cl_or_reg = self.cl_or_reg or classification_or_regression(y) cl_or_reg = self.cl_or_reg if self.scoring is None: self.scoring = [f1_weighted_scorer, mse_scorer][cl_or_reg != "classification"] print(self.scoring) complexity = calculate_complexity(X, y, cl_or_reg) for model in self.get_models(X, y): try: print("estimator:", model["name"]) grid = model["grid"] if complexity > model["max_complexity"]: continue clf = self.handle_multi_output(y, model["name"], model["clf"]) if clf is None: continue pipeline = self.get_pipeline(clf) unique_combinations = np.prod(list(map(len, grid.values()))) print("unique_combinations", unique_combinations) kwargs = self.kwargs.copy() if "population_size" not in self.kwargs: kwargs["population_size"] = np.clip(int(unique_combinations / 100), 5, 10) if "generations_number" not in kwargs: kwargs["generations_number"] = np.clip(int(unique_combinations / 10), 10, 50) evo = evo_search( pipeline, grid, scoring=self.scoring, cv=self.cv, n_jobs=self.n_jobs, **kwargs ) evo.fit(X, y) evos.append((evo.best_score_, evo)) except KeyboardInterrupt: if not evos: print("Stopped by user. No models finished trained; failed to fit.") raise print("Stopped by user. {} models trained.".format(len(evos))) self.evos = evos self.best_evo = sorted(self.evos, key=lambda x: x[0])[-1][1] # import warnings # warnings.warn("best: {}".format(self.best_evo.best_estimator_)) print("best: {}".format(self.best_evo.best_estimator_)) return self.best_evo.best_estimator_
def apply_toy_on(X, y, cl_or_reg=None, n=500, max_tries=3): n = min(len(y), n) cl_or_reg = cl_or_reg if cl_or_reg else classification_or_regression(y) if cl_or_reg == 'classification': cross_split = StratifiedShuffleSplit(y, 1, int(0.2 * n), int(0.8 * n)) else: cross_split = ShuffleSplit(len(y), max_tries, int(0.2 * n), int(0.8 * n)) if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) for train_index, test_index in cross_split: try: toy = Toy(cv=2, cl_or_reg=cl_or_reg) toy.fit(X.iloc[train_index], y[train_index]) score = toy.score(X.iloc[test_index], y[test_index]) except NotFittedError: # probably have to find the real fix here... score = -10000 print(score) yield score
def fit(self, X, y): self.X = X self.y = y if self.scoring is None: tp = classification_or_regression(self.y) # MULTIOUTPUT NOT SUPPORTED FOR REGRESSION (probably DEAPs fault) self.scoring = ['f1_weighted', 'r2'][tp != 'classification'] self.pick_model() self.get_pipeline() unique_combinations = np.prod(list(map(len, self.grid.values()))) print("unique_combinations", unique_combinations) if 'population_size' not in self.kwargs: self.kwargs['population_size'] = np.clip(int(unique_combinations / 1000), 5, 10) if 'generations_number' not in self.kwargs: self.kwargs['generations_number'] = np.clip(int(unique_combinations / 200), 10, 50) self.evo = evo_search(self.pipeline, self.grid, cv=self.cv, scoring=self.scoring, n_jobs=self.n_jobs, **self.kwargs) self.evo.fit(X, y) return self.evo.best_estimator_