Esempio n. 1
0
    def fit(self, X, y):
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X)
        X = pd.DataFrame(self.sparsify.fit_transform(X).A)
        if self.scoring is None:
            tp = classification_or_regression(y)
            self.scoring = [f1_weighted_scorer,
                            mse_scorer][tp != 'classification']
        print(self.scoring)
        self.pick_model(X, y)
        self.get_pipeline()
        unique_combinations = np.prod(list(map(len, self.grid.values())))
        print("unique_combinations", unique_combinations)
        if 'population_size' not in self.kwargs:
            self.kwargs['population_size'] = np.clip(
                int(unique_combinations / 1000), 5, 10)
        if 'generations_number' not in self.kwargs:
            self.kwargs['generations_number'] = np.clip(
                int(unique_combinations / 20), 10, 50)

        self.evo = evo_search(self.pipeline,
                              self.grid,
                              scoring=self.scoring,
                              cv=self.cv,
                              n_jobs=self.n_jobs,
                              **self.kwargs)
        self.evo.fit(X, y)
        return self.evo.best_estimator_
Esempio n. 2
0
    def fit(self, X, y):
        evos = []
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X)
        self._feature_name = np.array(list(X.columns))
        if isinstance(y, pd.DataFrame):
            y = np.array(y)
        elif hasattr(y, "__array__"):
            y = y.__array__()
        elif len(y.shape) > 1 and y.shape[1] == 1:
            y = y.ravel()
        X = pd.DataFrame(self.featurizer.fit_transform(X).A)
        self.cl_or_reg = self.cl_or_reg or classification_or_regression(y)
        cl_or_reg = self.cl_or_reg
        if self.scoring is None:
            self.scoring = [f1_weighted_scorer, mse_scorer][cl_or_reg != "classification"]
        print(self.scoring)

        complexity = calculate_complexity(X, y, cl_or_reg)
        for model in self.get_models(X, y):
            try:
                print("estimator:", model["name"])
                grid = model["grid"]
                if complexity > model["max_complexity"]:
                    continue
                clf = self.handle_multi_output(y, model["name"], model["clf"])
                if clf is None:
                    continue
                pipeline = self.get_pipeline(clf)
                unique_combinations = np.prod(list(map(len, grid.values())))
                print("unique_combinations", unique_combinations)
                kwargs = self.kwargs.copy()
                if "population_size" not in self.kwargs:
                    kwargs["population_size"] = np.clip(int(unique_combinations / 100), 5, 10)
                if "generations_number" not in kwargs:
                    kwargs["generations_number"] = np.clip(int(unique_combinations / 10), 10, 50)

                evo = evo_search(
                    pipeline, grid, scoring=self.scoring, cv=self.cv, n_jobs=self.n_jobs, **kwargs
                )
                evo.fit(X, y)
                evos.append((evo.best_score_, evo))
            except KeyboardInterrupt:
                if not evos:
                    print("Stopped by user. No models finished trained; failed to fit.")
                    raise
                print("Stopped by user. {} models trained.".format(len(evos)))
        self.evos = evos
        self.best_evo = sorted(self.evos, key=lambda x: x[0])[-1][1]
        # import warnings
        # warnings.warn("best: {}".format(self.best_evo.best_estimator_))
        print("best: {}".format(self.best_evo.best_estimator_))
        return self.best_evo.best_estimator_
Esempio n. 3
0
def apply_toy_on(X, y, cl_or_reg=None, n=500, max_tries=3):
    n = min(len(y), n)
    cl_or_reg = cl_or_reg if cl_or_reg else classification_or_regression(y)
    if cl_or_reg == 'classification':
        cross_split = StratifiedShuffleSplit(y, 1, int(0.2 * n), int(0.8 * n))
    else:
        cross_split = ShuffleSplit(len(y), max_tries, int(0.2 * n), int(0.8 * n))
    if not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X)
    for train_index, test_index in cross_split:
        try:
            toy = Toy(cv=2, cl_or_reg=cl_or_reg)
            toy.fit(X.iloc[train_index], y[train_index])
            score = toy.score(X.iloc[test_index], y[test_index])
        except NotFittedError:  # probably have to find the real fix here...
            score = -10000
        print(score)
        yield score
Esempio n. 4
0
    def fit(self, X, y):
        self.X = X
        self.y = y
        if self.scoring is None:
            tp = classification_or_regression(self.y)
            # MULTIOUTPUT NOT SUPPORTED FOR REGRESSION (probably DEAPs fault)
            self.scoring = ['f1_weighted', 'r2'][tp != 'classification']
        self.pick_model()
        self.get_pipeline()
        unique_combinations = np.prod(list(map(len, self.grid.values())))
        print("unique_combinations", unique_combinations)
        if 'population_size' not in self.kwargs:
            self.kwargs['population_size'] = np.clip(int(unique_combinations / 1000), 5, 10)
        if 'generations_number' not in self.kwargs:
            self.kwargs['generations_number'] = np.clip(int(unique_combinations / 200), 10, 50)

        self.evo = evo_search(self.pipeline, self.grid, cv=self.cv,
                              scoring=self.scoring, n_jobs=self.n_jobs, **self.kwargs)
        self.evo.fit(X, y)
        return self.evo.best_estimator_