Ejemplo n.º 1
0
def test_multi_output_classification_partial_fit_parallelism():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=-1)
    mor.partial_fit(X, y, classes)
    est1 = mor.estimators_[0]
    mor.partial_fit(X, y)
    est2 = mor.estimators_[0]
    if cpu_count() > 1:
        # parallelism requires this to be the case for a sane implementation
        assert est1 is not est2
def test_multi_output_classification_partial_fit_parallelism():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=-1)
    mor.partial_fit(X, y, classes)
    est1 = mor.estimators_[0]
    mor.partial_fit(X, y)
    est2 = mor.estimators_[0]
    if cpu_count() > 1:
        # parallelism requires this to be the case for a sane implementation
        assert est1 is not est2
Ejemplo n.º 3
0
    def _fit(self, X, y):
        X, y = check_X_y(X, y, "csr")
        # Initialization
        cv = check_cv(cv=self.cv,
                      y=y,
                      classifier=is_classifier(self.estimator))
        scorer = check_scoring(self.estimator, scoring=self.scoring)
        n_features = X.shape[1]

        if self.max_features is not None:
            if not isinstance(self.max_features, numbers.Integral):
                raise TypeError(
                    "'max_features' should be an integer between 1 and {} features."
                    " Got {!r} instead.".format(n_features, self.max_features))
            elif self.max_features < 1 or self.max_features > n_features:
                raise ValueError(
                    "'max_features' should be between 1 and {} features."
                    " Got {} instead.".format(n_features, self.max_features))
            max_features = self.max_features
        else:
            max_features = n_features

        if not isinstance(self.n_gen_no_change,
                          (numbers.Integral, np.integer, type(None))):
            raise ValueError(
                "'n_gen_no_change' should either be None or an integer."
                " {} was passed.".format(self.n_gen_no_change))

        estimator = clone(self.estimator)

        # Genetic Algorithm
        toolbox = base.Toolbox()

        toolbox.register("attr_bool", random.randint, 0, 1)
        toolbox.register("individual",
                         tools.initRepeat,
                         creator.Individual,
                         toolbox.attr_bool,
                         n=n_features)
        toolbox.register("population", tools.initRepeat, list,
                         toolbox.individual)
        toolbox.register("evaluate",
                         _evalFunction,
                         gaobject=self,
                         estimator=estimator,
                         X=X,
                         y=y,
                         cv=cv,
                         scorer=scorer,
                         verbose=self.verbose,
                         fit_params=self.fit_params,
                         max_features=max_features,
                         caching=self.caching)
        toolbox.register("mate",
                         tools.cxUniform,
                         indpb=self.crossover_independent_proba)
        toolbox.register("mutate",
                         tools.mutFlipBit,
                         indpb=self.mutation_independent_proba)
        toolbox.register("select",
                         tools.selTournament,
                         tournsize=self.tournament_size)

        if self.n_jobs == 0:
            raise ValueError("n_jobs == 0 has no meaning.")
        elif self.n_jobs > 1:
            pool = multiprocessing.Pool(processes=self.n_jobs)
            toolbox.register("map", pool.map)
        elif self.n_jobs < 0:
            pool = multiprocessing.Pool(
                processes=max(cpu_count() + 1 + self.n_jobs, 1))
            toolbox.register("map", pool.map)

        pop = toolbox.population(n=self.n_population)
        hof = tools.HallOfFame(1, similar=np.array_equal)
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean, axis=0)
        stats.register("std", np.std, axis=0)
        stats.register("min", np.min, axis=0)
        stats.register("max", np.max, axis=0)

        if self.verbose > 0:
            print("Selecting features with genetic algorithm.")

        _, log = _eaFunction(pop,
                             toolbox,
                             cxpb=self.crossover_proba,
                             mutpb=self.mutation_proba,
                             ngen=self.n_generations,
                             ngen_no_change=self.n_gen_no_change,
                             stats=stats,
                             halloffame=hof,
                             verbose=self.verbose)
        if self.n_jobs != 1:
            pool.close()
            pool.join()

        # Set final attributes
        support_ = np.array(hof, dtype=np.bool)[0]
        self.estimator_ = clone(self.estimator)
        self.estimator_.fit(X[:, support_], y)

        self.generation_scores_ = np.array(
            [score for score, _ in log.select("max")])
        self.n_features_ = support_.sum()
        self.support_ = support_

        return self