def test_multi_output_classification_partial_fit_parallelism(): sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=-1) mor.partial_fit(X, y, classes) est1 = mor.estimators_[0] mor.partial_fit(X, y) est2 = mor.estimators_[0] if cpu_count() > 1: # parallelism requires this to be the case for a sane implementation assert est1 is not est2
def _fit(self, X, y): X, y = check_X_y(X, y, "csr") # Initialization cv = check_cv(cv=self.cv, y=y, classifier=is_classifier(self.estimator)) scorer = check_scoring(self.estimator, scoring=self.scoring) n_features = X.shape[1] if self.max_features is not None: if not isinstance(self.max_features, numbers.Integral): raise TypeError( "'max_features' should be an integer between 1 and {} features." " Got {!r} instead.".format(n_features, self.max_features)) elif self.max_features < 1 or self.max_features > n_features: raise ValueError( "'max_features' should be between 1 and {} features." " Got {} instead.".format(n_features, self.max_features)) max_features = self.max_features else: max_features = n_features if not isinstance(self.n_gen_no_change, (numbers.Integral, np.integer, type(None))): raise ValueError( "'n_gen_no_change' should either be None or an integer." " {} was passed.".format(self.n_gen_no_change)) estimator = clone(self.estimator) # Genetic Algorithm toolbox = base.Toolbox() toolbox.register("attr_bool", random.randint, 0, 1) toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=n_features) toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("evaluate", _evalFunction, gaobject=self, estimator=estimator, X=X, y=y, cv=cv, scorer=scorer, verbose=self.verbose, fit_params=self.fit_params, max_features=max_features, caching=self.caching) toolbox.register("mate", tools.cxUniform, indpb=self.crossover_independent_proba) toolbox.register("mutate", tools.mutFlipBit, indpb=self.mutation_independent_proba) toolbox.register("select", tools.selTournament, tournsize=self.tournament_size) if self.n_jobs == 0: raise ValueError("n_jobs == 0 has no meaning.") elif self.n_jobs > 1: pool = multiprocessing.Pool(processes=self.n_jobs) toolbox.register("map", pool.map) elif self.n_jobs < 0: pool = multiprocessing.Pool( processes=max(cpu_count() + 1 + self.n_jobs, 1)) toolbox.register("map", pool.map) pop = toolbox.population(n=self.n_population) hof = tools.HallOfFame(1, similar=np.array_equal) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.mean, axis=0) stats.register("std", np.std, axis=0) stats.register("min", np.min, axis=0) stats.register("max", np.max, axis=0) if self.verbose > 0: print("Selecting features with genetic algorithm.") _, log = _eaFunction(pop, toolbox, cxpb=self.crossover_proba, mutpb=self.mutation_proba, ngen=self.n_generations, ngen_no_change=self.n_gen_no_change, stats=stats, halloffame=hof, verbose=self.verbose) if self.n_jobs != 1: pool.close() pool.join() # Set final attributes support_ = np.array(hof, dtype=np.bool)[0] self.estimator_ = clone(self.estimator) self.estimator_.fit(X[:, support_], y) self.generation_scores_ = np.array( [score for score, _ in log.select("max")]) self.n_features_ = support_.sum() self.support_ = support_ return self