def test_select_percentile(): t = steps.SelectPercentile(percentile=50, score_func='f_classif') X_new, y2, sample_weight = t.fit_transform(flat_X, y) assert np.all(y == y2) assert sample_weight is None assert isinstance(X_new, ElmStore) assert hasattr(X_new, 'flat') assert X_new.flat.values.shape[1] < flat_X.flat.values.shape[1]
def test_supervised_feat_select_X_y(client=None): '''Has a ModifySample step to get necessary y data''' pipe = Pipeline([steps.Flatten(), steps.SelectPercentile(score_func=f_classif, percentile=50), SGDClassifier()]) en = dict(method_kwargs=dict(classes=[0, 1, 2]), **ENSEMBLE_KWARGS) en.update(X_Y_DATA_SOURCE) fitted = pipe.fit_ensemble(**en) _train_asserts(fitted, en['saved_ensemble_size']) pred = fitted.predict_many(**X_Y_DATA_SOURCE) assert len(pred) == len(fitted.ensemble)
from api_example import data_source ELM_EXAMPLE_DATA_PATH = os.environ['ELM_EXAMPLE_DATA_PATH'] def make_example_y_data(X, y=None, sample_weight=None, **kwargs): fitted = MiniBatchKMeans(n_clusters=5).fit(X.flat.values) y = fitted.predict(X.flat.values) return (X, y, sample_weight) pipeline_steps = [ steps.Flatten(), steps.ModifySample(make_example_y_data), ('top_n', steps.SelectPercentile(percentile=80, score_func=f_classif)), ('kmeans', MiniBatchKMeans(n_clusters=4)) ] pipeline = Pipeline(pipeline_steps, scoring=kmeans_aic) param_grid = { 'kmeans__n_clusters': list(range(5, 10)), 'control': { 'select_method': 'selNSGA2', 'crossover_method': 'cxTwoPoint', 'mutate_method': 'mutUniformInt', 'init_pop': 'random', 'indpb': 0.5, 'mutpb': 0.9, 'cxpb': 0.3, 'eta': 20, 'ngen': 2,