Example #1
0
def test_select_percentile():
    t = steps.SelectPercentile(percentile=50, score_func='f_classif')
    X_new, y2, sample_weight = t.fit_transform(flat_X, y)
    assert np.all(y == y2)
    assert sample_weight is None
    assert isinstance(X_new, ElmStore)
    assert hasattr(X_new, 'flat')
    assert X_new.flat.values.shape[1] < flat_X.flat.values.shape[1]
Example #2
0
def test_supervised_feat_select_X_y(client=None):
    '''Has a ModifySample step to get necessary y data'''
    pipe = Pipeline([steps.Flatten(),
            steps.SelectPercentile(score_func=f_classif, percentile=50),
            SGDClassifier()])
    en = dict(method_kwargs=dict(classes=[0, 1, 2]), **ENSEMBLE_KWARGS)
    en.update(X_Y_DATA_SOURCE)
    fitted = pipe.fit_ensemble(**en)
    _train_asserts(fitted, en['saved_ensemble_size'])
    pred = fitted.predict_many(**X_Y_DATA_SOURCE)
    assert len(pred) == len(fitted.ensemble)
Example #3
0
from api_example import data_source

ELM_EXAMPLE_DATA_PATH = os.environ['ELM_EXAMPLE_DATA_PATH']


def make_example_y_data(X, y=None, sample_weight=None, **kwargs):
    fitted = MiniBatchKMeans(n_clusters=5).fit(X.flat.values)
    y = fitted.predict(X.flat.values)
    return (X, y, sample_weight)


pipeline_steps = [
    steps.Flatten(),
    steps.ModifySample(make_example_y_data),
    ('top_n', steps.SelectPercentile(percentile=80, score_func=f_classif)),
    ('kmeans', MiniBatchKMeans(n_clusters=4))
]
pipeline = Pipeline(pipeline_steps, scoring=kmeans_aic)
param_grid = {
    'kmeans__n_clusters': list(range(5, 10)),
    'control': {
        'select_method': 'selNSGA2',
        'crossover_method': 'cxTwoPoint',
        'mutate_method': 'mutUniformInt',
        'init_pop': 'random',
        'indpb': 0.5,
        'mutpb': 0.9,
        'cxpb': 0.3,
        'eta': 20,
        'ngen': 2,