コード例 #1
0
ファイル: test_ensemble.py プロジェクト: zhufengGNSS/elm
def test_kmeans_simple_X(client=None):
    pipe = Pipeline([steps.Flatten(),
                     MiniBatchKMeans(n_clusters=6)])
    fitted = pipe.fit_ensemble(X=X, **ENSEMBLE_KWARGS)
    _train_asserts(fitted, ENSEMBLE_KWARGS['saved_ensemble_size'])
    pred = fitted.predict_many(X=X)
    assert len(pred) == len(fitted.ensemble)
コード例 #2
0
def test_kmeans_simple_sampler(client=None):
    pipe = Pipeline([steps.Flatten(), MiniBatchKMeans(n_clusters=6)])
    kw = SAMPLER_DATA_SOURCE.copy()
    kw.update(ENSEMBLE_KWARGS)
    fitted = pipe.fit_ensemble(**kw)
    ens = fitted.ensemble
    _train_asserts(fitted, ENSEMBLE_KWARGS['saved_ensemble_size'])
    pred = fitted.predict_many(**SAMPLER_DATA_SOURCE)
    assert len(pred) == len(SAMPLER_DATA_SOURCE['args_list']) * len(ens)
コード例 #3
0
ファイル: test_ensemble.py プロジェクト: zhufengGNSS/elm
def test_supervised_feat_select_X_y(client=None):
    '''Has a ModifySample step to get necessary y data'''
    pipe = Pipeline([steps.Flatten(),
            steps.SelectPercentile(score_func=f_classif, percentile=50),
            SGDClassifier()])
    en = dict(method_kwargs=dict(classes=[0, 1, 2]), **ENSEMBLE_KWARGS)
    en.update(X_Y_DATA_SOURCE)
    fitted = pipe.fit_ensemble(**en)
    _train_asserts(fitted, en['saved_ensemble_size'])
    pred = fitted.predict_many(**X_Y_DATA_SOURCE)
    assert len(pred) == len(fitted.ensemble)
コード例 #4
0
ファイル: test_ensemble.py プロジェクト: zhufengGNSS/elm
def test_simple():
    p = Pipeline([steps.Flatten(), MiniBatchKMeans(n_clusters=5),])
    args_list = [(100, 200, 5)] * 10 # (height, width, bands)
    data_source = dict(sampler=example_sampler, args_list=args_list)
    ensemble_kw = dict(ngen=2, init_ensemble_size=2)
    ensemble_kw.update(data_source)
    fitted = p.fit_ensemble(**ensemble_kw)
    tagged_fitted_models = fitted.ensemble
    (tag1, model1), (tag2, model2) = tagged_fitted_models # ensemble size of 2 here
    X = example_sampler(100, 400, 5)
    pred1 = model1.predict(X)
    pred2 = model2.predict(X)
    assert pred1.shape == pred2.shape == (400 * 100,)
コード例 #5
0
def test_kmeans_model_selection(client=None):

    pipe = Pipeline([
        steps.Flatten(), ('pca', steps.Transform(IncrementalPCA())),
        ('kmeans', MiniBatchKMeans(n_clusters=5))
    ],
                    scoring=kmeans_aic,
                    scoring_kwargs={'score_weights': [-1]})

    def samp(*args, **kwargs):
        return random_elm_store(bands=12, mn=0, mx=1, height=20, width=40)

    en = ENSEMBLE_KWARGS.copy()
    n_clusters_choices = list(range(3, 10))

    def init(pipe, **kwargs):
        estimators = []
        for _ in range(100):
            n_components = np.random.choice(np.arange(2, 6))
            n_clusters = np.random.choice(n_clusters_choices)
            estimator = copy.deepcopy(pipe)
            estimator.set_params(kmeans__n_clusters=n_clusters,
                                 pca__n_components=n_components)
            estimators.append(estimator)
        return estimators

    en['ngen'] = 20
    en['model_scoring'] = kmeans_aic
    en['ensemble_init_func'] = init
    en['model_selection_kwargs'] = dict(drop_n=30,
                                        evolve_n=30,
                                        choices=n_clusters_choices)
    en['model_selection'] = kmeans_model_averaging
    sa = SAMPLER_DATA_SOURCE.copy()
    sa['sampler'] = samp
    en.update(sa)
    fitted = pipe.fit_ensemble(**en)
    assert len(fitted.ensemble) == en['saved_ensemble_size']
    preds = fitted.predict_many(**sa)
    assert len(preds) == len(fitted.ensemble) * len(
        SAMPLER_DATA_SOURCE['args_list'])