Python Subsemble Examples

Programming Language: Python

Namespace/Package Name: mlens.ensemble

Class/Type: Subsemble

Examples at hotexamples.com: 10

Python Subsemble - 10 examples found. These are the top rated real world Python examples of mlens.ensemble.Subsemble extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Subsemble(10)

add(9)

fit(7)

add_meta(3)

predict(3)

Example #1

Show file

File: test_sequential.py Project: zzygyx9119/mlens

def test_equivalence_subsemble():
    """[SequentialEnsemble] Test ensemble equivalence with Subsemble."""
    ens = Subsemble(n_jobs=1)
    seq = SequentialEnsemble(n_jobs=1)

    ens.add(ECM, dtype=np.float64)
    seq.add('subsemble', ECM, dtype=np.float64)

    F = ens.fit(X, y).predict(X)
    P = seq.fit(X, y).predict(X)

    np.testing.assert_array_equal(P, F)

Example #2

Show file

File: test_sequential.py Project: tongli12/mlens

def test_equivalence_subsemble():
    """[Sequential] Test ensemble equivalence with Subsemble."""

    ens = Subsemble()
    seq = SequentialEnsemble()

    ens.add(ECM)
    seq.add('subset', ECM)

    F = ens.fit(X, y).predict(X)
    P = seq.fit(X, y).predict(X)

    np.testing.assert_array_equal(P, F)

Example #3

Show file

File: test_subsemble.py Project: xc35/mlens

def test_subset_equiv():
    """[Subsemble] Test equivalence with SuperLearner for J=1."""

    sub = Subsemble(partitions=1)
    sl = SuperLearner()

    sub.add(ECM, dtype=np.float64)
    sl.add(ECM, dtype=np.float64)

    F = sub.fit(X, y).predict(X)
    P = sl.fit(X, y).predict(X)

    np.testing.assert_array_equal(P, F)

Example #4

Show file

File: test_subsemble.py Project: zzygyx9119/mlens

def test_subset_fit():
    """[Subsemble] 'fit' and 'predict' runs correctly."""
    meta = OLS()
    meta.fit(F, y)
    g = meta.predict(P)

    ens = Subsemble()
    ens.add(ECM, partitions=2, folds=3, dtype=np.float64)
    ens.add_meta(OLS(), dtype=np.float64)

    ens.fit(X, y)

    pred = ens.predict(X)
    np.testing.assert_array_equal(pred, g)

Example #5

Show file

def build_clustered_subsemble(estimator):
    """Build a subsemble with random partitions"""
    sub = Subsemble(partitions=2,
                    partition_estimator=estimator,
                    folds=2,
                    verbose=2)

    sub.add([SVC(), LogisticRegression()])
    sub.add_meta(SVC())
    return sub

Example #6

Show file

File: ensemble_notest.py Project: bbadura/mlens-exploration

def add_subsemble(name, models, X_train, Y_train, X_test, Y_test):
    # Establish and reset variables
    acc_score_cv = None
    acc_score = None
    time_ = None
    ensemble = Subsemble(scorer=accuracy_score, random_state=seed)

    ensemble.add(models)
    # Attach the final meta estimator
    ensemble.add(SVC(), meta=True)

    start = time.time()
    ensemble.fit(X_train, Y_train)
    preds = ensemble.predict(X_test)
    acc_score = accuracy_score(preds, Y_test)
    end = time.time()
    time_ = end - start

    return {
        "Ensemble": name,
        "Meta_Classifier": "SVC",
        "Accuracy_Score": acc_score,
        "Runtime": time_
    }

Example #7

Show file

File: ensemble_comp.py Project: chrinide/mlens_dev

                        'machine-learning-databases/'
                        'poker/poker-hand-testing.data')
    else:
        raise ValueError("Not valid data option.")

    X = np.loadtxt(out, delimiter=",")
    y = X[:, -1]
    X = X[:, :-1]
    return X, y


xtrain, ytrain = get_data('train')
xtest, ytest = get_data('test')

estimators = {
    'subsemble': Subsemble(),
    'super_learner': SuperLearner(),
    'blend_ensemble': BlendEnsemble()
}

base_learners = [
    RandomForestClassifier(n_estimators=500,
                           max_depth=10,
                           min_samples_split=50,
                           max_features=0.6),
    LogisticRegression(C=1e5),
    GradientBoostingClassifier()
]

for clf in estimators.values():
    clf.add([RandomForestClassifier(), LogisticRegression(), MLPClassifier()])

Example #8

Show file

def build_subsemble():
    """Build a subsemble with random partitions"""
    sub = Subsemble(partitions=3, folds=2)
    sub.add([SVC(), LogisticRegression()])
    return sub

Example #9

Show file

    def __init__(self):
        pass

    def our_custom_function(self, X, y=None):
        """Split the data in half based on the sum of features"""
        # Labels should be numerical
        return 1 * (X.sum(axis=1) > X.sum(axis=1).mean())

    def get_params(self, deep=False):
        return {}


# Note that the number of partitions the estimator creates *must* match the
# ``partitions`` argument passed to the subsemble.

sub = Subsemble(partitions=2, folds=3, verbose=1)
sub.add([SVC(), LogisticRegression()],
        partition_estimator=SimplePartitioner(),
        fit_estimator=False,
        attr="our_custom_function")

sub.fit(X, y)

##############################################################################
# A final word of caution. When implementing custom estimators from scratch, some
# care needs to be taken if you plan on copying the Subsemble. It is advised that
# the estimator inherits the :class:`sklearn.base.BaseEstimator` class to
# provide a Scikit-learn compatible interface. For further information,
# see the :ref:`API` documentation of the :class:`Subsemble`
# and :class:`mlens.base.indexer.ClusteredSubsetIndex`.
#

Example #10

Show file

# determine if we are building a classifier model
classifier = np.all(np.unique(Y.to_numpy()) == [0, 1])
outputs = Y.shape[1]

# separate the data into training and testing
if TIME_SERIES:
    test_idx = X.index.values[-int(X.shape[0] / 5):]
else:
    np.random.seed(1)
    test_idx = np.random.choice(a=X.index.values, size=int(X.shape[0] / 5), replace=False)
train_idx = np.array(list(set(X.index.values) - set(test_idx)))

# set up the model
if classifier:
    model = Subsemble(partitions=2, random_state=42, n_jobs=1)
    model.add(KNeighborsClassifier())
    model.add(RandomForestClassifier())
    model.add(GaussianNB())
    model.add_meta(LogisticRegression(penalty="l1", solver="saga"))
else:
    model = Subsemble(partitions=2, random_state=42, n_jobs=1)
    model.add(KNeighborsRegressor())
    model.add(RandomForestRegressor())
    model.add(BayesianRidge())
    model.add_meta(Lasso())

# train and predict
train_predict = pd.DataFrame()
test_predict = pd.DataFrame()
for j in Y.columns: