Example #1
0
def test_gridsearch():
    from sklearn.pipeline import make_pipeline
    dcv = pytest.importorskip('dask_searchcv')

    X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
    grid = {'logisticregression__lamduh': [.001, .01, .1, .5]}
    pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
    search = dcv.GridSearchCV(pipe, grid, cv=3)
    search.fit(X, y)
Example #2
0
def test_big(fit_intercept, is_sparse):
    with dask.config.set(scheduler='synchronous'):
        X, y = make_classification(is_sparse=is_sparse)
        lr = LogisticRegression(fit_intercept=fit_intercept)
        lr.fit(X, y)
        lr.predict(X)
        lr.predict_proba(X)
    if fit_intercept:
        assert lr.intercept_ is not None
Example #3
0
def test_fit(fit_intercept, is_sparse):
    X, y = make_classification(n_samples=100,
                               n_features=5,
                               chunksize=10,
                               is_sparse=is_sparse)
    lr = LogisticRegression(fit_intercept=fit_intercept)
    lr.fit(X, y)
    lr.predict(X)
    lr.predict_proba(X)
Example #4
0
def test_big(fit_intercept):
    import dask
    dask.set_options(get=dask.get)
    X, y = make_classification()
    lr = LogisticRegression(fit_intercept=fit_intercept)
    lr.fit(X, y)
    lr.predict(X)
    lr.predict_proba(X)
    if fit_intercept:
        assert lr.intercept_ is not None
Example #5
0
def test_big(fit_intercept, is_sparse, is_cupy):
    with dask.config.set(scheduler='synchronous'):
        X, y = make_classification(is_sparse=is_sparse)
        if is_cupy and not is_sparse:
            cupy = pytest.importorskip('cupy')
            X, y = to_dask_cupy_array_xy(X, y, cupy)
        lr = LogisticRegression(fit_intercept=fit_intercept)
        lr.fit(X, y)
        lr.predict(X)
        lr.predict_proba(X)
    if fit_intercept:
        assert lr.intercept_ is not None
Example #6
0
def test_fit(fit_intercept, is_sparse, is_cupy):
    X, y = make_classification(n_samples=100,
                               n_features=5,
                               chunksize=10,
                               is_sparse=is_sparse)

    if is_cupy and not is_sparse:
        cupy = pytest.importorskip('cupy')
        X, y = to_dask_cupy_array_xy(X, y, cupy)

    lr = LogisticRegression(fit_intercept=fit_intercept)
    lr.fit(X, y)
    lr.predict(X)
    lr.predict_proba(X)
import dask.dataframe as dd
import dask.datasets as ds
import time
from dask_ml.linear_model import LogisticRegression
from dask_glm.datasets import make_classification

X, y = make_classification(n_samples=1000)

t = time.time()
lr = LogisticRegression()
lr.fit(X, y)
lr.predict(X)
lr.predict_proba(X)
#est.score(X, y)
print('\nTime dask_ml: ' + str(time.time() - t))

# Parallelize Scikit-Learn Directly
from dask.distributed import Client
from sklearn.externals.joblib import parallel_backend

client = Client('localhost:8786')  # Connect to a Dask Cluster
print(client)
with parallel_backend('dask', scatter=[X, y]):
    # Your normal scikit-learn code here
    t = time.time()
    lr = LogisticRegression()
    lr.fit(X, y)
    lr.predict(X)
    lr.predict_proba(X)
    #est.score(X, y)
    print('\nTime dask_ml distributed: ' + str(time.time() - t))
Example #8
0
def make_dask_arrs():
    return make_classification(n_samples=300, n_features=6)
Example #9
0
def test_in_pipeline():
    from sklearn.pipeline import make_pipeline
    X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
    pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
    pipe.fit(X, y)
Example #10
0
class DoNothingTransformer(object):
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        return X

    def fit_transform(self, X, y=None):
        return X

    def get_params(self, deep=True):
        return {}


X, y = make_classification()


def test_lr_init(solver):
    LogisticRegression(solver=solver)


@pytest.mark.parametrize('fit_intercept', [True, False])
def test_fit(fit_intercept):
    X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
    lr = LogisticRegression(fit_intercept=fit_intercept)
    lr.fit(X, y)
    lr.predict(X)
    lr.predict_proba(X)