def test_from_sklearn(self):
        d = from_sklearn(self.sk)
        assert from_sklearn(self.sk)._name == d._name
        assert from_sklearn(self.sk2)._name != d._name

        with pytest.raises(TypeError):
            self.cls.from_sklearn("not a sklearn estimator")
Beispiel #2
0
def test_set_params():
    d = from_sklearn(pipe1)
    d2 = d.set_params(pca__n_components=20, logistic__C=100)
    assert isinstance(d2, Pipeline)
    assert d2._name == from_sklearn(pipe2)._name
    # Check no mutation
    assert d2.get_params()['logistic__C'] == 100
    assert d2.compute().get_params()['logistic__C'] == 100
    assert d.get_params()['logistic__C'] == 1000
    assert d.compute().get_params()['logistic__C'] == 1000

    # Changing steps with set_params works
    d2 = d.set_params(steps=steps)
    assert d2._name == d._name
    lr1 = d.named_steps['logistic']
    lr2 = d2.named_steps['logistic']
    assert lr1.get_params() == lr2.get_params()
    assert isinstance(lr2, Wrapped)

    # Fast return
    d2 = d.set_params()
    assert d2 is d

    # ambiguous change
    with pytest.raises(ValueError):
        d.set_params(steps=steps, logistic__C=10)

    with pytest.raises(ValueError):
        d.set_params(not_a_real_param='foo')
Beispiel #3
0
def test_pipeline():
    d = from_sklearn(pipe1)
    assert isinstance(d, Pipeline)
    assert from_sklearn(pipe1)._name == d._name
    assert from_sklearn(pipe2)._name != d._name

    # dask graph is cached on attribute access
    assert d.dask is d.dask

    with pytest.raises(TypeError):
        Pipeline.from_sklearn("not an estimator")
    def test_transform(self):
        d = from_sklearn(self.sk)

        # Single element in each partition
        b = db.from_sequence(self.raw_X)
        X1 = d.transform(b)
        assert X1.name == d.transform(b).name
        assert X1.name != d.transform(b.repartition(3)).name
        assert isinstance(X1, dm.Matrix)
        assert X1.shape == (None, 16)
        assert X1.dtype == np.dtype('f8')

        # Multiple element in each partition
        b2 = b.repartition(2)
        X2 = d.set_params(dtype='i8').transform(b2)
        assert X1.name != X2.name
        assert isinstance(X2, dm.Matrix)
        assert X2.shape == (None, 16)
        assert X2.dtype == np.dtype('i8')

        assert_array_equal(X1.compute().toarray(),
                           X2.compute().toarray().astype('f8'))

        # Delayed as input
        X3 = d.transform(self.raw_X)
        assert isinstance(X3, Delayed)
        assert X3._key == d.transform(self.raw_X)._key

        assert_array_equal(X1.compute().toarray(), X3.compute().toarray())
Beispiel #5
0
def test_Estimator__init__():
    d = Wrapped(LogisticRegression(C=1000))
    assert d._name == from_sklearn(clf1)._name

    with pytest.raises(ValueError):
        Wrapped(clf1, name='foo')

    with pytest.raises(TypeError):
        Wrapped("not an estimator")
Beispiel #6
0
def test_set_params():
    d = from_sklearn(clf1)
    d2 = d.set_params(C=5)
    assert isinstance(d2, Wrapped)
    # Check no mutation
    assert d2.get_params()['C'] == 5
    assert d2.compute().C == 5
    assert d.get_params()['C'] == 1000
    assert d.compute().C == 1000
Beispiel #7
0
def test_Pipeline__init__():
    d = Pipeline(steps)
    assert d._name == from_sklearn(pipe1)._name

    with pytest.raises(TypeError):
        Pipeline([MissingMethods(), LogisticRegression()])

    with pytest.raises(TypeError):
        Pipeline([PCA(), MissingMethods()])
Beispiel #8
0
def test_fit():
    d = from_sklearn(clf1)
    fit = d.fit(X_iris, y_iris)
    assert fit is not d
    assert isinstance(fit, Wrapped)

    res = fit.compute()
    assert hasattr(res, 'coef_')
    assert not hasattr(clf1, 'coef_')
    assert isinstance(res, LogisticRegression)
Beispiel #9
0
def test_predict():
    d = from_sklearn(pipe1)
    fit = d.fit(X_digits, y_digits)
    pred = fit.predict(X_digits)
    assert isinstance(pred, Delayed)
    res = pred.compute()
    assert isinstance(res, np.ndarray)
    will_error = d.predict(X_digits)
    with pytest.raises(NotFittedError):
        will_error.compute()
Beispiel #10
0
def test_score():
    d = from_sklearn(pipe1)
    fit = d.fit(X_digits, y_digits)
    s = fit.score(X_digits, y_digits)
    assert isinstance(s, Delayed)
    res = s.compute()
    assert isinstance(res, float)
    will_error = d.score(X_digits, y_digits)
    with pytest.raises(NotFittedError):
        will_error.compute()
Beispiel #11
0
def test_fit():
    d = from_sklearn(pipe1)
    fit = d.fit(X_digits, y_digits)
    assert fit is not d
    assert isinstance(fit, Pipeline)

    res = fit.compute()
    assert isinstance(res, pipeline.Pipeline)
    assert isinstance(res.named_steps['logistic'], LogisticRegression)
    assert hasattr(res, 'classes_')
    assert not hasattr(pipe1, 'classes_')
 def test_set_params(self):
     d = from_sklearn(self.sk)
     d2 = d.set_params(**self.sk2.get_params())
     assert isinstance(d2, type(d))
     # Check no mutation
     params = self.sk.get_params()
     assert d.get_params() == params
     assert d.compute().get_params() == params
     params2 = self.sk2.get_params()
     assert d2.get_params() == params2
     assert d2.compute().get_params() == params2
Beispiel #13
0
def test_get_params():
    d = from_sklearn(pipe1)
    params1 = d.get_params()
    params2 = pipe1.get_params()
    assert (dissoc(params1, 'steps', 'logistic',
                   'pca') == dissoc(params2, 'steps', 'logistic', 'pca'))
    params1 = d.get_params(deep=False)
    params2 = pipe1.get_params(deep=False)
    for dkstep, skstep in zip(params1['steps'], params2['steps']):
        # names are equivalent
        assert dkstep[0] == skstep[0]
        # ests have same params
        assert dkstep[1].get_params() == skstep[1].get_params()
Beispiel #14
0
def test_score():
    d = from_sklearn(clf1)
    fit = d.fit(X_iris, y_iris)
    s = fit.score(X_iris, y_iris)
    assert isinstance(s, Delayed)
    res = s.compute()
    assert isinstance(res, float)

    dX_iris = da.from_array(X_iris, chunks=4)
    dy_iris = da.from_array(y_iris, chunks=4)
    s = fit.score(dX_iris, dy_iris)
    assert isinstance(s, Delayed)
    res = s.compute()
    assert isinstance(res, float)

    will_error = d.score(X_iris, y_iris)
    with pytest.raises(NotFittedError):
        will_error.compute()
Beispiel #15
0
def test_predict():
    d = from_sklearn(clf1)
    fit = d.fit(X_iris, y_iris)
    pred = fit.predict(X_iris)
    assert isinstance(pred, Delayed)
    res = pred.compute()
    assert isinstance(res, np.ndarray)

    dX_iris = da.from_array(X_iris, chunks=4)
    pred = fit.predict(dX_iris)
    assert isinstance(pred, da.Array)
    res = pred.compute()
    assert isinstance(res, np.ndarray)

    dX_iris = dm.from_array(dX_iris)
    pred = fit.predict(dX_iris)
    assert isinstance(pred, dm.Matrix)
    res = pred.compute()
    assert isinstance(res, np.ndarray)

    will_error = d.predict(X_iris)
    with pytest.raises(NotFittedError):
        will_error.compute()
Beispiel #16
0
def test_from_sklearn():
    d = from_sklearn(clf1)
    assert from_sklearn(clf1)._name == d._name
    assert from_sklearn(clf2)._name != d._name
    assert 'logisticregression' in d._name.lower()
 def test_tokenize(self):
     d = from_sklearn(self.sk)
     d2 = from_sklearn(self.sk2)
     assert tokenize(d) == tokenize(d)
     assert tokenize(d) != tokenize(d2)
 def test__init__(self):
     d = self.cls(n_features=16)
     assert d._name == from_sklearn(self.sk)._name
     assert d._name != self.cls(n_features=20)._name
 def test_get_params(self):
     d = from_sklearn(self.sk)
     assert d.get_params() == self.sk.get_params()
     assert d.get_params(deep=False) == self.sk.get_params(deep=False)
 def test_clone(self):
     d = from_sklearn(self.sk)
     d2 = clone(d)
     assert d._name == d2._name
     assert d.get_params() == d2.get_params()
     assert d._est is not d2._est
Beispiel #21
0
def test_setattr():
    d = from_sklearn(pipe1)
    with pytest.raises(AttributeError):
        d.C = 10
 def test_setattr(self):
     d = from_sklearn(self.sk)
     with pytest.raises(AttributeError):
         d.n_features = 20
 def test_getattr(self):
     d = from_sklearn(self.sk)
     assert d.n_features == self.sk.n_features
     with pytest.raises(AttributeError):
         d.not_a_real_parameter
Beispiel #24
0
def test_named_steps():
    d = from_sklearn(pipe1)
    steps = d.named_steps
    assert isinstance(steps['pca'], Wrapped)
    assert isinstance(steps['logistic'], Wrapped)
 def test_dir(self):
     d = from_sklearn(self.sk)
     attrs = dir(d)
     assert 'n_features' in attrs
Beispiel #26
0
def test_get_params():
    d = from_sklearn(clf1)
    assert d.get_params() == clf1.get_params()
    assert d.get_params(deep=False) == clf1.get_params(deep=False)
 def test_repr(self):
     d = from_sklearn(self.sk)
     assert repr(d) == repr(self.sk)
Beispiel #28
0
def test_from_sklearn():
    d = from_sklearn(clf1)
    assert from_sklearn(clf1)._name == d._name
    assert from_sklearn(clf2)._name != d._name
 def test_fit(self):
     d = from_sklearn(self.sk)
     b = db.from_sequence(self.raw_X)
     fit = d.fit(b, db.range(len(self.raw_X), len(self.raw_X)))
     assert fit is d