def test_from_sklearn(self): d = from_sklearn(self.sk) assert from_sklearn(self.sk)._name == d._name assert from_sklearn(self.sk2)._name != d._name with pytest.raises(TypeError): self.cls.from_sklearn("not a sklearn estimator")
def test_set_params(): d = from_sklearn(pipe1) d2 = d.set_params(pca__n_components=20, logistic__C=100) assert isinstance(d2, Pipeline) assert d2._name == from_sklearn(pipe2)._name # Check no mutation assert d2.get_params()['logistic__C'] == 100 assert d2.compute().get_params()['logistic__C'] == 100 assert d.get_params()['logistic__C'] == 1000 assert d.compute().get_params()['logistic__C'] == 1000 # Changing steps with set_params works d2 = d.set_params(steps=steps) assert d2._name == d._name lr1 = d.named_steps['logistic'] lr2 = d2.named_steps['logistic'] assert lr1.get_params() == lr2.get_params() assert isinstance(lr2, Wrapped) # Fast return d2 = d.set_params() assert d2 is d # ambiguous change with pytest.raises(ValueError): d.set_params(steps=steps, logistic__C=10) with pytest.raises(ValueError): d.set_params(not_a_real_param='foo')
def test_pipeline(): d = from_sklearn(pipe1) assert isinstance(d, Pipeline) assert from_sklearn(pipe1)._name == d._name assert from_sklearn(pipe2)._name != d._name # dask graph is cached on attribute access assert d.dask is d.dask with pytest.raises(TypeError): Pipeline.from_sklearn("not an estimator")
def test_transform(self): d = from_sklearn(self.sk) # Single element in each partition b = db.from_sequence(self.raw_X) X1 = d.transform(b) assert X1.name == d.transform(b).name assert X1.name != d.transform(b.repartition(3)).name assert isinstance(X1, dm.Matrix) assert X1.shape == (None, 16) assert X1.dtype == np.dtype('f8') # Multiple element in each partition b2 = b.repartition(2) X2 = d.set_params(dtype='i8').transform(b2) assert X1.name != X2.name assert isinstance(X2, dm.Matrix) assert X2.shape == (None, 16) assert X2.dtype == np.dtype('i8') assert_array_equal(X1.compute().toarray(), X2.compute().toarray().astype('f8')) # Delayed as input X3 = d.transform(self.raw_X) assert isinstance(X3, Delayed) assert X3._key == d.transform(self.raw_X)._key assert_array_equal(X1.compute().toarray(), X3.compute().toarray())
def test_Estimator__init__(): d = Wrapped(LogisticRegression(C=1000)) assert d._name == from_sklearn(clf1)._name with pytest.raises(ValueError): Wrapped(clf1, name='foo') with pytest.raises(TypeError): Wrapped("not an estimator")
def test_set_params(): d = from_sklearn(clf1) d2 = d.set_params(C=5) assert isinstance(d2, Wrapped) # Check no mutation assert d2.get_params()['C'] == 5 assert d2.compute().C == 5 assert d.get_params()['C'] == 1000 assert d.compute().C == 1000
def test_Pipeline__init__(): d = Pipeline(steps) assert d._name == from_sklearn(pipe1)._name with pytest.raises(TypeError): Pipeline([MissingMethods(), LogisticRegression()]) with pytest.raises(TypeError): Pipeline([PCA(), MissingMethods()])
def test_fit(): d = from_sklearn(clf1) fit = d.fit(X_iris, y_iris) assert fit is not d assert isinstance(fit, Wrapped) res = fit.compute() assert hasattr(res, 'coef_') assert not hasattr(clf1, 'coef_') assert isinstance(res, LogisticRegression)
def test_predict(): d = from_sklearn(pipe1) fit = d.fit(X_digits, y_digits) pred = fit.predict(X_digits) assert isinstance(pred, Delayed) res = pred.compute() assert isinstance(res, np.ndarray) will_error = d.predict(X_digits) with pytest.raises(NotFittedError): will_error.compute()
def test_score(): d = from_sklearn(pipe1) fit = d.fit(X_digits, y_digits) s = fit.score(X_digits, y_digits) assert isinstance(s, Delayed) res = s.compute() assert isinstance(res, float) will_error = d.score(X_digits, y_digits) with pytest.raises(NotFittedError): will_error.compute()
def test_fit(): d = from_sklearn(pipe1) fit = d.fit(X_digits, y_digits) assert fit is not d assert isinstance(fit, Pipeline) res = fit.compute() assert isinstance(res, pipeline.Pipeline) assert isinstance(res.named_steps['logistic'], LogisticRegression) assert hasattr(res, 'classes_') assert not hasattr(pipe1, 'classes_')
def test_set_params(self): d = from_sklearn(self.sk) d2 = d.set_params(**self.sk2.get_params()) assert isinstance(d2, type(d)) # Check no mutation params = self.sk.get_params() assert d.get_params() == params assert d.compute().get_params() == params params2 = self.sk2.get_params() assert d2.get_params() == params2 assert d2.compute().get_params() == params2
def test_get_params(): d = from_sklearn(pipe1) params1 = d.get_params() params2 = pipe1.get_params() assert (dissoc(params1, 'steps', 'logistic', 'pca') == dissoc(params2, 'steps', 'logistic', 'pca')) params1 = d.get_params(deep=False) params2 = pipe1.get_params(deep=False) for dkstep, skstep in zip(params1['steps'], params2['steps']): # names are equivalent assert dkstep[0] == skstep[0] # ests have same params assert dkstep[1].get_params() == skstep[1].get_params()
def test_score(): d = from_sklearn(clf1) fit = d.fit(X_iris, y_iris) s = fit.score(X_iris, y_iris) assert isinstance(s, Delayed) res = s.compute() assert isinstance(res, float) dX_iris = da.from_array(X_iris, chunks=4) dy_iris = da.from_array(y_iris, chunks=4) s = fit.score(dX_iris, dy_iris) assert isinstance(s, Delayed) res = s.compute() assert isinstance(res, float) will_error = d.score(X_iris, y_iris) with pytest.raises(NotFittedError): will_error.compute()
def test_predict(): d = from_sklearn(clf1) fit = d.fit(X_iris, y_iris) pred = fit.predict(X_iris) assert isinstance(pred, Delayed) res = pred.compute() assert isinstance(res, np.ndarray) dX_iris = da.from_array(X_iris, chunks=4) pred = fit.predict(dX_iris) assert isinstance(pred, da.Array) res = pred.compute() assert isinstance(res, np.ndarray) dX_iris = dm.from_array(dX_iris) pred = fit.predict(dX_iris) assert isinstance(pred, dm.Matrix) res = pred.compute() assert isinstance(res, np.ndarray) will_error = d.predict(X_iris) with pytest.raises(NotFittedError): will_error.compute()
def test_from_sklearn(): d = from_sklearn(clf1) assert from_sklearn(clf1)._name == d._name assert from_sklearn(clf2)._name != d._name assert 'logisticregression' in d._name.lower()
def test_tokenize(self): d = from_sklearn(self.sk) d2 = from_sklearn(self.sk2) assert tokenize(d) == tokenize(d) assert tokenize(d) != tokenize(d2)
def test__init__(self): d = self.cls(n_features=16) assert d._name == from_sklearn(self.sk)._name assert d._name != self.cls(n_features=20)._name
def test_get_params(self): d = from_sklearn(self.sk) assert d.get_params() == self.sk.get_params() assert d.get_params(deep=False) == self.sk.get_params(deep=False)
def test_clone(self): d = from_sklearn(self.sk) d2 = clone(d) assert d._name == d2._name assert d.get_params() == d2.get_params() assert d._est is not d2._est
def test_setattr(): d = from_sklearn(pipe1) with pytest.raises(AttributeError): d.C = 10
def test_setattr(self): d = from_sklearn(self.sk) with pytest.raises(AttributeError): d.n_features = 20
def test_getattr(self): d = from_sklearn(self.sk) assert d.n_features == self.sk.n_features with pytest.raises(AttributeError): d.not_a_real_parameter
def test_named_steps(): d = from_sklearn(pipe1) steps = d.named_steps assert isinstance(steps['pca'], Wrapped) assert isinstance(steps['logistic'], Wrapped)
def test_dir(self): d = from_sklearn(self.sk) attrs = dir(d) assert 'n_features' in attrs
def test_get_params(): d = from_sklearn(clf1) assert d.get_params() == clf1.get_params() assert d.get_params(deep=False) == clf1.get_params(deep=False)
def test_repr(self): d = from_sklearn(self.sk) assert repr(d) == repr(self.sk)
def test_from_sklearn(): d = from_sklearn(clf1) assert from_sklearn(clf1)._name == d._name assert from_sklearn(clf2)._name != d._name
def test_fit(self): d = from_sklearn(self.sk) b = db.from_sequence(self.raw_X) fit = d.fit(b, db.range(len(self.raw_X), len(self.raw_X))) assert fit is d