def test_sklearn2code_export(): np.random.seed(0) X, y = make_classification(n_classes=2) X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])]) loss_function = BinomialDeviance(2) model = Booster(Earth(max_degree=2, use_fast=True, max_terms=10), loss_function) model.fit(X, y) code = sklearn2code(model, ['predict', 'predict_proba', 'transform'], numpy_flat) module = exec_module('test_module', code) assert_correct_exported_module(model, module, ['predict', 'predict_proba', 'transform'], dict(X=X), X)
def test_argument_names(): boston = load_boston() X = DataFrame(boston['data'], columns=boston['feature_names']) y = boston['target'] model = GradientBoostingRegressor(verbose=True).fit(X, y) code = sklearn2code(model, ['predict'], numpy_flat, argument_names=X.columns) boston_housing_module = exec_module('boston_housing_module', code) assert_array_almost_equal(model.predict(X), boston_housing_module.predict(**X))
def test_case(self): model = clone(estimator) model.fit(**fit_data) for method in methods: pred = getattr(model, method)(**predict_data) code = sklearn2code(model, method, numpy_flat) try: module = exec_module('test_module', code) exported_pred = getattr(module, method)(**export_predict_data['X']) if isinstance(exported_pred, tuple): exported_pred = DataFrame(dict(enumerate(exported_pred))) assert_array_almost_equal(pred, exported_pred, 3) except: # print(code) # import clipboard # clipboard.copy(code) raise
def test_case(self): model = clone(estimator) model.fit(**fit_data) for method in methods: pred = DataFrame(getattr(model, method)(**predict_data)) try: code = sklearn2code(model, method, pandas) except ExpressionTypeNotSupportedError: continue try: module = exec_module('test_module', code) exported_pred = getattr(module, method)(export_predict_data['X']) assert_array_almost_equal(pred, exported_pred, 3) except: # print(code) import clipboard clipboard.copy(code) raise
def test_super_learner(): np.random.seed(0) X, y = load_boston(return_X_y=True) X = pandas.DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])]) model = CrossValidatingEstimator(SuperLearner( [('linear', LinearRegression()), ('earth', Earth(max_degree=2))], LinearRegression(), cv=5, n_jobs=1), cv=5) cv_pred = model.fit_predict(X, y) pred = model.predict(X) cv_r2 = r2_score(y, cv_pred) best_component_cv_r2 = max([ r2_score( y, first(model.estimator_.cross_validating_estimators_.values()). cv_predictions_) for i in range(2) ]) assert cv_r2 >= .9 * best_component_cv_r2 code = sklearn2code(model, ['predict'], numpy_flat) module = exec_module('module', code) test_pred = module.predict(**X) try: assert_array_almost_equal(np.ravel(pred), np.ravel(test_pred)) except: idx = np.abs(np.ravel(pred) - np.ravel(test_pred)) > .000001 print(np.ravel(pred)[idx]) print(np.ravel(test_pred)[idx]) raise print(r2_score(y, pred)) print(r2_score(y, cv_pred)) print( max([ r2_score( y, first(model.estimator_.cross_validating_estimators_.values()). cv_predictions_) for i in range(2) ]))
@sym_predict.register(XGBRegressor) def sym_predict_xgb_regressor(estimator): dump = estimator.get_booster().get_dump() inputs = tuple(map(RealVariable, estimator.get_booster().feature_names)) Var = VariableFactory(existing=inputs) calls = tuple( map( lambda x: ((Var(), ), (x, inputs)), map(lambda x: Function(inputs, tuple(), (x.expression(), )), map(Node.from_str, dump)))) output = reduce(__add__, map(compose(first, first), calls)) + RealNumber( 0.5) # TODO: Why do I have to add 0.5? return Function(inputs, calls, (output, )) if __name__ == '__main__': model = XGBRegressor(n_estimators=2, max_depth=1) X, y = make_regression() X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])]) model.fit(X, y) print(sym_predict(model)) code = sklearn2code(model, ['predict'], numpy_flat) print(code) print(model.booster().get_dump()[0]) module = exec_module('module', code) print(module.predict(**X.loc[:10, :])) print(model.predict(X.loc[:10, :])) 1 + 1
from sklearn.datasets.base import load_boston from pyearth.earth import Earth from pandas import DataFrame from sklearn2code.sklearn2code import sklearn2code from sklearn2code.languages import numpy_flat from sklearn2code.utility import exec_module from numpy.testing.utils import assert_array_almost_equal from yapf.yapflib.yapf_api import FormatCode # Load a data set. boston = load_boston() X = DataFrame(boston['data'], columns=boston['feature_names']) y = boston['target'] # Fit a py-earth model. model = Earth(max_degree=2).fit(X, y) # Generate code from the py-earth model. code = sklearn2code(model, ['predict'], numpy_flat) # Execute the generated code in its own module. boston_housing_module = exec_module('boston_housing_module', code) # Confirm that the generated module produces output identical # to the fitted model's predict method. assert_array_almost_equal(model.predict(X), boston_housing_module.predict(**X)) # Print the generated code (using yapf for formatting). print(FormatCode(code, style_config='pep8')[0])