def test_sklearn_glm_unknown_link_func(): estimator = linear_model.TweedieRegressor( power=1, link="this_link_func_does_not_exist", max_iter=10) estimator = estimator.fit([[1], [2]], [0.1, 0.2]) assembler = assemblers.SklearnGLMModelAssembler(estimator) assembler.assemble()
def test_model_tweedie_regressor(self): X, y = make_regression(n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3) y = numpy.abs(y) y = y / y.max() + 1e-5 for power in range(0, 4): with self.subTest(power=power): model = linear_model.TweedieRegressor(power=power).fit(X, y) model_onnx = convert_sklearn( model, "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.check_model(model_onnx, X.astype(numpy.float32)) dump_data_and_model(X.astype(numpy.float32), model, model_onnx, basename="SklearnTweedieRegressor%d-Dec4" % power) model_onnx = convert_sklearn( model, "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model(X.astype(numpy.float64), model, model_onnx, basename="SklearnTweedieRegressor64%d" % power)
def test_sklearn_glm_identity_link_func(): estimator = linear_model.TweedieRegressor(power=0, link="identity", max_iter=10) estimator = estimator.fit([[1], [2]], [0.1, 0.2]) assembler = assemblers.SklearnGLMModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.NumVal(0.12), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.02), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_sklearn_glm_log_link_func(): estimator = linear_model.TweedieRegressor(power=1, link="log", fit_intercept=False, max_iter=10) estimator = estimator.fit([[1], [2]], [0.1, 0.2]) assembler = assemblers.SklearnGLMModelAssembler(estimator) actual = assembler.assemble() expected = ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(-0.4619711397), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
regression(linear_model.LassoLarsIC()), regression(linear_model.LinearRegression()), regression(linear_model.OrthogonalMatchingPursuit()), regression(linear_model.OrthogonalMatchingPursuitCV()), regression( linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)), regression(linear_model.PoissonRegressor()), regression( linear_model.RANSACRegressor( base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS), random_state=RANDOM_SEED)), regression(linear_model.Ridge(random_state=RANDOM_SEED)), regression(linear_model.RidgeCV()), regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)), regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)), regression(linear_model.TweedieRegressor(power=0.0)), regression(linear_model.TweedieRegressor(power=1.0)), regression(linear_model.TweedieRegressor(power=1.5)), regression(linear_model.TweedieRegressor(power=2.0)), regression(linear_model.TweedieRegressor(power=3.0)), # Statsmodels Linear Regression classification_binary( utils.StatsmodelsSklearnLikeWrapper( sm.GLM, dict(fit_constrained=dict(constraints=( np.eye(utils.get_binary_classification_model_trainer(). X_train.shape[-1])[0], [1]))))), classification_binary( utils.StatsmodelsSklearnLikeWrapper( sm.GLM,
def model_selector(model): # Ye good ol ugly if-elif switch to choose the model if model == "linear": regr = linear_model.LinearRegression(n_jobs=-1) elif model == "lasso": regr = linear_model.Lasso(random_state=17) elif model == "elasticnet" or model == "elastic": regr = linear_model.ElasticNet(random_state=17) elif model == "bayesian": regr = linear_model.BayesianRidge() elif model == "decision tree regressor" or model == "dtr": regr = tree.DecisionTreeRegressor(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "tweedie regressor 0" or model == "normal distribution": regr = linear_model.TweedieRegressor(power=0) elif model == "tweedie regressor 1" or model == "poisson distribution": regr = linear_model.TweedieRegressor(power=1) elif model == "extra trees regressor" or model == "etr": regr = ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "random forest regressor" or model == "rfr": regr = ensemble.RandomForestRegressor(n_estimators=500, oob_score=True, random_state=17, n_jobs=-1) elif model == "adaboost extra trees" or model == "boost et": regr = AdaBoostRegressor(ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17), n_estimators=500, random_state=17) elif model == "k neighbours" or model == "k neighbor": regr = neighbors.KNeighborsRegressor(n_jobs=-1) elif model == "gradient boosting regressor" or model == "gbr": regr = ensemble.GradientBoostingRegressor(random_state=17) elif model == "voting": clf1 = linear_model.LinearRegression(n_jobs=-1) clf2 = ensemble.RandomForestRegressor(max_depth=8, min_samples_leaf=17, random_state=17, n_jobs=-1) clf3 = ensemble.GradientBoostingRegressor(random_state=17) regr = ensemble.VotingRegressor(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], n_jobs=-1) elif model == "logistic": regr = linear_model.LogisticRegression(max_iter=250, random_state=17, n_jobs=-1) elif model == "gaussian": regr = GaussianNB() elif model == "decision tree classifier" or model == "dtc": regr = tree.DecisionTreeClassifier(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "extra tree classifier" or model == "etc": regr = ensemble.ExtraTreesClassifier(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "random forest classifier" or model == "rfc": regr = ensemble.RandomForestClassifier(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "linear svc": regr = svm.LinearSVC(random_state=17) elif model == "k neighbour classifier" or model == "k neighbor classifier": regr = neighbors.KNeighborsClassifier(n_jobs=-1, n_neighbors=2) elif model == "svc": regr = svm.SVC(kernel="rbf", probability=True, random_state=17) return regr