def model(xgbr_data_vd): current_cursor().execute("DROP MODEL IF EXISTS xgbr_model_test") current_cursor().execute( "SELECT xgb_regressor('xgbr_model_test', 'public.xgbr_data', 'TransPortation', '*' USING PARAMETERS exclude_columns='id, transportation', min_split_loss=0.1, max_ntree=3, learning_rate=0.2, sampling_size=1, max_depth=6, nbins=40, seed=1, id_column='id')" ) # I could use load_model but it is buggy model_class = XGBoostRegressor( "xgbr_model_test", max_ntree=3, min_split_loss=0.1, learning_rate=0.2, sample=1.0, max_depth=6, nbins=40, ) model_class.input_relation = "public.xgbr_data" model_class.test_relation = model_class.input_relation model_class.X = ['"Gender"', '"owned cars"', '"cost"', '"income"'] model_class.y = '"TransPortation"' model_class.prior_ = model_class.get_prior() yield model_class model_class.drop()
def test_to_python(self, model, titanic_vd): current_cursor().execute( "SELECT PREDICT_XGB_REGRESSOR('Male', 0, 'Cheap', 'Low' USING PARAMETERS model_name = '{}', match_by_pos=True)::float" .format(model.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( float(model.to_python()([["Male", 0, "Cheap", "Low"]])[0]))
def model(rfr_data_vd): current_cursor().execute("DROP MODEL IF EXISTS rfr_model_test") current_cursor().execute( "SELECT rf_regressor('rfr_model_test', 'public.rfr_data', 'TransPortation', '*' USING PARAMETERS exclude_columns='id, transportation', mtry=4, ntree=3, max_breadth=100, sampling_size=1, max_depth=6, min_leaf_size=1, min_info_gain=0.0, nbins=40, seed=1, id_column='id')" ) # I could use load_model but it is buggy model_class = RandomForestRegressor( "rfr_model_test", n_estimators=3, max_features=4, max_leaf_nodes=100, sample=1.0, max_depth=6, min_samples_leaf=1, min_info_gain=0.0, nbins=40, ) model_class.input_relation = "public.rfr_data" model_class.test_relation = model_class.input_relation model_class.X = ['"Gender"', '"owned cars"', '"cost"', '"income"'] model_class.y = '"TransPortation"' yield model_class model_class.drop()
def model(winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS norm_model_test") model_class = Normalizer("norm_model_test", ) model_class.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) yield model_class model_class.drop()
def test_to_python(self, model): current_cursor().execute( "SELECT PREDICT_SVM_REGRESSOR(3.0, 11.0, 93. USING PARAMETERS model_name = '{}', match_by_pos=True)" .format(model.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0])
def test_to_python(self, model): current_cursor().execute( "SELECT APPLY_BISECTING_KMEANS(5.006, 3.418, 1.464, 0.244 USING PARAMETERS model_name = '{}', match_by_pos=True)" .format(model.name)) prediction = current_cursor().fetchone() assert prediction == pytest.approx( model.to_python(return_str=False)([[5.006, 3.418, 1.464, 0.244]]))
def test_to_python(self, model): current_cursor().execute( "SELECT APPLY_SVD(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model.name)) prediction = current_cursor().fetchone() assert prediction == pytest.approx( model.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0])
def model(winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS ridge_model_test") model_class = Ridge("ridge_model_test", ) model_class.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"], "quality") yield model_class model_class.drop()
def test_to_python(self, model): current_cursor().execute( "SELECT (APPLY_IFOREST('Male', 0, 'Cheap', 'Low', 1 USING PARAMETERS model_name = '{}', match_by_pos=True)).anomaly_score::float" .format(model.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model.to_python(return_str=False)([["Male", 0, "Cheap", "Low", 1]])[0])
def test_model_from_vDF(self, market_vd): current_cursor().execute("DROP MODEL IF EXISTS mca_vDF") model_test = MCA("mca_vDF", ) model_test.fit(market_vd.cdt()) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'mca_vDF'") assert current_cursor().fetchone()[0] == "mca_vDF" model_test.drop()
def test_get_voronoi_plot(self, iris_vd): current_cursor().execute("DROP MODEL IF EXISTS model_test_plot") model_test = KMeans("model_test_plot", ) model_test.fit(iris_vd, ["SepalLengthCm", "SepalWidthCm"]) result = model_test.plot_voronoi(color="b") assert len(result.gca().get_default_bbox_extra_artists()) == 21 plt.close("all") model_test.drop()
def test_model_from_vDF(self, iris_vd): current_cursor().execute("DROP MODEL IF EXISTS kmeans_vDF") model_test = KMeans("kmeans_vDF", init="random") model_test.fit(iris_vd, ["SepalLengthCm", "SepalWidthCm"]) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'kmeans_vDF'") assert current_cursor().fetchone()[0] == "kmeans_vDF" model_test.drop()
def test_model_from_vDF(self, titanic_vd): current_cursor().execute("DROP MODEL IF EXISTS ohe_vDF") model_test = OneHotEncoder("ohe_vDF", drop_first=False) model_test.fit(titanic_vd, ["pclass", "embarked"]) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'ohe_vDF'") assert current_cursor().fetchone()[0] == "ohe_vDF" model_test.drop()
def test_to_sql(self, model): current_cursor().execute( "SELECT PREDICT_LOGISTIC_REG(3.0, 11.0 USING PARAMETERS model_name = '{}', match_by_pos=True)::float, {}::float".format( model.name, model.to_sql([3.0, 11.0]) ) ) prediction = current_cursor().fetchone() assert prediction[0] == pytest.approx(prediction[1])
def test_get_plot(self, winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS model_test_plot") model_test = XGBoostRegressor("model_test_plot", ) model_test.fit(winequality_vd, ["alcohol"], "quality") result = model_test.plot() assert len(result.get_default_bbox_extra_artists()) in (9, 12) plt.close("all") model_test.drop()
def test_model_from_vDF(self, winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS SVD_vDF") model_test = SVD("SVD_vDF", ) model_test.fit(winequality_vd, ["alcohol", "quality"]) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'SVD_vDF'") assert current_cursor().fetchone()[0] == "SVD_vDF" model_test.drop()
def test_get_plot(self, winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS model_test_plot") model_test = BisectingKMeans("model_test_plot", ) model_test.fit(winequality_vd, ["alcohol", "quality"]) result = model_test.plot() assert len(result.get_default_bbox_extra_artists()) == 16 plt.close("all") model_test.drop()
def test_get_plot(self, winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS model_test_plot") model_test = LinearSVR("model_test_plot", ) model_test.fit("public.winequality", ["alcohol"], "quality") result = model_test.plot() assert len(result.get_default_bbox_extra_artists()) == 9 plt.close("all") model_test.drop()
def test_model_from_vDF(self, bsk_data_vd): current_cursor().execute("DROP MODEL IF EXISTS bsk_vDF") model_test = BisectingKMeans("bsk_vDF", ) model_test.fit(bsk_data_vd, ["col1", "col2", "col3", "col4"]) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'bsk_vDF'") assert current_cursor().fetchone()[0] == "bsk_vDF" model_test.drop()
def test_model_from_vDF(self, titanic_vd): current_cursor().execute("DROP MODEL IF EXISTS lsvc_from_vDF") model_test = LinearSVC("lsvc_from_vDF", ) model_test.fit(titanic_vd, ["age", "fare"], "survived") current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'lsvc_from_vDF'") assert current_cursor().fetchone()[0] == "lsvc_from_vDF" model_test.drop()
def test_model_from_vDF(self, tr_data_vd): current_cursor().execute("DROP MODEL IF EXISTS tr_from_vDF") model_test = DecisionTreeRegressor("tr_from_vDF", ) model_test.fit(tr_data_vd, ["gender"], "transportation") current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'tr_from_vDF'") assert current_cursor().fetchone()[0] == "tr_from_vDF" model_test.drop()
def test_model_from_vDF(self, winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS enet_from_vDF") model_test = ElasticNet("enet_from_vDF", ) model_test.fit(winequality_vd, ["alcohol"], "quality") current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'enet_from_vDF'") assert current_cursor().fetchone()[0] == "enet_from_vDF" model_test.drop()
def test_to_sql(self, model, titanic_vd): model_test = DummyTreeClassifier("rfc_sql_test") model_test.drop() model_test.fit(titanic_vd, ["age", "fare", "sex"], "survived") current_cursor().execute( "SELECT PREDICT_RF_CLASSIFIER(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True)::int, {}::int FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex) x" .format(model_test.to_sql())) prediction = current_cursor().fetchone() assert prediction[0] == pytest.approx(prediction[1]) model_test.drop()
def test_model_from_vDF(self, rfr_data_vd): current_cursor().execute("DROP MODEL IF EXISTS rfr_from_vDF") model_test = RandomForestRegressor("rfr_from_vDF", ) model_test.fit(rfr_data_vd, ["gender"], "transportation") current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'rfr_from_vDF'") assert current_cursor().fetchone()[0] == "rfr_from_vDF" model_test.drop()
def test_to_python(self, model): # Zscore current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0]) # Minmax model2 = Normalizer("norm_model_test2", method="minmax") model2.drop() model2.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model2.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model2.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0]) model2.drop() # Robust Zscore model3 = Normalizer("norm_model_test2", method="robust_zscore") model3.drop() model3.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model3.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model3.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0]) model3.drop()
def test_create_verticapy_schema(self): drop("verticapy", method="schema") create_verticapy_schema() current_cursor().execute("""SELECT table_name FROM columns WHERE table_schema = 'verticapy' GROUP BY 1 ORDER BY 1;""") result = [elem[0] for elem in current_cursor().fetchall()] assert result == ["attr", "models"] drop("verticapy", method="schema")
def test_to_sql(self, model, titanic_vd): model_test = NaiveBayes("rfc_sql_test") model_test.drop() model_test.fit(titanic_vd, ["age", "fare", "sex", "pclass"], "survived") current_cursor().execute( "SELECT PREDICT_NAIVE_BAYES(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True)::int, {}::int FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex, 1 AS pclass) x" .format(model_test.to_sql())) prediction = current_cursor().fetchone() assert prediction[0] == pytest.approx(prediction[1], 1e-3) model_test.drop()
def test_model_from_vDF(self, winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS linreg_from_vDF") model_test = LinearRegression("linreg_from_vDF", ) model_test.fit(winequality_vd, ["alcohol"], "quality") current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'linreg_from_vDF'" ) assert current_cursor().fetchone()[0] == "linreg_from_vDF" model_test.drop()
def test_model_from_vDF(self, iforest_data_vd): current_cursor().execute("DROP MODEL IF EXISTS iForest_from_vdf") model_test = IsolationForest("iForest_from_vdf", ) model_test.fit(iforest_data_vd, ["gender"]) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'iForest_from_vdf'" ) assert current_cursor().fetchone()[0] == "iForest_from_vdf" model_test.drop()
def test_does_model_exist(self, titanic_vd): current_cursor().execute("CREATE SCHEMA IF NOT EXISTS load_model_test") model = LinearRegression("load_model_test.model_test") model.drop() assert does_model_exist("load_model_test.model_test") == False model.fit(titanic_vd, ["age", "fare"], "survived") assert does_model_exist("load_model_test.model_test") == True assert (does_model_exist( "load_model_test.model_test", return_model_type=True).lower() == "linear_regression") model.drop() current_cursor().execute("DROP SCHEMA load_model_test CASCADE")