def test_report(self, model): reg_rep = model.report() assert reg_rep["index"] == [ "explained_variance", "max_error", "median_absolute_error", "mean_absolute_error", "mean_squared_error", "root_mean_squared_error", "r2", "r2_adj", "aic", "bic", ] assert reg_rep["value"][0] == pytest.approx(0.219816, abs=1e-6) assert reg_rep["value"][1] == pytest.approx(3.592465, abs=1e-6) assert reg_rep["value"][2] == pytest.approx(0.496031, abs=1e-6) assert reg_rep["value"][3] == pytest.approx(0.609075, abs=1e-6) assert reg_rep["value"][4] == pytest.approx(0.594856, abs=1e-6) assert reg_rep["value"][5] == pytest.approx(0.7712695123858948, abs=1e-6) assert reg_rep["value"][6] == pytest.approx(0.219816, abs=1e-6) assert reg_rep["value"][7] == pytest.approx(0.21945605202370688, abs=1e-6) assert reg_rep["value"][8] == pytest.approx(-3366.7617912479104, abs=1e-6) assert reg_rep["value"][9] == pytest.approx(-3339.65156943384, abs=1e-6) model_class = Pipeline([ ( "NormalizerWine", StandardScaler("logstd_model_test", cursor=model.cursor), ), ( "LogisticRegressionWine", LogisticRegression("logreg_model_test", cursor=model.cursor), ), ]) model_class.drop() model_class.fit("public.winequality", ["alcohol"], "good") cls_rep1 = model_class.report().transpose() assert cls_rep1["auc"][0] == pytest.approx(0.7642901826299067) assert cls_rep1["prc_auc"][0] == pytest.approx(0.45326090911518313) assert cls_rep1["accuracy"][0] == pytest.approx(0.8131445282438048) assert cls_rep1["log_loss"][0] == pytest.approx(0.182720882885624) assert cls_rep1["precision"][0] == pytest.approx(0.5595463137996219) assert cls_rep1["recall"][0] == pytest.approx(0.2317932654659358) assert cls_rep1["f1_score"][0] == pytest.approx(0.37307094353346476) assert cls_rep1["mcc"][0] == pytest.approx(0.2719537880298097) assert cls_rep1["informedness"][0] == pytest.approx( 0.18715725014026519) assert cls_rep1["markedness"][0] == pytest.approx(0.3951696381964047) assert cls_rep1["csi"][0] == pytest.approx(0.19602649006622516) assert cls_rep1["cutoff"][0] == pytest.approx(0.5) model_class.drop()
def model(winequality_vd): model_class = Pipeline([ ("NormalizerWine", StandardScaler("std_model_test", )), ("LinearRegressionWine", LinearRegression("linreg_model_test", )), ]) model_class.drop() model_class.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"], "quality") yield model_class model_class.drop()
def test_set_cursor(self, base): model_test = Pipeline([ ( "NormalizerWine", StandardScaler("std_model_test_vdf", cursor=base.cursor), ), ( "LinearRegressionWine", LinearRegression("linreg_model_test_vdf", cursor=base.cursor), ), ]) model_test.drop() model_test.set_cursor(base.cursor) model_test.fit("public.winequality", ["alcohol"], "quality") model_test.cursor.execute( "SELECT model_name FROM models WHERE model_name IN ('std_model_test_vdf', 'linreg_model_test_vdf')" ) assert len(base.cursor.fetchall()) == 2 model_test.drop()
def test_transform(self, winequality_vd, model): model_class = Pipeline([ ( "NormalizerWine", StandardScaler("logstd_model_test"), ), ( "NormalizerWine", MinMaxScaler("logmm_model_test"), ), ]) model_class.drop() model_class.fit("public.winequality", ["alcohol"]) winequality_copy = winequality_vd.copy() winequality_copy = model_class.transform(winequality_copy, X=["alcohol"]) assert winequality_copy["alcohol"].mean() == pytest.approx( 0.361130555239542, abs=1e-6) model_class.drop()
def test_inverse_transform(self, winequality_vd, model): model_class = Pipeline([ ( "NormalizerWine", StandardScaler("logstd_model_test", cursor=model.cursor), ), ( "NormalizerWine", MinMaxScaler("logmm_model_test", cursor=model.cursor), ), ]) model_class.drop() model_class.fit("public.winequality", ["alcohol"]) winequality_copy = winequality_vd.copy() winequality_copy = model_class.inverse_transform( winequality_copy, X=["alcohol"], ) assert winequality_copy["alcohol"].mean() == pytest.approx( 80.3934257349546, abs=1e-6) model_class.drop()
def test_model_from_vDF(self, base, winequality_vd): model_test = Pipeline([ ( "NormalizerWine", StandardScaler("std_model_test_vdf", cursor=base.cursor), ), ( "LinearRegressionWine", LinearRegression("linreg_model_test_vdf", cursor=base.cursor), ), ]) model_test.drop() model_test.fit(winequality_vd, ["citric_acid", "residual_sugar", "alcohol"], "quality") model_test.cursor.execute( "SELECT model_name FROM models WHERE model_name IN ('std_model_test_vdf', 'linreg_model_test_vdf')" ) assert len(base.cursor.fetchall()) == 2 model_test.drop()
def test_drop(self, base, winequality_vd): model_class = Pipeline([ ( "NormalizerWine", StandardScaler("std_model_test_drop", cursor=base.cursor), ), ( "LinearRegressionWine", LinearRegression("linreg_model_test_drop", cursor=base.cursor), ), ]) model_class.drop() model_class.fit(winequality_vd, ["alcohol"], "quality") model_class.cursor.execute( "SELECT model_name FROM models WHERE model_name IN ('linreg_model_test_drop', 'std_model_test_drop')" ) assert len(model_class.cursor.fetchall()) == 2 model_class.drop() model_class.cursor.execute( "SELECT model_name FROM models WHERE model_name IN ('linreg_model_test_drop', 'std_model_test_drop')" ) assert model_class.cursor.fetchone() is None