def model(winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS norm_model_test") model_class = Normalizer("norm_model_test", ) model_class.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) yield model_class model_class.drop()
def test_model_from_vDF(self, winequality_vd): current_cursor().execute("DROP MODEL IF EXISTS norm_vDF") model_test = Normalizer("norm_vDF", ) model_test.fit(winequality_vd, ["alcohol", "quality"]) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'norm_vDF'") assert current_cursor().fetchone()[0] == "norm_vDF" model_test.drop()
def test_model_from_vDF(self, base, winequality_vd): base.cursor.execute("DROP MODEL IF EXISTS norm_vDF") model_test = Normalizer("norm_vDF", cursor=base.cursor) model_test.fit(winequality_vd, ["alcohol", "quality"]) base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'norm_vDF'") assert base.cursor.fetchone()[0] == "norm_vDF" model_test.drop()
def test_set_cursor(self, base): model_test = Normalizer("norm_cursor_test", cursor=base.cursor) # TODO: creat a new cursor model_test.set_cursor(base.cursor) model_test.drop() model_test.fit("public.winequality", ["alcohol"]) base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'norm_cursor_test'" ) assert base.cursor.fetchone()[0] == "norm_cursor_test" model_test.drop()
def test_to_python(self, model): # Zscore current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0]) # Minmax model2 = Normalizer("norm_model_test2", method="minmax") model2.drop() model2.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model2.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model2.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0]) model2.drop() # Robust Zscore model3 = Normalizer("norm_model_test2", method="robust_zscore") model3.drop() model3.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model3.name)) prediction = current_cursor().fetchone()[0] assert prediction == pytest.approx( model3.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0]) model3.drop()
def test_drop(self): current_cursor().execute("DROP MODEL IF EXISTS norm_model_test_drop") model_test = Normalizer("norm_model_test_drop", ) model_test.fit("public.winequality", ["alcohol", "quality"]) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'norm_model_test_drop'" ) assert current_cursor().fetchone()[0] == "norm_model_test_drop" model_test.drop() current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'norm_model_test_drop'" ) assert current_cursor().fetchone() is None
def test_drop(self, base): base.cursor.execute("DROP MODEL IF EXISTS norm_model_test_drop") model_test = Normalizer("norm_model_test_drop", cursor=base.cursor) model_test.fit("public.winequality", ["alcohol", "quality"]) base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'norm_model_test_drop'" ) assert base.cursor.fetchone()[0] == "norm_model_test_drop" model_test.drop() base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'norm_model_test_drop'" ) assert base.cursor.fetchone() is None
def test_to_memmodel(self, model): # Zscore current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model.name)) prediction = [float(elem) for elem in current_cursor().fetchone()] current_cursor().execute( "SELECT {} FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(", ".join(model.to_memmodel().transform_sql( ["citric_acid", "residual_sugar", "alcohol"])))) prediction2 = [float(elem) for elem in current_cursor().fetchone()] assert prediction == pytest.approx(prediction2) prediction3 = model.to_memmodel().transform([[3.0, 11.0, 93.0]]) assert prediction[0] == pytest.approx(prediction3[0][0]) assert prediction[1] == pytest.approx(prediction3[0][1]) assert prediction[2] == pytest.approx(prediction3[0][2]) # Minmax model2 = Normalizer("norm_model_test2", method="minmax") model2.drop() model2.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model2.name)) prediction = [float(elem) for elem in current_cursor().fetchone()] current_cursor().execute( "SELECT {} FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(", ".join(model2.to_memmodel().transform_sql( ["citric_acid", "residual_sugar", "alcohol"])))) prediction2 = [float(elem) for elem in current_cursor().fetchone()] assert prediction == pytest.approx(prediction2) prediction3 = model2.to_memmodel().transform([[3.0, 11.0, 93.0]]) assert prediction[0] == pytest.approx(prediction3[0][0]) assert prediction[1] == pytest.approx(prediction3[0][1]) assert prediction[2] == pytest.approx(prediction3[0][2]) model2.drop() # Robust Zscore model3 = Normalizer("norm_model_test2", method="robust_zscore") model3.drop() model3.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) current_cursor().execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model3.name)) prediction = [float(elem) for elem in current_cursor().fetchone()] current_cursor().execute( "SELECT {} FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(", ".join(model3.to_memmodel().transform_sql( ["citric_acid", "residual_sugar", "alcohol"])))) prediction2 = [float(elem) for elem in current_cursor().fetchone()] assert prediction == pytest.approx(prediction2) prediction3 = model3.to_memmodel().transform([[3.0, 11.0, 93.0]]) assert prediction[0] == pytest.approx(prediction3[0][0]) assert prediction[1] == pytest.approx(prediction3[0][1]) assert prediction[2] == pytest.approx(prediction3[0][2]) model3.drop()
def test_to_sql(self, model): # Zscore model.cursor.execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model.name)) prediction = [float(elem) for elem in model.cursor.fetchone()] model.cursor.execute( "SELECT {} FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model.to_sql())) prediction2 = [float(elem) for elem in model.cursor.fetchone()] assert prediction == pytest.approx(prediction2) # Minmax model2 = Normalizer("norm_model_test2", cursor=model.cursor, method="minmax") model2.drop() model2.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) model2.cursor.execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model2.name)) prediction = [float(elem) for elem in model2.cursor.fetchone()] model2.cursor.execute( "SELECT {} FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model2.to_sql())) prediction2 = [float(elem) for elem in model2.cursor.fetchone()] assert prediction == pytest.approx(prediction2) model2.drop() # Robust Zscore model3 = Normalizer("norm_model_test2", cursor=model.cursor, method="robust_zscore") model3.drop() model3.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) model3.cursor.execute( "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model3.name)) prediction = [float(elem) for elem in model3.cursor.fetchone()] model3.cursor.execute( "SELECT {} FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x" .format(model3.to_sql())) prediction2 = [float(elem) for elem in model3.cursor.fetchone()] assert prediction == pytest.approx(prediction2) model3.drop()
def test_get_transform(self, winequality_vd, model): # Zscore winequality_trans = model.transform( winequality_vd, X=["citric_acid", "residual_sugar", "alcohol"]) assert winequality_trans["citric_acid"].mean() == pytest.approx( 0.0, abs=1e-6) assert winequality_trans["residual_sugar"].mean() == pytest.approx( 0.0, abs=1e-6) assert winequality_trans["alcohol"].mean() == pytest.approx(0.0, abs=1e-6) # Minmax model2 = Normalizer("norm_model_test2", cursor=model.cursor, method="minmax") model2.drop() model2.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) winequality_trans = model2.transform( winequality_vd, X=["citric_acid", "residual_sugar", "alcohol"]) assert winequality_trans["citric_acid"].min() == pytest.approx( 0.0, abs=1e-6) assert winequality_trans["residual_sugar"].max() == pytest.approx( 1.0, abs=1e-6) assert winequality_trans["alcohol"].min() == pytest.approx(0.0, abs=1e-6) model2.drop() # Robust Zscore model3 = Normalizer("norm_model_test2", cursor=model.cursor, method="robust_zscore") model3.drop() model3.fit("public.winequality", ["citric_acid", "residual_sugar", "alcohol"]) winequality_trans = model3.transform( winequality_vd, X=["citric_acid", "residual_sugar", "alcohol"]) assert winequality_trans["citric_acid"].median() == pytest.approx( 0.0, abs=1e-6) assert winequality_trans["residual_sugar"].median() == pytest.approx( 0.0, abs=1e-6) assert winequality_trans["alcohol"].median() == pytest.approx(0.0, abs=1e-6) model3.drop()
def test_repr(self, model): assert "column_name | avg |std_dev" in model.__repr__() model_repr = Normalizer("model_repr") model_repr.drop() assert model_repr.__repr__() == "<Normalizer>"