def test_set_cursor(self, base): model_test = DummyTreeRegressor("tr_cursor_test", cursor=base.cursor) # TODO: creat a new cursor model_test.set_cursor(base.cursor) model_test.drop() model_test.fit("public.tr_data", ["gender"], "transportation") base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'tr_cursor_test'" ) assert base.cursor.fetchone()[0] == "tr_cursor_test" model_test.drop()
def test_contour(self, titanic_vd): model_test = DummyTreeRegressor("model_contour", ) model_test.drop() model_test.fit( titanic_vd, ["age", "fare"], "survived", ) result = model_test.contour() assert len(result.get_default_bbox_extra_artists()) == 34 model_test.drop()
def model(base, tr_data_vd): base.cursor.execute("DROP MODEL IF EXISTS tr_model_test") base.cursor.execute( "SELECT rf_regressor('tr_model_test', 'public.tr_data', 'TransPortation', '*' USING PARAMETERS exclude_columns='id, transportation', mtry=4, ntree=1, max_breadth=1e9, sampling_size=1, max_depth=100, min_leaf_size=1, min_info_gain=0.0, nbins=1000, seed=1, id_column='id')" ) # I could use load_model but it is buggy model_class = DummyTreeRegressor("tr_model_test", cursor=base.cursor) model_class.input_relation = "public.tr_data" model_class.test_relation = model_class.input_relation model_class.X = ['"Gender"', '"owned cars"', '"cost"', '"income"'] model_class.y = '"TransPortation"' yield model_class model_class.drop()
def test_model_from_vDF(self, base, tr_data_vd): base.cursor.execute("DROP MODEL IF EXISTS tr_from_vDF") model_test = DummyTreeRegressor("tr_from_vDF", cursor=base.cursor) model_test.fit(tr_data_vd, ["gender"], "transportation") base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'tr_from_vDF'") assert base.cursor.fetchone()[0] == "tr_from_vDF" model_test.drop()
def test_to_sklearn(self, base): base.cursor.execute("DROP MODEL IF EXISTS tr_model_sk_test") base.cursor.execute( "SELECT rf_regressor('tr_model_sk_test', 'public.tr_data', 'TransPortation', '\"owned cars\"' USING PARAMETERS mtry=1, ntree=1, max_breadth=1e9, sampling_size=1, max_depth=100, min_leaf_size=1, min_info_gain=0.0, nbins=1000, seed=1, id_column='id')" ) # I could use load_model but it is buggy model_sk = DummyTreeRegressor("tr_model_sk_test", cursor=base.cursor) model_sk.input_relation = "public.tr_data" model_sk.test_relation = model_sk.input_relation model_sk.X = ['"owned cars"'] model_sk.y = "TransPortation" md = model_sk.to_sklearn() model_sk.cursor.execute( "SELECT PREDICT_RF_REGRESSOR(1 USING PARAMETERS model_name = '{}', match_by_pos=True)" .format(model_sk.name)) prediction = model_sk.cursor.fetchone()[0] assert prediction == pytest.approx(md.predict([1])[0]) model_sk.drop()
def test_drop(self): current_cursor().execute("DROP MODEL IF EXISTS tr_model_test_drop") model_test = DummyTreeRegressor("tr_model_test_drop", ) model_test.fit( "public.tr_data", ["Gender", '"owned cars"', "cost", "income"], "TransPortation", ) current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'tr_model_test_drop'" ) assert current_cursor().fetchone()[0] == "tr_model_test_drop" model_test.drop() current_cursor().execute( "SELECT model_name FROM models WHERE model_name = 'tr_model_test_drop'" ) assert current_cursor().fetchone() is None
def test_repr(self, model): assert "SELECT rf_regressor('public.tr_model_test'," in model.__repr__( ) model_repr = DummyTreeRegressor("RF_repr") model_repr.drop() assert model_repr.__repr__() == "<RandomForestRegressor>"