def test_connecion(self): try: conn = MySQLConnection(testing.get_datasource()) conn.close() except: # noqa: E722 self.fail() try: conn_str = testing.get_datasource() conn_str = conn_str.replace(":3306", "") conn = MySQLConnection(conn_str) conn.close() except: # noqa: E722 self.fail()
def test_submit_pai_predict_task(): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test TO PREDICT alifin_jtest_dev.pai_dnn_predict.class USING e2etest_pai_dnn;""" pred(testing.get_datasource(), original_sql, """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test""", "e2etest_pai_dnn", "class", {}, "alifin_jtest_dev.pai_dnn_predict")
def test_query(self): conn = MaxComputeConnection(testing.get_datasource()) try: conn.query("select * from notexist limit 1") self.assertTrue(False) except Exception as e: self.assertTrue("Table not found" in str(e)) rs = conn.query( "select * from alifin_jtest_dev.sqlflow_iris_train limit 1") self.assertTrue(rs.success()) rows = [r for r in rs] self.assertEqual(1, len(rows)) rs = conn.query( "select * from alifin_jtest_dev.sqlflow_iris_train limit 20") self.assertTrue(rs.success()) col_info = rs.column_info() self.assertEqual([('sepal_length', 'DOUBLE'), ('sepal_width', 'DOUBLE'), ('petal_length', 'DOUBLE'), ('petal_width', 'DOUBLE'), ('class', 'BIGINT')], col_info) rows = [r for r in rs] self.assertTrue(20, len(rows))
def test_train(self): ds = testing.get_datasource() original_sql = """SELECT * FROM iris.train TO TRAIN xgboost.gbtree WITH objective="multi:softmax", num_boost_round=20, num_class=3, validation.select="SELECT * FROM iris.test" INTO iris.xgboost_train_model_test; """ select = "SELECT * FROM iris.train" val_select = "SELECT * FROM iris.test" train_params = { "num_boost_round": 20, "original_sql": original_sql, "feature_column_map": None, "label_column": NumericColumn(FieldDesc(name="class")), "model_image": "sqlflow:step" } model_params = {"num_class": 3, "objective": "multi:softmax"} eval_result = train(ds, "xgboost.gbtree", select, val_select, model_params, "iris.xgboost_train_model_test", None, train_params) self.assertLess(eval_result['train']['merror'][-1], 0.01) self.assertLess(eval_result['validate']['merror'][-1], 0.01)
def test_generator(self): conn = connect(testing.get_datasource()) # prepare test data conn.execute(self.drop_statement) conn.execute(self.create_statement) conn.execute(self.insert_statement) column_name_to_type = { "features": { "feature_name": "features", "delimiter": "", "dtype": "float32", "is_sparse": False, "shape": [] } } label_meta = {"feature_name": "label", "shape": [], "delimiter": ""} gen = db_generator(conn, "SELECT * FROM test_table_float_fea", label_meta) idx = 0 for row, label in gen(): features = read_features_from_row(row, ["features"], ["features"], column_name_to_type) d = (features, label) if idx == 0: self.assertEqual(d, (((1.0, ), ), 0)) elif idx == 1: self.assertEqual(d, (((2.0, ), ), 1)) idx += 1 self.assertEqual(idx, 2)
def test_query(self): conn = MaxComputeConnection(testing.get_datasource()) rs = conn.query("select * from notexist limit 1") self.assertFalse(rs.success()) self.assertTrue("Table not found" in rs.error()) rs = conn.query( "select * from alifin_jtest_dev.sqlflow_iris_train limit 1") self.assertTrue(rs.success()) rows = [r for r in rs] self.assertEqual(1, len(rows)) rs = conn.query( "select * from alifin_jtest_dev.sqlflow_iris_train limit 20") self.assertTrue(rs.success()) col_info = rs.column_info() self.assertEqual([('sepal_length', 'double'), ('sepal_width', 'double'), ('petal_length', 'double'), ('petal_width', 'double'), ('class', 'bigint')], col_info) rows = [r for r in rs] self.assertTrue(20, len(rows))
def test_submit_xgb_train_task(self): model_params = { "booster": "gbtree", "eta": 0.4, "num_class": 3, "objective": "multi:softprob" } train_params = {"num_boost_round": 10} feature_columns_code = """ xgboost_extended.feature_column.numeric_column( "sepal_length", shape=[1]), xgboost_extended.feature_column.numeric_column( "sepal_width", shape=[1]), xgboost_extended.feature_column.numeric_column( "petal_length", shape=[1]), xgboost_extended.feature_column.numeric_column( "petal_width", shape=[1]) """ submitter.submit_pai_train( testing.get_datasource(), "XGBoost", "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "select * from alifin_jtest_dev.sqlflow_iris_train", model_params, "e2etest_xgb_classify_model", None, train_params=train_params, feature_columns=eval("[%s]" % feature_columns_code), feature_metas=iris_feature_metas, label_meta=iris_label_meta, feature_column_names=iris_feature_column_names, feature_columns_code=feature_columns_code)
def test_field_type(self): self.assertGreater(len(MYSQL_FIELD_TYPE_DICT), 0) conn = connect_with_data_source(testing.get_datasource()) table_name = "iris.test_mysql_field_type_table" drop_table_sql = "DROP TABLE IF EXISTS %s" % table_name create_table_sql = "CREATE TABLE IF NOT EXISTS " + \ table_name + "(a %s)" select_sql = "SELECT * FROM %s" % table_name for int_type, str_type in MYSQL_FIELD_TYPE_DICT.items(): if str_type in ["VARCHAR", "CHAR"]: str_type += "(255)" conn.execute(drop_table_sql) conn.execute(create_table_sql % str_type) # we are meant to use low layer cursor here to # check the type value with the real value returned by mysql cursor = conn.cursor() cursor.execute(select_sql) int_type_actual = cursor.description[0][1] cursor.close() conn.execute(drop_table_sql) self.assertEqual(int_type_actual, int_type, "%s not match" % str_type)
def test_pai_train_step(self): from runtime.step.tensorflow.train import train_step model_params = dict() model_params["hidden_units"] = [10, 20] model_params["n_classes"] = 3 original_sql = """ SELECT * FROM alifin_jtest_dev.sqlflow_test_iris_train TO TRAIN DNNClassifier WITH model.n_classes = 3, model.hidden_units = [10, 20] LABEL class INTO e2etest_pai_dnn;""" datasource = testing.get_datasource() save = "e2etest_pai_dnn" FLAGS = define_tf_flags() FLAGS.sqlflow_oss_ak = os.getenv("SQLFLOW_OSS_AK") FLAGS.sqlflow_oss_sk = os.getenv("SQLFLOW_OSS_SK") FLAGS.sqlflow_oss_ep = os.getenv("SQLFLOW_OSS_MODEL_ENDPOINT") oss_path_to_save = pai_model.get_oss_model_save_path(datasource, save, user="") FLAGS.sqlflow_oss_modeldir = pai_model.get_oss_model_url( oss_path_to_save) train_step(original_sql, "", "DNNClassifier", datasource, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", "alifin_jtest_dev.sqlflow_iris_train", "", model_params, {}, feature_column_map, label_column, save, None)
def test_submit_pai_random_forest_predict_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test TO PREDICT alifin_jtest_dev.pai_rf_predict.class USING e2e_test_random_forest_wuyi;""" predict(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_test", "e2e_test_random_forest_wuyi", "class", {}, "alifin_jtest_dev.pai_rf_predict")
def test_submit_pai_xgb_predict_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test TO PREDICT alifin_jtest_dev.pai_xgb_predict.class USING e2etest_xgb_classify_model;""" predict(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_test", "e2etest_xgb_classify_model", "class", {}, "alifin_jtest_dev.pai_xgb_predict")
def test_get_table_schema(self): conn = MaxComputeConnection(testing.get_datasource()) col_info = conn.get_table_schema("sqlflow_iris_train") self.assertEqual([('sepal_length', 'DOUBLE'), ('sepal_width', 'DOUBLE'), ('petal_length', 'DOUBLE'), ('petal_width', 'DOUBLE'), ('class', 'BIGINT')], col_info)
def test_submit_pai_random_forest_explain_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_train TO EXPLAIN e2e_test_random_forest_wuyi WITH label_col=class INTO alifin_jtest_dev.e2etest_random_forest_explain_result;""" explain(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "e2e_test_random_forest_wuyi", {"label_col": "class"}, "alifin_jtest_dev.e2etest_random_forest_explain_result")
def test_submit_pai_xgb_explain_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test TO EXPLAIN e2etest_xgb_classify_model WITH label_col=class INTO alifin_jtest_dev.e2etest_xgb_explain_result;""" explain(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "e2etest_xgb_classify_model", {"label_col": "class"}, "alifin_jtest_dev.e2etest_xgb_explain_result")
def test_proto_table_writer(self): conn = MySQLConnection(testing.get_datasource()) rs = conn.query("select * from iris.train limit 10;") self.assertTrue(rs.success()) tw = table_writer.ProtobufWriter(rs) lines = tw.dump_strings() self.assertTrue(lines[0].find( "head { column_names: \"sepal_length\" column_names: \"sepal_width\" column_names: \"petal_length\" column_names: \"petal_width\" column_names: \"class\" }" # noqa: E501 ) >= 0)
def test_submit_pai_tf_evaluate_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test TO EXPLAIN e2etest_pai_dnn WITH label_col=class INTO alifin_jtest_dev.pai_dnn_explain_result;""" evaluate(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "e2etest_pai_dnn", {"validation.metrics": "Accuracy,Recall"}, "alifin_jtest_dev.e2etest_pai_dnn_evaluate_result")
def test_submit_pai_xgb_evaluate_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test TO EVALUATE e2etest_xgb_classify_model WITH validation.metrics=accuracy_score INTO alifin_jtest_dev.e2etest_pai_xgb_evaluate_result;""" evaluate(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "e2etest_xgb_classify_model", {"validation.metrics": "accuracy_score"}, "alifin_jtest_dev.e2etest_pai_xgb_evaluate_result")
def test_submit_pai_random_forest_train_task(self): train(testing.get_datasource(), "RandomForests", "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", { "tree_num": 3, }, "e2e_test_random_forest", "", feature_column_names=iris_feature_column_names, label_meta=iris_label_meta)
def test_exec(self): conn = HiveConnection(testing.get_datasource()) rs = conn.exec("create table test_exec(a int)") self.assertTrue(rs) rs = conn.exec("insert into test_exec values(1), (2)") self.assertTrue(rs) rs = conn.query("select * from test_exec") self.assertTrue(rs.success()) rows = [r for r in rs] self.assertTrue(2, len(rows)) rs = conn.exec("drop table test_exec") self.assertTrue(rs)
def test_submit_pai_kmeans_train_task(self): submitter.submit_pai_train( testing.get_datasource(), "KMeans", "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", { "excluded_columns": "class", "idx_table_name": "alifin_jtest_dev.e2e_test_kmeans_output_idx" }, "e2e_test_kmeans", "", feature_column_names=[*iris_feature_column_names, "class"])
def test_save_load_db(self): table = "sqlflow_models.test_model" meta = {"model_params": {"n_classes": 3}} m = Model(EstimatorType.XGBOOST, meta) datasource = get_datasource() # save mode with temp_file.TemporaryDirectory() as d: m.save_to_db(datasource, table, d) # load model with temp_file.TemporaryDirectory() as d: m = Model.load_from_db(datasource, table, d) self.assertEqual(m._meta, meta)
def test_exec(self): conn = MySQLConnection(testing.get_datasource()) rs = conn.execute("create table test_exec(a int)") self.assertTrue(rs) rs = conn.execute("insert into test_exec values(1), (2)") self.assertTrue(rs) rs = conn.query("select * from test_exec") self.assertTrue(rs.success()) rows = [r for r in rs] self.assertTrue(2, len(rows)) rs = conn.execute("drop table test_exec") self.assertTrue(rs) with self.assertRaises(Exception): conn.execute("drop table not_exist")
def test_train(self): ds = testing.get_datasource() original_sql = """SELECT * FROM iris.train TO TRAIN xgboost.gbtree WITH objective="multi:softmax", num_boost_round=20, num_class=3, validation.select="SELECT * FROM iris.test" INTO iris.xgboost_train_model_test; """ select = "SELECT * FROM iris.train" val_select = "SELECT * FROM iris.test" train_params = { "num_boost_round": 20, } model_params = {"num_class": 3, "objective": "multi:softmax"} with temp_file.TemporaryDirectory(as_cwd=True): eval_result = train(ds, original_sql, select, val_select, "xgboost.gbtree", "", None, NumericColumn(FieldDesc(name="class")), model_params, train_params, None, "iris.xgboost_train_model_test", None) self.assertLess(eval_result['train']['merror'][-1], 0.01) self.assertLess(eval_result['validate']['merror'][-1], 0.01) with temp_file.TemporaryDirectory(as_cwd=True): pred_original_sql = """SELECT * FROM iris.test TO PREDICT iris.xgboost_pred_result.pred_val USING iris.xgboost_train_model_test;""" pred(ds, pred_original_sql, "SELECT * FROM iris.test", "iris.xgboost_train_model_test", "pred_val", model_params, "iris.xgboost_pred_result") with temp_file.TemporaryDirectory(as_cwd=True): explain_original_sql = """SELECT * FROM iris.test TO EXPLAIN iris.xgboost_train_model_test INTO iris.xgboost_explain_result;""" explain(ds, explain_original_sql, "SELECT * FROM iris.test", "iris.xgboost_train_model_test", model_params, "iris.xgboost_explain_result") with temp_file.TemporaryDirectory(as_cwd=True): evaluate_original_sql = """SELECT * FROM iris.test TO EVALUATE iris.xgboost_train_model_test WITH label_col=class INTO iris.xgboost_evaluate_result;""" evaluate(ds, evaluate_original_sql, "SELECT * FROM iris.test", "class", "iris.xgboost_train_model_test", model_params, "iris.xgboost_evaluate_result")
def test_exec(self): conn = MaxComputeConnection(testing.get_datasource()) rs = conn.exec( "create table alifin_jtest_dev.sqlflow_test_exec(a int)") self.assertTrue(rs) rs = conn.exec( "insert into alifin_jtest_dev.sqlflow_test_exec values(1), (2)") self.assertTrue(rs) rs = conn.query("select * from alifin_jtest_dev.sqlflow_test_exec") self.assertTrue(rs.success()) rows = [r for r in rs] self.assertTrue(2, len(rows)) rs = conn.exec("drop table alifin_jtest_dev.sqlflow_test_exec") self.assertTrue(rs)
def test_submit_pai_train_task(self): model_params = dict() model_params["hidden_units"] = [10, 20] model_params["n_classes"] = 3 # feature_columns_code will be used to save the training information # together with the saved model. feature_columns_code = """{"feature_columns": [ tf.feature_column.numeric_column("sepal_length", shape=[1]), tf.feature_column.numeric_column("sepal_width", shape=[1]), tf.feature_column.numeric_column("petal_length", shape=[1]), tf.feature_column.numeric_column("petal_width", shape=[1]), ]}""" feature_columns = eval(feature_columns_code) submitter.submit_pai_train( testing.get_datasource(), "DNNClassifier", "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", model_params, "e2etest_pai_dnn", None, feature_columns=feature_columns, feature_column_names=iris_feature_column_names, feature_column_names_map=iris_feature_column_names_map, feature_metas=iris_feature_metas, label_meta=iris_label_meta, validation_metrics="Accuracy".split(","), save="model_save", batch_size=1, epoch=1, validation_steps=1, verbose=0, max_steps=None, validation_start_delay_secs=0, validation_throttle_secs=0, save_checkpoints_steps=100, log_every_n_iter=10, load_pretrained_model=False, is_pai=True, feature_columns_code=feature_columns_code, model_repo_image="", original_sql=''' SELECT * FROM alifin_jtest_dev.sqlflow_test_iris_train TO TRAIN DNNClassifier WITH model.n_classes = 3, model.hidden_units = [10, 20] LABEL class INTO e2etest_pai_dnn;''')
def test_submit_pai_random_forest_train_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_train TO TRAIN RandomForests WITH model.tree_num=3 LABEL class INTO e2e_test_random_forest;""" train(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", "RandomForests", "", feature_column_map, label_column, {"tree_num": 3}, { "feature_column_names": iris_feature_column_names, "label_meta": json.loads(label_column.get_field_desc()[0].to_json()) }, "e2e_test_random_forest_wuyi", None)
def test_submit_pai_kmeans_train_task(self): original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_train TO TRAIN KMeans WITH model.excluded_columns="class", model.idx_table_name="alifin_jtest_dev.e2e_test_kmeans_output_idx" INTO e2e_test_kmeans;""" train( testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", "KMeans", "", feature_column_map, None, { "excluded_columns": "class", "idx_table_name": "alifin_jtest_dev.e2e_test_kmeans_output_idx" }, {"feature_column_names": iris_feature_column_names}, "e2e_test_kmeans", None)
def test_submit_pai_train_task(self): model_params = dict() model_params["hidden_units"] = [10, 20] model_params["n_classes"] = 3 original_sql = """ SELECT * FROM alifin_jtest_dev.sqlflow_test_iris_train TO TRAIN DNNClassifier WITH model.n_classes = 3, model.hidden_units = [10, 20] LABEL class INTO e2etest_pai_dnn;""" train(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", "DNNClassifier", "", feature_column_map, label_column, model_params, {}, "e2etest_pai_dnn", None)
def test_save(self): table = "sqlflow_models.test_model" meta = {"train_params": {"n_classes": 3}} m = Model(EstimatorType.XGBOOST, meta) datasource = get_datasource() # save mode with tempfile.TemporaryDirectory() as d: os.chdir(d) m.save(datasource, table) # load model with tempfile.TemporaryDirectory() as d: os.chdir(d) m = load(datasource, table) self.assertEqual(m._meta, meta)
def test_submit_xgb_train_task(self): original_sql = """SELECT * FROM iris.train TO TRAIN xgboost.gbtree WITH objective="multi:softprob", num_class=3, eta=0.4, booster="gbtree" validatioin.select="select * from alifin_jtest_dev.sqlflow_iris_test" LABEL class INTO e2etest_xgb_classify_model;""" model_params = { "eta": 0.4, "num_class": 3, "objective": "multi:softprob" } train_params = {"num_boost_round": 10} train(testing.get_datasource(), original_sql, "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "SELECT * FROM alifin_jtest_dev.sqlflow_iris_test", "xgboost.gbtree", "", feature_column_map, label_column, model_params, train_params, "e2etest_xgb_classify_model", None)