예제 #1
0
    def test_connecion(self):
        try:
            conn = MySQLConnection(testing.get_datasource())
            conn.close()
        except:  # noqa: E722
            self.fail()

        try:
            conn_str = testing.get_datasource()
            conn_str = conn_str.replace(":3306", "")
            conn = MySQLConnection(conn_str)
            conn.close()
        except:  # noqa: E722
            self.fail()
예제 #2
0
def test_submit_pai_predict_task():
    original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test
TO PREDICT alifin_jtest_dev.pai_dnn_predict.class
USING e2etest_pai_dnn;"""
    pred(testing.get_datasource(), original_sql,
         """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test""",
         "e2etest_pai_dnn", "class", {}, "alifin_jtest_dev.pai_dnn_predict")
예제 #3
0
    def test_query(self):
        conn = MaxComputeConnection(testing.get_datasource())
        try:
            conn.query("select * from notexist limit 1")
            self.assertTrue(False)
        except Exception as e:
            self.assertTrue("Table not found" in str(e))

        rs = conn.query(
            "select * from alifin_jtest_dev.sqlflow_iris_train limit 1")
        self.assertTrue(rs.success())
        rows = [r for r in rs]
        self.assertEqual(1, len(rows))

        rs = conn.query(
            "select * from alifin_jtest_dev.sqlflow_iris_train limit 20")
        self.assertTrue(rs.success())

        col_info = rs.column_info()
        self.assertEqual([('sepal_length', 'DOUBLE'),
                          ('sepal_width', 'DOUBLE'),
                          ('petal_length', 'DOUBLE'),
                          ('petal_width', 'DOUBLE'), ('class', 'BIGINT')],
                         col_info)

        rows = [r for r in rs]
        self.assertTrue(20, len(rows))
예제 #4
0
    def test_train(self):
        ds = testing.get_datasource()
        original_sql = """SELECT * FROM iris.train
        TO TRAIN xgboost.gbtree
        WITH
            objective="multi:softmax",
            num_boost_round=20,
            num_class=3,
            validation.select="SELECT * FROM iris.test"
        INTO iris.xgboost_train_model_test;
        """

        select = "SELECT * FROM iris.train"
        val_select = "SELECT * FROM iris.test"
        train_params = {
            "num_boost_round": 20,
            "original_sql": original_sql,
            "feature_column_map": None,
            "label_column": NumericColumn(FieldDesc(name="class")),
            "model_image": "sqlflow:step"
        }
        model_params = {"num_class": 3, "objective": "multi:softmax"}
        eval_result = train(ds, "xgboost.gbtree", select, val_select,
                            model_params, "iris.xgboost_train_model_test",
                            None, train_params)
        self.assertLess(eval_result['train']['merror'][-1], 0.01)
        self.assertLess(eval_result['validate']['merror'][-1], 0.01)
예제 #5
0
    def test_generator(self):
        conn = connect(testing.get_datasource())
        # prepare test data
        conn.execute(self.drop_statement)
        conn.execute(self.create_statement)
        conn.execute(self.insert_statement)

        column_name_to_type = {
            "features": {
                "feature_name": "features",
                "delimiter": "",
                "dtype": "float32",
                "is_sparse": False,
                "shape": []
            }
        }
        label_meta = {"feature_name": "label", "shape": [], "delimiter": ""}
        gen = db_generator(conn, "SELECT * FROM test_table_float_fea",
                           label_meta)
        idx = 0
        for row, label in gen():
            features = read_features_from_row(row, ["features"], ["features"],
                                              column_name_to_type)
            d = (features, label)
            if idx == 0:
                self.assertEqual(d, (((1.0, ), ), 0))
            elif idx == 1:
                self.assertEqual(d, (((2.0, ), ), 1))
            idx += 1
        self.assertEqual(idx, 2)
예제 #6
0
    def test_query(self):
        conn = MaxComputeConnection(testing.get_datasource())
        rs = conn.query("select * from notexist limit 1")
        self.assertFalse(rs.success())
        self.assertTrue("Table not found" in rs.error())

        rs = conn.query(
            "select * from alifin_jtest_dev.sqlflow_iris_train limit 1")
        self.assertTrue(rs.success())
        rows = [r for r in rs]
        self.assertEqual(1, len(rows))

        rs = conn.query(
            "select * from alifin_jtest_dev.sqlflow_iris_train limit 20")
        self.assertTrue(rs.success())

        col_info = rs.column_info()
        self.assertEqual([('sepal_length', 'double'),
                          ('sepal_width', 'double'),
                          ('petal_length', 'double'),
                          ('petal_width', 'double'), ('class', 'bigint')],
                         col_info)

        rows = [r for r in rs]
        self.assertTrue(20, len(rows))
예제 #7
0
 def test_submit_xgb_train_task(self):
     model_params = {
         "booster": "gbtree",
         "eta": 0.4,
         "num_class": 3,
         "objective": "multi:softprob"
     }
     train_params = {"num_boost_round": 10}
     feature_columns_code = """
         xgboost_extended.feature_column.numeric_column(
             "sepal_length", shape=[1]),
         xgboost_extended.feature_column.numeric_column(
             "sepal_width", shape=[1]),
         xgboost_extended.feature_column.numeric_column(
             "petal_length", shape=[1]),
         xgboost_extended.feature_column.numeric_column(
             "petal_width", shape=[1])
     """
     submitter.submit_pai_train(
         testing.get_datasource(),
         "XGBoost",
         "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
         "select * from alifin_jtest_dev.sqlflow_iris_train",
         model_params,
         "e2etest_xgb_classify_model",
         None,
         train_params=train_params,
         feature_columns=eval("[%s]" % feature_columns_code),
         feature_metas=iris_feature_metas,
         label_meta=iris_label_meta,
         feature_column_names=iris_feature_column_names,
         feature_columns_code=feature_columns_code)
예제 #8
0
    def test_field_type(self):
        self.assertGreater(len(MYSQL_FIELD_TYPE_DICT), 0)

        conn = connect_with_data_source(testing.get_datasource())

        table_name = "iris.test_mysql_field_type_table"
        drop_table_sql = "DROP TABLE IF EXISTS %s" % table_name
        create_table_sql = "CREATE TABLE IF NOT EXISTS " + \
                           table_name + "(a %s)"
        select_sql = "SELECT * FROM %s" % table_name

        for int_type, str_type in MYSQL_FIELD_TYPE_DICT.items():
            if str_type in ["VARCHAR", "CHAR"]:
                str_type += "(255)"

            conn.execute(drop_table_sql)
            conn.execute(create_table_sql % str_type)
            # we are meant to use low layer cursor here to
            # check the type value with the real value returned by mysql
            cursor = conn.cursor()
            cursor.execute(select_sql)
            int_type_actual = cursor.description[0][1]
            cursor.close()
            conn.execute(drop_table_sql)

            self.assertEqual(int_type_actual, int_type,
                             "%s not match" % str_type)
예제 #9
0
    def test_pai_train_step(self):
        from runtime.step.tensorflow.train import train_step
        model_params = dict()
        model_params["hidden_units"] = [10, 20]
        model_params["n_classes"] = 3

        original_sql = """
SELECT * FROM alifin_jtest_dev.sqlflow_test_iris_train
TO TRAIN DNNClassifier
WITH model.n_classes = 3, model.hidden_units = [10, 20]
LABEL class
INTO e2etest_pai_dnn;"""
        datasource = testing.get_datasource()
        save = "e2etest_pai_dnn"

        FLAGS = define_tf_flags()
        FLAGS.sqlflow_oss_ak = os.getenv("SQLFLOW_OSS_AK")
        FLAGS.sqlflow_oss_sk = os.getenv("SQLFLOW_OSS_SK")
        FLAGS.sqlflow_oss_ep = os.getenv("SQLFLOW_OSS_MODEL_ENDPOINT")

        oss_path_to_save = pai_model.get_oss_model_save_path(datasource,
                                                             save,
                                                             user="")
        FLAGS.sqlflow_oss_modeldir = pai_model.get_oss_model_url(
            oss_path_to_save)

        train_step(original_sql, "", "DNNClassifier", datasource,
                   "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "",
                   "alifin_jtest_dev.sqlflow_iris_train", "", model_params, {},
                   feature_column_map, label_column, save, None)
예제 #10
0
    def test_submit_pai_random_forest_predict_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test
TO PREDICT alifin_jtest_dev.pai_rf_predict.class
USING e2e_test_random_forest_wuyi;"""
        predict(testing.get_datasource(), original_sql,
                "SELECT * FROM alifin_jtest_dev.sqlflow_iris_test",
                "e2e_test_random_forest_wuyi", "class", {},
                "alifin_jtest_dev.pai_rf_predict")
예제 #11
0
    def test_submit_pai_xgb_predict_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test
TO PREDICT alifin_jtest_dev.pai_xgb_predict.class
USING e2etest_xgb_classify_model;"""
        predict(testing.get_datasource(), original_sql,
                "SELECT * FROM alifin_jtest_dev.sqlflow_iris_test",
                "e2etest_xgb_classify_model", "class", {},
                "alifin_jtest_dev.pai_xgb_predict")
예제 #12
0
 def test_get_table_schema(self):
     conn = MaxComputeConnection(testing.get_datasource())
     col_info = conn.get_table_schema("sqlflow_iris_train")
     self.assertEqual([('sepal_length', 'DOUBLE'),
                       ('sepal_width', 'DOUBLE'),
                       ('petal_length', 'DOUBLE'),
                       ('petal_width', 'DOUBLE'), ('class', 'BIGINT')],
                      col_info)
예제 #13
0
    def test_submit_pai_random_forest_explain_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_train
TO EXPLAIN e2e_test_random_forest_wuyi
WITH label_col=class
INTO alifin_jtest_dev.e2etest_random_forest_explain_result;"""
        explain(testing.get_datasource(), original_sql,
                "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
                "e2e_test_random_forest_wuyi", {"label_col": "class"},
                "alifin_jtest_dev.e2etest_random_forest_explain_result")
예제 #14
0
    def test_submit_pai_xgb_explain_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test
TO EXPLAIN e2etest_xgb_classify_model
WITH label_col=class
INTO alifin_jtest_dev.e2etest_xgb_explain_result;"""
        explain(testing.get_datasource(), original_sql,
                "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
                "e2etest_xgb_classify_model", {"label_col": "class"},
                "alifin_jtest_dev.e2etest_xgb_explain_result")
예제 #15
0
 def test_proto_table_writer(self):
     conn = MySQLConnection(testing.get_datasource())
     rs = conn.query("select * from iris.train limit 10;")
     self.assertTrue(rs.success())
     tw = table_writer.ProtobufWriter(rs)
     lines = tw.dump_strings()
     self.assertTrue(lines[0].find(
         "head { column_names: \"sepal_length\" column_names: \"sepal_width\" column_names: \"petal_length\" column_names: \"petal_width\" column_names: \"class\" }"  # noqa: E501
     ) >= 0)
예제 #16
0
    def test_submit_pai_tf_evaluate_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test
TO EXPLAIN e2etest_pai_dnn
WITH label_col=class
INTO alifin_jtest_dev.pai_dnn_explain_result;"""
        evaluate(testing.get_datasource(), original_sql,
                 "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
                 "e2etest_pai_dnn", {"validation.metrics": "Accuracy,Recall"},
                 "alifin_jtest_dev.e2etest_pai_dnn_evaluate_result")
예제 #17
0
    def test_submit_pai_xgb_evaluate_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_test
TO EVALUATE e2etest_xgb_classify_model
WITH validation.metrics=accuracy_score
INTO alifin_jtest_dev.e2etest_pai_xgb_evaluate_result;"""
        evaluate(testing.get_datasource(), original_sql,
                 "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
                 "e2etest_xgb_classify_model",
                 {"validation.metrics": "accuracy_score"},
                 "alifin_jtest_dev.e2etest_pai_xgb_evaluate_result")
예제 #18
0
 def test_submit_pai_random_forest_train_task(self):
     train(testing.get_datasource(),
           "RandomForests",
           "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
           "", {
               "tree_num": 3,
           },
           "e2e_test_random_forest",
           "",
           feature_column_names=iris_feature_column_names,
           label_meta=iris_label_meta)
예제 #19
0
 def test_exec(self):
     conn = HiveConnection(testing.get_datasource())
     rs = conn.exec("create table test_exec(a int)")
     self.assertTrue(rs)
     rs = conn.exec("insert into test_exec values(1), (2)")
     self.assertTrue(rs)
     rs = conn.query("select * from test_exec")
     self.assertTrue(rs.success())
     rows = [r for r in rs]
     self.assertTrue(2, len(rows))
     rs = conn.exec("drop table test_exec")
     self.assertTrue(rs)
예제 #20
0
 def test_submit_pai_kmeans_train_task(self):
     submitter.submit_pai_train(
         testing.get_datasource(),
         "KMeans",
         "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
         "", {
             "excluded_columns": "class",
             "idx_table_name": "alifin_jtest_dev.e2e_test_kmeans_output_idx"
         },
         "e2e_test_kmeans",
         "",
         feature_column_names=[*iris_feature_column_names, "class"])
예제 #21
0
    def test_save_load_db(self):
        table = "sqlflow_models.test_model"
        meta = {"model_params": {"n_classes": 3}}
        m = Model(EstimatorType.XGBOOST, meta)
        datasource = get_datasource()

        # save mode
        with temp_file.TemporaryDirectory() as d:
            m.save_to_db(datasource, table, d)

        # load model
        with temp_file.TemporaryDirectory() as d:
            m = Model.load_from_db(datasource, table, d)
            self.assertEqual(m._meta, meta)
예제 #22
0
 def test_exec(self):
     conn = MySQLConnection(testing.get_datasource())
     rs = conn.execute("create table test_exec(a int)")
     self.assertTrue(rs)
     rs = conn.execute("insert into test_exec values(1), (2)")
     self.assertTrue(rs)
     rs = conn.query("select * from test_exec")
     self.assertTrue(rs.success())
     rows = [r for r in rs]
     self.assertTrue(2, len(rows))
     rs = conn.execute("drop table test_exec")
     self.assertTrue(rs)
     with self.assertRaises(Exception):
         conn.execute("drop table not_exist")
예제 #23
0
    def test_train(self):
        ds = testing.get_datasource()
        original_sql = """SELECT * FROM iris.train
        TO TRAIN xgboost.gbtree
        WITH
            objective="multi:softmax",
            num_boost_round=20,
            num_class=3,
            validation.select="SELECT * FROM iris.test"
        INTO iris.xgboost_train_model_test;
        """
        select = "SELECT * FROM iris.train"
        val_select = "SELECT * FROM iris.test"
        train_params = {
            "num_boost_round": 20,
        }
        model_params = {"num_class": 3, "objective": "multi:softmax"}
        with temp_file.TemporaryDirectory(as_cwd=True):
            eval_result = train(ds, original_sql, select, val_select,
                                "xgboost.gbtree", "", None,
                                NumericColumn(FieldDesc(name="class")),
                                model_params, train_params, None,
                                "iris.xgboost_train_model_test", None)
            self.assertLess(eval_result['train']['merror'][-1], 0.01)
            self.assertLess(eval_result['validate']['merror'][-1], 0.01)

        with temp_file.TemporaryDirectory(as_cwd=True):
            pred_original_sql = """SELECT * FROM iris.test
            TO PREDICT iris.xgboost_pred_result.pred_val
            USING iris.xgboost_train_model_test;"""
            pred(ds, pred_original_sql, "SELECT * FROM iris.test",
                 "iris.xgboost_train_model_test", "pred_val", model_params,
                 "iris.xgboost_pred_result")

        with temp_file.TemporaryDirectory(as_cwd=True):
            explain_original_sql = """SELECT * FROM iris.test
            TO EXPLAIN iris.xgboost_train_model_test
            INTO iris.xgboost_explain_result;"""
            explain(ds, explain_original_sql, "SELECT * FROM iris.test",
                    "iris.xgboost_train_model_test", model_params,
                    "iris.xgboost_explain_result")

        with temp_file.TemporaryDirectory(as_cwd=True):
            evaluate_original_sql = """SELECT * FROM iris.test
            TO EVALUATE iris.xgboost_train_model_test
            WITH label_col=class
            INTO iris.xgboost_evaluate_result;"""
            evaluate(ds, evaluate_original_sql, "SELECT * FROM iris.test",
                     "class", "iris.xgboost_train_model_test", model_params,
                     "iris.xgboost_evaluate_result")
예제 #24
0
 def test_exec(self):
     conn = MaxComputeConnection(testing.get_datasource())
     rs = conn.exec(
         "create table alifin_jtest_dev.sqlflow_test_exec(a int)")
     self.assertTrue(rs)
     rs = conn.exec(
         "insert into alifin_jtest_dev.sqlflow_test_exec values(1), (2)")
     self.assertTrue(rs)
     rs = conn.query("select * from alifin_jtest_dev.sqlflow_test_exec")
     self.assertTrue(rs.success())
     rows = [r for r in rs]
     self.assertTrue(2, len(rows))
     rs = conn.exec("drop table alifin_jtest_dev.sqlflow_test_exec")
     self.assertTrue(rs)
예제 #25
0
    def test_submit_pai_train_task(self):
        model_params = dict()
        model_params["hidden_units"] = [10, 20]
        model_params["n_classes"] = 3

        # feature_columns_code will be used to save the training information
        # together with the saved model.
        feature_columns_code = """{"feature_columns": [
            tf.feature_column.numeric_column("sepal_length", shape=[1]),
            tf.feature_column.numeric_column("sepal_width", shape=[1]),
            tf.feature_column.numeric_column("petal_length", shape=[1]),
            tf.feature_column.numeric_column("petal_width", shape=[1]),
        ]}"""
        feature_columns = eval(feature_columns_code)

        submitter.submit_pai_train(
            testing.get_datasource(),
            "DNNClassifier",
            "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
            "",
            model_params,
            "e2etest_pai_dnn",
            None,
            feature_columns=feature_columns,
            feature_column_names=iris_feature_column_names,
            feature_column_names_map=iris_feature_column_names_map,
            feature_metas=iris_feature_metas,
            label_meta=iris_label_meta,
            validation_metrics="Accuracy".split(","),
            save="model_save",
            batch_size=1,
            epoch=1,
            validation_steps=1,
            verbose=0,
            max_steps=None,
            validation_start_delay_secs=0,
            validation_throttle_secs=0,
            save_checkpoints_steps=100,
            log_every_n_iter=10,
            load_pretrained_model=False,
            is_pai=True,
            feature_columns_code=feature_columns_code,
            model_repo_image="",
            original_sql='''
SELECT * FROM alifin_jtest_dev.sqlflow_test_iris_train
TO TRAIN DNNClassifier
WITH model.n_classes = 3, model.hidden_units = [10, 20]
LABEL class
INTO e2etest_pai_dnn;''')
예제 #26
0
    def test_submit_pai_random_forest_train_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_train
TO TRAIN RandomForests
WITH model.tree_num=3
LABEL class
INTO e2e_test_random_forest;"""
        train(testing.get_datasource(), original_sql,
              "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "",
              "RandomForests", "", feature_column_map, label_column,
              {"tree_num": 3}, {
                  "feature_column_names":
                  iris_feature_column_names,
                  "label_meta":
                  json.loads(label_column.get_field_desc()[0].to_json())
              }, "e2e_test_random_forest_wuyi", None)
예제 #27
0
    def test_submit_pai_kmeans_train_task(self):
        original_sql = """SELECT * FROM alifin_jtest_dev.sqlflow_iris_train
TO TRAIN KMeans
WITH model.excluded_columns="class",
     model.idx_table_name="alifin_jtest_dev.e2e_test_kmeans_output_idx"
INTO e2e_test_kmeans;"""

        train(
            testing.get_datasource(), original_sql,
            "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "", "KMeans",
            "", feature_column_map, None, {
                "excluded_columns": "class",
                "idx_table_name": "alifin_jtest_dev.e2e_test_kmeans_output_idx"
            }, {"feature_column_names": iris_feature_column_names},
            "e2e_test_kmeans", None)
예제 #28
0
    def test_submit_pai_train_task(self):
        model_params = dict()
        model_params["hidden_units"] = [10, 20]
        model_params["n_classes"] = 3

        original_sql = """
SELECT * FROM alifin_jtest_dev.sqlflow_test_iris_train
TO TRAIN DNNClassifier
WITH model.n_classes = 3, model.hidden_units = [10, 20]
LABEL class
INTO e2etest_pai_dnn;"""

        train(testing.get_datasource(), original_sql,
              "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train", "",
              "DNNClassifier", "", feature_column_map, label_column,
              model_params, {}, "e2etest_pai_dnn", None)
예제 #29
0
    def test_save(self):
        table = "sqlflow_models.test_model"
        meta = {"train_params": {"n_classes": 3}}
        m = Model(EstimatorType.XGBOOST, meta)
        datasource = get_datasource()

        # save mode
        with tempfile.TemporaryDirectory() as d:
            os.chdir(d)
            m.save(datasource, table)

        # load model
        with tempfile.TemporaryDirectory() as d:
            os.chdir(d)
            m = load(datasource, table)
            self.assertEqual(m._meta, meta)
예제 #30
0
    def test_submit_xgb_train_task(self):
        original_sql = """SELECT * FROM iris.train
TO TRAIN xgboost.gbtree
WITH objective="multi:softprob", num_class=3, eta=0.4, booster="gbtree"
     validatioin.select="select * from alifin_jtest_dev.sqlflow_iris_test"
LABEL class
INTO e2etest_xgb_classify_model;"""
        model_params = {
            "eta": 0.4,
            "num_class": 3,
            "objective": "multi:softprob"
        }
        train_params = {"num_boost_round": 10}
        train(testing.get_datasource(), original_sql,
              "SELECT * FROM alifin_jtest_dev.sqlflow_iris_train",
              "SELECT * FROM alifin_jtest_dev.sqlflow_iris_test",
              "xgboost.gbtree", "", feature_column_map, label_column,
              model_params, train_params, "e2etest_xgb_classify_model", None)