Beispiel #1
0
def test_run_async_cta_query_with_lower_limit(test_client, ctas_method):
    if backend() == "mysql":
        # failing
        return

    tmp_table = f"{TEST_ASYNC_LOWER_LIMIT}_{ctas_method.lower()}"
    result = run_sql(
        test_client,
        QUERY,
        cta=True,
        ctas_method=ctas_method,
        async_=True,
        tmp_table=tmp_table,
    )
    query = wait_for_success(result)
    assert QueryStatus.SUCCESS == query.status

    sqllite_select_sql = f"SELECT *\nFROM {tmp_table}\nLIMIT {query.limit}\nOFFSET 0"
    assert query.select_sql == (sqllite_select_sql if backend() == "sqlite"
                                else get_select_star(tmp_table, query.limit))

    assert f"CREATE {ctas_method} {tmp_table} AS \n{QUERY}" == query.executed_sql
    assert QUERY == query.sql

    assert query.rows == (1 if backend() == "presto" else 0)
    assert query.limit == 10000
    assert query.select_as_cta
    assert query.select_as_cta_used

    delete_tmp_view_or_table(tmp_table, ctas_method)
Beispiel #2
0
def test_run_async_cta_query(test_client, ctas_method):
    if backend() == "mysql":
        # failing
        return

    table_name = f"{TEST_ASYNC_CTA}_{ctas_method.lower()}"
    result = run_sql(
        test_client,
        QUERY,
        cta=True,
        ctas_method=ctas_method,
        async_=True,
        tmp_table=table_name,
    )

    query = wait_for_success(result)

    assert QueryStatus.SUCCESS == query.status
    assert get_select_star(table_name, query.limit) in query.select_sql

    assert f"CREATE {ctas_method} {table_name} AS \n{QUERY}" == query.executed_sql
    assert QUERY == query.sql
    assert query.rows == (1 if backend() == "presto" else 0)
    assert query.select_as_cta
    assert query.select_as_cta_used

    delete_tmp_view_or_table(table_name, ctas_method)
Beispiel #3
0
def test_import_excel(setup_csv_upload, create_excel_files):
    if utils.backend() == "hive":
        pytest.skip("Hive doesn't excel upload.")

    success_msg = (
        f'Excel file "{EXCEL_FILENAME}" uploaded to table "{EXCEL_UPLOAD_TABLE}"'
    )

    # initial upload with fail mode
    resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE)
    assert success_msg in resp

    # upload again with fail mode; should fail
    fail_msg = f'Unable to upload Excel file "{EXCEL_FILENAME}" to table "{EXCEL_UPLOAD_TABLE}"'
    resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE)
    assert fail_msg in resp

    if utils.backend() != "hive":
        # upload again with append mode
        resp = upload_excel(EXCEL_FILENAME,
                            EXCEL_UPLOAD_TABLE,
                            extra={"if_exists": "append"})
        assert success_msg in resp

    # upload again with replace mode
    resp = upload_excel(EXCEL_FILENAME,
                        EXCEL_UPLOAD_TABLE,
                        extra={"if_exists": "replace"})
    assert success_msg in resp

    # make sure that john and empty string are replaced with None
    data = (get_upload_db().get_sqla_engine().execute(
        f"SELECT * from {EXCEL_UPLOAD_TABLE}").fetchall())
    assert data == [(0, "john", 1), (1, "paul", 2)]
Beispiel #4
0
def test_import_csv_enforced_schema(mock_event_logger):
    if utils.backend() == "sqlite":
        pytest.skip("Sqlite doesn't support schema / database creation")

    full_table_name = f"admin_database.{CSV_UPLOAD_TABLE_W_SCHEMA}"

    # no schema specified, fail upload
    resp = upload_csv(CSV_FILENAME1,
                      CSV_UPLOAD_TABLE_W_SCHEMA,
                      extra={"schema": None})
    assert (
        f'Database "{CSV_UPLOAD_DATABASE}" schema "None" is not allowed for csv uploads'
        in resp)

    success_msg = f'CSV file "{CSV_FILENAME1}" uploaded to table "{full_table_name}"'
    resp = upload_csv(
        CSV_FILENAME1,
        CSV_UPLOAD_TABLE_W_SCHEMA,
        extra={
            "schema": "admin_database",
            "if_exists": "replace"
        },
    )
    assert success_msg in resp
    mock_event_logger.assert_called_with(
        action="successful_csv_upload",
        database=get_upload_db().name,
        schema="admin_database",
        table=CSV_UPLOAD_TABLE_W_SCHEMA,
    )

    engine = get_upload_db().get_sqla_engine()
    data = engine.execute(
        f"SELECT * from {ADMIN_SCHEMA_NAME}.{CSV_UPLOAD_TABLE_W_SCHEMA}"
    ).fetchall()
    assert data == [("john", 1), ("paul", 2)]

    # user specified schema doesn't match, fail
    resp = upload_csv(CSV_FILENAME1,
                      CSV_UPLOAD_TABLE_W_SCHEMA,
                      extra={"schema": "gold"})
    assert (
        f'Database "{CSV_UPLOAD_DATABASE}" schema "gold" is not allowed for csv uploads'
        in resp)

    # user specified schema matches the expected schema, append
    if utils.backend() == "hive":
        pytest.skip("Hive database doesn't support append csv uploads.")
    resp = upload_csv(
        CSV_FILENAME1,
        CSV_UPLOAD_TABLE_W_SCHEMA,
        extra={
            "schema": "admin_database",
            "if_exists": "append"
        },
    )
    assert success_msg in resp
Beispiel #5
0
def test_import_excel(mock_event_logger):
    if utils.backend() == "hive":
        pytest.skip("Hive doesn't excel upload.")

    schema = utils.get_example_default_schema()
    full_table_name = f"{schema}.{EXCEL_UPLOAD_TABLE}" if schema else EXCEL_UPLOAD_TABLE
    test_db = get_upload_db()

    success_msg = f'Excel file "{EXCEL_FILENAME}" uploaded to table "{full_table_name}"'

    # initial upload with fail mode
    resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE)
    assert success_msg in resp
    mock_event_logger.assert_called_with(
        action="successful_excel_upload",
        database=test_db.name,
        schema=schema,
        table=EXCEL_UPLOAD_TABLE,
    )

    # ensure user is assigned as an owner
    table = SupersetTestCase.get_table(name=EXCEL_UPLOAD_TABLE)
    assert security_manager.find_user("admin") in table.owners

    # upload again with fail mode; should fail
    fail_msg = f'Unable to upload Excel file "{EXCEL_FILENAME}" to table "{EXCEL_UPLOAD_TABLE}"'
    resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE)
    assert fail_msg in resp

    if utils.backend() != "hive":
        # upload again with append mode
        resp = upload_excel(EXCEL_FILENAME,
                            EXCEL_UPLOAD_TABLE,
                            extra={"if_exists": "append"})
        assert success_msg in resp

    # upload again with replace mode
    resp = upload_excel(EXCEL_FILENAME,
                        EXCEL_UPLOAD_TABLE,
                        extra={"if_exists": "replace"})
    assert success_msg in resp
    mock_event_logger.assert_called_with(
        action="successful_excel_upload",
        database=test_db.name,
        schema=schema,
        table=EXCEL_UPLOAD_TABLE,
    )

    # make sure that john and empty string are replaced with None
    data = (test_db.get_sqla_engine().execute(
        f"SELECT * from {EXCEL_UPLOAD_TABLE}").fetchall())
    assert data == [(0, "john", 1), (1, "paul", 2)]
Beispiel #6
0
def test_import_excel(mock_event_logger):
    if utils.backend() == "hive":
        pytest.skip("Hive doesn't excel upload.")

    test_db = get_upload_db()

    success_msg = (
        f'Excel file "{EXCEL_FILENAME}" uploaded to table "{EXCEL_UPLOAD_TABLE}"'
    )

    # initial upload with fail mode
    resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE)
    assert success_msg in resp
    mock_event_logger.assert_called_with(
        action="successful_excel_upload",
        database=test_db.name,
        schema=None,
        table=EXCEL_UPLOAD_TABLE,
    )

    # upload again with fail mode; should fail
    fail_msg = f'Unable to upload Excel file "{EXCEL_FILENAME}" to table "{EXCEL_UPLOAD_TABLE}"'
    resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE)
    assert fail_msg in resp

    if utils.backend() != "hive":
        # upload again with append mode
        resp = upload_excel(
            EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "append"}
        )
        assert success_msg in resp

    # upload again with replace mode
    resp = upload_excel(
        EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "replace"}
    )
    assert success_msg in resp
    mock_event_logger.assert_called_with(
        action="successful_excel_upload",
        database=test_db.name,
        schema=None,
        table=EXCEL_UPLOAD_TABLE,
    )

    # make sure that john and empty string are replaced with None
    data = (
        test_db.get_sqla_engine()
        .execute(f"SELECT * from {EXCEL_UPLOAD_TABLE}")
        .fetchall()
    )
    assert data == [(0, "john", 1), (1, "paul", 2)]
Beispiel #7
0
def test_run_sync_query_dont_exist(setup_sqllab, ctas_method):
    sql_dont_exist = "SELECT name FROM table_dont_exist"
    result = run_sql(sql_dont_exist, cta=True, ctas_method=ctas_method)
    if backend() == "sqlite" and ctas_method == CtasMethod.VIEW:
        assert QueryStatus.SUCCESS == result["status"], result
    else:
        assert QueryStatus.FAILED == result["status"], result
    def test_admin_permissions(self):
        if backend() == "hive":
            return

        self.assert_can_gamma(get_perm_tuples("Admin"))
        self.assert_can_alpha(get_perm_tuples("Admin"))
        self.assert_can_admin(get_perm_tuples("Admin"))
Beispiel #9
0
def test_run_async_query_cta_config(setup_sqllab, ctas_method):
    if backend() == "sqlite":
        # sqlite doesn't support schemas
        return
    tmp_table_name = f"{TEST_ASYNC_CTA_CONFIG}_{ctas_method.lower()}"
    result = run_sql(
        QUERY,
        cta=True,
        ctas_method=ctas_method,
        async_=True,
        tmp_table=tmp_table_name,
    )

    time.sleep(CELERY_SLEEP_TIME)

    query = get_query_by_id(result["query"]["serverId"])
    assert QueryStatus.SUCCESS == query.status
    assert get_select_star(tmp_table_name,
                           schema=CTAS_SCHEMA_NAME) == query.select_sql
    assert (
        f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}"
        == query.executed_sql)

    delete_tmp_view_or_table(f"{CTAS_SCHEMA_NAME}.{tmp_table_name}",
                             ctas_method)
Beispiel #10
0
def get_select_star(table: str, limit: int, schema: Optional[str] = None):
    if backend() in {"presto", "hive"}:
        schema = quote_f(schema)
        table = quote_f(table)
    if schema:
        return f"SELECT *\nFROM {schema}.{table}\nLIMIT {limit}"
    return f"SELECT *\nFROM {table}\nLIMIT {limit}"
Beispiel #11
0
def test_run_async_query_cta_config(test_client, ctas_method):
    if backend() in {"sqlite", "mysql"}:
        # sqlite doesn't support schemas, mysql is flaky
        return
    tmp_table_name = f"{TEST_ASYNC_CTA_CONFIG}_{ctas_method.lower()}"
    result = run_sql(
        test_client,
        QUERY,
        cta=True,
        ctas_method=ctas_method,
        async_=True,
        tmp_table=tmp_table_name,
    )

    query = wait_for_success(result)

    assert QueryStatus.SUCCESS == query.status
    assert (get_select_star(tmp_table_name,
                            limit=query.limit,
                            schema=CTAS_SCHEMA_NAME) == query.select_sql)
    assert (
        f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}"
        == query.executed_sql)

    delete_tmp_view_or_table(f"{CTAS_SCHEMA_NAME}.{tmp_table_name}",
                             ctas_method)
    def test_create_dataset_same_name_different_schema(self):
        if backend() == "sqlite":
            # sqlite doesn't support schemas
            return

        example_db = get_example_database()
        example_db.get_sqla_engine().execute(
            f"CREATE TABLE {CTAS_SCHEMA_NAME}.birth_names AS SELECT 2 as two")

        self.login(username="******")
        table_data = {
            "database": example_db.id,
            "schema": CTAS_SCHEMA_NAME,
            "table_name": "birth_names",
        }

        uri = "api/v1/dataset/"
        rv = self.post_assert_metric(uri, table_data, "post")
        assert rv.status_code == 201

        # cleanup
        data = json.loads(rv.data.decode("utf-8"))
        uri = f'api/v1/dataset/{data.get("id")}'
        rv = self.client.delete(uri)
        assert rv.status_code == 200
        example_db.get_sqla_engine().execute(
            f"DROP TABLE {CTAS_SCHEMA_NAME}.birth_names")
    def test_get_dashboard_view__user_access_with_dashboard_permission(self):
        if backend() == "hive":
            return

        # arrange

        username = random_str()
        new_role = f"role_{random_str()}"
        self.create_user_with_roles(username, [new_role],
                                    should_create_roles=True)

        slice = (db.session.query(Slice).filter_by(
            slice_name="Girl Name Cloud").one_or_none())
        dashboard_to_access = create_dashboard_to_db(published=True,
                                                     slices=[slice])
        self.login(username)
        grant_access_to_dashboard(dashboard_to_access, new_role)

        # act
        response = self.get_dashboard_view_response(dashboard_to_access)

        # assert
        self.assert200(response)

        request_payload = get_query_context("birth_names")
        rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data")
        self.assertEqual(rv.status_code, 200)

        # post
        revoke_access_to_dashboard(dashboard_to_access, new_role)
Beispiel #14
0
def test_run_sync_query_cta_config(test_client, ctas_method):
    if backend() == "sqlite":
        # sqlite doesn't support schemas
        return
    tmp_table_name = f"{TEST_SYNC_CTA}_{ctas_method.lower()}"
    result = run_sql(test_client,
                     QUERY,
                     cta=True,
                     ctas_method=ctas_method,
                     tmp_table=tmp_table_name)
    assert QueryStatus.SUCCESS == result["query"]["state"], result
    assert cta_result(ctas_method) == (result["data"], result["columns"])

    query = get_query_by_id(result["query"]["serverId"])
    assert (
        f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}"
        == query.executed_sql)
    assert query.select_sql == get_select_star(tmp_table_name,
                                               limit=query.limit,
                                               schema=CTAS_SCHEMA_NAME)
    results = run_sql(test_client, query.select_sql)
    assert QueryStatus.SUCCESS == results["status"], result

    delete_tmp_view_or_table(f"{CTAS_SCHEMA_NAME}.{tmp_table_name}",
                             ctas_method)
Beispiel #15
0
def test_run_sync_query_dont_exist(test_client, ctas_method):
    examples_db = get_example_database()
    engine_name = examples_db.db_engine_spec.engine_name
    sql_dont_exist = "SELECT name FROM table_dont_exist"
    result = run_sql(test_client,
                     sql_dont_exist,
                     cta=True,
                     ctas_method=ctas_method)
    if backend() == "sqlite" and ctas_method == CtasMethod.VIEW:
        assert QueryStatus.SUCCESS == result["status"], result
    elif backend() == "presto":
        assert (result["errors"][0]["error_type"] ==
                SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR)
        assert result["errors"][0]["level"] == ErrorLevel.ERROR
        assert result["errors"][0]["extra"] == {
            "engine_name":
            "Presto",
            "issue_codes": [
                {
                    "code":
                    1003,
                    "message":
                    "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.",
                },
                {
                    "code":
                    1005,
                    "message":
                    "Issue 1005 - The table was deleted or renamed in the database.",
                },
            ],
        }
    else:
        assert (result["errors"][0]["error_type"] ==
                SupersetErrorType.GENERIC_DB_ENGINE_ERROR)
        assert result["errors"][0]["level"] == ErrorLevel.ERROR
        assert result["errors"][0]["extra"] == {
            "issue_codes": [{
                "code":
                1002,
                "message":
                "Issue 1002 - The database returned an unexpected error.",
            }],
            "engine_name":
            engine_name,
        }
Beispiel #16
0
def test_import_csv_explore_database(setup_csv_upload, create_csv_files):
    if utils.backend() == "sqlite":
        pytest.skip("Sqlite doesn't support schema / database creation")

    resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_EXPLORE)
    assert (
        f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE_W_EXPLORE}"'
        in resp)
    table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE_W_EXPLORE)
    assert table.database_id == utils.get_example_database().id
Beispiel #17
0
def test_import_csv_explore_database(setup_csv_upload, create_csv_files):
    schema = utils.get_example_default_schema()
    full_table_name = (f"{schema}.{CSV_UPLOAD_TABLE_W_EXPLORE}"
                       if schema else CSV_UPLOAD_TABLE_W_EXPLORE)

    if utils.backend() == "sqlite":
        pytest.skip("Sqlite doesn't support schema / database creation")

    resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_EXPLORE)
    assert f'CSV file "{CSV_FILENAME1}" uploaded to table "{full_table_name}"' in resp
    table = SupersetTestCase.get_table(name=CSV_UPLOAD_TABLE_W_EXPLORE)
    assert table.database_id == utils.get_example_database().id
Beispiel #18
0
def test_run_async_cta_query_with_lower_limit(setup_sqllab, ctas_method):
    if backend() == "mysql":
        # failing
        return

    tmp_table = f"{TEST_ASYNC_LOWER_LIMIT}_{ctas_method.lower()}"
    result = run_sql(QUERY,
                     cta=True,
                     ctas_method=ctas_method,
                     async_=True,
                     tmp_table=tmp_table)
    query = wait_for_success(result)

    assert QueryStatus.SUCCESS == query.status

    assert get_select_star(tmp_table) == query.select_sql
    assert f"CREATE {ctas_method} {tmp_table} AS \n{QUERY}" == query.executed_sql
    assert QUERY == query.sql
    assert query.rows == (1 if backend() == "presto" else 0)
    assert query.limit is None
    assert query.select_as_cta
    assert query.select_as_cta_used

    delete_tmp_view_or_table(tmp_table, ctas_method)
    def test_sql_json(self):
        examples_db = get_example_database()
        engine_name = examples_db.db_engine_spec.engine_name

        self.login("admin")

        data = self.run_sql("SELECT * FROM birth_names LIMIT 10", "1")
        self.assertLess(0, len(data["data"]))

        data = self.run_sql("SELECT * FROM unexistant_table", "2")
        if backend() == "presto":
            assert (data["errors"][0]["error_type"] ==
                    SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR)
            assert data["errors"][0]["level"] == ErrorLevel.ERROR
            assert data["errors"][0]["extra"] == {
                "engine_name":
                "Presto",
                "issue_codes": [
                    {
                        "code":
                        1003,
                        "message":
                        "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.",
                    },
                    {
                        "code":
                        1005,
                        "message":
                        "Issue 1005 - The table was deleted or renamed in the database.",
                    },
                ],
            }
        else:
            assert (data["errors"][0]["error_type"] ==
                    SupersetErrorType.GENERIC_DB_ENGINE_ERROR)
            assert data["errors"][0]["level"] == ErrorLevel.ERROR
            assert data["errors"][0]["extra"] == {
                "issue_codes": [{
                    "code":
                    1002,
                    "message":
                    "Issue 1002 - The database returned an unexpected error.",
                }],
                "engine_name":
                engine_name,
            }
Beispiel #20
0
def test_run_sync_query_dont_exist(setup_sqllab, ctas_method):
    sql_dont_exist = "SELECT name FROM table_dont_exist"
    result = run_sql(sql_dont_exist, cta=True, ctas_method=ctas_method)
    if backend() == "sqlite" and ctas_method == CtasMethod.VIEW:
        assert QueryStatus.SUCCESS == result["status"], result
    else:
        assert (result["errors"][0]["error_type"] ==
                SupersetErrorType.GENERIC_DB_ENGINE_ERROR)
        assert result["errors"][0]["level"] == ErrorLevel.ERROR
        assert result["errors"][0]["extra"] == {
            "issue_codes": [{
                "code":
                1002,
                "message":
                "Issue 1002 - The database returned an unexpected error.",
            }]
        }
Beispiel #21
0
def cta_result(ctas_method: CtasMethod):
    if backend() != "presto":
        return [], []
    if ctas_method == CtasMethod.TABLE:
        return [{
            "rows": 1
        }], [{
            "name": "rows",
            "type": "BIGINT",
            "is_dttm": False
        }]
    return [{
        "result": True
    }], [{
        "name": "result",
        "type": "BOOLEAN",
        "is_dttm": False
    }]
Beispiel #22
0
def test_run_async_cta_query_with_lower_limit(setup_sqllab, ctas_method):
    tmp_table = f"{TEST_ASYNC_LOWER_LIMIT}_{ctas_method.lower()}"
    result = run_sql(QUERY,
                     cta=True,
                     ctas_method=ctas_method,
                     async_=True,
                     tmp_table=tmp_table)
    time.sleep(CELERY_SLEEP_TIME)

    query = get_query_by_id(result["query"]["serverId"])
    assert QueryStatus.SUCCESS == query.status

    assert get_select_star(tmp_table) == query.select_sql
    assert f"CREATE {ctas_method} {tmp_table} AS \n{QUERY}" == query.executed_sql
    assert QUERY == query.sql
    assert query.rows == (1 if backend() == "presto" else 0)
    assert query.limit is None
    assert query.select_as_cta
    assert query.select_as_cta_used
Beispiel #23
0
def test_should_generate_closed_and_open_time_filter_range():
    with app.app_context():
        if backend() != "postgresql":
            pytest.skip(
                f"{backend()} has different dialect for datetime column")

        table = SqlaTable(
            table_name="temporal_column_table",
            sql=("SELECT '2021-12-31'::timestamp as datetime_col "
                 "UNION SELECT '2022-01-01'::timestamp "
                 "UNION SELECT '2022-03-10'::timestamp "
                 "UNION SELECT '2023-01-01'::timestamp "
                 "UNION SELECT '2023-03-10'::timestamp "),
            database=get_example_database(),
        )
        TableColumn(
            column_name="datetime_col",
            type="TIMESTAMP",
            table=table,
            is_dttm=True,
        )
        SqlMetric(metric_name="count", expression="count(*)", table=table)
        result_object = table.query({
            "metrics": ["count"],
            "is_timeseries": False,
            "filter": [],
            "from_dttm": datetime(2022, 1, 1),
            "to_dttm": datetime(2023, 1, 1),
            "granularity": "datetime_col",
        })
        """ >>> result_object.query
                SELECT count(*) AS count
                FROM
                  (SELECT '2021-12-31'::timestamp as datetime_col
                   UNION SELECT '2022-01-01'::timestamp
                   UNION SELECT '2022-03-10'::timestamp
                   UNION SELECT '2023-01-01'::timestamp
                   UNION SELECT '2023-03-10'::timestamp) AS virtual_table
                WHERE datetime_col >= TO_TIMESTAMP('2022-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
                  AND datetime_col < TO_TIMESTAMP('2023-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
        """
        assert result_object.df.iloc[0]["count"] == 2
Beispiel #24
0
def test_run_async_cta_query(setup_sqllab, ctas_method):
    table_name = f"{TEST_ASYNC_CTA}_{ctas_method.lower()}"
    result = run_sql(QUERY,
                     cta=True,
                     ctas_method=ctas_method,
                     async_=True,
                     tmp_table=table_name)

    time.sleep(CELERY_SLEEP_TIME)

    query = get_query_by_id(result["query"]["serverId"])
    assert QueryStatus.SUCCESS == query.status
    assert get_select_star(table_name) in query.select_sql

    assert f"CREATE {ctas_method} {table_name} AS \n{QUERY}" == query.executed_sql
    assert QUERY == query.sql
    assert query.rows == (1 if backend() == "presto" else 0)
    assert query.select_as_cta
    assert query.select_as_cta_used

    delete_tmp_view_or_table(table_name, ctas_method)
Beispiel #25
0
def test_run_sync_query_cta_config(setup_sqllab, ctas_method):
    if backend() == "sqlite":
        # sqlite doesn't support schemas
        return
    tmp_table_name = f"{TEST_SYNC_CTA}_{ctas_method.lower()}"
    result = run_sql(QUERY,
                     cta=True,
                     ctas_method=ctas_method,
                     tmp_table=tmp_table_name)
    assert QueryStatus.SUCCESS == result["query"]["state"], result
    assert cta_result(ctas_method) == (result["data"], result["columns"])

    query = get_query_by_id(result["query"]["serverId"])
    assert (
        f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}"
        == query.executed_sql)

    assert query.select_sql == get_select_star(tmp_table_name,
                                               schema=CTAS_SCHEMA_NAME)
    time.sleep(CELERY_SLEEP_TIME)
    results = run_sql(query.select_sql)
    assert QueryStatus.SUCCESS == results["status"], result
def test_import_csv_enforced_schema(setup_csv_upload, create_csv_files):
    if utils.backend() == "sqlite":
        pytest.skip("Sqlite doesn't support schema / database creation")

    full_table_name = f"admin_database.{CSV_UPLOAD_TABLE_W_SCHEMA}"

    # no schema specified, fail upload
    resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA)
    assert (
        f'Database "{CSV_UPLOAD_DATABASE}" schema "None" is not allowed for csv uploads'
        in resp
    )

    # user specified schema matches the expected schema, append
    success_msg = f'CSV file "{CSV_FILENAME1}" uploaded to table "{full_table_name}"'
    resp = upload_csv(
        CSV_FILENAME1,
        CSV_UPLOAD_TABLE_W_SCHEMA,
        extra={"schema": "admin_database", "if_exists": "append"},
    )
    assert success_msg in resp
    resp = upload_csv(
        CSV_FILENAME1,
        CSV_UPLOAD_TABLE_W_SCHEMA,
        extra={"schema": "admin_database", "if_exists": "replace"},
    )
    assert success_msg in resp

    # user specified schema doesn't match, fail
    resp = upload_csv(
        CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": "gold"}
    )
    assert (
        f'Database "{CSV_UPLOAD_DATABASE}" schema "gold" is not allowed for csv uploads'
        in resp
    )
Beispiel #27
0
    def test_export_database_command(self, mock_g):
        mock_g.user = security_manager.find_user("admin")

        example_db = get_example_database()
        db_uuid = example_db.uuid

        command = ExportDatabasesCommand([example_db.id])
        contents = dict(command.run())

        # TODO: this list shouldn't depend on the order in which unit tests are run
        # or on the backend; for now use a stable subset
        core_files = {
            "metadata.yaml",
            "databases/examples.yaml",
            "datasets/examples/energy_usage.yaml",
            "datasets/examples/wb_health_population.yaml",
            "datasets/examples/birth_names.yaml",
        }
        expected_extra = {
            "engine_params": {},
            "metadata_cache_timeout": {},
            "metadata_params": {},
            "schemas_allowed_for_csv_upload": [],
        }
        if backend() == "presto":
            expected_extra = {
                "engine_params": {
                    "connect_args": {
                        "poll_interval": 0.1
                    }
                }
            }

        assert core_files.issubset(set(contents.keys()))

        if example_db.backend == "postgresql":
            ds_type = "TIMESTAMP WITHOUT TIME ZONE"
        elif example_db.backend == "hive":
            ds_type = "TIMESTAMP"
        elif example_db.backend == "presto":
            ds_type = "VARCHAR(255)"
        else:
            ds_type = "DATETIME"
        if example_db.backend == "mysql":
            big_int_type = "BIGINT(20)"
        else:
            big_int_type = "BIGINT"
        metadata = yaml.safe_load(contents["databases/examples.yaml"])
        assert metadata == ({
            "allow_csv_upload": True,
            "allow_ctas": True,
            "allow_cvas": True,
            "allow_run_async": False,
            "cache_timeout": None,
            "database_name": "examples",
            "expose_in_sqllab": True,
            "extra": expected_extra,
            "sqlalchemy_uri": example_db.sqlalchemy_uri,
            "uuid": str(example_db.uuid),
            "version": "1.0.0",
        })

        metadata = yaml.safe_load(
            contents["datasets/examples/birth_names.yaml"])
        metadata.pop("uuid")

        metadata["columns"].sort(key=lambda x: x["column_name"])
        expected_metadata = {
            "cache_timeout":
            None,
            "columns": [
                {
                    "column_name": "ds",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": True,
                    "python_date_format": None,
                    "type": ds_type,
                    "verbose_name": None,
                },
                {
                    "column_name": "gender",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "STRING"
                    if example_db.backend == "hive" else "VARCHAR(16)",
                    "verbose_name": None,
                },
                {
                    "column_name": "name",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "STRING"
                    if example_db.backend == "hive" else "VARCHAR(255)",
                    "verbose_name": None,
                },
                {
                    "column_name": "num",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": big_int_type,
                    "verbose_name": None,
                },
                {
                    "column_name": "num_california",
                    "description": None,
                    "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": None,
                    "verbose_name": None,
                },
                {
                    "column_name": "state",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": "STRING"
                    if example_db.backend == "hive" else "VARCHAR(10)",
                    "verbose_name": None,
                },
                {
                    "column_name": "num_boys",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": big_int_type,
                    "verbose_name": None,
                },
                {
                    "column_name": "num_girls",
                    "description": None,
                    "expression": None,
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": big_int_type,
                    "verbose_name": None,
                },
            ],
            "database_uuid":
            str(db_uuid),
            "default_endpoint":
            None,
            "description":
            "",
            "extra":
            None,
            "fetch_values_predicate":
            None,
            "filter_select_enabled":
            True,
            "main_dttm_col":
            "ds",
            "metrics": [
                {
                    "d3format": None,
                    "description": None,
                    "expression": "COUNT(*)",
                    "extra": None,
                    "metric_name": "count",
                    "metric_type": "count",
                    "verbose_name": "COUNT(*)",
                    "warning_text": None,
                },
                {
                    "d3format": None,
                    "description": None,
                    "expression": "SUM(num)",
                    "extra": None,
                    "metric_name": "sum__num",
                    "metric_type": None,
                    "verbose_name": None,
                    "warning_text": None,
                },
            ],
            "offset":
            0,
            "params":
            None,
            "schema":
            None,
            "sql":
            None,
            "table_name":
            "birth_names",
            "template_params":
            None,
            "version":
            "1.0.0",
        }
        expected_metadata["columns"].sort(key=lambda x: x["column_name"])
        assert metadata == expected_metadata
Beispiel #28
0
    def test_export_database_command(self, mock_g):
        mock_g.user = security_manager.find_user("admin")

        example_db = get_example_database()
        command = ExportDatabasesCommand([example_db.id])
        contents = dict(command.run())

        # TODO: this list shouldn't depend on the order in which unit tests are run
        # or on the backend; for now use a stable subset
        core_files = {
            "metadata.yaml",
            "databases/examples.yaml",
            "datasets/examples/energy_usage.yaml",
            "datasets/examples/wb_health_population.yaml",
            "datasets/examples/birth_names.yaml",
        }
        expected_extra = {
            "engine_params": {},
            "metadata_cache_timeout": {},
            "metadata_params": {},
            "schemas_allowed_for_csv_upload": [],
        }
        if backend() == "presto":
            expected_extra = {
                "engine_params": {
                    "connect_args": {
                        "poll_interval": 0.1
                    }
                }
            }

        assert core_files.issubset(set(contents.keys()))

        metadata = yaml.safe_load(contents["databases/examples.yaml"])
        assert metadata == ({
            "allow_csv_upload": True,
            "allow_ctas": True,
            "allow_cvas": True,
            "allow_run_async": False,
            "cache_timeout": None,
            "database_name": "examples",
            "expose_in_sqllab": True,
            "extra": expected_extra,
            "sqlalchemy_uri": example_db.sqlalchemy_uri,
            "uuid": str(example_db.uuid),
            "version": "1.0.0",
        })

        metadata = yaml.safe_load(
            contents["datasets/examples/birth_names.yaml"])
        metadata.pop("uuid")
        assert metadata == {
            "table_name":
            "birth_names",
            "main_dttm_col":
            None,
            "description":
            "Adding a DESCRip",
            "default_endpoint":
            "",
            "offset":
            66,
            "cache_timeout":
            55,
            "schema":
            "",
            "sql":
            "",
            "params":
            None,
            "template_params":
            None,
            "filter_select_enabled":
            True,
            "fetch_values_predicate":
            None,
            "extra":
            None,
            "metrics": [
                {
                    "metric_name": "ratio",
                    "verbose_name": "Ratio Boys/Girls",
                    "metric_type": None,
                    "expression": "sum(sum_boys) / sum(sum_girls)",
                    "description": "This represents the ratio of boys/girls",
                    "d3format": ".2%",
                    "extra": None,
                    "warning_text": "no warning",
                },
                {
                    "metric_name": "sum__num",
                    "verbose_name": "Babies",
                    "metric_type": None,
                    "expression": "SUM(num)",
                    "description": "",
                    "d3format": "",
                    "extra": None,
                    "warning_text": "",
                },
                {
                    "metric_name": "count",
                    "verbose_name": "",
                    "metric_type": None,
                    "expression": "count(1)",
                    "description": None,
                    "d3format": None,
                    "extra": None,
                    "warning_text": None,
                },
            ],
            "columns": [
                {
                    "column_name": "num_california",
                    "verbose_name": None,
                    "is_dttm": False,
                    "is_active": None,
                    "type": "NUMBER",
                    "groupby": False,
                    "filterable": False,
                    "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    "description": None,
                    "python_date_format": None,
                },
                {
                    "column_name": "ds",
                    "verbose_name": "",
                    "is_dttm": True,
                    "is_active": None,
                    "type": "DATETIME",
                    "groupby": True,
                    "filterable": True,
                    "expression": "",
                    "description": None,
                    "python_date_format": None,
                },
                {
                    "column_name": "sum_girls",
                    "verbose_name": None,
                    "is_dttm": False,
                    "is_active": None,
                    "type": "BIGINT(20)",
                    "groupby": False,
                    "filterable": False,
                    "expression": "",
                    "description": None,
                    "python_date_format": None,
                },
                {
                    "column_name": "gender",
                    "verbose_name": None,
                    "is_dttm": False,
                    "is_active": None,
                    "type": "VARCHAR(16)",
                    "groupby": True,
                    "filterable": True,
                    "expression": "",
                    "description": None,
                    "python_date_format": None,
                },
                {
                    "column_name": "state",
                    "verbose_name": None,
                    "is_dttm": None,
                    "is_active": None,
                    "type": "VARCHAR(10)",
                    "groupby": True,
                    "filterable": True,
                    "expression": None,
                    "description": None,
                    "python_date_format": None,
                },
                {
                    "column_name": "sum_boys",
                    "verbose_name": None,
                    "is_dttm": None,
                    "is_active": None,
                    "type": "BIGINT(20)",
                    "groupby": True,
                    "filterable": True,
                    "expression": None,
                    "description": None,
                    "python_date_format": None,
                },
                {
                    "column_name": "num",
                    "verbose_name": None,
                    "is_dttm": None,
                    "is_active": None,
                    "type": "BIGINT(20)",
                    "groupby": True,
                    "filterable": True,
                    "expression": None,
                    "description": None,
                    "python_date_format": None,
                },
                {
                    "column_name": "name",
                    "verbose_name": None,
                    "is_dttm": None,
                    "is_active": None,
                    "type": "VARCHAR(255)",
                    "groupby": True,
                    "filterable": True,
                    "expression": None,
                    "description": None,
                    "python_date_format": None,
                },
            ],
            "version":
            "1.0.0",
            "database_uuid":
            str(example_db.uuid),
        }
def test_import_csv(setup_csv_upload, create_csv_files):
    success_msg_f1 = (
        f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE}"')

    # initial upload with fail mode
    resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE)
    assert success_msg_f1 in resp

    # upload again with fail mode; should fail
    fail_msg = (
        f'Unable to upload CSV file "{CSV_FILENAME1}" to table "{CSV_UPLOAD_TABLE}"'
    )
    resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE)
    assert fail_msg in resp

    if utils.backend() != "hive":
        # upload again with append mode
        resp = upload_csv(CSV_FILENAME1,
                          CSV_UPLOAD_TABLE,
                          extra={"if_exists": "append"})
        assert success_msg_f1 in resp

    # upload again with replace mode
    resp = upload_csv(CSV_FILENAME1,
                      CSV_UPLOAD_TABLE,
                      extra={"if_exists": "replace"})
    assert success_msg_f1 in resp

    # try to append to table from file with different schema
    resp = upload_csv(CSV_FILENAME2,
                      CSV_UPLOAD_TABLE,
                      extra={"if_exists": "append"})
    fail_msg_f2 = (
        f'Unable to upload CSV file "{CSV_FILENAME2}" to table "{CSV_UPLOAD_TABLE}"'
    )
    assert fail_msg_f2 in resp

    # replace table from file with different schema
    resp = upload_csv(CSV_FILENAME2,
                      CSV_UPLOAD_TABLE,
                      extra={"if_exists": "replace"})
    success_msg_f2 = (
        f'CSV file "{CSV_FILENAME2}" uploaded to table "{CSV_UPLOAD_TABLE}"')
    assert success_msg_f2 in resp

    table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE)
    # make sure the new column name is reflected in the table metadata
    assert "d" in table.column_names

    # null values are set
    upload_csv(
        CSV_FILENAME2,
        CSV_UPLOAD_TABLE,
        extra={
            "null_values": '["", "john"]',
            "if_exists": "replace"
        },
    )
    # make sure that john and empty string are replaced with None
    engine = get_upload_db().get_sqla_engine()
    data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
    assert data == [(None, 1, "x"), ("paul", 2, None)]

    # default null values
    upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"})
    # make sure that john and empty string are replaced with None
    data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
    assert data == [("john", 1, "x"), ("paul", 2, None)]
Beispiel #30
0
def test_import_csv(setup_csv_upload, create_csv_files):
    success_msg_f1 = (
        f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE}"')

    # initial upload with fail mode
    resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE)
    assert success_msg_f1 in resp

    # upload again with fail mode; should fail
    fail_msg = (
        f'Unable to upload CSV file "{CSV_FILENAME1}" to table "{CSV_UPLOAD_TABLE}"'
    )
    resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE)
    assert fail_msg in resp

    if utils.backend() != "hive":
        # upload again with append mode
        resp = upload_csv(CSV_FILENAME1,
                          CSV_UPLOAD_TABLE,
                          extra={"if_exists": "append"})
        assert success_msg_f1 in resp

    # upload again with replace mode
    resp = upload_csv(CSV_FILENAME1,
                      CSV_UPLOAD_TABLE,
                      extra={"if_exists": "replace"})
    assert success_msg_f1 in resp

    # try to append to table from file with different schema
    resp = upload_csv(CSV_FILENAME2,
                      CSV_UPLOAD_TABLE,
                      extra={"if_exists": "append"})
    fail_msg_f2 = (
        f'Unable to upload CSV file "{CSV_FILENAME2}" to table "{CSV_UPLOAD_TABLE}"'
    )
    assert fail_msg_f2 in resp

    # replace table from file with different schema
    resp = upload_csv(CSV_FILENAME2,
                      CSV_UPLOAD_TABLE,
                      extra={"if_exists": "replace"})
    success_msg_f2 = (
        f'CSV file "{CSV_FILENAME2}" uploaded to table "{CSV_UPLOAD_TABLE}"')
    assert success_msg_f2 in resp

    table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE)
    # make sure the new column name is reflected in the table metadata
    assert "d" in table.column_names

    # null values are set
    upload_csv(
        CSV_FILENAME2,
        CSV_UPLOAD_TABLE,
        extra={
            "null_values": '["", "john"]',
            "if_exists": "replace"
        },
    )
    # make sure that john and empty string are replaced with None
    engine = get_upload_db().get_sqla_engine()
    data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
    if utils.backend() == "hive":
        # Be aware that hive only uses first value from the null values list.
        # It is hive database engine limitation.
        # TODO(bkyryliuk): preprocess csv file for hive upload to match default engine capabilities.
        assert data == [("john", 1, "x"), ("paul", 2, None)]
    else:
        assert data == [(None, 1, "x"), ("paul", 2, None)]

    # default null values
    upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"})
    # make sure that john and empty string are replaced with None
    data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
    if utils.backend() == "hive":
        # By default hive does not convert values to null vs other databases.
        assert data == [("john", 1, "x"), ("paul", 2, "")]
    else:
        assert data == [("john", 1, "x"), ("paul", 2, None)]