Ejemplo n.º 1
0
def test_parse_bugged_cte():
    with pytest.raises(RuntimeError):
        SqlParser.parse(
            '''
            WITH sum_trans (
                SELECT user_id, COUNT(*) as cnt, SUM(amount) as balance
                FROM transactions
                WHERE created_date > '2020-01-01'
                GROUP BY user_id
            )
            INSERT INTO potential_fraud (user_id, cnt, balance)
            SELECT user_id, cnt, balance
              FROM sum_trans
              WHERE count > 1000 OR balance > 100000;
            '''
        )
Ejemplo n.º 2
0
    def extract(self) -> StepMetadata:
        # (1) Parse sql statement to obtain input / output tables.
        sql_meta: SqlMeta = SqlParser.parse(self.operator.sql,
                                            self.default_schema)

        # (2) Default all inputs / outputs to current connection.
        # NOTE: We'll want to look into adding support for the `database`
        # property that is used to override the one defined in the connection.
        conn_id = self._conn_id()
        source = Source(type=self.source_type,
                        name=conn_id,
                        connection_url=get_connection_uri(conn_id))

        # (3) Map input / output tables to dataset objects with source set
        # as the current connection. We need to also fetch the schema for the
        # input tables to format the dataset name as:
        # {schema_name}.{table_name}
        inputs = [
            Dataset.from_table(source=source,
                               table_name=in_table_schema.table_name.name,
                               schema_name=in_table_schema.schema_name)
            for in_table_schema in self._get_table_schemas(sql_meta.in_tables)
        ]
        outputs = [
            Dataset.from_table_schema(source=source,
                                      table_schema=out_table_schema) for
            out_table_schema in self._get_table_schemas(sql_meta.out_tables)
        ]

        return StepMetadata(
            name=f"{self.operator.dag_id}.{self.operator.task_id}",
            inputs=inputs,
            outputs=outputs,
            context={'sql': self.operator.sql})
Ejemplo n.º 3
0
def test_ignores_default_schema_when_non_default_schema():
    sql_meta = SqlParser.parse(
        '''
        SELECT col0, col1, col2
          FROM transactions.table0
        ''',
        'public'
    )
    assert sql_meta.in_tables == [DbTableName('transactions.table0')]
Ejemplo n.º 4
0
def test_parse_default_schema():
    sql_meta = SqlParser.parse(
        '''
        SELECT col0, col1, col2
          FROM table0
        ''',
        'public'
    )
    assert sql_meta.in_tables == [DbTableName('public.table0')]
Ejemplo n.º 5
0
def test_parse_simple_select_with_table_schema_prefix_and_extra_whitespace():
    sql_meta = SqlParser.parse(
        '''
        SELECT *
          FROM    schema0.table0   ;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('schema0.table0')]
    assert sql_meta.out_tables == []
Ejemplo n.º 6
0
def test_parse_simple_insert_into():
    sql_meta = SqlParser.parse(
        '''
        INSERT INTO table0 (col0, col1, col2)
        VALUES (val0, val1, val2);
        '''
    )

    assert sql_meta.in_tables == []
    assert sql_meta.out_tables == [DbTableName('table0')]
Ejemplo n.º 7
0
def test_parse_simple_select():
    sql_meta = SqlParser.parse(
        '''
        SELECT *
          FROM table0;
        '''
    )

    log.debug("sqlparser.parse() successful.")
    assert sql_meta.in_tables == [DbTableName('table0')]
    assert sql_meta.out_tables == []
Ejemplo n.º 8
0
def test_parse_simple_insert_into_select():
    sql_meta = SqlParser.parse(
        '''
        INSERT INTO table1 (col0, col1, col2)
        SELECT col0, col1, col2
          FROM table0;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table0')]
    assert sql_meta.out_tables == [DbTableName('table1')]
Ejemplo n.º 9
0
def test_parse_simple_select_into():
    sql_meta = SqlParser.parse(
        '''
        SELECT *
          INTO table0
          FROM table1;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table1')]
    assert sql_meta.out_tables == [DbTableName('table0')]
Ejemplo n.º 10
0
def test_parse_simple_right_outer_join():
    sql_meta = SqlParser.parse(
        '''
        SELECT col0, col1, col2
          FROM table0
          RIGHT OUTER JOIN table1
            ON t1.col0 = t2.col0;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table0'), DbTableName('table1')]
    assert sql_meta.out_tables == []
Ejemplo n.º 11
0
def test_parse_simple_inner_join():
    sql_meta = SqlParser.parse(
        '''
        SELECT col0, col1, col2
          FROM table0
         INNER JOIN table1
            ON t1.col0 = t2.col0
        '''
    )

    assert set(sql_meta.in_tables) == {DbTableName('table0'), DbTableName('table1')}
    assert sql_meta.out_tables == []
Ejemplo n.º 12
0
def test_parse_simple_left_join():
    sql_meta = SqlParser.parse(
        '''
        SELECT col0, col1, col2
          FROM table0
          LEFT JOIN table1
            ON t1.col0 = t2.col0
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table0'), DbTableName('table1')]
    assert sql_meta.out_tables == []
Ejemplo n.º 13
0
def test_parser_integration():
    sql_meta = SqlParser.parse(
        """
        INSERT INTO popular_orders_day_of_week (order_day_of_week, order_placed_on,orders_placed)
            SELECT EXTRACT(ISODOW FROM order_placed_on) AS order_day_of_week,
                   order_placed_on,
                   COUNT(*) AS orders_placed
              FROM top_delivery_times
             GROUP BY order_placed_on;
        """,
        "public"
    )
    assert sql_meta.in_tables == [DbTableName('public.top_delivery_times')]
Ejemplo n.º 14
0
def test_parse_simple_cte():
    sql_meta = SqlParser.parse(
        '''
        WITH sum_trans as (
            SELECT user_id, COUNT(*) as cnt, SUM(amount) as balance
            FROM transactions
            WHERE created_date > '2020-01-01'
            GROUP BY user_id
        )
        INSERT INTO potential_fraud (user_id, cnt, balance)
        SELECT user_id, cnt, balance
          FROM sum_trans
          WHERE count > 1000 OR balance > 100000;
        '''
    )
    assert sql_meta.in_tables == [DbTableName('transactions')]
    assert sql_meta.out_tables == [DbTableName('potential_fraud')]
Ejemplo n.º 15
0
 def parse_sql_context(self):
     context = {
         'sql': self.operator.sql,
     }
     try:
         sql_meta = SqlParser.parse(self.operator.sql)
         log.debug(f"bigquery sql parsed and obtained meta: {sql_meta}")
         context['bigquery.sql.parsed.inputs'] = json.dumps(
             [in_table.name for in_table in sql_meta.in_tables])
         context['bigquery.sql.parsed.outputs'] = json.dumps(
             [out_table.name for out_table in sql_meta.out_tables])
     except Exception as e:
         log.error(f"Cannot parse sql query. {e}", exc_info=True)
         context['bigquery.extractor.sql_parser_error'] = \
             f'{e}: {traceback.format_exc()}'
     self.log.info(context)
     return context
Ejemplo n.º 16
0
 def parse_sql_context(self) -> SqlContext:
     try:
         sql_meta = SqlParser.parse(self.operator.sql, None)
         log.debug(f"bigquery sql parsed and obtained meta: {sql_meta}")
         return SqlContext(
             sql=self.operator.sql,
             inputs=json.dumps(
                 [in_table.name for in_table in sql_meta.in_tables]
             ),
             outputs=json.dumps(
                 [out_table.name for out_table in sql_meta.out_tables]
             )
         )
     except Exception as e:
         log.error(f"Cannot parse sql query. {e}",
                   exc_info=True)
         return SqlContext(
             sql=self.operator.sql,
             parser_error=f'{e}: {traceback.format_exc()}'
         )
Ejemplo n.º 17
0
def test_parse_recursive_cte():
    sql_meta = SqlParser.parse(
        '''
        WITH RECURSIVE subordinates AS
            (SELECT employee_id,
                manager_id,
                full_name
            FROM employees
            WHERE employee_id = 2
            UNION SELECT e.employee_id,
                e.manager_id,
                e.full_name
            FROM employees e
            INNER JOIN subordinates s ON s.employee_id = e.manager_id)
        INSERT INTO sub_employees (employee_id, manager_id, full_name)
        SELECT employee_id, manager_id, full_name FROM subordinates;
        '''
    )
    assert sql_meta.in_tables == [DbTableName('employees')]
    assert sql_meta.out_tables == [DbTableName('sub_employees')]
Ejemplo n.º 18
0
def test_tpcds_cte_query():
    sql_meta = SqlParser.parse("""
WITH year_total AS
    (SELECT c_customer_id customer_id,
            c_first_name customer_first_name,
            c_last_name customer_last_name,
            c_preferred_cust_flag customer_preferred_cust_flag,
            c_birth_country customer_birth_country,
            c_login customer_login,
            c_email_address customer_email_address,
            d_year dyear,
            Sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt)
                + ss_ext_sales_price) / 2) year_total,
            's' sale_type
     FROM src.customer,
          store_sales,
          date_dim
     WHERE c_customer_sk = ss_customer_sk
         AND ss_sold_date_sk = d_date_sk GROUP  BY c_customer_id,
                                                   c_first_name,
                                                   c_last_name,
                                                   c_preferred_cust_flag,
                                                   c_birth_country,
                                                   c_login,
                                                   c_email_address,
                                                   d_year)
SELECT t_s_secyear.customer_id,
       t_s_secyear.customer_first_name,
       t_s_secyear.customer_last_name,
       t_s_secyear.customer_preferred_cust_flag
FROM year_total t_s_firstyear,
     year_total t_s_secyear,
     year_total t_c_firstyear,
     year_total t_c_secyear,
     year_total t_w_firstyear,
     year_total t_w_secyear
WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id
    AND t_s_firstyear.customer_id = t_c_secyear.customer_id
    AND t_s_firstyear.customer_id = t_c_firstyear.customer_id
    AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
    AND t_s_firstyear.customer_id = t_w_secyear.customer_id
    AND t_s_firstyear.sale_type = 's'
    AND t_c_firstyear.sale_type = 'c'
    AND t_w_firstyear.sale_type = 'w'
    AND t_s_secyear.sale_type = 's'
    AND t_c_secyear.sale_type = 'c'
    AND t_w_secyear.sale_type = 'w'
    AND t_s_firstyear.dyear = 2001
    AND t_s_secyear.dyear = 2001 + 1
    AND t_c_firstyear.dyear = 2001
    AND t_c_secyear.dyear = 2001 + 1
    AND t_w_firstyear.dyear = 2001
    AND t_w_secyear.dyear = 2001 + 1
    AND t_s_firstyear.year_total > 0
    AND t_c_firstyear.year_total > 0
    AND t_w_firstyear.year_total > 0
    AND CASE WHEN
            t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total
            ELSE NULL
        END > CASE WHEN
            t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total
            ELSE NULL
        END
    AND CASE WHEN
            t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total
            ELSE NULL
        END > CASE WHEN
            t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total
            ELSE NULL
        END
    ORDER  BY t_s_secyear.customer_id,
              t_s_secyear.customer_first_name,
              t_s_secyear.customer_last_name,
              t_s_secyear.customer_preferred_cust_flag
LIMIT 100;
""")
    assert set(sql_meta.in_tables) == {
        DbTableName("src.customer"),
        DbTableName("store_sales"),
        DbTableName("date_dim")
    }
    assert len(sql_meta.out_tables) == 0