def test_postgress_quoted_names(): # https://github.com/macbre/sql-metadata/issues/85 parser = Parser( 'INSERT INTO "test" ("name") VALUES (\'foo\') RETURNING "test"."id"' ) assert ["test"] == parser.tables assert ["name"] == parser.columns assert {"insert": ["name"]} == parser.columns_dict assert "INSERT INTO test (name) VALUES (X) RETURNING test.id" == parser.generalize assert parser.values == ["foo"] parser = Parser( 'SELECT "test"."id", "test"."name" FROM "test" WHERE "test"."name" = \'foo\' LIMIT 21 FOR UPDATE' ) assert ["test"] == parser.tables assert ["test.id", "test.name"] == parser.columns assert { "select": ["test.id", "test.name"], "where": ["test.name"], } == parser.columns_dict assert ( "SELECT test.id, test.name FROM test WHERE test.name = X LIMIT N FOR UPDATE" == parser.generalize ) parser = Parser('UPDATE "test" SET "name" = \'bar\' WHERE "test"."id" = 1') assert ["test"] == parser.tables assert ["name", "test.id"] == parser.columns assert {"update": ["name"], "where": ["test.id"]} == parser.columns_dict assert "UPDATE test SET name = X WHERE test.id = N" == parser.generalize
def test_multiline_queries(): query = """ SELECT COUNT(1) FROM (SELECT std.task_id as new_task_id FROM some_task_detail std WHERE std.STATUS = 1 ) a JOIN ( SELECT st.task_id FROM some_task st WHERE task_type_id = 80 ) as b ON a.new_task_id = b.task_id; """.strip() parser = Parser(query) assert parser.subqueries_names == ["a", "b"] assert parser.tables == ["some_task_detail", "some_task"] assert parser.columns_aliases_names == ["new_task_id"] assert parser.columns_aliases == { "new_task_id": "some_task_detail.task_id" } assert parser.columns == [ "some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id", "task_type_id", "a.new_task_id", "b.task_id", ] assert parser.columns_without_subqueries == [ "some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id", "task_type_id", ] assert parser.columns_dict == { "join": ["a.new_task_id", "b.task_id"], "select": ["some_task_detail.task_id", "some_task.task_id"], "where": ["some_task_detail.STATUS", "task_type_id"], } assert parser.subqueries == { "a": "SELECT std.task_id as new_task_id FROM some_task_detail std WHERE std.STATUS = 1", "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80", } parser2 = Parser(parser.subqueries["a"]) assert parser2.tables == ["some_task_detail"] assert parser2.columns == [ "some_task_detail.task_id", "some_task_detail.STATUS" ]
def test_getting_comments(): parser = Parser( "INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) VALUES ('442001','27574631','20180228130846')" ) assert parser.comments == ["/* VoteHelper::addVote xxx */"] parser = Parser( "SELECT /* CategoryPaginationViewer::processSection */ " "page_namespace,page_title,page_len,page_is_redirect,cl_sortkey_prefix FROM `page` " "INNER JOIN `categorylinks` FORCE INDEX (cl_sortkey) ON ((cl_from = page_id)) " " /* We should add more conditions */ " "WHERE cl_type = 'page' AND cl_to = 'Spotify/Song' " " /* Verify with accounting */ " "ORDER BY cl_sortkey LIMIT 927600,200" ) assert parser.comments == [ "/* CategoryPaginationViewer::processSection */", "/* We should add more conditions */", "/* Verify with accounting */", ] assert parser.without_comments == ( "SELECT page_namespace,page_title,page_len,page_is_redirect,cl_sortkey_prefix " "FROM `page` " "INNER JOIN `categorylinks` FORCE INDEX (cl_sortkey) ON ((cl_from = page_id)) " "WHERE cl_type = 'page' AND cl_to = 'Spotify/Song' " "ORDER BY cl_sortkey LIMIT 927600,200" ) # no comments and new lines assert ( "SELECT test FROM `foo`.`bar`" == Parser("SELECT /* foo */ test\nFROM `foo`.`bar`").without_comments )
def test_inline_comments(): query = """ SELECT * from foo -- this comment should not be hiding rest of the query join bar on foo.a=bar.b where foo.c = 'am' """ parser = Parser(query) assert parser.tables == ["foo", "bar"] assert parser.columns == ["*", "foo.a", "bar.b", "foo.c"] assert parser.comments == [ "-- this comment should not be hiding rest of the query\n" ] query = """ SELECT * --multiple from foo -- comments left outer join bar on foo.a=bar.b --works too where foo.c = 'am' """ parser = Parser(query) assert parser.tables == ["foo", "bar"] assert parser.columns == ["*", "foo.a", "bar.b", "foo.c"] assert parser.comments == [ "--multiple\n", "-- comments\n", "--works too\n" ]
def test_cleared_cache(): parser = Parser("Select * from test") assert parser.tables == ["test"] with pytest.raises(AttributeError): parser.query = "Select * from test2" assert parser._tables == ["test"]
def test_redshift(): parser = Parser("ALTER TABLE target_table APPEND FROM source_table") assert parser.tables == [ "target_table", "source_table", ] assert parser.query_type == QueryType.ALTER assert Parser("ALTER TABLE x APPEND FROM y").tables == ["x", "y"]
def test_is_create_table_query(): with pytest.raises(ValueError): assert Parser("BEGIN").query_type assert Parser("SELECT * FROM `foo` ()").query_type == QueryType.SELECT assert Parser("CREATE TABLE `foo` ()").query_type == QueryType.CREATE assert (Parser( "CREATE table abc.foo as SELECT pqr.foo1 , ab.foo2 FROM foo pqr, bar ab" ).query_type == QueryType.CREATE)
def test_is_create_table_query(): assert Parser("BEGIN")._is_create_table_query is False assert Parser("SELECT * FROM `foo` ()")._is_create_table_query is False assert Parser("CREATE TABLE `foo` ()")._is_create_table_query is True assert ( Parser( "create table abc.foo as SELECT pqr.foo1 , ab.foo2 FROM foo pqr, bar ab" )._is_create_table_query is True )
def test_with_statements(): parser = Parser(""" WITH database1.tableFromWith AS (SELECT aa.* FROM table3 as aa left join table4 on aa.col1=table4.col2), test as (SELECT * from table3) SELECT "xxxxx" FROM database1.tableFromWith alias LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx") """) assert parser.tables == ["table3", "table4", "database2.table2"] assert parser.with_names == ["database1.tableFromWith", "test"] parser = Parser(""" WITH database1.tableFromWith AS (SELECT * FROM table3), database1.tableFromWith2 AS (SELECT * FROM table4), database1.tableFromWith3 AS (SELECT * FROM table5), database1.tableFromWith4 AS (SELECT * FROM table6) SELECT "xxxxx" FROM database1.tableFromWith alias LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx") """) assert parser.with_names == [ "database1.tableFromWith", "database1.tableFromWith2", "database1.tableFromWith3", "database1.tableFromWith4", ] assert parser.tables == [ "table3", "table4", "table5", "table6", "database2.table2" ] parser = Parser(""" WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, d FROM table2) SELECT b, d FROM cte1 JOIN cte2 WHERE cte1.a = cte2.c; """) assert parser.with_names == ["cte1", "cte2"] assert parser.tables == ["table1", "table2"]
def test_multiple_with_statements_with_with_columns(): # fix for setting columns in with # https://github.com/macbre/sql-metadata/issues/128 query = """ WITH t1 (c1, c2) AS (SELECT * FROM t2), t3 (c3, c4) AS (SELECT c5, c6 FROM t4) SELECT * FROM t1, t3; """ parser = Parser(query) assert parser.with_names == ["t1", "t3"] assert parser.with_queries == { "t1": "SELECT * FROM t2", "t3": "SELECT c5, c6 FROM t4", } assert parser.tables == ["t2", "t4"] assert parser.columns == ["*", "c5", "c6"] assert parser.columns_aliases_names == ["c1", "c2", "c3", "c4"] assert parser.columns_aliases == { "c1": "*", "c2": "*", "c3": "c5", "c4": "c6" } assert parser.query_type == QueryType.SELECT
def test_window_in_with(): query = """ WITH cte_1 AS ( SELECT column_1, column_2 FROM table_1 WINDOW window_1 AS ( PARTITION BY column_2 ) ) SELECT column_1, column_2 FROM cte_1 AS alias_1 """ parser = Parser(query) assert parser.with_names == ["cte_1"] assert parser.columns == ["column_1", "column_2"] assert parser.with_queries == { "cte_1": "SELECT column_1, column_2 FROM table_1 WINDOW window_1 AS(PARTITION BY column_2)" } assert parser.tables == ["table_1"]
def test_resolving_with_columns_with_wildcard(): query = """ WITH query1 AS (SELECT c1, c2, c4 FROM t5), query2 AS (SELECT c3, c7 FROM t6) SELECT query1.*, query2.c7 FROM query1 left join query2 on query1.c4 = query2.c3 order by query2.c7; """ parser = Parser(query) assert parser.with_names == ["query1", "query2"] assert parser.with_queries == { "query1": "SELECT c1, c2, c4 FROM t5", "query2": "SELECT c3, c7 FROM t6", } assert parser.tables == ["t5", "t6"] assert parser.columns_aliases == {} assert parser.columns_aliases_names == [] assert parser.columns == ["c1", "c2", "c4", "c3", "c7"] assert parser.columns_dict == { "join": ["c4", "c3"], "order_by": ["c7"], "select": ["c1", "c2", "c4", "c3", "c7"], } assert parser.query_type == QueryType.SELECT
def test_resolving_with_clauses_with_columns(): query = """ WITH query1 (c1, c2) AS (SELECT * FROM t2), query2 (c3, c4) AS (SELECT c5, c6 FROM t4) SELECT query1.c2, query2.c4 FROM query1 left join query2 on query1.c1 = query2.c3 order by query1.c2; """ parser = Parser(query) assert parser.with_names == ["query1", "query2"] assert parser.with_queries == { "query1": "SELECT * FROM t2", "query2": "SELECT c5, c6 FROM t4", } assert parser.tables == ["t2", "t4"] assert parser.columns_aliases == { "c1": "*", "c2": "*", "c3": "c5", "c4": "c6" } assert parser.columns_aliases_names == ["c1", "c2", "c3", "c4"] assert parser.columns_aliases_dict == { "join": ["c1", "c3"], "order_by": ["c2"], "select": ["c1", "c2", "c3", "c4"], } assert parser.columns == ["*", "c5", "c6"] assert parser.columns_dict == { "join": ["*", "c5"], "order_by": ["*"], "select": ["*", "c5", "c6"], } assert parser.query_type == QueryType.SELECT
def test_complicated_with(): query = """ WITH uisd_filter_table as ( select session_id, srch_id, srch_ci, srch_co, srch_los, srch_sort_type, impr_list from uisd where datem <= date_sub(date_add(current_date(), 92), 7 * 52) and lower(srch_sort_type) in ('expertpicks', 'recommended') and srch_ci <= date_sub(date_add(current_date(), 92), 7 * 52) and srch_co >= date_sub(date_add(current_date(), 1), 7 * 52) ) select DISTINCT session_id, srch_id, srch_ci, srch_co, srch_los, srch_sort_type, l.impr_property_id as expe_property_id, l.impr_position_across_pages from uisd_filter_table lateral view explode(impr_list) table as l """ parser = Parser(query) assert parser.query_type == QueryType.SELECT assert parser.with_names == ["uisd_filter_table"] assert parser.with_queries == { "uisd_filter_table": "select session_id, srch_id, srch_ci, srch_co, srch_los, " "srch_sort_type, impr_list from uisd where datem <= " "date_sub(date_add(current_date(), 92), 7 * 52) and " "lower(srch_sort_type) in ('expertpicks', 'recommended') " "and srch_ci <= date_sub(date_add(current_date(), 92), 7 " "* 52) and srch_co >= date_sub(date_add(current_date(), " "1), 7 * 52)" } assert parser.tables == [ "uisd", "impr_list", ] # this one is wrong too should be table assert parser.columns == [ "session_id", "srch_id", "srch_ci", "srch_co", "srch_los", "srch_sort_type", "impr_list", "datem", "l.impr_property_id", "l.impr_position_across_pages", ]
def test_creating_table_as_select_with_with_clause(): qry = """ CREATE table xyz as with sub as (select it_id from internal_table) SELECT * from table_a join table_b on (table_a.name = table_b.name) left join table_c on (table_a.age = table_c.age) left join sub on (table.it_id = sub.it_id) order by table_a.name, table_a.age """ parser = Parser(qry) assert parser.query_type == QueryType.CREATE assert parser.with_names == ["sub"] assert parser.columns == [ "it_id", "*", "table_a.name", "table_b.name", "table_a.age", "table_c.age", "table.it_id", "sub.it_id", ] assert parser.tables == [ "xyz", "internal_table", "table_a", "table_b", "table_c" ]
def test_readme_query(): parser = Parser( """ SELECT COUNT(1) FROM (SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1) a JOIN (SELECT st.task_id FROM some_task st WHERE task_type_id = 80) b ON a.task_id = b.task_id; """ ) assert parser.subqueries == { "a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1", "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80", } assert parser.subqueries_names == ["a", "b"] assert parser.columns == [ "some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id", "task_type_id", ] assert parser.columns_dict == { "join": ["some_task_detail.task_id", "some_task.task_id"], "select": ["some_task_detail.task_id", "some_task.task_id"], "where": ["some_task_detail.STATUS", "task_type_id"], }
def test_resolving_columns_in_sub_queries_with_join_between_sub_queries(): query = """ Select sq.sub_alias, sq.other_name from ( select tab1.aa sub_alias, tab2.us as other_name from tab1 left join tab2 on tab1.id = tab2.other_id ) sq left join ( select intern1 as col1, secret col2 from aa ) sq3 on sq.sub_alias = sq3.col1 order by sq.other_name, sq3.col2 """ parser = Parser(query) assert parser.columns == [ "tab1.aa", "tab2.us", "tab1.id", "tab2.other_id", "intern1", "secret", ] assert parser.columns_aliases == { "col1": "intern1", "col2": "secret", "other_name": "tab2.us", "sub_alias": "tab1.aa", } assert parser.columns_dict == { "join": ["tab1.id", "tab2.other_id", "tab1.aa", "intern1"], "order_by": ["tab2.us", "secret"], "select": ["tab1.aa", "tab2.us", "intern1", "secret"], } assert parser.subqueries_names == ["sq", "sq3"]
def test_resolving_columns_in_sub_queries_union(): query = """ Select sq.sub_alias, sq.other_name from ( select tab1.aa sub_alias, tab2.us as other_name from tab1 left join tab2 on tab1.id = tab2.other_id ) sq union all select sq3.col1, sq3.col2 from (select tab12.col1, concat(tab23.ab, ' ', tab23.bc) as col2 from tab12 left join tab23 on tab12.id = tab23.zorro) sq3 """ parser = Parser(query) assert parser.columns_aliases == { "col2": ["tab23.ab", "tab23.bc"], "other_name": "tab2.us", "sub_alias": "tab1.aa", } assert parser.columns == [ "tab1.aa", "tab2.us", "tab1.id", "tab2.other_id", "tab12.col1", "tab23.ab", "tab23.bc", "tab12.id", "tab23.zorro", ] assert parser.columns_dict == { "join": ["tab1.id", "tab2.other_id", "tab12.id", "tab23.zorro"], "select": ["tab1.aa", "tab2.us", "tab12.col1", "tab23.ab", "tab23.bc"], } assert parser.subqueries_names == ["sq", "sq3"]
def test_convert_in_join(): parser = Parser( "SELECT la1.col1, la2.col2, CONVERT(la1.col2 USING utf8) FROM latin1_table la1 " "left join latin2_table la2 " "on CONVERT(la1.latin1_column USING utf8) = " "CONVERT(la2.latin1_column USING utf8) " "left join latin3_table la3 using (col1, col2);") assert parser.columns == [ "latin1_table.col1", "latin2_table.col2", "latin1_table.col2", "latin1_table.latin1_column", "latin2_table.latin1_column", "col1", "col2", ] assert parser.columns_dict == { "join": [ "latin1_table.latin1_column", "latin2_table.latin1_column", "col1", "col2", ], "select": ["latin1_table.col1", "latin2_table.col2", "latin1_table.col2"], } assert parser.tables == ["latin1_table", "latin2_table", "latin3_table"]
def test_nested_with_statement_in_create_table(): qry = """ CREATE table xyz as with sub as (select it_id from internal_table) SELECT * from ( with abc as (select * from other_table) select name, age, it_id from table_z join abc on (table_z.it_id = abc.it_id) ) as table_a join table_b on (table_a.name = table_b.name) left join table_c on (table_a.age = table_c.age) left join sub on (table_a.it_id = sub.it_id) order by table_a.name, table_a.age """ parser = Parser(qry) assert parser.tables == [ "xyz", "internal_table", "other_table", "table_z", "table_b", "table_c", ] assert parser.columns == [ "it_id", "*", "name", "age", "table_z.it_id", "table_b.name", "table_c.age", ] assert parser.columns_dict == { "select": ["it_id", "*", "name", "age"], "join": [ "table_z.it_id", "it_id", "name", "table_b.name", "age", "table_c.age", ], "order_by": ["name", "age"], } assert parser.with_names == ["sub", "abc"] assert parser.subqueries_names == ["table_a"] assert parser.with_queries == { "abc": "select * from other_table", "sub": "select it_id from internal_table", } assert parser.subqueries == { "table_a": "with abc as(select * from other_table) select name, age, it_id " "from table_z join abc on (table_z.it_id = abc.it_id)" } assert parser.query_type == QueryType.CREATE
def test_empty_query(): queries = ["", "/* empty query */"] for query in queries: with pytest.raises(ValueError) as ex: _ = Parser(query).query_type assert "Empty queries are not supported!" in str(ex.value)
def test_drop_table_query(): queries = [ "{0}DROP TABLE foo;{0}", ] for query in queries: for comment in ["", "/* foo */", "\n--foo\n", "\n# foo\n"]: assert "DROP TABLE" == Parser(query.format(comment)).query_type
def test_column_aliases_with_subquery(): query = """ SELECT yearweek(SignDate) as Aggregation, BusinessSource, (SELECT sum(C2Count) from (SELECT count(C2) as C2Count, BusinessSource, yearweek(Start1) Start1, yearweek(End1) End1 from ( SELECT ContractID as C2, BusinessSource, StartDate as Start1, EndDate as End1 from data_contracts_report ) sq2 group by 2, 3, 4) sq where Start1 <= yearweek(SignDate) and End1 >= yearweek(SignDate) and sq.BusinessSource = mq.BusinessSource) CountOfConsultants FROM data_contracts_report mq where SignDate >= last_day(date_add(now(), interval -13 month)) group by 1, 2 order by 1, 2; """ parser = Parser(query) assert parser.tables == ["data_contracts_report"] assert parser.subqueries_names == ["sq2", "sq"] assert parser.subqueries == { "sq": "SELECT count(C2) as C2Count, BusinessSource, yearweek(Start1) Start1, " "yearweek(End1) End1 from (SELECT ContractID as C2, BusinessSource, " "StartDate as Start1, EndDate as End1 from data_contracts_report) sq2 " "group by 2, 3, 4", "sq2": "SELECT ContractID as C2, BusinessSource, StartDate as Start1, EndDate " "as End1 from data_contracts_report", } assert parser.columns == [ "SignDate", "BusinessSource", "ContractID", "StartDate", "EndDate", "data_contracts_report.BusinessSource", ] assert parser.columns_aliases_names == [ "Aggregation", "C2Count", "Start1", "End1", "C2", "CountOfConsultants", ] assert parser.columns_aliases == { "Aggregation": "SignDate", "C2": "ContractID", "C2Count": "C2", "CountOfConsultants": "C2Count", "End1": "EndDate", "Start1": "StartDate", }
def test_create_table_as_select_in_parentheses(): qry = """ CREATE TABLE records AS (SELECT t.id, t.name, e.name as energy FROM t JOIN e ON t.e_id = e.id) """ parser = Parser(qry) assert parser.query_type == QueryType.CREATE assert parser.columns == ["t.id", "t.name", "e.name", "t.e_id", "e.id"] assert parser.tables == ["records", "t", "e"]
def test_column_aliases_with_columns_operations(): query = """ SELECT a, b + c - u as alias1, custome_func(d) alias2 from aa, bb """ parser = Parser(query) assert parser.tables == ["aa", "bb"] assert parser.columns == ["a", "b", "c", "u", "d"] assert parser.columns_aliases_names == ["alias1", "alias2"] assert parser.columns_aliases == {"alias1": ["b", "c", "u"], "alias2": "d"}
def test_column_aliases_with_multiple_functions(): query = """ SELECT a, sum(b) + sum(c) as alias1, custome_func(d) alias2 from aa, bb """ parser = Parser(query) assert parser.tables == ["aa", "bb"] assert parser.columns == ["a", "b", "c", "d"] assert parser.columns_aliases_names == ["alias1", "alias2"] assert parser.columns_aliases == {"alias1": ["b", "c"], "alias2": "d"}
def test_mutiple_functions(): parser = Parser( "SELECT count(col) + max(col2) + min(col3)" "+ count(distinct col4) + custom_func(col5) as result from dual") assert parser.columns == ["col", "col2", "col3", "col4", "col5"] assert parser.columns_aliases_names == ["result"] assert parser.columns_aliases == { "result": ["col", "col2", "col3", "col4", "col5"] }
def test_insert_overwrite(): query = """ WITH AAA AS ( SELECT * FROM db1.tb1 AS jt WHERE col_date >= CURRENT_DATE ) , BBB AS ( SELECT col1, ROW_NUMBER() OVER(PARTITION BY col2 ORDER BY col3 DESC, col4 DESC) AS row_count FROM AAA ) , CCC AS ( SELECT * FROM BBB WHERE row_count = 1 ) , DDD AS ( SELECT * FROM CCC WHERE col1 = 'HI' ) INSERT OVERWRITE TABLE db4.tb25 SELECT jt.col1, jt.col2, jt.col3 FROM DDD AS jt ; """ parser = Parser(query) assert parser.with_names == ["AAA", "BBB", "CCC", "DDD"] assert parser.columns == [ "*", "col_date", "col1", "col2", "col3", "col4", "db1.tb1.col1", "db1.tb1.col2", "db1.tb1.col3", ] assert parser.tables == ["db1.tb1", "db4.tb25"]
def test_simple_create_table_as_select(): parser = Parser(""" CREATE table abc.foo as SELECT pqr.foo1 , ab.foo2 FROM foo pqr, bar ab; """) assert parser.query_type == QueryType.CREATE assert parser.tables == ["abc.foo", "foo", "bar"] assert parser.columns == ["foo.foo1", "bar.foo2"]
def test_create_if_not_exists_with_select(): qry = """ CREATE TABLE if not exists mysuper_secret_schema.records AS (SELECT t.id, t.name, e.name as energy FROM t JOIN e ON t.e_id = e.id) """ parser = Parser(qry) assert parser.query_type == QueryType.CREATE assert parser.columns == ["t.id", "t.name", "e.name", "t.e_id", "e.id"] assert parser.tables == ["mysuper_secret_schema.records", "t", "e"]