def test_get_query_columns(): assert get_query_columns('SELECT * FROM `test_table`') == ['*'] assert get_query_columns('SELECT foo.* FROM `test_table`') == ['foo.*'] assert get_query_columns('SELECT foo FROM `test_table`') == ['foo'] assert get_query_columns('SELECT count(foo) FROM `test_table`') == ['foo'] assert get_query_columns( 'SELECT COUNT(foo), max(time_id) FROM `test_table`') == [ 'foo', 'time_id' ] assert get_query_columns( 'SELECT id, foo FROM test_table WHERE id = 3') == ['id', 'foo'] assert get_query_columns( 'SELECT id, foo FROM test_table WHERE foo_id = 3 AND bar = 5') == [ 'id', 'foo', 'foo_id', 'bar' ] assert get_query_columns( 'SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3') == [ 'foo', 'id' ] assert get_query_columns('SELECT foo, test as bar FROM `test_table`') == [ 'foo', 'test' ] assert get_query_columns('SELECT /* a comment */ bar FROM test_table') == [ 'bar' ]
def test_get_query_columns(): assert get_query_columns("SELECT * FROM `test_table`") == ["*"] assert get_query_columns("SELECT foo.* FROM `test_table`") == ["foo.*"] assert get_query_columns("SELECT foo FROM `test_table`") == ["foo"] assert get_query_columns("SELECT count(foo) FROM `test_table`") == ["foo"] assert get_query_columns( "SELECT COUNT(foo), max(time_id) FROM `test_table`") == [ "foo", "time_id", ] assert get_query_columns( "SELECT id, foo FROM test_table WHERE id = 3") == [ "id", "foo", ] assert get_query_columns( "SELECT id, foo FROM test_table WHERE foo_id = 3 AND bar = 5") == [ "id", "foo", "foo_id", "bar" ] assert get_query_columns( "SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3") == [ "foo", "id" ] assert get_query_columns("SELECT foo, test as bar FROM `test_table`") == [ "foo", "test", ] assert get_query_columns("SELECT /* a comment */ bar FROM test_table") == [ "bar" ]
def test_queries_with_null_conditions(): assert get_query_columns( "SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NULL AND cm.OID IN(123123);" ) == ["id", "cm.status", "cm.OPERATIONDATE", "cm.OID"] assert get_query_columns( "SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NOT NULL AND cm.OID IN(123123);" ) == ["id", "cm.status", "cm.OPERATIONDATE", "cm.OID"]
def test_queries_with_null_conditions(): assert get_query_columns( 'SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NULL AND cm.OID IN(123123);' ) == ['id', 'cm.status', 'cm.OPERATIONDATE', 'cm.OID'] assert get_query_columns( 'SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NOT NULL AND cm.OID IN(123123);' ) == ['id', 'cm.status', 'cm.OPERATIONDATE', 'cm.OID']
def test_cast_and_convert_functions(): # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html assert get_query_columns( 'SELECT count(c) as test, id FROM foo where cast(d as bigint) > e' ) == ['c', 'id', 'd', 'e'] assert get_query_columns( 'SELECT CONVERT(latin1_column USING utf8) FROM latin1_table;') == [ 'latin1_column' ]
def test_select_aliases(): assert get_query_columns('SELECT e.foo FROM bar AS e') == ['e.foo'] assert get_query_columns('SELECT e.foo FROM bar e') == ['e.foo'] assert get_query_columns('SELECT e.foo FROM (SELECT * FROM bar) AS e') == [ 'e.foo', '*' ] assert get_query_columns('SELECT e.foo FROM (SELECT * FROM bar) e') == [ 'e.foo', '*' ]
def test_cast_and_convert_functions(): # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html assert get_query_columns( "SELECT count(c) as test, id FROM foo where cast(d as bigint) > e" ) == ["c", "id", "d", "e"] assert get_query_columns( "SELECT CONVERT(latin1_column USING utf8) FROM latin1_table;") == [ "latin1_column" ]
def test_queries_with_distinct(): assert get_query_columns("SELECT DISTINCT DATA.ASSAY_ID FROM foo") == [ "DATA.ASSAY_ID" ] assert get_query_columns("SELECT UNIQUE DATA.ASSAY_ID FROM foo") == [ "DATA.ASSAY_ID" ] assert get_query_tables("SELECT DISTINCT DATA.ASSAY_ID FROM foo") == [ "foo" ]
def test_get_query_columns_order_by(): assert get_query_columns("SELECT foo FROM bar ORDER BY id") == [ 'foo', 'id' ] assert get_query_columns( "SELECT foo FROM bar WHERE id > 20 ORDER BY id") == ['foo', 'id'] assert get_query_columns("SELECT id, foo FROM bar ORDER BY id DESC") == [ 'id', 'foo' ] assert get_query_columns( "SELECT user_id,foo FROM bar ORDER BY id LIMIT 20") == [ 'user_id', 'foo', 'id' ]
def test_insert_into_select(): # https://dev.mysql.com/doc/refman/5.7/en/insert-select.html query = "INSERT INTO foo SELECT * FROM bar" assert get_query_tables(query) == ['foo', 'bar'] assert get_query_columns(query) == ['*'] query = "INSERT INTO foo SELECT id, price FROM bar" assert get_query_tables(query) == ['foo', 'bar'] assert get_query_columns(query) == ['id', 'price'] query = "INSERT INTO foo SELECT id, price FROM bar WHERE qty > 200" assert get_query_tables(query) == ['foo', 'bar'] assert get_query_columns(query) == ['id', 'price', 'qty']
def test_insert_into_select(): # https://dev.mysql.com/doc/refman/5.7/en/insert-select.html query = "INSERT INTO foo SELECT * FROM bar" assert get_query_tables(query) == ["foo", "bar"] assert get_query_columns(query) == ["*"] query = "INSERT INTO foo SELECT id, price FROM bar" assert get_query_tables(query) == ["foo", "bar"] assert get_query_columns(query) == ["id", "price"] query = "INSERT INTO foo SELECT id, price FROM bar WHERE qty > 200" assert get_query_tables(query) == ["foo", "bar"] assert get_query_columns(query) == ["id", "price", "qty"]
def test_joins(): assert ['redirect', 'page'] == \ get_query_tables("SELECT page_title FROM `redirect` INNER JOIN `page` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))") assert ['redirect', 'page'] == \ get_query_tables("SELECT page_title FROM `redirect` INNER JOIN `page` `foo` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (foo.page_id = rd_from))") assert ['page_title', 'rd_title', 'rd_namespace', 'page_id', 'rd_from'] == \ get_query_columns("SELECT page_title FROM `redirect` INNER JOIN `page` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))") # see #34 assert ['foos', 'bars'] == \ get_query_tables("SELECT foo FROM `foos` JOIN `bars` ON (foos.id = bars.id)") assert ['foos', 'bars'] == \ get_query_tables("SELECT foo FROM `foos` FULL JOIN `bars` ON (foos.id = bars.id)") assert ['foos', 'bars'] == \ get_query_tables("SELECT foo FROM `foos` FULL OUTER JOIN `bars` ON (foos.id = bars.id)") assert ['foos', 'bars'] == \ get_query_tables("SELECT foo FROM `foos` RIGHT OUTER JOIN `bars` ON (foos.id = bars.id)") assert ['foos', 'bars'] == \ get_query_tables("SELECT foo FROM `foos` LEFT OUTER JOIN `bars` ON (foos.id = bars.id)")
def test_case_syntax(): # https://dev.mysql.com/doc/refman/8.0/en/case.html assert get_query_columns( 'select case when p > 0 then 1 else 0 end as cs from c where g > f' ) == ['p', 'g', 'f'] assert get_query_tables( 'select case when p > 0 then 1 else 0 end as cs from c where g > f' ) == ['c']
def test_case_syntax(): # https://dev.mysql.com/doc/refman/8.0/en/case.html assert get_query_columns( "select case when p > 0 then 1 else 0 end as cs from c where g > f" ) == ["p", "g", "f"] assert get_query_tables( "select case when p > 0 then 1 else 0 end as cs from c where g > f" ) == ["c"]
def test_handle_force_index(): query = "SELECT page_title,page_namespace FROM `page` FORCE INDEX (page_random) " \ "JOIN `categorylinks` ON ((page_id=cl_from)) WHERE page_is_redirect = '0' " \ "AND (page_random >= 0.197372293871) AND cl_to = 'Muppet_Characters' " \ "ORDER BY page_random LIMIT 1" assert get_query_tables(query) == ['page', 'categorylinks'] assert get_query_columns(query) == \ ['page_title', 'page_namespace', 'page_id', 'cl_from', 'page_is_redirect', 'page_random', 'cl_to']
def test_insert_overwrite_table(): assert ["foo_report"] == get_query_tables("INSERT TABLE foo_report") assert ["foo_report" ] == get_query_tables("INSERT OVERWRITE TABLE foo_report") assert ["foo_report", "bar"] == get_query_tables( "INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar") assert ["foo"] == get_query_columns( "INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar")
def test_insert_overwrite_table(): assert ['foo_report'] == get_query_tables('INSERT TABLE foo_report') assert ['foo_report' ] == get_query_tables('INSERT OVERWRITE TABLE foo_report') assert ['foo_report', 'bar'] == get_query_tables( 'INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar') assert ['foo'] == get_query_columns( 'INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar')
def test_case_insensitive(): # case-insensitive handling # https://github.com/macbre/sql-metadata/issues/71 assert ["abc.foo", "foo", "bar"] == get_query_tables( "create table abc.foo as SELECT pqr.foo1 , ab.foo2 FROM foo pqr, bar ab" ) assert ["abc.foo", "foo", "bar"] == get_query_tables( "create table abc.foo as select pqr.foo1 , ab.foo2 FROM foo pqr, bar ab" ) assert ["pqr.foo1", "ab.foo2"] == get_query_columns( "create table abc.foo as SELECT pqr.foo1 , ab.foo2 FROM foo pqr, bar ab" ) assert ["pqr.foo1", "ab.foo2"] == get_query_columns( "create table abc.foo as select pqr.foo1 , ab.foo2 FROM foo pqr, bar ab" )
def test_get_query_columns_order_by(): assert get_query_columns("SELECT foo FROM bar ORDER BY id") == [ "foo", "id" ] assert get_query_columns( "SELECT foo FROM bar WHERE id > 20 ORDER BY id") == [ "foo", "id", ] assert get_query_columns("SELECT id, foo FROM bar ORDER BY id DESC") == [ "id", "foo", ] assert get_query_columns( "SELECT user_id,foo FROM bar ORDER BY id LIMIT 20") == [ "user_id", "foo", "id", ]
def check_not_used_columns(database, queries): """ :type database indexdigest.database.Database :type queries list[str] :rtype: list[LinterEntry] :raises Exception """ logger = logging.getLogger(__name__) # analyze only SELECT queries from the log queries = list(filter(is_select_query, queries)) used_tables = get_used_tables_from_queries(queries) used_columns = defaultdict(list) logger.info("Will check these tables: %s", used_tables) # analyze given queries and collect used columns for each table for query in queries: tables = get_query_tables(query) if tables: columns = get_query_columns(query) # print(query, table, columns) # add used columns per table # FIXME: assume we're querying just a single table for now used_columns[tables[0]] += columns else: logger.error( 'Unable to extract tables and columns used from the query: %s', query) # analyze table schemas and report not used columns for each table for table in used_tables: logger.info("Checking %s table", table) table_columns = database.get_table_columns(table) # now get the difference and report them not_used_columns = [ column for column in table_columns if column.name not in set(used_columns[table]) ] if table_columns else [] for column in not_used_columns: yield LinterEntry( linter_type='not_used_columns', table_name=table, message='"{}" column was not used by provided queries'.format( column), context={ 'column_name': column.name, 'column_type': column.type })
def test_tables_aliases_are_resolved(): """ See https://github.com/macbre/sql-metadata/issues/52 """ sql = "SELECT a.* FROM users1 AS a JOIN users2 AS b ON a.ip_address = b.ip_address" assert get_query_tables(sql) == ['users1', 'users2'] assert get_query_table_aliases(sql) == {'a': 'users1', 'b': 'users2'} assert get_query_columns(sql) == [ 'users1.*', 'users1.ip_address', 'users2.ip_address' ], 'Should resolve table aliases'
def test_joins(): assert ['redirect', 'page'] == \ get_query_tables("SELECT page_title FROM `redirect` INNER JOIN `page` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))") assert ['redirect', 'page'] == \ get_query_tables("SELECT page_title FROM `redirect` INNER JOIN `page` `foo` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (foo.page_id = rd_from))") assert ['page_title', 'rd_title', 'rd_namespace', 'page_id', 'rd_from'] == \ get_query_columns("SELECT page_title FROM `redirect` INNER JOIN `page` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))")
def test_get_query_columns_complex(): # @see https://github.com/macbre/sql-metadata/issues/6 assert get_query_columns("SELECT 1 as c FROM foo_pageviews WHERE time_id = '2018-01-07 00:00:00' AND period_id = '2' LIMIT 1") == ['time_id', 'period_id'] # table aliases assert get_query_columns("SELECT r.wiki_id AS id, pageviews_7day AS pageviews FROM report_wiki_recent_pageviews AS r " "INNER JOIN dimension_wikis AS d ON r.wiki_id = d.wiki_id WHERE d.is_public = '1' " "AND r.lang IN ( 'en', 'ru' ) AND r.hub_name = 'gaming' ORDER BY pageviews DESC LIMIT 300") \ == ['report_wiki_recent_pageviews.wiki_id', 'pageviews_7day', 'dimension_wikis.wiki_id', 'dimension_wikis.is_public', 'report_wiki_recent_pageviews.lang', 'report_wiki_recent_pageviews.hub_name', 'pageviews'] # self joins assert get_query_columns("SELECT count(fw1.wiki_id) as wam_results_total FROM `fact_wam_scores` `fw1` " "left join `fact_wam_scores` `fw2` ON ((fw1.wiki_id = fw2.wiki_id) AND " "(fw2.time_id = FROM_UNIXTIME(1466380800))) left join `dimension_wikis` `dw` " "ON ((fw1.wiki_id = dw.wiki_id)) WHERE (fw1.time_id = FROM_UNIXTIME(1466467200)) " "AND (dw.url like '%%' OR dw.title like '%%') AND fw1.vertical_id IN " "('0','1','2','3','4','5','6','7') AND (fw1.wiki_id NOT " "IN ('23312','70256','168929','463633','381622','1089624')) " "AND ((dw.url IS NOT NULL AND dw.title IS NOT NULL))") \ == ['fw1.wiki_id', 'fw2.wiki_id', 'fw2.time_id', 'dw.wiki_id', 'fw1.time_id', 'dw.url', 'dw.title', 'fw1.vertical_id'] assert get_query_columns("SELECT date_format(time_id,'%Y-%m-%d') AS date, pageviews AS cnt FROM rollup_wiki_pageviews WHERE period_id = '2' AND wiki_id = '1676379' AND time_id BETWEEN '2018-01-08' AND '2018-01-01'") == ['time_id', 'pageviews', 'period_id', 'wiki_id'] assert get_query_columns("INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) VALUES ('442001','27574631','20180228130846')") == ['article_id', 'user_id', 'time'] # REPLACE queries assert get_query_columns("REPLACE INTO `page_props` (pp_page,pp_propname,pp_value) VALUES ('47','infoboxes','')") == ['pp_page', 'pp_propname', 'pp_value']
def test_tables_aliases_are_resolved(): """ See https://github.com/macbre/sql-metadata/issues/52 """ sql = "SELECT a.* FROM users1 AS a JOIN users2 AS b ON a.ip_address = b.ip_address" assert get_query_tables(sql) == ["users1", "users2"] assert get_query_table_aliases(sql) == {"a": "users1", "b": "users2"} assert get_query_columns(sql) == [ "users1.*", "users1.ip_address", "users2.ip_address", ], "Should resolve table aliases"
def get_columns(sql_query: str) -> list: if sql_query is not None: tables: list = sql_metadata.get_query_tables(sql_query) columns: list = sql_metadata.get_query_columns(sql_query) if len(columns) == 1: #* Return all columns if len(columns) == 1 and columns[0] == '*': if tables[0] == 'public.store': columns = TABLE_STORE_COLUMNS else: columns = TABLE_USER_COLUMNS return columns
def test_handle_force_index(): query = ( "SELECT page_title,page_namespace FROM `page` FORCE INDEX (page_random) " "JOIN `categorylinks` ON ((page_id=cl_from)) WHERE page_is_redirect = '0' " "AND (page_random >= 0.197372293871) AND cl_to = 'Muppet_Characters' " "ORDER BY page_random LIMIT 1") assert get_query_tables(query) == ["page", "categorylinks"] assert get_query_columns(query) == [ "page_title", "page_namespace", "page_id", "cl_from", "page_is_redirect", "page_random", "cl_to", ]
def test_joins(): assert ["redirect", "page"] == get_query_tables( "SELECT page_title FROM `redirect` INNER JOIN `page` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))" ) assert ["redirect", "page"] == get_query_tables( "SELECT page_title FROM `redirect` INNER JOIN `page` `foo` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (foo.page_id = rd_from))" ) assert [ "page_title", "rd_title", "rd_namespace", "page_id", "rd_from", ] == get_query_columns( "SELECT page_title FROM `redirect` INNER JOIN `page` " "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))" ) # see #34 assert ["foos", "bars"] == get_query_tables( "SELECT foo FROM `foos` JOIN `bars` ON (foos.id = bars.id)") assert ["foos", "bars"] == get_query_tables( "SELECT foo FROM `foos` FULL JOIN `bars` ON (foos.id = bars.id)") assert ["foos", "bars"] == get_query_tables( "SELECT foo FROM `foos` FULL OUTER JOIN `bars` ON (foos.id = bars.id)") assert ["foos", "bars"] == get_query_tables( "SELECT foo FROM `foos` RIGHT OUTER JOIN `bars` ON (foos.id = bars.id)" ) assert ["foos", "bars"] == get_query_tables( "SELECT foo FROM `foos` LEFT OUTER JOIN `bars` ON (foos.id = bars.id)")
def test_get_query_columns(self): self.assertListEqual(['*'], get_query_columns('SELECT * FROM `test_table`')) self.assertListEqual(['foo'], get_query_columns('SELECT foo FROM `test_table`')) self.assertListEqual( ['id', 'foo'], get_query_columns('SELECT id, foo FROM test_table WHERE id = 3')) self.assertListEqual( ['foo', 'id'], get_query_columns( 'SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3')) self.assertListEqual( ['foo', 'test'], get_query_columns('SELECT foo, test as bar FROM `test_table`')) self.assertListEqual( ['bar'], get_query_columns('SELECT /* a comment */ bar FROM test_table'))
def test_select_aliases(): assert get_query_columns("SELECT e.foo FROM bar AS e") == ["bar.foo"]
def columns_queried(self) -> List[str]: return sql_metadata.get_query_columns(self.sql)