예제 #1
0
def test_get_query_columns():
    assert get_query_columns('SELECT * FROM `test_table`') == ['*']
    assert get_query_columns('SELECT foo.* FROM `test_table`') == ['foo.*']
    assert get_query_columns('SELECT foo FROM `test_table`') == ['foo']
    assert get_query_columns('SELECT count(foo) FROM `test_table`') == ['foo']
    assert get_query_columns(
        'SELECT COUNT(foo), max(time_id) FROM `test_table`') == [
            'foo', 'time_id'
        ]
    assert get_query_columns(
        'SELECT id, foo FROM test_table WHERE id = 3') == ['id', 'foo']
    assert get_query_columns(
        'SELECT id, foo FROM test_table WHERE foo_id = 3 AND bar = 5') == [
            'id', 'foo', 'foo_id', 'bar'
        ]
    assert get_query_columns(
        'SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3') == [
            'foo', 'id'
        ]
    assert get_query_columns('SELECT foo, test as bar FROM `test_table`') == [
        'foo', 'test'
    ]
    assert get_query_columns('SELECT /* a comment */ bar FROM test_table') == [
        'bar'
    ]
def test_get_query_columns():
    assert get_query_columns("SELECT * FROM `test_table`") == ["*"]
    assert get_query_columns("SELECT foo.* FROM `test_table`") == ["foo.*"]
    assert get_query_columns("SELECT foo FROM `test_table`") == ["foo"]
    assert get_query_columns("SELECT count(foo) FROM `test_table`") == ["foo"]
    assert get_query_columns(
        "SELECT COUNT(foo), max(time_id) FROM `test_table`") == [
            "foo",
            "time_id",
        ]
    assert get_query_columns(
        "SELECT id, foo FROM test_table WHERE id = 3") == [
            "id",
            "foo",
        ]
    assert get_query_columns(
        "SELECT id, foo FROM test_table WHERE foo_id = 3 AND bar = 5") == [
            "id", "foo", "foo_id", "bar"
        ]
    assert get_query_columns(
        "SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3") == [
            "foo", "id"
        ]
    assert get_query_columns("SELECT foo, test as bar FROM `test_table`") == [
        "foo",
        "test",
    ]
    assert get_query_columns("SELECT /* a comment */ bar FROM test_table") == [
        "bar"
    ]
예제 #3
0
def test_queries_with_null_conditions():
    assert get_query_columns(
        "SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NULL AND cm.OID IN(123123);"
    ) == ["id", "cm.status", "cm.OPERATIONDATE", "cm.OID"]

    assert get_query_columns(
        "SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NOT NULL AND cm.OID IN(123123);"
    ) == ["id", "cm.status", "cm.OPERATIONDATE", "cm.OID"]
예제 #4
0
def test_queries_with_null_conditions():
    assert get_query_columns(
        'SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NULL AND cm.OID IN(123123);'
    ) == ['id', 'cm.status', 'cm.OPERATIONDATE', 'cm.OID']

    assert get_query_columns(
        'SELECT id FROM cm WHERE cm.status = 1 AND cm.OPERATIONDATE IS NOT NULL AND cm.OID IN(123123);'
    ) == ['id', 'cm.status', 'cm.OPERATIONDATE', 'cm.OID']
예제 #5
0
def test_cast_and_convert_functions():
    # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html
    assert get_query_columns(
        'SELECT count(c) as test, id FROM foo where cast(d as bigint) > e'
    ) == ['c', 'id', 'd', 'e']
    assert get_query_columns(
        'SELECT CONVERT(latin1_column USING utf8) FROM latin1_table;') == [
            'latin1_column'
        ]
예제 #6
0
def test_select_aliases():
    assert get_query_columns('SELECT e.foo FROM bar AS e') == ['e.foo']
    assert get_query_columns('SELECT e.foo FROM bar e') == ['e.foo']
    assert get_query_columns('SELECT e.foo FROM (SELECT * FROM bar) AS e') == [
        'e.foo', '*'
    ]
    assert get_query_columns('SELECT e.foo FROM (SELECT * FROM bar) e') == [
        'e.foo', '*'
    ]
예제 #7
0
def test_cast_and_convert_functions():
    # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html
    assert get_query_columns(
        "SELECT count(c) as test, id FROM foo where cast(d as bigint) > e"
    ) == ["c", "id", "d", "e"]
    assert get_query_columns(
        "SELECT CONVERT(latin1_column USING utf8) FROM latin1_table;") == [
            "latin1_column"
        ]
예제 #8
0
def test_queries_with_distinct():
    assert get_query_columns("SELECT DISTINCT DATA.ASSAY_ID FROM foo") == [
        "DATA.ASSAY_ID"
    ]

    assert get_query_columns("SELECT UNIQUE DATA.ASSAY_ID FROM foo") == [
        "DATA.ASSAY_ID"
    ]

    assert get_query_tables("SELECT DISTINCT DATA.ASSAY_ID FROM foo") == [
        "foo"
    ]
예제 #9
0
def test_get_query_columns_order_by():
    assert get_query_columns("SELECT foo FROM bar ORDER BY id") == [
        'foo', 'id'
    ]
    assert get_query_columns(
        "SELECT foo FROM bar WHERE id > 20 ORDER BY id") == ['foo', 'id']
    assert get_query_columns("SELECT id, foo FROM bar ORDER BY id DESC") == [
        'id', 'foo'
    ]
    assert get_query_columns(
        "SELECT user_id,foo FROM bar ORDER BY id LIMIT 20") == [
            'user_id', 'foo', 'id'
        ]
예제 #10
0
def test_insert_into_select():
    # https://dev.mysql.com/doc/refman/5.7/en/insert-select.html
    query = "INSERT INTO foo SELECT * FROM bar"
    assert get_query_tables(query) == ['foo', 'bar']
    assert get_query_columns(query) == ['*']

    query = "INSERT INTO foo SELECT id, price FROM bar"
    assert get_query_tables(query) == ['foo', 'bar']
    assert get_query_columns(query) == ['id', 'price']

    query = "INSERT INTO foo SELECT id, price FROM bar WHERE qty > 200"
    assert get_query_tables(query) == ['foo', 'bar']
    assert get_query_columns(query) == ['id', 'price', 'qty']
예제 #11
0
def test_insert_into_select():
    # https://dev.mysql.com/doc/refman/5.7/en/insert-select.html
    query = "INSERT INTO foo SELECT * FROM bar"
    assert get_query_tables(query) == ["foo", "bar"]
    assert get_query_columns(query) == ["*"]

    query = "INSERT INTO foo SELECT id, price FROM bar"
    assert get_query_tables(query) == ["foo", "bar"]
    assert get_query_columns(query) == ["id", "price"]

    query = "INSERT INTO foo SELECT id, price FROM bar WHERE qty > 200"
    assert get_query_tables(query) == ["foo", "bar"]
    assert get_query_columns(query) == ["id", "price", "qty"]
예제 #12
0
def test_joins():
    assert ['redirect', 'page'] == \
        get_query_tables("SELECT  page_title  FROM `redirect` INNER JOIN `page` "
                         "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))")

    assert ['redirect', 'page'] == \
        get_query_tables("SELECT  page_title  FROM `redirect` INNER JOIN `page` `foo` "
                         "ON (rd_title = 'foo' AND rd_namespace = '100' AND (foo.page_id = rd_from))")

    assert ['page_title', 'rd_title', 'rd_namespace', 'page_id', 'rd_from'] == \
        get_query_columns("SELECT  page_title  FROM `redirect` INNER JOIN `page` "
                          "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))")

    # see #34
    assert ['foos', 'bars'] == \
        get_query_tables("SELECT foo FROM `foos` JOIN `bars` ON (foos.id = bars.id)")

    assert ['foos', 'bars'] == \
        get_query_tables("SELECT foo FROM `foos` FULL JOIN `bars` ON (foos.id = bars.id)")

    assert ['foos', 'bars'] == \
        get_query_tables("SELECT foo FROM `foos` FULL OUTER JOIN `bars` ON (foos.id = bars.id)")

    assert ['foos', 'bars'] == \
        get_query_tables("SELECT foo FROM `foos` RIGHT OUTER JOIN `bars` ON (foos.id = bars.id)")

    assert ['foos', 'bars'] == \
        get_query_tables("SELECT foo FROM `foos` LEFT OUTER JOIN `bars` ON (foos.id = bars.id)")
예제 #13
0
def test_case_syntax():
    # https://dev.mysql.com/doc/refman/8.0/en/case.html
    assert get_query_columns(
        'select case when p > 0 then 1 else 0 end as cs from c where g > f'
    ) == ['p', 'g', 'f']
    assert get_query_tables(
        'select case when p > 0 then 1 else 0 end as cs from c where g > f'
    ) == ['c']
예제 #14
0
def test_case_syntax():
    # https://dev.mysql.com/doc/refman/8.0/en/case.html
    assert get_query_columns(
        "select case when p > 0 then 1 else 0 end as cs from c where g > f"
    ) == ["p", "g", "f"]
    assert get_query_tables(
        "select case when p > 0 then 1 else 0 end as cs from c where g > f"
    ) == ["c"]
예제 #15
0
def test_handle_force_index():
    query = "SELECT  page_title,page_namespace  FROM `page` FORCE INDEX (page_random) " \
            "JOIN `categorylinks` ON ((page_id=cl_from))  WHERE page_is_redirect = '0' " \
            "AND (page_random >= 0.197372293871) AND cl_to = 'Muppet_Characters'  " \
            "ORDER BY page_random LIMIT 1"

    assert get_query_tables(query) == ['page', 'categorylinks']
    assert get_query_columns(query) == \
        ['page_title', 'page_namespace', 'page_id', 'cl_from', 'page_is_redirect', 'page_random', 'cl_to']
예제 #16
0
def test_insert_overwrite_table():
    assert ["foo_report"] == get_query_tables("INSERT TABLE foo_report")
    assert ["foo_report"
            ] == get_query_tables("INSERT OVERWRITE TABLE foo_report")
    assert ["foo_report", "bar"] == get_query_tables(
        "INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar")

    assert ["foo"] == get_query_columns(
        "INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar")
예제 #17
0
def test_insert_overwrite_table():
    assert ['foo_report'] == get_query_tables('INSERT TABLE foo_report')
    assert ['foo_report'
            ] == get_query_tables('INSERT OVERWRITE TABLE foo_report')
    assert ['foo_report', 'bar'] == get_query_tables(
        'INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar')

    assert ['foo'] == get_query_columns(
        'INSERT OVERWRITE TABLE foo_report SELECT foo FROM bar')
예제 #18
0
def test_case_insensitive():
    # case-insensitive handling
    # https://github.com/macbre/sql-metadata/issues/71
    assert ["abc.foo", "foo", "bar"] == get_query_tables(
        "create table abc.foo as SELECT pqr.foo1 , ab.foo2 FROM foo pqr, bar ab"
    )

    assert ["abc.foo", "foo", "bar"] == get_query_tables(
        "create table abc.foo as select pqr.foo1 , ab.foo2 FROM foo pqr, bar ab"
    )

    assert ["pqr.foo1", "ab.foo2"] == get_query_columns(
        "create table abc.foo as SELECT pqr.foo1 , ab.foo2 FROM foo pqr, bar ab"
    )

    assert ["pqr.foo1", "ab.foo2"] == get_query_columns(
        "create table abc.foo as select pqr.foo1 , ab.foo2 FROM foo pqr, bar ab"
    )
def test_get_query_columns_order_by():
    assert get_query_columns("SELECT foo FROM bar ORDER BY id") == [
        "foo", "id"
    ]
    assert get_query_columns(
        "SELECT foo FROM bar WHERE id > 20 ORDER BY id") == [
            "foo",
            "id",
        ]
    assert get_query_columns("SELECT id, foo FROM bar ORDER BY id DESC") == [
        "id",
        "foo",
    ]
    assert get_query_columns(
        "SELECT user_id,foo FROM bar ORDER BY id LIMIT 20") == [
            "user_id",
            "foo",
            "id",
        ]
def check_not_used_columns(database, queries):
    """
    :type database  indexdigest.database.Database
    :type queries list[str]
    :rtype: list[LinterEntry]
    :raises Exception
    """
    logger = logging.getLogger(__name__)

    # analyze only SELECT queries from the log
    queries = list(filter(is_select_query, queries))

    used_tables = get_used_tables_from_queries(queries)
    used_columns = defaultdict(list)

    logger.info("Will check these tables: %s", used_tables)

    # analyze given queries and collect used columns for each table
    for query in queries:
        tables = get_query_tables(query)
        if tables:
            columns = get_query_columns(query)

            # print(query, table, columns)

            # add used columns per table
            # FIXME: assume we're querying just a single table for now
            used_columns[tables[0]] += columns
        else:
            logger.error(
                'Unable to extract tables and columns used from the query: %s',
                query)

    # analyze table schemas and report not used columns for each table
    for table in used_tables:
        logger.info("Checking %s table", table)
        table_columns = database.get_table_columns(table)

        # now get the difference and report them
        not_used_columns = [
            column for column in table_columns
            if column.name not in set(used_columns[table])
        ] if table_columns else []

        for column in not_used_columns:
            yield LinterEntry(
                linter_type='not_used_columns',
                table_name=table,
                message='"{}" column was not used by provided queries'.format(
                    column),
                context={
                    'column_name': column.name,
                    'column_type': column.type
                })
예제 #21
0
def test_tables_aliases_are_resolved():
    """
    See https://github.com/macbre/sql-metadata/issues/52
    """
    sql = "SELECT a.* FROM users1 AS a JOIN users2 AS b ON a.ip_address = b.ip_address"

    assert get_query_tables(sql) == ['users1', 'users2']
    assert get_query_table_aliases(sql) == {'a': 'users1', 'b': 'users2'}
    assert get_query_columns(sql) == [
        'users1.*', 'users1.ip_address', 'users2.ip_address'
    ], 'Should resolve table aliases'
예제 #22
0
def test_joins():
    assert ['redirect', 'page'] == \
        get_query_tables("SELECT  page_title  FROM `redirect` INNER JOIN `page` "
                         "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))")

    assert ['redirect', 'page'] == \
        get_query_tables("SELECT  page_title  FROM `redirect` INNER JOIN `page` `foo` "
                         "ON (rd_title = 'foo' AND rd_namespace = '100' AND (foo.page_id = rd_from))")

    assert ['page_title', 'rd_title', 'rd_namespace', 'page_id', 'rd_from'] == \
        get_query_columns("SELECT  page_title  FROM `redirect` INNER JOIN `page` "
                          "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))")
예제 #23
0
def test_get_query_columns_complex():
    # @see https://github.com/macbre/sql-metadata/issues/6
    assert get_query_columns("SELECT 1 as c    FROM foo_pageviews      WHERE time_id = '2018-01-07 00:00:00'   AND period_id = '2' LIMIT 1") == ['time_id', 'period_id']

    # table aliases
    assert get_query_columns("SELECT r.wiki_id AS id, pageviews_7day AS pageviews FROM report_wiki_recent_pageviews AS r "
        "INNER JOIN dimension_wikis AS d ON r.wiki_id = d.wiki_id WHERE d.is_public = '1' "
        "AND r.lang IN ( 'en', 'ru' ) AND r.hub_name = 'gaming' ORDER BY pageviews DESC LIMIT 300") \
        == ['report_wiki_recent_pageviews.wiki_id', 'pageviews_7day', 'dimension_wikis.wiki_id', 'dimension_wikis.is_public', 'report_wiki_recent_pageviews.lang', 'report_wiki_recent_pageviews.hub_name', 'pageviews']

    # self joins
    assert get_query_columns("SELECT  count(fw1.wiki_id) as wam_results_total  FROM `fact_wam_scores` `fw1` "
        "left join `fact_wam_scores` `fw2` ON ((fw1.wiki_id = fw2.wiki_id) AND "
        "(fw2.time_id = FROM_UNIXTIME(1466380800))) left join `dimension_wikis` `dw` "
        "ON ((fw1.wiki_id = dw.wiki_id))  WHERE (fw1.time_id = FROM_UNIXTIME(1466467200)) "
        "AND (dw.url like '%%' OR dw.title like '%%') AND fw1.vertical_id IN "
        "('0','1','2','3','4','5','6','7')  AND (fw1.wiki_id NOT "
        "IN ('23312','70256','168929','463633','381622','1089624')) "
        "AND ((dw.url IS NOT NULL AND dw.title IS NOT NULL))") \
        == ['fw1.wiki_id', 'fw2.wiki_id', 'fw2.time_id', 'dw.wiki_id', 'fw1.time_id', 'dw.url', 'dw.title', 'fw1.vertical_id']

    assert get_query_columns("SELECT date_format(time_id,'%Y-%m-%d') AS date, pageviews AS cnt         FROM rollup_wiki_pageviews      WHERE period_id = '2'   AND wiki_id = '1676379'         AND time_id BETWEEN '2018-01-08'        AND '2018-01-01'") == ['time_id', 'pageviews', 'period_id', 'wiki_id']

    assert get_query_columns("INSERT /* VoteHelper::addVote xxx */  INTO `page_vote` (article_id,user_id,`time`) VALUES ('442001','27574631','20180228130846')") == ['article_id', 'user_id', 'time']

    # REPLACE queries
    assert get_query_columns("REPLACE INTO `page_props` (pp_page,pp_propname,pp_value) VALUES ('47','infoboxes','')") == ['pp_page', 'pp_propname', 'pp_value']
예제 #24
0
def test_tables_aliases_are_resolved():
    """
    See https://github.com/macbre/sql-metadata/issues/52
    """
    sql = "SELECT a.* FROM users1 AS a JOIN users2 AS b ON a.ip_address = b.ip_address"

    assert get_query_tables(sql) == ["users1", "users2"]
    assert get_query_table_aliases(sql) == {"a": "users1", "b": "users2"}
    assert get_query_columns(sql) == [
        "users1.*",
        "users1.ip_address",
        "users2.ip_address",
    ], "Should resolve table aliases"
def get_columns(sql_query: str) -> list:
    if sql_query is not None:
        tables: list = sql_metadata.get_query_tables(sql_query)
        columns: list = sql_metadata.get_query_columns(sql_query)

        if len(columns) == 1:
            #* Return all columns
            if len(columns) == 1 and columns[0] == '*':
                if tables[0] == 'public.store':
                    columns = TABLE_STORE_COLUMNS
                else:
                    columns = TABLE_USER_COLUMNS
        return columns
예제 #26
0
def test_handle_force_index():
    query = (
        "SELECT  page_title,page_namespace  FROM `page` FORCE INDEX (page_random) "
        "JOIN `categorylinks` ON ((page_id=cl_from))  WHERE page_is_redirect = '0' "
        "AND (page_random >= 0.197372293871) AND cl_to = 'Muppet_Characters'  "
        "ORDER BY page_random LIMIT 1")

    assert get_query_tables(query) == ["page", "categorylinks"]
    assert get_query_columns(query) == [
        "page_title",
        "page_namespace",
        "page_id",
        "cl_from",
        "page_is_redirect",
        "page_random",
        "cl_to",
    ]
예제 #27
0
def test_joins():
    assert ["redirect", "page"] == get_query_tables(
        "SELECT  page_title  FROM `redirect` INNER JOIN `page` "
        "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))"
    )

    assert ["redirect", "page"] == get_query_tables(
        "SELECT  page_title  FROM `redirect` INNER JOIN `page` `foo` "
        "ON (rd_title = 'foo' AND rd_namespace = '100' AND (foo.page_id = rd_from))"
    )

    assert [
        "page_title",
        "rd_title",
        "rd_namespace",
        "page_id",
        "rd_from",
    ] == get_query_columns(
        "SELECT  page_title  FROM `redirect` INNER JOIN `page` "
        "ON (rd_title = 'foo' AND rd_namespace = '100' AND (page_id = rd_from))"
    )

    # see #34
    assert ["foos", "bars"] == get_query_tables(
        "SELECT foo FROM `foos` JOIN `bars` ON (foos.id = bars.id)")

    assert ["foos", "bars"] == get_query_tables(
        "SELECT foo FROM `foos` FULL JOIN `bars` ON (foos.id = bars.id)")

    assert ["foos", "bars"] == get_query_tables(
        "SELECT foo FROM `foos` FULL OUTER JOIN `bars` ON (foos.id = bars.id)")

    assert ["foos", "bars"] == get_query_tables(
        "SELECT foo FROM `foos` RIGHT OUTER JOIN `bars` ON (foos.id = bars.id)"
    )

    assert ["foos", "bars"] == get_query_tables(
        "SELECT foo FROM `foos` LEFT OUTER JOIN `bars` ON (foos.id = bars.id)")
예제 #28
0
    def test_get_query_columns(self):
        self.assertListEqual(['*'],
                             get_query_columns('SELECT * FROM `test_table`'))

        self.assertListEqual(['foo'],
                             get_query_columns('SELECT foo FROM `test_table`'))

        self.assertListEqual(
            ['id', 'foo'],
            get_query_columns('SELECT id, foo FROM test_table WHERE id = 3'))

        self.assertListEqual(
            ['foo', 'id'],
            get_query_columns(
                'SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3'))

        self.assertListEqual(
            ['foo', 'test'],
            get_query_columns('SELECT foo, test as bar FROM `test_table`'))

        self.assertListEqual(
            ['bar'],
            get_query_columns('SELECT /* a comment */ bar FROM test_table'))
예제 #29
0
def test_select_aliases():
    assert get_query_columns("SELECT e.foo FROM bar AS e") == ["bar.foo"]
예제 #30
0
 def columns_queried(self) -> List[str]:
     return sql_metadata.get_query_columns(self.sql)