Beispiel #1
0
def analyze_where_clause_part(stmt, logger=None):  # COPIED
    stmtl = [token for token in stmt.tokens if not token.is_whitespace]
    uses = []
    for token in stmtl:
        if token.is_keyword:
            # might be worth treating LIKE seperately
            if token.value.upper() not in [
                    'WHERE', 'AND', 'OR', 'LIKE', 'IN', 'OR'
            ]:
                raise SqlParserDoesNotYetParseThis(
                    "got WHERE statement with unknown keyword {} in {}".format(
                        token, stmtl))
        elif str(token.ttype) == 'Token.Punctuation':
            pass
        elif type(token) == sqlparse.sql.Comparison:
            uses.append(token.left.value)
        elif type(token) == sqlparse.sql.Identifier:
            uses += [token.get_real_name()]
        elif type(token) == sqlparse.sql.Parenthesis:
            uses += analyze_parenthesis(token, logger)
        elif token.ttype == sqlparse.tokens.Token.String.Single:
            # this means that it is a string and therefore cannot define a column - I hope
            pass
        else:
            raise SqlParserDoesNotYetParseThis(
                "unknown WHERE condition element {} of type {} in {}".format(
                    token, type(token), stmtl))
    return uses
Beispiel #2
0
def analyze_parenthesis(stmt, logger=None):  # COPIED
    assert str(stmt.tokens[0].ttype
               ) == 'Token.Punctuation', "first element has to be punctuation"
    assert str(stmt.tokens[-1].ttype
               ) == 'Token.Punctuation', "last element has to be punctuation"
    stmtl = [token for token in stmt.tokens if not token.is_whitespace]
    ret = []
    for token in stmtl[1:-1]:
        if type(token) == sqlparse.sql.Parenthesis:
            ret = ret + analyze_parenthesis(token, logger)
        elif type(token) == sqlparse.sql.Identifier:
            ret.append(token.get_real_name())
        elif token.ttype == sqlparse.tokens.Token.String.Single:
            pass
        elif type(token) == sqlparse.sql.IdentifierList:
            for identifier in token.get_identifiers():
                if identifier.ttype == sqlparse.tokens.Token.String.Single:
                    pass
                else:
                    raise SqlParserDoesNotYetParseThis(
                        "unknwon identifierliste element {} of type {} in {}".
                        format(identifier, type(identifier),
                               token.get_identifiers()))
        elif type(token) == sqlparse.sql.Comparison:
            ret.append(token.left.value)
        elif token.is_keyword and token.value.upper() in ['AND', 'OR']:
            pass
        else:
            raise SqlParserDoesNotYetParseThis(
                "unknown paren element {} of type {} in {}".format(
                    token, type(token), stmt.tokens))
    return ret
Beispiel #3
0
def extract_insert_table_identifier(token_list, logger=None):
    assert len(
        token_list) == 1, "the list must not be longer or shorter than 1"
    if type(token_list[0]) == sqlparse.sql.Function:
        return [token_list[0].get_name()]
    elif type(token_list[0]) == sqlparse.sql.Identifier:
        return [token_list[0].get_real_name()]
    else:
        raise SqlParserDoesNotYetParseThis(
            "got {} but expected function or identifier as into value".format(
                token_list[0]))
Beispiel #4
0
def analyze_identifier_list(identlist,
                            keywords_are_identifiers=False,
                            logger=None):
    """
    This function is the bread and butter of the analyzer as
    this function does the actual analysis and is called (recursively)
    by all other functions that deal with (sub)parts of a given
    pseudo-parsetree. If anything breaks - it does so here
    """
    if identlist is None:
        identlist = []
    ret = []
    for token in identlist:
        if type(token) == sqlparse.sql.Function:
            ret += analyze_function(token, logger)
        elif type(token) == sqlparse.sql.Identifier:
            if token.get_real_name() == 'special':
                pass
            else:
                ret.append(token.get_real_name())
        elif type(token) == sqlparse.sql.Token and token.value == "*":
            ret += ["*"]
        elif type(token) == sqlparse.sql.Parenthesis:
            ret += analyze_parenthesis(token, logger)
        elif type(token) == sqlparse.sql.Operation:
            ret += analyze_identifier_list(token.tokens,
                                           keywords_are_identifiers=True)
        elif type(token) == sqlparse.sql.IdentifierList:
            ret += analyze_identifier_list(token.get_identifiers())
        elif token.ttype == sqlparse.tokens.Token.String.Single:
            pass
        elif token.ttype == sqlparse.tokens.Token.Operator:
            pass
        elif type(token) == sqlparse.sql.Comparison:
            ret += analyze_identifier_list([token.left],
                                           keywords_are_identifiers=True,
                                           logger=logger)
            ret += analyze_identifier_list([token.right],
                                           keywords_are_identifiers=True,
                                           logger=logger)
        elif token.is_keyword:
            if keywords_are_identifiers:
                ret += token.value
            else:
                pass
        elif token.is_whitespace:
            pass  # whitespaces are of no interest for our purposes
        elif str(token.ttype) == "Token.Literal.Number.Integer":
            pass  # fixed values are of no interest for our purposes
        else:
            raise SqlParserDoesNotYetParseThis(
                "token {} in list {} unknown".format(token, identlist))
    return ret
Beispiel #5
0
def analyze_from_clause_part(stmtl, logger=None):
    tables = []
    identifiers = []
    if type(stmtl[1]) == sqlparse.sql.IdentifierList:
        identifiers = list(stmtl[1].get_identifiers())
    else:
        identifiers = [stmtl[1]]
    for token in identifiers:
        if type(token) == sqlparse.sql.Identifier:
            tables.append(token.get_real_name())
        else:
            raise SqlParserDoesNotYetParseThis(
                "unknown {} in table definition {}".format(token, stmtl))
    return tables
Beispiel #6
0
def analyze_set_clause_part(stmtl, logger=None):  # COPIED
    defines = []
    for token in stmtl[1:]:
        if type(token) == sqlparse.sql.IdentifierList:
            for identifier in token.get_identifiers():
                if identifier.left.is_keyword:
                    defines.append(identifier.left.value)
                else:
                    defines.append(identifier.left.get_real_name())
        elif type(token) == sqlparse.sql.Comparison:
            if type(token.left) == sqlparse.sql.Identifier:
                defines.append(token.left.get_real_name())
            else:
                defines.append(token.right.get_real_name())
        else:
            raise SqlParserDoesNotYetParseThis(
                "unknown {} part in set statement {}".format(token, stmtl))
    return defines
Beispiel #7
0
def analyze_insert_statement(insert_statement, logger=None):
    stmtl = [
        token for token in insert_statement.tokens if not token.is_whitespace
    ]
    uses = []
    defines = []
    pos_into_start = position_of_keyword(stmtl, u'INTO')
    pos_into_end = -1
    if position_of_keyword(stmtl, u'VALUES'):
        pos_into_end = position_of_keyword(stmtl, u'VALUES')
    elif position_of_keyword(stmtl, u'SET'):
        pos_into_end = position_of_keyword(stmtl, u'SET')
    else:
        raise SqlParserDoesNotYetParseThis(
            "How can there no values in insertion? {}".format(stmtl))
    tables, defines = analyze_into_clause_part(
        stmtl[pos_into_start:pos_into_end], logger)
    defines = [InteractedSchemaElement(tables[0], "*", 'relation')]
    return uses, defines
Beispiel #8
0
def analyze_insert_statement(token_list, logger=None):
    token_list = [token for token in token_list if not token.is_whitespace]
    tables = []

    if token_list[1].is_keyword:  # this is needed to remove that pesky ignore
        if token_list[1].value.upper() == 'IGNORE':
            token_list = token_list[0:1] + token_list[2:]

    if token_list[1].is_keyword:
        if token_list[1].value.upper() == 'INTO':
            tables = extract_insert_table_identifier(token_list[2:3],
                                                     logger=logger)
        else:
            raise SqlParserDoesNotYetParseThis(
                "there should only be INTO and not '{}' in {}".format(
                    token_list[1], token_list))
    else:
        tables = extract_insert_table_identifier(token_list[1:2],
                                                 logger=logger)
    return [], merge_table_and_columns(tables, "*", logger)
Beispiel #9
0
 def __init__(self, query_string, logger=None):
     self._query_string = query_string
     stmt = sqlparse.parse(query_string)[0]
     if stmt.get_type() == 'SELECT':
         self._uses, self._defines = analyze_select_statement(stmt, logger)
     elif stmt.get_type() == 'INSERT':
         self._uses, self._defines = analyze_insert_statement(stmt, logger)
         self._changing = True
     elif stmt.get_type() == 'UPDATE':
         self._uses, self._defines = analyze_update_statement(stmt, logger)
         self._changing = True
     elif stmt.get_type() == 'DELETE':
         self._uses, self._defines = analyze_delete_statement(stmt, logger)
         self._changing = True
     elif stmt.get_type() == 'UNKNOWN':
         raise SqlParserDoesNotParseThis(
             "not yet supported statement type {} in query {}".format(
                 stmt.get_type(), query_string))
     else:
         raise SqlParserDoesNotYetParseThis(
             "not yet supported statement type {} in query {}".format(
                 stmt.get_type(), query_string))
Beispiel #10
0
    def __init__(self, query_string, logger=None):
        self._query_string = query_string
        stmt = sqlparse.parse(query_string)[0]
        if stmt.get_type() == 'SELECT':
            # print "parsing SELECT"
            self._uses, self._defines, self._aggregate_p = analyze_select_statement(
                stmt.tokens, logger)
            # print self
        elif stmt.get_type() == 'INSERT':
            self._uses, self._defines = analyze_insert_statement(
                stmt.tokens, logger)
            self._changing = True

        elif stmt.get_type() == 'REPLACE':
            self._uses, self._defines = analyze_insert_statement(
                stmt.tokens, logger)
            self._changing = True

        elif stmt.get_type() == 'UPDATE':
            self._uses, self._defines = analyze_update_statement(
                stmt.tokens, logger)
            self._changing = True

        elif stmt.get_type() == 'DELETE':
            self._uses, self._defines = analyze_delete_statement(
                stmt.tokens, logger)
            self._changing = True

        elif stmt.get_type() == 'UNKNOWN':
            raise SqlParserDoesNotParseThis(
                "not yet supported statement type {} in query {}".format(
                    stmt.get_type(), query_string))
        else:
            raise SqlParserDoesNotYetParseThis(
                "not yet supported statement type {} in query {}".format(
                    stmt.get_type(), query_string))

        self._defines = sorted(self._defines, key=lambda x: x.__str__)
        self._uses = sorted(self._uses, key=lambda x: x.__str__)
Beispiel #11
0
def analyze_select_column_clause_part(stmt, logger=None):  # COPIED
    uses = []
    # print type(stmt)
    if type(stmt) == sqlparse.sql.Identifier:
        uses += parse_select_identifier(stmt)
    elif type(stmt) == sqlparse.sql.IdentifierList:
        for token in stmt.get_identifiers():
            if type(token) == sqlparse.sql.Function:
                uses += parse_function_identifier(token, logger)
            elif type(token) == sqlparse.sql.Identifier:
                uses += parse_select_identifier(token, logger)
            elif type(stmt) == sqlparse.sql.Token and stmt.value == "*":
                uses += ["*"]
            else:
                print "warn: no explicit handling of token {} of type {} in {}".format(
                    token, type(token), stmt)
    elif type(stmt) == sqlparse.sql.Token and stmt.value == "*":
        uses += ["*"]
    else:
        print "warn: no explicit handling of token {} of type {}".format(
            stmt, type(stmt))
        raise SqlParserDoesNotYetParseThis("boom moddafucker")
    return uses