Esempio n. 1
0
    def parse_cte(self, idx, tokens: TokenList):
        gidx, group = tokens.token_next(idx, skip_ws=True, skip_cm=True)

        # handle recursive keyword
        if group.match(T.Keyword, values=['RECURSIVE']):
            gidx, group = tokens.token_next(gidx, skip_ws=True, skip_cm=True)

        if not group.is_group:
            return [], None

        # get CTE name
        offset = 1
        cte_name = group.token_first(skip_ws=True, skip_cm=True)
        self.ctes.add(cte_name.value)

        # AS keyword
        offset, as_keyword = group.token_next(offset,
                                              skip_ws=True,
                                              skip_cm=True)
        if not as_keyword.match(T.Keyword, values=['AS']):
            raise RuntimeError(f"CTE does not have AS keyword at index {gidx}")

        offset, parens = group.token_next(offset, skip_ws=True, skip_cm=True)
        if isinstance(parens, Parenthesis) or parens.is_group:
            # Parse CTE using recursion.
            return cte_name.value, self.recurse(TokenList(
                parens.tokens)).in_tables
        raise RuntimeError(
            f"Parens {parens} are not Parenthesis at index {gidx}")
Esempio n. 2
0
 def __vectorize(self, tokenlist):
     token_list = TokenList(list(tokenlist.flatten()))
     # print(token_list.tokens)
     for x in token_list:
         if x.ttype is Comparison:
             idx_comp_op = token_list.token_index(
                 x)  #Index of comparison operator
             attr = token_list.token_prev(
                 idx_comp_op, skip_ws=True,
                 skip_cm=True)[1].value  #Name of the attribute
             print(attr)
             comp_op = x
             # print(comp_op)
             if comp_op.value == '<' or comp_op.value == '<=':
                 lit_dir = 'ub'
             elif comp_op.value == '>' or comp_op.value == '>=':
                 lit_dir = 'lb'
             else:
                 lit_dir = 'bi'
             # print(lit_dir)
             try:
                 lit = float(
                     token_list.token_next(
                         idx_comp_op, skip_ws=True,
                         skip_cm=True)[1].value)  #literal value
             except ValueError:
                 print("Possible join, skipping")
                 continue
             # print(lit)
             if lit_dir == 'bi':
                 self.query_vec['_'.join([attr, 'lb'])] = lit
                 self.query_vec['_'.join([attr, 'ub'])] = lit
                 continue
             self.query_vec['_'.join([attr, lit_dir
                                      ])] = lit  #lit_dir is either lb or ub
Esempio n. 3
0
    def __projections(self, token, tokenlist):
        idx = tokenlist.token_index(token)
        afs_list_idx, afs = tokenlist.token_next(idx,
                                                 skip_ws=True,
                                                 skip_cm=True)
        afs_list = TokenList(list(afs.flatten()))
        for af in afs_list:  # Get AFs

            if af.value.lower() in ['avg', 'count', 'sum', 'min', 'max']:
                # if af not in self.afs_dic:
                #     self.afs_dic[af.value] = []
                af_idx = afs_list.token_index(af)
                punc_idx, _ = afs_list.token_next(af_idx,
                                                  skip_ws=True,
                                                  skip_cm=True)
                attr_idx, attr = afs_list.token_next(punc_idx,
                                                     skip_ws=True,
                                                     skip_cm=True)
                if attr.ttype is not Wildcard:
                    self.afs.append('_'.join([af.value, attr.value]))
                else:
                    self.afs.append(af.value)
Esempio n. 4
0
 def filter_identifier_list(tkn_list: TokenList, token: Token):
     # debug: pprint(token)
     index = tkn_list.token_index(token)
     prev_token: Token = tkn_list.token_prev(index)[1]
     if prev_token is not None:
         # prev is not exist(index: 0) -> None
         if not prev_token.match(DML, 'SELECT'):
             return False
     next_token: Token = tkn_list.token_next(index)[1]
     if next_token is not None:
         # next is not exist(index: list len max) -> None
         if not next_token.match(Keyword, 'FROM'):
             return False
     return True
Esempio n. 5
0
    def extract_from_column(self):

        '''
        columns_group can collect all tokens between 'DML SELECT' and 'Keyword FROM'
        
        [<DML 'SELECT' at 0x3655A08>, <Whitespace ' ' at 0x3655A68>, <IdentifierList 'me.Sap...' at 0x366E228>,
         <Newline ' ' at 0x3665948>, <Keyword 'FROM' at 0x36659A8>, <Whitespace ' ' at 0x3665A08>,
         <IdentifierList 'SODS2....' at 0x366E390>,
         <Whitespace ' ' at 0x3667228>, <IdentifierList 't,SHAR...' at 0x366E480>, <Newline ' ' at 0x3667528>]
        '''
        
        tokens = self.getTokens()
        tokenlist = TokenList(tokens)
        cols_idx,cols_item = [] , []
        cols_group = []
        '''
            cols_item only keep the columns between select and from.
            Notic : exists many groups if sql have union/union all token , so need use cols_group to collect it.
        '''
        fetch_col_flag = False
        for idx, item in enumerate(tokens):
            before_idx,before_item = tokenlist.token_prev(idx,skip_ws=True)
            next_idx,next_item = tokenlist.token_next(idx,skip_ws=True)
            if not next_item :
                break
            #capture up first column index
            if (isinstance(item,IdentifierList) or isinstance(item,Identifier)) and \
                (before_item.ttype == Keyword.DML or before_item.value.upper() == 'DISTINCT'):
                cols_idx.append(idx)
                fetch_col_flag = True
                cols_item = []                
            if fetch_col_flag == True:
                
                cols_item.append(item)
            #capture up last column index
            if (isinstance(item,IdentifierList) or isinstance(item,Identifier)) and \
                next_item.ttype is Keyword and next_item.value.upper() == 'FROM':
                cols_idx.append(idx)
                fetch_col_flag = False
                cols_group.append (''.join([ item.value for item in cols_item]))
        
        '''
        the cols_idx like [[10,12],[24,26]],it's two-dimnsn list , --> flatten to [10,11,12,24,25,26]
        '''
        cols_idxes = sum([list(range(cols_idx[2*i],cols_idx[2*i+1]+1)) for i in range(int(len(cols_idx)/2))],[]) 
        
        keep_tokens = [ item for idx,item in enumerate(tokens) if idx not in cols_idxes ]
        self.tokens = keep_tokens
        self.tokens_val = [item.value for item in tokens]
        return cols_group
Esempio n. 6
0
def _extract_limit_from_query(statement: TokenList) -> Optional[int]:
    """
    Extract limit clause from SQL statement.

    :param statement: SQL statement
    :return: Limit extracted from query, None if no limit present in statement
    """
    idx, _ = statement.token_next_by(m=(Keyword, "LIMIT"))
    if idx is not None:
        _, token = statement.token_next(idx=idx)
        if token:
            if isinstance(token, IdentifierList):
                # In case of "LIMIT <offset>, <limit>", find comma and extract
                # first succeeding non-whitespace token
                idx, _ = token.token_next_by(m=(sqlparse.tokens.Punctuation, ","))
                _, token = token.token_next(idx=idx)
            if token and token.ttype == sqlparse.tokens.Literal.Number.Integer:
                return int(token.value)
    return None
Esempio n. 7
0
    def extract_from_column(self):

        '''
        pick up all tokens between 'DML SELECT' and 'Keyword FROM'
        
        [<DML 'SELECT' at 0x3655A08>, <Whitespace ' ' at 0x3655A68>, <IdentifierList 'me.Sap...' at 0x366E228>,
         <Newline ' ' at 0x3665948>, <Keyword 'FROM' at 0x36659A8>, <Whitespace ' ' at 0x3665A08>,
         <IdentifierList 'SODS2....' at 0x366E390>,
         <Whitespace ' ' at 0x3667228>, <IdentifierList 't,SHAR...' at 0x366E480>, <Newline ' ' at 0x3667528>]
        '''
        
        tokens = self.getTokens()
        tokenlist = TokenList(tokens)
        cols_idx,cols_item = [] , []
        cols_group = []
        fetch_col_flag = False
        for idx, item in enumerate(tokens):
            before_idx,before_item = tokenlist.token_prev(idx,skip_ws=True)
            next_idx,next_item = tokenlist.token_next(idx,skip_ws=True)
            if not next_item :
                break
            #capture up first column index
            if (isinstance(item,IdentifierList) or isinstance(item,Identifier)) and \
                (before_item.ttype == Keyword.DML or before_item.value.upper() == 'DISTINCT'):
                cols_idx.append(idx)
                fetch_col_flag = True
                cols_item = []                
            if fetch_col_flag == True:
                cols_item.append(item)
            #capture up last column index
            if (isinstance(item,IdentifierList) or isinstance(item,Identifier)) and \
                next_item.ttype is Keyword and next_item.value.upper() == 'FROM':
                cols_idx.append(idx)
                fetch_col_flag = False
                cols_group.append (cols_item)
        
        cols_idxes = sum([list(range(cols_idx[2*i],cols_idx[2*i+1]+1)) for i in range(int(len(cols_idx)/2))],[]) 
        
        left_tokens = [ item for idx,item in enumerate(tokens) if idx not in cols_idxes ]
Esempio n. 8
0
 def _next_non_empty_token(
         idx: int,
         token_list: TokenList) -> Tuple[Optional[int], Optional[Token]]:
     return token_list.token_next(idx, skip_ws=True, skip_cm=True)