def __vectorize(self, tokenlist): token_list = TokenList(list(tokenlist.flatten())) # print(token_list.tokens) for x in token_list: if x.ttype is Comparison: idx_comp_op = token_list.token_index( x) #Index of comparison operator attr = token_list.token_prev( idx_comp_op, skip_ws=True, skip_cm=True)[1].value #Name of the attribute print(attr) comp_op = x # print(comp_op) if comp_op.value == '<' or comp_op.value == '<=': lit_dir = 'ub' elif comp_op.value == '>' or comp_op.value == '>=': lit_dir = 'lb' else: lit_dir = 'bi' # print(lit_dir) try: lit = float( token_list.token_next( idx_comp_op, skip_ws=True, skip_cm=True)[1].value) #literal value except ValueError: print("Possible join, skipping") continue # print(lit) if lit_dir == 'bi': self.query_vec['_'.join([attr, 'lb'])] = lit self.query_vec['_'.join([attr, 'ub'])] = lit continue self.query_vec['_'.join([attr, lit_dir ])] = lit #lit_dir is either lb or ub
def filter_identifier_list(tkn_list: TokenList, token: Token): # debug: pprint(token) index = tkn_list.token_index(token) prev_token: Token = tkn_list.token_prev(index)[1] if prev_token is not None: # prev is not exist(index: 0) -> None if not prev_token.match(DML, 'SELECT'): return False next_token: Token = tkn_list.token_next(index)[1] if next_token is not None: # next is not exist(index: list len max) -> None if not next_token.match(Keyword, 'FROM'): return False return True
def __projections(self, token, tokenlist): idx = tokenlist.token_index(token) afs_list_idx, afs = tokenlist.token_next(idx, skip_ws=True, skip_cm=True) afs_list = TokenList(list(afs.flatten())) for af in afs_list: # Get AFs if af.value.lower() in ['avg', 'count', 'sum', 'min', 'max']: # if af not in self.afs_dic: # self.afs_dic[af.value] = [] af_idx = afs_list.token_index(af) punc_idx, _ = afs_list.token_next(af_idx, skip_ws=True, skip_cm=True) attr_idx, attr = afs_list.token_next(punc_idx, skip_ws=True, skip_cm=True) if attr.ttype is not Wildcard: self.afs.append('_'.join([af.value, attr.value])) else: self.afs.append(af.value)