def __init__(self, full_text, text_before_cursor): self.identifier = None self.word_before_cursor = word_before_cursor = last_word( text_before_cursor, include='many_punctuations') full_text, text_before_cursor, self.local_tables = \ isolate_query_ctes(full_text, text_before_cursor) self.text_before_cursor_including_last_word = text_before_cursor # If we've partially typed a word then word_before_cursor won't be an # empty string. In that case we want to remove the partially typed # string before sending it to the sqlparser. Otherwise the last token # will always be the partially typed string which renders the smart # completion useless because it will always return the list of # keywords as completion. if self.word_before_cursor: if word_before_cursor[-1] == '(' or word_before_cursor[0] == '\\': parsed = sqlparse.parse(text_before_cursor) else: text_before_cursor = text_before_cursor[:-len(word_before_cursor)] parsed = sqlparse.parse(text_before_cursor) self.identifier = parse_partial_identifier(word_before_cursor) else: parsed = sqlparse.parse(text_before_cursor) full_text, text_before_cursor, parsed = \ _split_multiple_statements(full_text, text_before_cursor, parsed) self.full_text = full_text self.text_before_cursor = text_before_cursor self.parsed = parsed self.last_token = parsed and parsed.token_prev(len(parsed.tokens))[1] or ''
def find_matches(self, text, collection, mode='fuzzy', meta=None): """Find completion matches for the given text. Given the user's input text and a collection of available completions, find completions matching the last word of the text. `collection` can be either a list of strings or a list of Candidate namedtuples. `mode` can be either 'fuzzy', or 'strict' 'fuzzy': fuzzy matching, ties broken by name prevalance `keyword`: start only matching, ties broken by keyword prevalance yields prompt_toolkit Completion instances for any matches found in the collection of available completions. """ if not collection: return [] priority_order = [ 'keyword', 'function', 'procedure', 'view', 'table', 'datatype', 'column', 'table alias', 'join', 'name join', 'fk join', 'table format' ] type_priority = priority_order.index( meta) if meta in priority_order else -1 text = last_word(text, include='most_punctuations').lower() text_len = len(text) if text and text[0] == '"': # text starts with double quote; user is manually escaping a name # Match on everything that follows the double-quote. Note that # text_len is calculated before removing the quote, so the # Completion.position value is correct text = text[1:] if mode == 'fuzzy': fuzzy = True priority_func = self.prioritizer.name_count else: fuzzy = False priority_func = self.prioritizer.keyword_count # Construct a `_match` function for either fuzzy or non-fuzzy matching # The match function returns a 2-tuple used for sorting the matches, # or None if the item doesn't match # Note: higher priority values mean more important, so use negative # signs to flip the direction of the tuple if fuzzy: regex = '.*?'.join(map(re.escape, text)) pat = re.compile('(%s)' % regex) def _match(item): if item.lower()[:len(text) + 1] in (text, text + ' '): # Exact match of first word in suggestion # This is to get exact alias matches to the top # E.g. for input `e`, 'Entries E' should be on top # (before e.g. `EndUsers EU`) return float('Infinity'), -1 r = pat.search(self.unescape_name(item.lower())) if r: return -len(r.group()), -r.start() else: match_end_limit = len(text) def _match(item): match_point = item.lower().find(text, 0, match_end_limit) if match_point >= 0: # Use negative infinity to force keywords to sort after all # fuzzy matches return -float('Infinity'), -match_point matches = [] for cand in collection: item, display_meta, prio, prio2, display = cand, meta, 0, 0, cand sort_key = _match(cand) if sort_key: if display_meta and len(display_meta) > 50: # Truncate meta-text to 50 characters, if necessary display_meta = display_meta[:47] + u'...' # Lexical order of items in the collection, used for # tiebreaking items with the same match group length and start # position. Since we use *higher* priority to mean "more # important," we use -ord(c) to prioritize "aa" > "ab" and end # with 1 to prioritize shorter strings (ie "user" > "users"). # We first do a case-insensitive sort and then a # case-sensitive one as a tie breaker. # We also use the unescape_name to make sure quoted names have # the same priority as unquoted names. lexical_priority = ( tuple(0 if c in (' _') else -ord(c) for c in self.unescape_name(item.lower())) + (1, ) + tuple(c for c in item)) item = self.case(item) display = self.case(display) priority = (sort_key, type_priority, prio, priority_func(item), prio2, lexical_priority) matches.append( Match(completion=Completion(text=item, start_position=-text_len, display_meta=display_meta, display=display), priority=priority)) return matches
def suggest_based_on_last_token(token, stmt): if isinstance(token, string_types): token_v = token.lower() elif token.value.lower() == "exec": # if exec, then it must be a Stored Procedure return (Procedure(),) elif isinstance(token, Comparison): # If 'token' is a Comparison type such as # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling # token.value on the comparison type will only return the lhs of the # comparison. In this case a.id. So we need to do token.tokens to get # both sides of the comparison and pick the last token out of that # list. token_v = token.tokens[-1].value.lower() elif isinstance(token, Where): # sqlparse groups all tokens from the where clause into a single token # list. This means that token.value may be something like # 'where foo > 5 and '. We need to look "inside" token.tokens to handle # suggestions in complicated where clauses correctly prev_keyword = stmt.reduce_to_prev_keyword() return suggest_based_on_last_token(prev_keyword, stmt) elif isinstance(token, Identifier): # If the previous token is an identifier, we can suggest datatypes if # we're in a parenthesized column/field list, e.g.: # CREATE TABLE foo (Identifier <CURSOR> # CREATE FUNCTION foo (Identifier <CURSOR> # If we're not in a parenthesized list, the most likely scenario is the # user is about to specify an alias, e.g.: # SELECT Identifier <CURSOR> # SELECT foo FROM Identifier <CURSOR> prev_keyword, _ = find_prev_keyword(stmt.text_before_cursor) if prev_keyword and prev_keyword.value == '(': # Suggest datatypes return suggest_based_on_last_token('type', stmt) else: return (Keyword(),) else: token_v = token.value.lower() if not token: return (Keyword(),) elif token_v.endswith('('): p = sqlparse.parse(stmt.text_before_cursor)[0] if p.tokens and isinstance(p.tokens[-1], Where): # Four possibilities: # 1 - Parenthesized clause like "WHERE foo AND (" # Suggest columns/functions # 2 - Function call like "WHERE foo(" # Suggest columns/functions # 3 - Subquery expression like "WHERE EXISTS (" # Suggest keywords, in order to do a subquery # 4 - Subquery OR array comparison like "WHERE foo = ANY(" # Suggest columns/functions AND keywords. (If we wanted to be # really fancy, we could suggest only array-typed columns) column_suggestions = suggest_based_on_last_token('where', stmt) # Check for a subquery expression (cases 3 & 4) where = p.tokens[-1] prev_tok = where.token_prev(len(where.tokens) - 1)[1] if isinstance(prev_tok, Comparison): # e.g. "SELECT foo FROM bar WHERE foo = ANY(" prev_tok = prev_tok.tokens[-1] prev_tok = prev_tok.value.lower() if prev_tok == 'exists': return (Keyword(),) else: return column_suggestions # Get the token before the parens prev_tok = p.token_prev(len(p.tokens) - 1)[1] if (prev_tok and prev_tok.value and prev_tok.value.lower().split(' ')[-1] == 'using'): # tbl1 INNER JOIN tbl2 USING (col1, col2) tables = stmt.get_tables('before') # suggest columns that are present in more than one table return (Column(table_refs=tables, require_last_table=True, local_tables=stmt.local_tables),) elif p.token_first().value.lower() == 'select': # If the lparen is preceeded by a space chances are we're about to # do a sub-select. if last_word(stmt.text_before_cursor, 'all_punctuations').startswith('('): return (Keyword(),) prev_prev_tok = prev_tok and p.token_prev(p.token_index(prev_tok))[1] if prev_prev_tok and prev_prev_tok.normalized == 'INTO': return ( Column(table_refs=stmt.get_tables('insert'), context='insert'), ) # We're probably in a function argument list return (Column(table_refs=extract_tables(stmt.full_text), local_tables=stmt.local_tables, qualifiable=True),) elif token_v == 'set': return (Column(table_refs=stmt.get_tables(), local_tables=stmt.local_tables),) elif token_v in ('select', 'where', 'having', 'by', 'distinct'): # Check for a table alias tables = stmt.get_tables() return (Column(table_refs=tables, local_tables=stmt.local_tables, qualifiable=True), Function(), Keyword(token_v.upper()),) elif token_v == 'as': # Don't suggest anything for aliases return () elif (token_v.endswith('join') and token.is_keyword) or (token_v in ('copy', 'from', 'update', 'into', 'describe', 'truncate')): tables = extract_tables(stmt.text_before_cursor) is_join = token_v.endswith('join') and token.is_keyword suggest = [] if token_v == 'from' or is_join: suggest.append(FromClauseItem(table_refs=tables, local_tables=stmt.local_tables)) elif token_v == 'truncate': suggest.append(Table()) else: suggest.extend((Table(), View())) if is_join and _allow_join(stmt.parsed): tables = stmt.get_tables('before') suggest.append(Join(table_refs=tables)) return tuple(suggest) elif token_v == 'function': # stmt.get_previous_token will fail for e.g. `SELECT 1 FROM functions WHERE function:` try: prev = stmt.get_previous_token(token).value.lower() if prev in ('drop', 'alter', 'create', 'create or replace'): return (Function(usage='signature'),) except ValueError: pass return tuple() elif token_v in ('table', 'view'): # E.g. 'ALTER TABLE <tablname>' rel_type = {'table': Table, 'view': View, 'function': Function}[token_v] return (rel_type(),) elif token_v == 'column': # E.g. 'ALTER TABLE foo ALTER COLUMN bar return (Column(table_refs=stmt.get_tables()),) elif token_v == 'on': tables = stmt.get_tables('before') parent = (stmt.identifier and stmt.identifier.get_parent_name()) or None if parent: # "ON parent.<suggestion>" # parent can be a table alias filteredtables = tuple(t for t in tables if identifies(parent, t)) sugs = [Column(table_refs=filteredtables, local_tables=stmt.local_tables), Table(), View(), Function()] if filteredtables and _allow_join_condition(stmt.parsed): sugs.append(JoinCondition(table_refs=tables, parent=filteredtables[-1])) return tuple(sugs) else: # ON <suggestion> # Use table alias if there is one, otherwise the table name aliases = tuple(t.ref for t in tables) if _allow_join_condition(stmt.parsed): return (Alias(aliases=aliases), JoinCondition( table_refs=tables, parent=None)) else: return (Alias(aliases=aliases),) elif token_v.endswith(',') or token_v in ('=', 'and', 'or'): prev_keyword = stmt.reduce_to_prev_keyword() if prev_keyword: return suggest_based_on_last_token(prev_keyword, stmt) else: return () elif token_v in 'type': suggestions = [Datatype()] return tuple(suggestions) elif token_v in {'alter', 'create', 'drop'}: return (Keyword(token_v.upper()),) elif token.is_keyword: # token is a keyword we haven't implemented any special handling for # go backwards in the query until we find one we do recognize prev_keyword = stmt.reduce_to_prev_keyword(n_skip=1) if prev_keyword: return suggest_based_on_last_token(prev_keyword, stmt) else: return (Keyword(token_v.upper()),) else: return (Keyword(),)