def __init__(self, full_text, text_before_cursor): self.identifier = None self.word_before_cursor = word_before_cursor = last_word( text_before_cursor, include='many_punctuations') full_text, text_before_cursor, self.local_tables = isolate_query_ctes( full_text, text_before_cursor ) self.text_before_cursor_including_last_word = text_before_cursor # If we've partially typed a word then word_before_cursor won't be an # empty string. In that case we want to remove the partially typed # string before sending it to the sqlparser. Otherwise the last token # will always be the partially typed string which renders the smart # completion useless because it will always return the list of # keywords as completion. if self.word_before_cursor: if word_before_cursor[-1] == '(' or word_before_cursor[0] == '\\': parsed = sqlparse.parse(text_before_cursor) else: text_before_cursor = text_before_cursor[:-len(word_before_cursor)] parsed = sqlparse.parse(text_before_cursor) self.identifier = parse_partial_identifier(word_before_cursor) else: parsed = sqlparse.parse(text_before_cursor) full_text, text_before_cursor, parsed = \ _split_multiple_statements(full_text, text_before_cursor, parsed) self.full_text = full_text self.text_before_cursor = text_before_cursor self.parsed = parsed self.last_token = parsed and parsed.token_prev(len(parsed.tokens))[1] or ''
def get_column_matches(self, suggestion, word_before_cursor): tables = suggestion.table_refs do_qualify = suggestion.qualifiable and { 'always': True, 'never': False, 'if_more_than_one_table': len(tables) > 1 }[self.qualify_columns] def qualify(col, tbl): return tbl + '.' + self.case(col) if do_qualify else self.case(col) scoped_cols = self.populate_scoped_cols(tables, suggestion.local_tables) colit = scoped_cols.items def make_cand(name, ref): synonyms = (name, generate_alias(self.case(name))) return Candidate(qualify(name, ref), 0, 'column', synonyms) flat_cols = [] for t, cols in colit(): for c in cols: flat_cols.append(make_cand(c.name, t.ref)) if suggestion.require_last_table: # require_last_table is used for 'tb11 JOIN tbl2 USING (...' which should # suggest only columns that appear in the last table and one more ltbl = tables[-1].ref flat_cols = list( set(c.name for t, cs in colit() if t.ref == ltbl for c in cs) & set(c.name for t, cs in colit() if t.ref != ltbl for c in cs)) lastword = last_word(word_before_cursor, include='most_punctuations') if lastword == '*': if self.asterisk_column_order == 'alphabetic': flat_cols.sort() for cols in scoped_cols.values(): cols.sort(key=operator.attrgetter('name')) if (lastword != word_before_cursor and len(tables) == 1 and word_before_cursor[-len(lastword) - 1] == '.'): # User typed x.*; replicate "x." for all columns except the # first, which gets the original (as we only replace the "*"") sep = ', ' + word_before_cursor[:-1] collist = sep.join(self.case(c.completion) for c in flat_cols) else: collist = ', '.join( qualify(c.name, t.ref) for t, cs in colit() for c in cs) return [ Match(completion=Completion(collist, -1, display_meta='columns', display='*'), priority=(1, 1, 1)) ] return self.find_matches(word_before_cursor, flat_cols, meta='column')
def find_matches(self, text, collection, mode='fuzzy', meta=None): """Find completion matches for the given text. Given the user's input text and a collection of available completions, find completions matching the last word of the text. `collection` can be either a list of strings or a list of Candidate namedtuples. `mode` can be either 'fuzzy', or 'strict' 'fuzzy': fuzzy matching, ties broken by name prevalance `keyword`: start only matching, ties broken by keyword prevalance yields prompt_toolkit Completion instances for any matches found in the collection of available completions. """ if not collection: return [] prio_order = [ 'keyword', 'function', 'view', 'table', 'datatype', 'database', 'schema', 'column', 'table alias', 'join', 'name join', 'fk join' ] type_priority = prio_order.index(meta) if meta in prio_order else -1 text = last_word(text, include='most_punctuations').lower() text_len = len(text) if text and text[0] == '"': # text starts with double quote; user is manually escaping a name # Match on everything that follows the double-quote. Note that # text_len is calculated before removing the quote, so the # Completion.position value is correct text = text[1:] if mode == 'fuzzy': fuzzy = True priority_func = self.prioritizer.name_count else: fuzzy = False priority_func = self.prioritizer.keyword_count # Construct a `_match` function for either fuzzy or non-fuzzy matching # The match function returns a 2-tuple used for sorting the matches, # or None if the item doesn't match # Note: higher priority values mean more important, so use negative # signs to flip the direction of the tuple if fuzzy: regex = '.*?'.join(map(re.escape, text)) pat = re.compile('(%s)' % regex) def _match(item): if item.lower()[:len(text) + 1] in (text, text + ' '): # Exact match of first word in suggestion # This is to get exact alias matches to the top # E.g. for input `e`, 'Entries E' should be on top # (before e.g. `EndUsers EU`) return float('Infinity'), -1 r = pat.search(self.unescape_name(item.lower())) if r: return -len(r.group()), -r.start() else: match_end_limit = len(text) def _match(item): match_point = item.lower().find(text, 0, match_end_limit) if match_point >= 0: # Use negative infinity to force keywords to sort after all # fuzzy matches return -float('Infinity'), -match_point matches = [] for cand in collection: if isinstance(cand, _Candidate): item, prio, display_meta, synonyms, prio2 = cand if display_meta is None: display_meta = meta syn_matches = (_match(x) for x in synonyms) # Nones need to be removed to avoid max() crashing in Python 3 syn_matches = [m for m in syn_matches if m] sort_key = max(syn_matches) if syn_matches else None else: item, display_meta, prio, prio2 = cand, meta, 0, 0 sort_key = _match(cand) if sort_key: if display_meta and len(display_meta) > 50: # Truncate meta-text to 50 characters, if necessary display_meta = display_meta[:47] + u'...' # Lexical order of items in the collection, used for # tiebreaking items with the same match group length and start # position. Since we use *higher* priority to mean "more # important," we use -ord(c) to prioritize "aa" > "ab" and end # with 1 to prioritize shorter strings (ie "user" > "users"). # We first do a case-insensitive sort and then a # case-sensitive one as a tie breaker. # We also use the unescape_name to make sure quoted names have # the same priority as unquoted names. lexical_priority = (tuple(0 if c in(' _') else -ord(c) for c in self.unescape_name(item.lower())) + (1,) + tuple(c for c in item)) item = self.case(item) priority = ( sort_key, type_priority, prio, priority_func(item), prio2, lexical_priority ) matches.append( Match( completion=Completion( item, -text_len, display_meta=display_meta ), priority=priority ) ) return matches
def suggest_based_on_last_token(token, stmt): if isinstance(token, str): token_v = token.lower() elif isinstance(token, Comparison): # If 'token' is a Comparison type such as # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling # token.value on the comparison type will only return the lhs of the # comparison. In this case a.id. So we need to do token.tokens to get # both sides of the comparison and pick the last token out of that # list. token_v = token.tokens[-1].value.lower() elif isinstance(token, Where): # sqlparse groups all tokens from the where clause into a single token # list. This means that token.value may be something like # 'where foo > 5 and '. We need to look "inside" token.tokens to handle # suggestions in complicated where clauses correctly prev_keyword = stmt.reduce_to_prev_keyword() return suggest_based_on_last_token(prev_keyword, stmt) elif isinstance(token, Identifier): # If the previous token is an identifier, we can suggest datatypes if # we're in a parenthesized column/field list, e.g.: # CREATE TABLE foo (Identifier <CURSOR> # CREATE FUNCTION foo (Identifier <CURSOR> # If we're not in a parenthesized list, the most likely scenario is the # user is about to specify an alias, e.g.: # SELECT Identifier <CURSOR> # SELECT foo FROM Identifier <CURSOR> prev_keyword, _ = find_prev_keyword(stmt.text_before_cursor) if prev_keyword and prev_keyword.value == '(': # Suggest datatypes return suggest_based_on_last_token('type', stmt) else: return (Keyword(), ) else: token_v = token.value.lower() if not token: return (Keyword(), Special()) elif token_v.endswith('('): p = sqlparse.parse(stmt.text_before_cursor)[0] if p.tokens and isinstance(p.tokens[-1], Where): # Four possibilities: # 1 - Parenthesized clause like "WHERE foo AND (" # Suggest columns/functions # 2 - Function call like "WHERE foo(" # Suggest columns/functions # 3 - Subquery expression like "WHERE EXISTS (" # Suggest keywords, in order to do a subquery # 4 - Subquery OR array comparison like "WHERE foo = ANY(" # Suggest columns/functions AND keywords. (If we wanted to be # really fancy, we could suggest only array-typed columns) column_suggestions = suggest_based_on_last_token('where', stmt) # Check for a subquery expression (cases 3 & 4) where = p.tokens[-1] prev_tok = where.token_prev(len(where.tokens) - 1)[1] if isinstance(prev_tok, Comparison): # e.g. "SELECT foo FROM bar WHERE foo = ANY(" prev_tok = prev_tok.tokens[-1] prev_tok = prev_tok.value.lower() if prev_tok == 'exists': return (Keyword(), ) else: return column_suggestions # Get the token before the parens prev_tok = p.token_prev(len(p.tokens) - 1)[1] if (prev_tok and prev_tok.value and prev_tok.value.lower().split(' ')[-1] == 'using'): # tbl1 INNER JOIN tbl2 USING (col1, col2) tables = stmt.get_tables('before') # suggest columns that are present in more than one table return (Column(table_refs=tables, require_last_table=True, local_tables=stmt.local_tables), ) elif p.token_first().value.lower() == 'select': # If the lparen is preceeded by a space chances are we're about to # do a sub-select. if last_word(stmt.text_before_cursor, 'all_punctuations').startswith('('): return (Keyword(), ) prev_prev_tok = prev_tok and p.token_prev(p.token_index(prev_tok))[1] if prev_prev_tok and prev_prev_tok.normalized == 'INTO': return (Column(table_refs=stmt.get_tables('insert')), ) # We're probably in a function argument list return (Column(table_refs=extract_tables(stmt.full_text), local_tables=stmt.local_tables, qualifiable=True), ) elif token_v == 'set': return (Column(table_refs=stmt.get_tables(), local_tables=stmt.local_tables), ) elif token_v in ('select', 'where', 'having', 'by', 'distinct'): # Check for a table alias or schema qualification parent = (stmt.identifier and stmt.identifier.get_parent_name()) or None tables = stmt.get_tables() if parent: tables = tuple(t for t in tables if identifies(parent, t)) return ( Column(table_refs=tables, local_tables=stmt.local_tables), Table(schema=parent), View(schema=parent), Function(schema=parent), ) else: tables = tuple( TableReference(schema=table.schema or 'default', name=table.name, alias=table.alias, is_function=table.is_function) for table in tables) return ( Column(table_refs=tables, local_tables=stmt.local_tables), Function(schema=None), Keyword(), ) elif token_v == 'as': # Don't suggest anything for aliases return () elif (token_v.endswith('join') and token.is_keyword) or (token_v in ( 'copy', 'from', 'update', 'into', 'describe', 'truncate')): schema = stmt.get_identifier_schema() tables = extract_tables(stmt.text_before_cursor) is_join = token_v.endswith('join') and token.is_keyword # Suggest tables from either the currently-selected schema or the # public schema if no schema has been specified suggest = [] if not schema: # Suggest schemas suggest.insert(0, Schema()) if token_v == 'from' or is_join: suggest.append( FromClauseItem(schema=schema, table_refs=tables, local_tables=stmt.local_tables)) elif token_v == 'truncate': suggest.append(Table(schema)) else: suggest.extend((Table(schema), View(schema))) if is_join and _allow_join(stmt.parsed): tables = stmt.get_tables('before') suggest.append(Join(table_refs=tables, schema=schema)) return tuple(suggest) elif token_v in ('table', 'view', 'function'): # E.g. 'DROP FUNCTION <funcname>', 'ALTER TABLE <tablname>' rel_type = { 'table': Table, 'view': View, 'function': Function }[token_v] schema = stmt.get_identifier_schema() if schema: return (rel_type(schema=schema), ) else: return (Schema(), rel_type(schema=schema)) elif token_v == 'column': # E.g. 'ALTER TABLE foo ALTER COLUMN bar return (Column(table_refs=stmt.get_tables()), ) elif token_v == 'on': tables = stmt.get_tables('before') parent = (stmt.identifier and stmt.identifier.get_parent_name()) or None if parent: # "ON parent.<suggestion>" # parent can be either a schema name or table alias filteredtables = tuple(t for t in tables if identifies(parent, t)) sugs = [ Column(table_refs=filteredtables, local_tables=stmt.local_tables), Table(schema=parent), View(schema=parent), Function(schema=parent) ] if filteredtables and _allow_join_condition(stmt.parsed): sugs.append( JoinCondition(table_refs=tables, parent=filteredtables[-1])) return tuple(sugs) else: # ON <suggestion> # Use table alias if there is one, otherwise the table name aliases = tuple(t.ref for t in tables) if _allow_join_condition(stmt.parsed): return (Alias(aliases=aliases), JoinCondition(table_refs=tables, parent=None)) else: return (Alias(aliases=aliases), ) elif token_v in ('c', 'use', 'database', 'template'): # "\c <db", "use <db>", "DROP DATABASE <db>", # "CREATE DATABASE <newdb> WITH TEMPLATE <db>" return (Database(), ) elif token_v == 'schema': # DROP SCHEMA schema_name return (Schema(), ) elif token_v.endswith(',') or token_v in ('=', 'and', 'or'): prev_keyword = stmt.reduce_to_prev_keyword() if prev_keyword: return suggest_based_on_last_token(prev_keyword, stmt) else: return () elif token_v == 'format': return (Format(), ) elif token_v in ('type', '::'): # ALTER TABLE foo SET DATA TYPE bar # SELECT foo::bar # Note that tables are a form of composite type in postgresql, so # they're suggested here as well schema = stmt.get_identifier_schema() suggestions = [Datatype(schema=schema), Table(schema=schema)] if not schema: suggestions.append(Schema()) return tuple(suggestions) elif token_v == 'alter': return (Keyword(), ) elif token.is_keyword: # token is a keyword we haven't implemented any special handling for # go backwards in the query until we find one we do recognize prev_keyword = stmt.reduce_to_prev_keyword(n_skip=1) if prev_keyword: return suggest_based_on_last_token(prev_keyword, stmt) else: return (Keyword(), ) else: return (Keyword(), )