예제 #1
0
    def __init__(self, full_text, text_before_cursor):
        self.identifier = None
        self.word_before_cursor = word_before_cursor = last_word(
            text_before_cursor, include='many_punctuations')
        full_text = _strip_named_query(full_text)
        text_before_cursor = _strip_named_query(text_before_cursor)

        full_text, text_before_cursor, self.local_tables = \
            isolate_query_ctes(full_text, text_before_cursor)

        self.text_before_cursor_including_last_word = text_before_cursor

        # If we've partially typed a word then word_before_cursor won't be an
        # empty string. In that case we want to remove the partially typed
        # string before sending it to the sqlparser. Otherwise the last token
        # will always be the partially typed string which renders the smart
        # completion useless because it will always return the list of
        # keywords as completion.
        if self.word_before_cursor:
            if word_before_cursor[-1] == '(' or word_before_cursor[0] == '\\':
                parsed = sqlparse.parse(text_before_cursor)
            else:
                text_before_cursor = text_before_cursor[:-len(
                    word_before_cursor)]
                parsed = sqlparse.parse(text_before_cursor)
                self.identifier = parse_partial_identifier(word_before_cursor)
        else:
            parsed = sqlparse.parse(text_before_cursor)

        full_text, text_before_cursor, parsed = \
            _split_multiple_statements(full_text, text_before_cursor, parsed)

        self.full_text = full_text
        self.text_before_cursor = text_before_cursor
        self.parsed = parsed

        self.last_token = parsed.token_prev(len(parsed.tokens))[1] \
            if parsed and parsed.token_prev(len(parsed.tokens))[1] else ''
    def get_column_matches(self, suggestion, word_before_cursor):
        tables = suggestion.table_refs
        do_qualify = suggestion.qualifiable and {'always': True, 'never': False,
                                                 'if_more_than_one_table': \
                                                     len(tables) > 1}[self.qualify_columns]

        def qualify(col, tbl):
            return (tbl + '.' +
                    self.case(col)) if do_qualify else self.case(col)

        _logger.debug("Completion column scope: %r", tables)
        scoped_cols = self.populate_scoped_cols(tables,
                                                suggestion.local_tables)

        def make_cand(name, ref):
            synonyms = (name, generate_alias(self.case(name)))
            return Candidate(qualify(name, ref), 0, 'column', synonyms)

        def flat_cols():
            return [
                make_cand(c.name, t.ref) for t, cols in scoped_cols.items()
                for c in cols
            ]

        if suggestion.require_last_table:
            # require_last_table is used for 'tb11 JOIN tbl2 USING (...' which should
            # suggest only columns that appear in the last table and one more
            ltbl = tables[-1].ref
            other_tbl_cols = set(c.name for t, cs in scoped_cols.items()
                                 if t.ref != ltbl for c in cs)
            scoped_cols = {
                t: [col for col in cols if col.name in other_tbl_cols]
                for t, cols in scoped_cols.items() if t.ref == ltbl
            }
        lastword = last_word(word_before_cursor, include='most_punctuations')
        if lastword == '*':
            if suggestion.context == 'insert':

                def filter_col(col):
                    if not col.has_default:
                        return True
                    return not any(
                        p.match(col.default)
                        for p in self.insert_col_skip_patterns)

                scoped_cols = {
                    t: [col for col in cols if filter_col(col)]
                    for t, cols in scoped_cols.items()
                }
            if self.asterisk_column_order == 'alphabetic':
                for cols in scoped_cols.values():
                    cols.sort(key=operator.attrgetter('name'))
            if lastword != word_before_cursor \
                and len(tables) == 1 \
                and word_before_cursor[-len(lastword) - 1] == '.':
                # User typed x.*; replicate "x." for all columns except the
                # first, which gets the original (as we only replace the "*"")
                sep = ', ' + word_before_cursor[:-1]
                collist = sep.join(
                    self.case(c.completion) for c in flat_cols())
            else:
                collist = ', '.join(
                    qualify(c.name, t.ref) for t, cs in scoped_cols.items()
                    for c in cs)

            return [
                Match(completion=Completion(collist,
                                            -1,
                                            display_meta='columns',
                                            display='*'),
                      priority=(1, 1, 1))
            ]

        return self.find_matches(word_before_cursor,
                                 flat_cols(),
                                 meta='column')
예제 #3
0
def suggest_based_on_last_token(token, stmt):

    if isinstance(token, string_types):
        token_v = token.lower()
    elif isinstance(token, Comparison):
        # If 'token' is a Comparison type such as
        # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling
        # token.value on the comparison type will only return the lhs of the
        # comparison. In this case a.id. So we need to do token.tokens to get
        # both sides of the comparison and pick the last token out of that
        # list.
        token_v = token.tokens[-1].value.lower()
    elif isinstance(token, Where):
        # sqlparse groups all tokens from the where clause into a single token
        # list. This means that token.value may be something like
        # 'where foo > 5 and '. We need to look "inside" token.tokens to handle
        # suggestions in complicated where clauses correctly
        prev_keyword = stmt.reduce_to_prev_keyword()
        return suggest_based_on_last_token(prev_keyword, stmt)
    elif isinstance(token, Identifier):
        # If the previous token is an identifier, we can suggest datatypes if
        # we're in a parenthesized column/field list, e.g.:
        #       CREATE TABLE foo (Identifier <CURSOR>
        #       CREATE FUNCTION foo (Identifier <CURSOR>
        # If we're not in a parenthesized list, the most likely scenario is the
        # user is about to specify an alias, e.g.:
        #       SELECT Identifier <CURSOR>
        #       SELECT foo FROM Identifier <CURSOR>
        prev_keyword, _ = find_prev_keyword(stmt.text_before_cursor)
        if prev_keyword and prev_keyword.value == '(':
            # Suggest datatypes
            return suggest_based_on_last_token('type', stmt)
        return (Keyword(), )
    else:
        token_v = token.value.lower()

    if not token:
        return (Keyword(), Special())
    if token_v.endswith('('):
        p = sqlparse.parse(stmt.text_before_cursor)[0]

        if p.tokens and isinstance(p.tokens[-1], Where):
            # Four possibilities:
            #  1 - Parenthesized clause like "WHERE foo AND ("
            #        Suggest columns/functions
            #  2 - Function call like "WHERE foo("
            #        Suggest columns/functions
            #  3 - Subquery expression like "WHERE EXISTS ("
            #        Suggest keywords, in order to do a subquery
            #  4 - Subquery OR array comparison like "WHERE foo = ANY("
            #        Suggest columns/functions AND keywords. (If we wanted to be
            #        really fancy, we could suggest only array-typed columns)

            column_suggestions = suggest_based_on_last_token('where', stmt)

            # Check for a subquery expression (cases 3 & 4)
            where = p.tokens[-1]
            prev_tok = where.token_prev(len(where.tokens) - 1)[1]

            if isinstance(prev_tok, Comparison):
                # e.g. "SELECT foo FROM bar WHERE foo = ANY("
                prev_tok = prev_tok.tokens[-1]

            prev_tok = prev_tok.value.lower()
            if prev_tok == 'exists':
                return (Keyword(), )
            return column_suggestions

        # Get the token before the parens
        prev_tok = p.token_prev(len(p.tokens) - 1)[1]

        if (prev_tok and prev_tok.value
                and prev_tok.value.lower().split(' ')[-1] == 'using'):
            # tbl1 INNER JOIN tbl2 USING (col1, col2)
            tables = stmt.get_tables('before')

            # suggest columns that are present in more than one table
            return (Column(table_refs=tables,
                           require_last_table=True,
                           local_tables=stmt.local_tables), )

        if p.token_first().value.lower() == 'select':
            # If the lparen is preceeded by a space chances are we're about to
            # do a sub-select.
            if last_word(stmt.text_before_cursor,
                         'all_punctuations').startswith('('):
                return (Keyword(), )
        prev_prev_tok = prev_tok and p.token_prev(p.token_index(prev_tok))[1]
        if prev_prev_tok and prev_prev_tok.normalized == 'INTO':
            return (Column(table_refs=stmt.get_tables('insert'),
                           context='insert'), )
        # We're probably in a function argument list
        return (Column(table_refs=extract_tables(stmt.full_text),
                       local_tables=stmt.local_tables,
                       qualifiable=True), )
    if token_v == 'set':
        return (Column(table_refs=stmt.get_tables(),
                       local_tables=stmt.local_tables), )
    if token_v in ('select', 'where', 'having', 'by', 'distinct'):
        # Check for a table alias or schema qualification
        parent = stmt.identifier.get_parent_name() \
            if (stmt.identifier and stmt.identifier.get_parent_name()) else []
        tables = stmt.get_tables()
        if parent:
            tables = tuple(t for t in tables if identifies(parent, t))
            return (
                Column(table_refs=tables, local_tables=stmt.local_tables),
                Table(schema=parent),
                View(schema=parent),
                Function(schema=parent),
            )
        return (
            Column(table_refs=tables,
                   local_tables=stmt.local_tables,
                   qualifiable=True),
            Function(schema=None),
            Keyword(token_v.upper()),
        )
    if token_v == 'as':
        # Don't suggest anything for aliases
        return ()
    if (token_v.endswith('join') and token.is_keyword) or \
         token_v in ('copy', 'from', 'update', 'into', 'describe', 'truncate'):

        schema = stmt.get_identifier_schema()
        tables = extract_tables(stmt.text_before_cursor)
        is_join = token_v.endswith('join') and token.is_keyword

        # Suggest tables from either the currently-selected schema or the
        # public schema if no schema has been specified
        suggest = []

        if not schema:
            # Suggest schemas
            suggest.insert(0, Schema())

        if token_v == 'from' or is_join:
            suggest.append(
                FromClauseItem(schema=schema,
                               table_refs=tables,
                               local_tables=stmt.local_tables))
        elif token_v == 'truncate':
            suggest.append(Table(schema))
        else:
            suggest.extend((Table(schema), View(schema)))

        if is_join and _allow_join(stmt.parsed):
            tables = stmt.get_tables('before')
            suggest.append(Join(table_refs=tables, schema=schema))

        return tuple(suggest)

    if token_v == 'function':
        schema = stmt.get_identifier_schema()
        # stmt.get_previous_token will fail for e.g. `SELECT 1 FROM functions
        # WHERE function:`
        try:
            prev = stmt.get_previous_token(token).value.lower()
            if prev in ('drop', 'alter', 'create', 'create or replace'):
                return (Function(schema=schema, usage='signature'), )
        except ValueError:
            pass
        return tuple()

    if token_v in ('table', 'view'):
        # E.g. 'ALTER TABLE <tablname>'
        rel_type = {
            'table': Table,
            'view': View,
            'function': Function
        }[token_v]
        schema = stmt.get_identifier_schema()
        if schema:
            return (rel_type(schema=schema), )
        return (Schema(), rel_type(schema=schema))

    if token_v == 'column':
        # E.g. 'ALTER TABLE foo ALTER COLUMN bar
        return (Column(table_refs=stmt.get_tables()), )

    if token_v == 'on':
        tables = stmt.get_tables('before')
        parent = stmt.identifier.get_parent_name() \
            if (stmt.identifier and stmt.identifier.get_parent_name()) else None
        if parent:
            # "ON parent.<suggestion>"
            # parent can be either a schema name or table alias
            filteredtables = tuple(t for t in tables if identifies(parent, t))
            sugs = [
                Column(table_refs=filteredtables,
                       local_tables=stmt.local_tables),
                Table(schema=parent),
                View(schema=parent),
                Function(schema=parent)
            ]
            if filteredtables and _allow_join_condition(stmt.parsed):
                sugs.append(
                    JoinCondition(table_refs=tables,
                                  parent=filteredtables[-1]))
            return tuple(sugs)
        # ON <suggestion>
        # Use table alias if there is one, otherwise the table name
        aliases = tuple(t.ref for t in tables)
        if _allow_join_condition(stmt.parsed):
            return (Alias(aliases=aliases),
                    JoinCondition(table_refs=tables, parent=None))
        return (Alias(aliases=aliases), )

    if token_v in ('c', 'use', 'database', 'template'):
        # "\c <db", "use <db>", "DROP DATABASE <db>",
        # "CREATE DATABASE <newdb> WITH TEMPLATE <db>"
        return (Database(), )
    if token_v == 'schema':
        # DROP SCHEMA schema_name, SET SCHEMA schema name
        prev_keyword = stmt.reduce_to_prev_keyword(n_skip=2)
        quoted = prev_keyword and prev_keyword.value.lower() == 'set'
        return (Schema(quoted), )
    if token_v.endswith(',') or token_v in ('=', 'and', 'or'):
        prev_keyword = stmt.reduce_to_prev_keyword()
        if prev_keyword:
            return suggest_based_on_last_token(prev_keyword, stmt)
        return ()
    if token_v in ('type', '::'):
        #   ALTER TABLE foo SET DATA TYPE bar
        #   SELECT foo::bar
        # Note that tables are a form of composite type in postgresql, so
        # they're suggested here as well
        schema = stmt.get_identifier_schema()
        suggestions = [Datatype(schema=schema), Table(schema=schema)]
        if not schema:
            suggestions.append(Schema())
        return tuple(suggestions)
    if token_v in {'alter', 'create', 'drop'}:
        return (Keyword(token_v.upper()), )
    if token.is_keyword:
        # token is a keyword we haven't implemented any special handling for
        # go backwards in the query until we find one we do recognize
        prev_keyword = stmt.reduce_to_prev_keyword(n_skip=1)
        if prev_keyword:
            return suggest_based_on_last_token(prev_keyword, stmt)
        return (Keyword(token_v.upper()), )
    return (Keyword(), )
    def find_matches(self, text, collection, mode='fuzzy', meta=None):
        """Find completion matches for the given text.

        Given the user's input text and a collection of available
        completions, find completions matching the last word of the
        text.

        `collection` can be either a list of strings or a list of Candidate
        namedtuples.
        `mode` can be either 'fuzzy', or 'strict'
            'fuzzy': fuzzy matching, ties broken by name prevalance
            `keyword`: start only matching, ties broken by keyword prevalance

        yields prompt_toolkit Completion instances for any matches found
        in the collection of available completions.

        """
        if not collection:
            return []
        prio_order = [
            'keyword', 'function', 'view', 'table', 'datatype', 'database',
            'schema', 'column', 'table alias', 'join', 'name join', 'fk join'
        ]
        type_priority = prio_order.index(meta) if meta in prio_order else -1
        text = last_word(text, include='most_punctuations').lower()
        text_len = len(text)

        if text and text[0] == '"':
            # text starts with double quote; user is manually escaping a name
            # Match on everything that follows the double-quote. Note that
            # text_len is calculated before removing the quote, so the
            # Completion.position value is correct
            text = text[1:]

        if mode == 'fuzzy':
            fuzzy = True
            priority_func = self.prioritizer.name_count
        else:
            fuzzy = False
            priority_func = self.prioritizer.keyword_count

        # Construct a `_match` function for either fuzzy or non-fuzzy matching
        # The match function returns a 2-tuple used for sorting the matches,
        # or None if the item doesn't match
        # Note: higher priority values mean more important, so use negative
        # signs to flip the direction of the tuple
        if fuzzy:
            regex = '.*?'.join(map(re.escape, text))
            pat = re.compile('(%s)' % regex)

            def _match(item):
                if item.lower()[:len(text) + 1] in (text, text + ' '):
                    # Exact match of first word in suggestion
                    # This is to get exact alias matches to the top
                    # E.g. for input `e`, 'Entries E' should be on top
                    # (before e.g. `EndUsers EU`)
                    return float('Infinity'), -1
                r = pat.search(self.unescape_name(item.lower()))
                if r:
                    return -len(r.group()), -r.start()
        else:
            match_end_limit = len(text)

            def _match(item):
                match_point = item.lower().find(text, 0, match_end_limit)
                if match_point >= 0:
                    # Use negative infinity to force keywords to sort after all
                    # fuzzy matches
                    return -float('Infinity'), -match_point

        matches = []
        for cand in collection:
            if isinstance(cand, _Candidate):
                item, prio, display_meta, synonyms, prio2, display = cand
                if display_meta is None:
                    display_meta = meta
                syn_matches = (_match(x) for x in synonyms)
                # Nones need to be removed to avoid max() crashing in Python 3
                syn_matches = [m for m in syn_matches if m]
                sort_key = max(syn_matches) if syn_matches else None
            else:
                item, display_meta, prio, prio2, display = cand, meta, 0, 0, cand
                sort_key = _match(cand)

            if sort_key:
                if display_meta and len(display_meta) > 50:
                    # Truncate meta-text to 50 characters, if necessary
                    display_meta = display_meta[:47] + u'...'

                # Lexical order of items in the collection, used for
                # tiebreaking items with the same match group length and start
                # position. Since we use *higher* priority to mean "more
                # important," we use -ord(c) to prioritize "aa" > "ab" and end
                # with 1 to prioritize shorter strings (ie "user" > "users").
                # We first do a case-insensitive sort and then a
                # case-sensitive one as a tie breaker.
                # We also use the unescape_name to make sure quoted names have
                # the same priority as unquoted names.
                lexical_priority = (tuple(0 if c in(' _') else -ord(c) \
                                    for c in self.unescape_name(item.lower())) +
                                    (1,) + tuple(c for c in item))

                item = self.case(item)
                display = self.case(display)
                priority = (sort_key, type_priority, prio, priority_func(item),
                            prio2, lexical_priority)

                item = decode(item)
                display_meta = decode(display_meta)
                display = decode(display)

                matches.append(
                    Match(completion=Completion(text=item,
                                                start_position=-text_len,
                                                display_meta=display_meta,
                                                display=display),
                          priority=priority))
        return matches