Example #1
0
import sqlparse
from sqlparse.tokens import Name
from collections import defaultdict
from voltliterals.literals import get_literals

white_space_regex = re.compile('\\s+', re.MULTILINE)


def _compile_regex(keyword):
    # Surround the keyword with word boundaries and replace interior whitespace
    # with whitespace wildcards
    pattern = '\\b' + white_space_regex.sub(r'\\s+', keyword) + '\\b'
    return re.compile(pattern, re.MULTILINE | re.IGNORECASE)


keywords = get_literals('keywords')
keyword_regexs = dict((kw, _compile_regex(kw)) for kw in keywords)


# TODO: haven't enable this feature yet
class PrevalenceCounter(object):
    """
    Allow Completer to learn user's preferred keywords from history
    """
    def __init__(self):
        self.keyword_counts = defaultdict(int)
        self.name_counts = defaultdict(int)

    def update(self, text):
        self.update_keywords(text)
        self.update_names(text)
Example #2
0
class VoltCompleter(Completer):
    # keywords_tree: A dict mapping keywords to well known following keywords.
    # e.g. 'CREATE': ['TABLE', 'USER', ...],
    keywords_tree = get_literals('keywords', type_=dict)
    keywords = tuple(set(chain(keywords_tree.keys(), *keywords_tree.values())))
    functions = get_literals('functions')
    procedures = get_literals('procedures')
    datatypes = get_literals('datatypes')
    reserved_words = set(get_literals('reserved'))

    def __init__(self, smart_completion=True):
        self.smart_completion = smart_completion
        self.prioritizer = PrevalenceCounter()
        self.keyword_casing = "upper"
        self.name_pattern = re.compile(r"^[_a-z][_a-z0-9\$]*$")
        # metadata should be updated in real-time
        # note that we assume name of tables and views are in upper-case
        self.dbmetadata = {
            'tables': {},
            'views': {},
            'functions': [],
            'datatypes': []
        }
        # TODO: casing is not enabled yet
        # casing should be a dict {lowercasename: PreferredCasingName}
        self.casing = {}

        self.all_completions = set(self.keywords + self.functions)

    def escape_name(self, name):
        """ Quote a string."""
        if name and ((not self.name_pattern.match(name)) or
                     (name.upper() in self.reserved_words) or
                     (name.upper() in self.functions)):
            name = '"%s"' % name

        return name

    def unescape_name(self, name):
        """ Unquote a string."""
        if name and name[0] == '"' and name[-1] == '"':
            name = name[1:-1]

        return name

    def escaped_names(self, names):
        return [self.escape_name(name) for name in names]

    def reset_completions(self):
        self.dbmetadata = {
            'tables': {},
            'views': {},
            'functions': [],
            'datatypes': []
        }
        self.all_completions = set(self.keywords + self.functions)

    def case(self, word):
        return self.casing.get(word, word)

    def find_matches(self, text, collection, mode='fuzzy', meta=None):
        """Find completion matches for the given text.

        Given the user's input text and a collection of available
        completions, find completions matching the last word of the
        text.

        `collection` can be either a list of strings or a list of Candidate
        namedtuples.
        `mode` can be either 'fuzzy', or 'strict'
            'fuzzy': fuzzy matching, ties broken by name prevalance
            `keyword`: start only matching, ties broken by keyword prevalance

        yields prompt_toolkit Completion instances for any matches found
        in the collection of available completions.

        """
        if not collection:
            return []
        priority_order = [
            'keyword', 'function', 'procedure', 'view', 'table', 'datatype',
            'column', 'table alias', 'join', 'name join', 'fk join',
            'table format'
        ]
        type_priority = priority_order.index(
            meta) if meta in priority_order else -1
        text = last_word(text, include='most_punctuations').lower()
        text_len = len(text)

        if text and text[0] == '"':
            # text starts with double quote; user is manually escaping a name
            # Match on everything that follows the double-quote. Note that
            # text_len is calculated before removing the quote, so the
            # Completion.position value is correct
            text = text[1:]

        if mode == 'fuzzy':
            fuzzy = True
            priority_func = self.prioritizer.name_count
        else:
            fuzzy = False
            priority_func = self.prioritizer.keyword_count

        # Construct a `_match` function for either fuzzy or non-fuzzy matching
        # The match function returns a 2-tuple used for sorting the matches,
        # or None if the item doesn't match
        # Note: higher priority values mean more important, so use negative
        # signs to flip the direction of the tuple
        if fuzzy:
            regex = '.*?'.join(map(re.escape, text))
            pat = re.compile('(%s)' % regex)

            def _match(item):
                if item.lower()[:len(text) + 1] in (text, text + ' '):
                    # Exact match of first word in suggestion
                    # This is to get exact alias matches to the top
                    # E.g. for input `e`, 'Entries E' should be on top
                    # (before e.g. `EndUsers EU`)
                    return float('Infinity'), -1
                r = pat.search(self.unescape_name(item.lower()))
                if r:
                    return -len(r.group()), -r.start()
        else:
            match_end_limit = len(text)

            def _match(item):
                match_point = item.lower().find(text, 0, match_end_limit)
                if match_point >= 0:
                    # Use negative infinity to force keywords to sort after all
                    # fuzzy matches
                    return -float('Infinity'), -match_point

        matches = []
        for cand in collection:

            item, display_meta, prio, prio2, display = cand, meta, 0, 0, cand
            sort_key = _match(cand)

            if sort_key:
                if display_meta and len(display_meta) > 50:
                    # Truncate meta-text to 50 characters, if necessary
                    display_meta = display_meta[:47] + u'...'

                # Lexical order of items in the collection, used for
                # tiebreaking items with the same match group length and start
                # position. Since we use *higher* priority to mean "more
                # important," we use -ord(c) to prioritize "aa" > "ab" and end
                # with 1 to prioritize shorter strings (ie "user" > "users").
                # We first do a case-insensitive sort and then a
                # case-sensitive one as a tie breaker.
                # We also use the unescape_name to make sure quoted names have
                # the same priority as unquoted names.
                lexical_priority = (
                    tuple(0 if c in (' _') else -ord(c)
                          for c in self.unescape_name(item.lower())) + (1, ) +
                    tuple(c for c in item))

                item = self.case(item)
                display = self.case(display)
                priority = (sort_key, type_priority, prio, priority_func(item),
                            prio2, lexical_priority)
                matches.append(
                    Match(completion=Completion(text=item,
                                                start_position=-text_len,
                                                display_meta=display_meta,
                                                display=display),
                          priority=priority))
        return matches

    def get_completions(self, document, complete_event, smart_completion=None):
        word_before_cursor = document.get_word_before_cursor(WORD=True)

        if smart_completion is None:
            smart_completion = self.smart_completion

        # If smart_completion is off then match any word that starts with
        # 'word_before_cursor'.
        if not smart_completion:
            matches = self.find_matches(word_before_cursor,
                                        self.all_completions,
                                        mode='strict')
            completions = [m.completion for m in matches]
            return sorted(completions, key=operator.attrgetter('text'))

        matches = []
        suggestions = suggest_type(document.text, document.text_before_cursor)

        for suggestion in suggestions:
            suggestion_type = type(suggestion)
            # Map suggestion type to method
            # e.g. 'table' -> self.get_table_matches
            matcher = self.suggestion_matchers[suggestion_type]
            matches.extend(matcher(self, suggestion, word_before_cursor))

        # Sort matches so highest priorities are first
        matches = sorted(matches,
                         key=operator.attrgetter('priority'),
                         reverse=True)

        return [m.completion for m in matches]

    def get_column_matches(self, suggestion, word_before_cursor):
        tables = suggestion.table_refs

        if not tables or len(tables) == 0:
            return self.find_matches(
                word_before_cursor,
                set([
                    c for column_list in self.dbmetadata['tables'].values()
                    for c in column_list
                ]),
                meta='column')
        return self.find_matches(word_before_cursor, [
            c for column_list in [
                self.dbmetadata['tables'].get(table.name.upper(), [])
                for table in tables
            ] for c in column_list
        ],
                                 meta='column')

    def get_join_matches(self, suggestion, word_before_cursor):

        return self.find_matches(word_before_cursor,
                                 self.dbmetadata['tables'].keys(),
                                 meta='join')

    # TODO: this can be improved
    def get_join_condition_matches(self, suggestion, word_before_cursor):
        return self.get_column_matches(suggestion, word_before_cursor)

    def get_function_matches(self,
                             suggestion,
                             word_before_cursor,
                             alias=False):
        return (self.find_matches(
            word_before_cursor, self.functions, mode='strict', meta='function')
                + self.find_matches(word_before_cursor,
                                    self.dbmetadata['functions'],
                                    mode='strict',
                                    meta='function'))

    def get_from_clause_item_matches(self, suggestion, word_before_cursor):
        return (self.find_matches(
            word_before_cursor, self.dbmetadata['tables'].keys(),
            meta='table') + self.find_matches(word_before_cursor,
                                              self.dbmetadata['views'].keys(),
                                              meta='view') +
                self.find_matches(
                    word_before_cursor, self.functions, meta='function') +
                self.find_matches(word_before_cursor,
                                  self.dbmetadata['functions'],
                                  meta='function'))

    def get_table_matches(self, suggestion, word_before_cursor, alias=False):
        return self.find_matches(word_before_cursor,
                                 self.dbmetadata['tables'].keys(),
                                 meta='table')

    def get_view_matches(self, suggestion, word_before_cursor, alias=False):
        return self.find_matches(word_before_cursor,
                                 self.dbmetadata['views'].keys(),
                                 meta='view')

    def get_alias_matches(self, suggestion, word_before_cursor):
        aliases = suggestion.aliases
        return self.find_matches(word_before_cursor,
                                 aliases,
                                 meta='table alias')

    def get_keyword_matches(self, suggestion, word_before_cursor):
        keywords = self.keywords_tree.keys()
        # Get well known following keywords for the last token. If any, narrow
        # candidates to this list.
        next_keywords = self.keywords_tree.get(suggestion.last_token, [])
        if next_keywords:
            keywords = next_keywords

        casing = self.keyword_casing
        if casing == 'auto':
            if word_before_cursor and word_before_cursor[-1].islower():
                casing = 'lower'
            else:
                casing = 'upper'

        if casing == 'upper':
            keywords = [k.upper() for k in keywords]
        else:
            keywords = [k.lower() for k in keywords]

        return self.find_matches(word_before_cursor,
                                 keywords,
                                 mode='strict',
                                 meta='keyword')

    def get_datatype_matches(self, suggestion, word_before_cursor):
        return self.find_matches(word_before_cursor,
                                 self.datatypes,
                                 mode='strict',
                                 meta='datatype')

    def get_procedure_matches(self, suggestion, word_before_cursor):
        return (self.find_matches(word_before_cursor,
                                  self.procedures,
                                  mode='strict',
                                  meta='procedure') +
                self.find_matches(word_before_cursor,
                                  self.dbmetadata['procedures'],
                                  meta='procedure'))

    suggestion_matchers = {
        FromClauseItem: get_from_clause_item_matches,
        JoinCondition: get_join_condition_matches,
        Join: get_join_matches,
        Column: get_column_matches,
        Function: get_function_matches,
        Table: get_table_matches,
        View: get_view_matches,
        Alias: get_alias_matches,
        Keyword: get_keyword_matches,
        Datatype: get_datatype_matches,
        Procedure: get_procedure_matches
    }

    def update_tables(self, tables):
        self.dbmetadata['tables'] = tables

    def update_views(self, views):
        self.dbmetadata['views'] = views

    def update_functions(self, functions):
        self.dbmetadata['functions'] = functions

    def update_procedures(self, procedures):
        self.dbmetadata['procedures'] = procedures
Example #3
0
import sqlparse
from sqlparse.tokens import Name

from voltliterals.literals import get_literals

white_space_regex = re.compile('\\s+', re.MULTILINE)


def _compile_regex(keyword):
    # Surround the keyword with word boundaries and replace interior whitespace
    # with whitespace wildcards
    pattern = '\\b' + white_space_regex.sub(r'\\s+', keyword) + '\\b'
    return re.compile(pattern, re.MULTILINE | re.IGNORECASE)


keywords = get_literals('keywords')
keyword_regexs = dict((kw, _compile_regex(kw)) for kw in keywords)


# TODO: haven't enable this feature yet
class PrevalenceCounter(object):
    """
    Allow Completer to learn user's preferred keywords from history
    """

    def __init__(self):
        self.keyword_counts = defaultdict(int)
        self.name_counts = defaultdict(int)

    def update(self, text):
        self.update_keywords(text)