Python SimplePrefixTree.autocomplete Beispiele, prefix_tree.SimplePrefixTree.autocomplete Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_simple_prefix_tree_autocomplete() -> None:
    """This is a test for the correct autocomplete behaviour for a small
    simple prefix tree.

    NOTE: This test should pass even if you insert these values in a different
    order. This is a good thing to try out.
    """
    t = SimplePrefixTree('sum')
    t.insert('cat', 2.0, ['c', 'a', 't'])
    t.insert('car', 3.0, ['c', 'a', 'r'])
    t.insert('dog', 4.0, ['d', 'o', 'g'])

    # Note that the returned tuples *must* be sorted in non-increasing weight
    # order. You can (and should) sort the tuples yourself inside
    # SimplePrefixTree.autocomplete.
    assert t.autocomplete([]) == [('dog', 4.0), ('car', 3.0), ('cat', 2.0)]

    # But keep in mind that the greedy algorithm here does not necessarily
    # return the highest-weight values!! In this case, the ['c'] subtree
    # is recursed on first.
    assert t.autocomplete([], 1) == [('car', 3.0)]

Beispiel #2

0

Datei anzeigen

Datei: autocomplete_engines.py Projekt: owenhu99/autocomplete

class LetterAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few letters.

    The *prefix sequence* for a string is the list of characters in the string.
    This can include space characters.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a text file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Each line of the specified file counts as one input string.
        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one alphanumeric character, it is inserted into the
        Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight (because of how Autocompleter.insert works).
        """
        with open(config['file'], encoding='utf8') as f:
            if config['autocompleter'] == 'simple':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            elif config['autocompleter'] == 'compressed':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            for line in f:
                line = ''.join(char for char in line.lower()
                               if (char.isalnum() or char == ' '))
                self.autocompleter.insert(line, 1, [char for char in line])

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        prefix_list = [char for char in prefix]
        return self.autocompleter.autocomplete(prefix_list, limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix string.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        prefix_list = [char for char in prefix]
        return self.autocompleter.remove(prefix_list)

Beispiel #3

0

Datei anzeigen

Datei: test_simple_prefix_tree.py Projekt: caasig1/Autocomlete-System

def test_autocomplete(length: int) -> None:
    """Test the aggregate weight, length,..etc of the SimplePrefixTree"""
    import sys
    sys.setrecursionlimit(5000)

    # insertion method 1 (n = length)
    # prefixes = [[0,..,n-1],[1,..,n-1],[2,...n-1],....[n-1]]
    # every prefix has 1 value
    # spt will have 'n' subtrees
    # spt must len(prefixes) subtrees

    # insertion method 2 (n = length)
    # prefixes = [[0,..,n-1],[0,..,n-2],[0,...n-3],....[1]]
    # every prefix has 1 value
    # spt must have 1 subtree

    # insertion method 3 (n = length)
    # check method_spt3()
    # balanced spt

    methods = ['1', '2', '3']

    for method in methods:
        prefixes = []
        values = []
        weights = []
        spt = SimplePrefixTree('sum')
        spt_avg = SimplePrefixTree('average')

        if method == '3':
            prefixes = spt_method_3(spt, 3, list(range(15)))
            spt_method_3(spt_avg, 3, list(range(15)))
            values = prefixes  # values is only tested on length
            weights = list(range(15))
            weights.reverse()
        else:
            for x in range(0, length):
                if method == '1':
                    start = x
                    stop = length
                else:
                    start = 0
                    stop = length - x
                prefixes.append(list(range(start, stop)))
                values.append(length - x)
                # weight goes for values, go from weight = length, to weight = 1
                weights.append(length - x)
                spt.insert(values[len(values) - 1], weights[len(weights) - 1],
                           prefixes[len(prefixes) - 1])
                spt_avg.insert(values[len(values) - 1],
                               weights[len(weights) - 1],
                               prefixes[len(prefixes) - 1])

        prefixes.insert(0, [])
        for prefix in prefixes:
            for i in range(1, len(values) + 1):
                assert len(spt.autocomplete(prefix, i)) <= i
                assert len(spt.autocomplete(prefix, i**2)) <= len(values)
                assert len(spt_avg.autocomplete(prefix, i)) <= i
                assert len(spt_avg.autocomplete(prefix, i**2)) <= len(values)
                tup = spt.autocomplete(prefix, i)
                tup_av = spt_avg.autocomplete(prefix, i)
                for x in range(len(tup)):
                    # weights[0] should have the greatest weight
                    assert tup[x][1] <= weights[0]
                    assert tup_av[x][1] <= weights[0]
                    if x != len(tup) - 1:
                        # weights should be non-increasing
                        assert tup[x][1] >= tup[x + 1][1]
                        assert tup_av[x][1] >= tup[x + 1][1]
        prefixes.pop(0)  # popping [] out

Beispiel #4

0

Datei anzeigen

Datei: autocomplete_engines.py Projekt: owenhu99/autocomplete

class SentenceAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few words.

    A *word* is a string containing only alphanumeric characters.
    The *prefix sequence* for a string is the list of words in the string
    (separated by whitespace). The words themselves do not contain spaces.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has two entries:
            - the first entry is a string
            - the second entry is the a number representing the weight of that
              string

        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one word, it is inserted into the Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE
        LINE FROM THE CSV FILE. (Updated Nov 19)
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        with open(config['file'], encoding='utf8') as f:
            if config['autocompleter'] == 'simple':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            elif config['autocompleter'] == 'compressed':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            for line in f:
                line = line.lower().split(',')
                line[0] = ''.join(char for char in line[0]
                                  if (char.isalnum() or char == ' '))
                self.autocompleter.insert(line[0], float(line[1]),
                                          line[0].split())

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        prefix_list = prefix.split()
        return self.autocompleter.autocomplete(prefix_list, limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        prefix_list = prefix.split()
        self.autocompleter.remove(prefix_list)

Beispiel #5

0

Datei anzeigen

class SimpleAutoCompleteTest(unittest.TestCase):

    def setUp(self):
        self.sum_tree = SimplePrefixTree('sum')

    def test_empty_tree_no_prefix(self):
        self.assertEqual(self.sum_tree.autocomplete([]), [])

    def test_empty_tree_extra_prefix(self):
        self.assertEqual(self.sum_tree.autocomplete(['c']), [])

    def test_one_leaf_no_prefix(self):
        self.sum_tree.insert('Alice', 5, [])
        expected = [('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([]), expected)

    def test_one_leaf_no_prefix_zero_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.assertEqual(self.sum_tree.autocomplete([], 0), [])

    def test_one_leaf_no_prefix_at_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        expected = [('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 1), expected)

    def test_multi_leaf_no_prefix_extra_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.sum_tree.insert('Jacky', 11, [])
        self.sum_tree.insert('Bob', 10, [])
        expected = [('Jacky', 11.0), ('Bob', 10.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 4), expected)

    def test_multi_leaf_no_prefix_not_enough_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.sum_tree.insert('Jacky', 11, [])
        self.sum_tree.insert('Bob', 10, [])
        expected = [('Jacky', 11.0), ('Bob', 10.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 2), expected)

    def test_multi_internal_no_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Bob', 10.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 3), expected)

    def test_with_multi_internal_and_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_internal_extra_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = []
        self.assertEqual(self.sum_tree.autocomplete(['a', 'b'], 3), expected)

    def test_multi_internal_short_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a', 'b'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_level_leaves(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_internal_limit_cutoff(self):
        self.sum_tree.insert('Alice', 5, ['a', 'c'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        expected = [('Jacky', 11.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 1), expected)

    def test_multi_internal_limit_continue(self):
        self.sum_tree.insert('Alice', 5, ['a', 'c'])
        self.sum_tree.insert('Jacky', 10, ['a', 'c'])
        self.sum_tree.insert('Bob', 11, ['a', 'd'])
        self.sum_tree.insert('Kevin', 9, ['a', 'd'])
        expected = [('Bob', 11.0), ('Jacky', 10.0), ('Kevin', 9.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

Beispiel #6

0

Datei anzeigen

class MelodyAutocompleteEngine:
    """An autocomplete engine that suggests melodies based on a few intervals.

    The values stored are Melody objects, and the corresponding
    prefix sequence for a Melody is its interval sequence.

    Because the prefix is based only on interval sequence and not the
    starting pitch or duration of the notes, it is possible for different
    melodies to have the same prefix.

    # === Private Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has the following format:
            - The first entry is the name of a melody (a string).
            - The remaining entries are grouped into pairs (as in Assignment 1)
              where the first number in each pair is a note pitch,
              and the second number is the corresponding duration.

            HOWEVER, there may be blank entries (stored as an empty string '');
            as soon as you encounter a blank entry, stop processing this line
            and move onto the next line the CSV file.

        Each melody is be inserted into the Autocompleter with a weight of 1.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        self.autocompleter = SimplePrefixTree(config['weight_type']) \
            if config['autocompleter'] == 'simple' \
            else CompressedPrefixTree(config['weight_type'])
        with open(config['file']) as f:
            lines = csv.reader(f)
            for line in lines:
                if line:
                    pairs = []
                    for i in range(1, len(line) - 1, 2):
                        pairs.append((int(line[i]), int(line[i + 1])))
                    interval = []
                    for i in range(len(pairs) - 1):
                        interval.append(
                            int(pairs[i + 1][0]) - int(pairs[i][0]))
                    value = Melody(line[0], pairs)
                    self.autocompleter.insert(value, 1, interval)

    def autocomplete(
            self,
            prefix: List[int],
            limit: Optional[int] = None) -> List[Tuple[Melody, float]]:
        """Return up to <limit> matches for the given interval sequence.

        The return value is a list of tuples (melody, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given interval sequence.

        Precondition:
            limit is None or limit > 0
        """
        return self.autocompleter.autocomplete(prefix, limit)

    def remove(self, prefix: List[int]) -> None:
        """Remove all melodies that match the given interval sequence.
        """
        self.autocompleter.remove(prefix)

Beispiel #7

0

Datei anzeigen

Datei: a2_test.py Projekt: patawatt/autocomplete

def test_autocomplete() -> None:

    x = CompressedPrefixTree('sum')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('desk', 10, ['d', 'e', 's', 'k'])

    y = SimplePrefixTree('sum')
    y.insert('car', 1, ['c', 'a', 'r'])
    y.insert('care', 2, ['c', 'a', 'r', 'e'])
    y.insert('cat', 6, ['c', 'a', 't'])
    y.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    y.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    y.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    y.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    y.insert('desk', 10, ['d', 'e', 's', 'k'])

    assert x.autocomplete(['c']) == y.autocomplete(['c'])
    assert x.autocomplete(['c', 'a']) == y.autocomplete(['c', 'a'])
    assert x.autocomplete(['c','a','r']) == y.autocomplete(['c','a','r'])
    assert x.autocomplete(['c', 'a', 'r', 'e']) == y.autocomplete(['c', 'a', 'r', 'e'])
    assert x.autocomplete(['c', 'a', 't']) == y.autocomplete(['c', 'a', 't'])
    assert x.autocomplete(['d']) == y.autocomplete(['d'])
    assert x.autocomplete(['d', 'o']) == y.autocomplete(['d', 'o'])
    assert x.autocomplete(['d', 'a']) == y.autocomplete(['d', 'a'])
    assert x.autocomplete(['d', 'e']) == y.autocomplete(['d', 'e'])
    assert x.autocomplete(['d', 'e', 's']) == y.autocomplete(['d', 'e', 's'])
    assert x.autocomplete(['d', 'o', 'o']) == y.autocomplete(['d', 'o', 'o'])
    assert x.autocomplete(['d', 'a', 'n']) == y.autocomplete(['d', 'a', 'n'])
    assert x.autocomplete(['d', 'o', 'o', 'r']) == y.autocomplete(['d', 'o', 'o', 'r'])
    assert x.autocomplete(['d', 'o', 'o', 'r', 's']) == y.autocomplete(['d', 'o', 'o', 'r', 's'])
    assert x.autocomplete(['d', 'a', 'n', 'g', 'e', 'r']) == y.autocomplete(['d', 'a', 'n', 'g', 'e', 'r'])

Beispiel #8

0

Datei anzeigen

Datei: a2_test.py Projekt: patawatt/autocomplete

def test_insert() -> None:

    # How many values in the prefix tree match the autocomplete prefix?
    # (0? 1? 10?)
    s = SimplePrefixTree('average')
    s.insert('car', 1.0, ['c', 'a', 'r'])
    s.insert('cat', 2.0, ['c', 'a', 't'])
    s.insert('care', 3.0, ['c', 'a', 'r', 'e'])
    assert len(s.autocomplete([])) == 3
    assert len(s.autocomplete(['c'])) == 3
    assert len(s.autocomplete(['c', 'a'])) == 3
    assert len(s.autocomplete(['c', 'a', 't'])) == 1
    assert len(s.autocomplete(['c', 'a', 'r'])) == 2

    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('sum')
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, 1.0, a)
        a = []
    assert len(x.autocomplete([])) == len(u)
    assert x.autocomplete(['t', 'o']) == [('to', 1.0)]
    assert len(x.autocomplete(['a'])) == 5
    assert x.autocomplete(['o']) == [('of', 1.0), ('obtain', 1.0)]
    assert x.autocomplete(['p', 'r']) == [('properties', 1.0), ('prefix', 1.0)]

    # same as above with average instead of sum
    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('average')
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, 1.0, a)
        a = []
    assert len(x.autocomplete([])) == len(u)
    assert x.autocomplete(['t', 'o']) == [('to', 1.0)]
    assert len(x.autocomplete(['a'])) == 5
    assert x.autocomplete(['o']) == [('of', 1.0), ('obtain', 1.0)]
    assert x.autocomplete(['p', 'r']) == [('properties', 1.0), ('prefix', 1.0)]

    # What is the relationship between the number of matches and the limit
    # argument? (less than? equal to? greater than?)

    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('sum')
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, 1.0, a)
        a = []
    assert len(x.autocomplete([], 5)) == 5
    assert x.autocomplete(['t', 'o'], 2) == [('to', 1.0)]
    assert len(x.autocomplete(['a'], 3)) == 3  # there are 5 a words
    assert x.autocomplete(['o'], 0) == []  # same as len() == 0
    assert len(x.autocomplete(['p', 'r'], 2)) == 2

    # If there are more matches than the specified limit, try different
    # combinations of input weights to check that you’re returning the right
    # matches.

    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('sum')
    w = 1.0
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, w, a)
        a = []
        w += 1.0
    assert len(x.autocomplete([], 5)) == 5
    assert len(x.autocomplete(['t', 'o'], 2)) == 1
    assert len(x.autocomplete(['a'], 3)) == 3  # there are 5 a words
    assert x.autocomplete(['o'], 0) == []  # same as len() == 0
    assert len(x.autocomplete(['p', 'r'], 2)) == 2

    # same as above but with average
    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('average')
    w = 1.0
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, w, a)
        a = []
        w += 1.0
    assert len(x.autocomplete([], 5)) == 5
    assert len(x.autocomplete(['t', 'o'], 2)) == 1
    assert len(x.autocomplete(['a'], 3)) == 3  # there are 5 a words
    assert x.autocomplete(['o'], 0) == []  # same as len() == 0
    assert len(x.autocomplete(['p', 'r'], 2)) == 2

Beispiel #9

0

Datei anzeigen

Datei: autocomplete_engines.py Projekt: ajollymarlier/University_Code

class LetterAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few letters.

    The *prefix sequence* for a string is the list of characters in the string.
    This can include space characters.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a text file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Each line of the specified file counts as one input string.
        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one alphanumeric character, it is inserted into the
        Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight (because of how Autocompleter.insert works).

        >>> import sys
        >>> sys.setrecursionlimit(5000)
        >>> a = LetterAutocompleteEngine({'file': 'data/lotr.txt', 'autocompleter': 'simple', 'weight_type': 'sum'})
        """
        # We've opened the file for you here. You should iterate over the
        # lines of the file and process them according to the description in
        # this method's docstring.
        self.autocompleter = SimplePrefixTree(config['weight_type'])

        with open(config['file'], encoding='utf8') as f:
            for line in f:
                sanitized_t = _sanitize(line)
                self.autocompleter.insert(sanitized_t[0].lower(), 1.0,
                                          [x.lower() for x in sanitized_t[1]])

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        return self.autocompleter.autocomplete(list(prefix), limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix string.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        self.autocompleter.remove(list(prefix))