コード例 #1
0
def test_simple_prefix_tree_structure() -> None:
    """This is a test for the structure of a small simple prefix tree.

    NOTE: This test should pass even if you insert these values in a different
    order. This is a good thing to try out.
    """
    t = SimplePrefixTree('sum')
    t.insert('cat', 2.0, ['c', 'a', 't'])
    t.insert('car', 3.0, ['c', 'a', 'r'])
    t.insert('dog', 4.0, ['d', 'o', 'g'])

    # t has 3 values (note that __len__ only counts the inserted values,
    # which are stored at the *leaves* of the tree).
    assert len(t) == 3

    # This tree is using the 'sum' aggregate weight option.
    assert t.weight == 2.0 + 3.0 + 4.0

    # t has two subtrees, and order matters (because of weights).
    assert len(t.subtrees) == 2
    left = t.subtrees[0]
    right = t.subtrees[1]

    assert left.value == ['c']
    assert left.weight == 5.0

    assert right.value == ['d']
    assert right.weight == 4.0
コード例 #2
0
def spt_method_3(spt: SimplePrefixTree,
                 largest_prefix: int,
                 weights: List[Any],
                 prefixes: List[Any] = [[]]) -> List[List[Any]]:
    """Create a specialized generated spt for testing purposes

                        []
                [0]           [1]
            [0,0] [0,1]   [1,0] [1,1]
            ...             ...

    Note: height of spt = len(largest prefix tree) + 2 = largest_prefix + 2
    """
    if not isinstance(spt.value, list):
        return []
    elif len(prefixes[0]) == largest_prefix:
        return []
    else:
        # extract the prefix
        accum_prefixes = []
        # values don't matter
        values = random.sample(range(1000000), 10)
        for prefix in prefixes:
            for n in range(0, 2):
                pref = prefix + [n]
                accum_prefixes.append(pref)
                spt.insert(values.pop(), float(weights.pop()), pref)
                accum_prefixes.extend(
                    spt_method_3(spt, largest_prefix, weights, [pref]))
        return accum_prefixes
コード例 #3
0
def test_insert(length: int) -> None:
    """Test the aggregate weight, length,..etc of the SimplePrefixTree"""
    import sys
    sys.setrecursionlimit(5000)

    # insertion method 1 (n = length)
    # prefixes = [[0,..,n-1],[1,..,n-1],[2,...n-1],....[n-1]]
    # spt must len(prefixes) subtrees

    # insertion method 2 (n = length)
    # prefixes = [[0,..,n-1],[0,..,n-2],[0,...n-3],....[0]]
    # spt must have 1 subtree

    # insertion method 3 (n = length)

    methods = ['1', '2', '3']

    for method in methods:
        prefixes = []
        values = []
        weights = []
        spt = SimplePrefixTree('sum')
        spt_avg = SimplePrefixTree('average')
        if method == '3':
            prefixes = spt_method_3(spt, 3, list(range(15)))
            spt_method_3(spt_avg, 3, list(range(15)))
            values = prefixes  # values is only tested on length
            weights = list(range(15))
            weights.reverse()
        else:
            for x in range(0, length):
                if method == '1':
                    start = x
                    stop = length
                else:
                    start = 0
                    stop = length - x
                prefixes.append(list(range(start, stop)))
                values.append(length - x)
                weights.append(length - x)
                spt.insert(values[len(values) - 1], weights[len(weights) - 1],
                           prefixes[len(prefixes) - 1])
                spt_avg.insert(values[len(values) - 1],
                               weights[len(weights) - 1],
                               prefixes[len(prefixes) - 1])
        if method == '1':
            assert len(spt.subtrees) == len(prefixes)
        elif method == '2':
            assert len(spt.subtrees) == 1
        else:  # method == '3'
            assert len(spt.subtrees) == 2
        assert spt.weight == sum(weights)
        assert spt_avg.weight == sum(weights) / len(values)
        assert len(spt) == len(values)
        # check if spt has non-increasing weight order
        assert scheck_subtrees_non_increasing_order(spt)
        #assert stree_weight_check(spt, 'sum')
        assert stree_weight_check(spt_avg, 'average')
コード例 #4
0
def test_insert_num_nodes(length: int) -> None:
    """Inserting one value with a length-n prefix [x_1, .., x_n] into a new
    prefix tree should result in a tree with (n+2) nodes. (n+1) internal nodes
    plus 1 inserted value"""
    import sys
    sys.setrecursionlimit(5000)

    prefix = list(range(length))
    spt = SimplePrefixTree('sum')
    spt.insert('x', 1, prefix)
    assert num_nodes(spt) == (length + 2)
    assert len(spt) == 1
    assert spt.weight == 1
コード例 #5
0
def test_insert_2() -> None:
    """Test SimplePrefixTree.insert() method using different types of
    SPTs"""
    # sum
    spt = SimplePrefixTree('sum')
    # empty spt
    assert len(spt) == 0
    assert spt.value == []
    # spt w/ len == 1
    spt.insert('x', 1, ['x'])
    assert len(spt) == 1
    assert num_nodes(spt) == 3
    # spt w/ len == 1, internal nodes > 1, achieved in test_insert_num_nodes()
    # spt w/ len == 2, internal nodes == 2
    spt = SimplePrefixTree('sum')
    spt.insert('x', 1, [])
    assert len(spt) == 1
    assert num_nodes(spt) == 2
コード例 #6
0
def test_simple_prefix_tree_autocomplete() -> None:
    """This is a test for the correct autocomplete behaviour for a small
    simple prefix tree.

    NOTE: This test should pass even if you insert these values in a different
    order. This is a good thing to try out.
    """
    t = SimplePrefixTree('sum')
    t.insert('cat', 2.0, ['c', 'a', 't'])
    t.insert('car', 3.0, ['c', 'a', 'r'])
    t.insert('dog', 4.0, ['d', 'o', 'g'])

    # Note that the returned tuples *must* be sorted in non-increasing weight
    # order. You can (and should) sort the tuples yourself inside
    # SimplePrefixTree.autocomplete.
    assert t.autocomplete([]) == [('dog', 4.0), ('car', 3.0), ('cat', 2.0)]

    # But keep in mind that the greedy algorithm here does not necessarily
    # return the highest-weight values!! In this case, the ['c'] subtree
    # is recursed on first.
    assert t.autocomplete([], 1) == [('car', 3.0)]
コード例 #7
0
def test_simple_prefix_tree_remove() -> None:
    """This is a test for the correct remove behaviour for a small
    simple prefix tree.

    NOTE: This test should pass even if you insert these values in a different
    order. This is a good thing to try out.
    """
    t = SimplePrefixTree('sum')
    t.insert('cat', 2.0, ['c', 'a', 't'])
    t.insert('car', 3.0, ['c', 'a', 'r'])
    t.insert('dog', 4.0, ['d', 'o', 'g'])

    # The trickiest part is that only *values* should be stored at leaves,
    # so even if you remove a specific prefix, its parent might get removed
    # from the tree as well!
    t.remove(['c', 'a'])

    assert len(t) == 1
    assert t.weight == 4.0

    # There is no more ['c'] subtree!
    assert len(t.subtrees) == 1
    assert t.subtrees[0].value == ['d']
コード例 #8
0
ファイル: a2_test.py プロジェクト: patawatt/autocomplete
def test_insert():
    # Inserting one value with an empty prefix [] into a new prefix tree should
    # result in a tree with two nodes: an internal node with an empty prefix [],
    # and then a leaf containing the inserted value. Note that __len__ should
    # return 1 in this case, since we only count inserted values for the
    # Autocompleter ADT.

    s = SimplePrefixTree('sum')
    s.insert('a', 0.1, [])
    assert str(s) == '[] (0.1)\n  a (0.1)\n'
    assert len(s) == 1

    # Inserting one value with a length-one prefix [x] into a new prefix tree
    # should result in a tree with three node: two internal nodes with prefixes
    # [] and [x], and then a leaf containing the inserted value.

    s = SimplePrefixTree('sum')
    s.insert('a', 0.1, ['a'])
    assert str(s) == "[] (0.1)\n  ['a'] (0.1)\n    a (0.1)\n"
    assert len(s) == 1

    # Inserting one value with a length-n prefix [x_1, ..., x_n] into a new
    # prefix tree should result in a tree with (n+2) nodes: internal nodes with
    # prefixes [], [x_1], [x_1, x_2], etc., and then a leaf containing the
    # inserted value.

    s = SimplePrefixTree('sum')
    prefix = ['a','b','c']
    s.insert('a', 0.1, prefix)
    assert len(s) == 1
    assert s.subtrees[0].subtrees[0].subtrees[0].subtrees[0].value == 'a'
    # shows 5 nodes, len prefix plus 2

    s = SimplePrefixTree('average')
    s.insert('a', 0.1, [])
    assert str(s) == '[] (0.1)\n  a (0.1)\n'
    assert len(s) == 1

    # Inserting one value with a length-one prefix [x] into a new prefix tree
    # should result in a tree with three node: two internal nodes with prefixes
    # [] and [x], and then a leaf containing the inserted value.

    s = SimplePrefixTree('average')
    s.insert('a', 0.1, ['a'])
    assert str(s) == "[] (0.1)\n  ['a'] (0.1)\n    a (0.1)\n"
    assert len(s) == 1

    # Inserting one value with a length-n prefix [x_1, ..., x_n] into a new
    # prefix tree should result in a tree with (n+2) nodes: internal nodes with
    # prefixes [], [x_1], [x_1, x_2], etc., and then a leaf containing the
    # inserted value.

    s = SimplePrefixTree('average')
    prefix = ['a', 'b', 'c']
    s.insert('a', 0.1, prefix)
    assert len(s) == 1
    assert s.subtrees[0].subtrees[0].subtrees[0].subtrees[0].value == 'a'
    # shows 5 nodes, len prefix plus 2

    s = SimplePrefixTree('average')
    s.insert('car', 1.0, ['c', 'a', 'r'])
    assert str(s) == "[] (1.0)\n  ['c'] (1.0)\n    ['c', 'a'] (1.0)\n      ['c', 'a', 'r'] (1.0)\n        car (1.0)\n"

    s.insert('cat', 2.0, ['c', 'a', 't'])
    assert str(s) == "[] (1.5)\n  ['c'] (1.5)\n    ['c', 'a'] (1.5)\n      ['c', 'a', 't'] (2.0)\n        cat (2.0)\n      ['c', 'a', 'r'] (1.0)\n        car (1.0)\n"

    s.insert('care', 3.0, ['c', 'a', 'r', 'e'])
    assert str(s) == "[] (2.0)\n  ['c'] (2.0)\n    ['c', 'a'] (2.0)\n      ['c', 'a', 't'] (2.0)\n        cat (2.0)\n      ['c', 'a', 'r'] (2.0)\n        ['c', 'a', 'r', 'e'] (3.0)\n          care (3.0)\n        car (1.0)\n"
コード例 #9
0
class SimpleInsertTest(unittest.TestCase):
    def setUp(self):
        self.sum_tree = SimplePrefixTree('sum')
        self.avg_tree = SimplePrefixTree('average')

    def test_insert_leaf(self):
        self.sum_tree.insert('Gary', 5, [])
        self.avg_tree.insert('Gary', 5, [])

        expected = "Tree([] (5.0) [Tree(Gary (5.0))])"

        self.assertEqual(repr_tree(self.sum_tree), expected)
        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_update_leaf_weight(self):
        self.sum_tree.insert('Gary', 5, [])
        self.sum_tree.insert('Gary', 5, [])

        self.avg_tree.insert('Gary', 5, [])
        self.avg_tree.insert('Gary', 5, [])

        expected = "Tree([] (10.0) [Tree(Gary (10.0))])"

        self.assertEqual(repr_tree(self.sum_tree), expected)
        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_insert_create_internal(self):
        expected = "Tree([] (5.0) [Tree(['a'] (5.0) [Tree(Gary (5.0))])])"

        self.sum_tree.insert('Gary', 5, ['a'])
        self.assertEqual(repr_tree(self.sum_tree), expected)

        self.avg_tree.insert('Gary', 5, ['a'])
        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_insert_multi_internals(self):
        self.sum_tree.insert('Gary', 5, ['a', 'b', 'c'])
        self.avg_tree.insert('Gary', 5, ['a', 'b', 'c'])

        expected = "Tree([] (5.0) [Tree(['a'] (5.0) [Tree(['a', 'b'] (5.0) " \
                   "[Tree(['a', 'b', 'c'] (5.0) [Tree(Gary (5.0))])])])])"

        self.assertEqual(repr_tree(self.sum_tree), expected)
        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_insert_sum_tree_two_branches(self):
        self.sum_tree.insert('ac', 4, ['a', 'c'])
        self.sum_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (10.0) [Tree(['a'] (10.0) [Tree(['a', 'b'] (6.0) " \
                   "[Tree(ab (6.0))]), Tree(['a', 'c'] (4.0) [Tree(ac (4.0))])])])"

        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_insert_avg_tree_two_branches(self):
        self.avg_tree.insert('ac', 4, ['a', 'c'])
        self.avg_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (5.0) [Tree(['a'] (5.0) [Tree(['a', 'b'] (6.0) " \
                   "[Tree(ab (6.0))]), Tree(['a', 'c'] (4.0) [Tree(ac (4.0))])])])"

        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_sum_tree_contains_leave_and_subtree(self):
        self.sum_tree.insert('a', 4, ['a'])
        self.sum_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (10.0) [Tree(['a'] (10.0) [Tree(['a', 'b'] (6.0) [Tree(ab (6.0))]), Tree(a (4.0))])])"

        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_avg_tree_contains_leave_and_subtree(self):
        self.avg_tree.insert('a', 4, ['a'])
        self.avg_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (5.0) [Tree(['a'] (5.0) [Tree(['a', 'b'] (6.0) [Tree(ab (6.0))]), Tree(a (4.0))])])"

        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_sum_tree_insert_dup(self):
        self.sum_tree.insert('ab', 6, ['a', 'b'])
        self.sum_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (12.0) [Tree(['a'] (12.0) [Tree(['a', 'b'] (12.0) [Tree(ab (12.0))])])])"

        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_avg_tree_insert_dup(self):
        self.avg_tree.insert('ab', 6, ['a', 'b'])
        self.avg_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (12.0) [Tree(['a'] (12.0) [Tree(['a', 'b'] (12.0) [Tree(ab (12.0))])])])"

        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_sum_tree_insert_dup_change_order(self):
        self.sum_tree.insert('ab', 4, ['a', 'b'])
        self.sum_tree.insert('ac', 6, ['a', 'c'])
        self.sum_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (16.0) [Tree(['a'] (16.0) [Tree(['a', 'b'] (10.0) " \
                   "[Tree(ab (10.0))]), Tree(['a', 'c'] (6.0) [Tree(ac (6.0))])])])"

        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_avg_tree_insert_dup_change_order(self):
        self.avg_tree.insert('ab', 4, ['a', 'b'])
        self.avg_tree.insert('ac', 6, ['a', 'c'])
        self.avg_tree.insert('ab', 6, ['a', 'b'])

        expected = "Tree([] (8.0) [Tree(['a'] (8.0) [Tree(['a', 'b'] (10.0) " \
                   "[Tree(ab (10.0))]), Tree(['a', 'c'] (6.0) [Tree(ac (6.0))])])])"

        self.assertEqual(repr_tree(self.avg_tree), expected)
コード例 #10
0
class LetterAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few letters.

    The *prefix sequence* for a string is the list of characters in the string.
    This can include space characters.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a text file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Each line of the specified file counts as one input string.
        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one alphanumeric character, it is inserted into the
        Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight (because of how Autocompleter.insert works).
        """
        with open(config['file'], encoding='utf8') as f:
            if config['autocompleter'] == 'simple':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            elif config['autocompleter'] == 'compressed':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            for line in f:
                line = ''.join(char for char in line.lower()
                               if (char.isalnum() or char == ' '))
                self.autocompleter.insert(line, 1, [char for char in line])

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        prefix_list = [char for char in prefix]
        return self.autocompleter.autocomplete(prefix_list, limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix string.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        prefix_list = [char for char in prefix]
        return self.autocompleter.remove(prefix_list)
コード例 #11
0
ファイル: a2_test.py プロジェクト: patawatt/autocomplete
def test_insert() -> None:

    # How many values in the prefix tree match the autocomplete prefix?
    # (0? 1? 10?)
    s = SimplePrefixTree('average')
    s.insert('car', 1.0, ['c', 'a', 'r'])
    s.insert('cat', 2.0, ['c', 'a', 't'])
    s.insert('care', 3.0, ['c', 'a', 'r', 'e'])
    assert len(s.autocomplete([])) == 3
    assert len(s.autocomplete(['c'])) == 3
    assert len(s.autocomplete(['c', 'a'])) == 3
    assert len(s.autocomplete(['c', 'a', 't'])) == 1
    assert len(s.autocomplete(['c', 'a', 'r'])) == 2

    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('sum')
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, 1.0, a)
        a = []
    assert len(x.autocomplete([])) == len(u)
    assert x.autocomplete(['t', 'o']) == [('to', 1.0)]
    assert len(x.autocomplete(['a'])) == 5
    assert x.autocomplete(['o']) == [('of', 1.0), ('obtain', 1.0)]
    assert x.autocomplete(['p', 'r']) == [('properties', 1.0), ('prefix', 1.0)]

    # same as above with average instead of sum
    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('average')
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, 1.0, a)
        a = []
    assert len(x.autocomplete([])) == len(u)
    assert x.autocomplete(['t', 'o']) == [('to', 1.0)]
    assert len(x.autocomplete(['a'])) == 5
    assert x.autocomplete(['o']) == [('of', 1.0), ('obtain', 1.0)]
    assert x.autocomplete(['p', 'r']) == [('properties', 1.0), ('prefix', 1.0)]

    # What is the relationship between the number of matches and the limit
    # argument? (less than? equal to? greater than?)

    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('sum')
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, 1.0, a)
        a = []
    assert len(x.autocomplete([], 5)) == 5
    assert x.autocomplete(['t', 'o'], 2) == [('to', 1.0)]
    assert len(x.autocomplete(['a'], 3)) == 3  # there are 5 a words
    assert x.autocomplete(['o'], 0) == []  # same as len() == 0
    assert len(x.autocomplete(['p', 'r'], 2)) == 2

    # If there are more matches than the specified limit, try different
    # combinations of input weights to check that you’re returning the right
    # matches.

    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('sum')
    w = 1.0
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, w, a)
        a = []
        w += 1.0
    assert len(x.autocomplete([], 5)) == 5
    assert len(x.autocomplete(['t', 'o'], 2)) == 1
    assert len(x.autocomplete(['a'], 3)) == 3  # there are 5 a words
    assert x.autocomplete(['o'], 0) == []  # same as len() == 0
    assert len(x.autocomplete(['p', 'r'], 2)) == 2

    # same as above but with average
    # this string has no repeat words
    s = "at this point you should try inserting values into a prefix tree and then calling autocomplete to obtain some results here are suggestions of input properties conditions help design test cases"
    u = s.split()
    a = []
    x = SimplePrefixTree('average')
    w = 1.0
    for word in u:
        for c in word:
            a.append(c)
        x.insert(word, w, a)
        a = []
        w += 1.0
    assert len(x.autocomplete([], 5)) == 5
    assert len(x.autocomplete(['t', 'o'], 2)) == 1
    assert len(x.autocomplete(['a'], 3)) == 3  # there are 5 a words
    assert x.autocomplete(['o'], 0) == []  # same as len() == 0
    assert len(x.autocomplete(['p', 'r'], 2)) == 2
コード例 #12
0
class SimpleAutoCompleteTest(unittest.TestCase):

    def setUp(self):
        self.sum_tree = SimplePrefixTree('sum')
        self.avg_tree = SimplePrefixTree('average')

    def test_empty_tree_remove_no_prefix(self):
        self.sum_tree.remove([])
        expected = ""
        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_empty_tree_remove_extra_prefix(self):
        self.sum_tree.remove(['a'])
        expected = ""
        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_remove_leaf(self):
        self.sum_tree.insert('Bob', 6, [])
        self.sum_tree.insert('Alice', 5, [])
        self.sum_tree.remove([])
        expected = ""
        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_remove_update_weight(self):
        self.sum_tree.insert('Bob', 6, ['a', 'b'])
        self.sum_tree.insert('Alice', 5, ['a', 'c'])
        self.sum_tree.remove(['a', 'b'])
        expected = "Tree([] (5.0) [Tree(['a'] (5.0) [Tree(['a', 'c'] (5.0) [Tree(Alice (5.0))])])])"
        self.assertEqual(repr_tree(self.sum_tree), expected)

    def test_remove_update_avg(self):
        self.avg_tree.insert('Bob', 6, ['a', 'b'])
        self.avg_tree.insert('Alice', 4, ['a', 'c'])
        self.avg_tree.remove(['a', 'b'])
        expected = "Tree([] (4.0) [Tree(['a'] (4.0) [Tree(['a', 'c'] (4.0) [Tree(Alice (4.0))])])])"
        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_remove_entire_subtree(self):
        self.avg_tree.insert('Bob', 6, ['a', 'b'])
        self.avg_tree.insert('Alice', 4, ['a', 'c'])
        self.avg_tree.insert('Jacky', 4, ['b', 'c'])
        self.avg_tree.remove(['a'])
        expected = "Tree([] (4.0) [Tree(['b'] (4.0) [Tree(['b', 'c'] (4.0) [Tree(Jacky (4.0))])])])"
        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_remove_delete_empty_subtree(self):
        self.avg_tree.insert('Alice', 6, ['a'])
        self.avg_tree.insert('Bob', 6, ['a', 'b', 'c', 'd', 'e'])
        self.avg_tree.remove(['a', 'b', 'c', 'd', 'e'])
        expected = "Tree([] (6.0) [Tree(['a'] (6.0) [Tree(Alice (6.0))])])"
        self.assertEqual(repr_tree(self.avg_tree), expected)

    def test_remove_non_exist_prefix(self):
        self.avg_tree.insert('Alice', 6, ['a'])
        self.avg_tree.insert('Bob', 6, ['a', 'b'])
        self.avg_tree.remove(['b'])
        expected = "Tree([] (6.0) [Tree(['a'] (6.0) [Tree(Alice (6.0)), Tree(['a', 'b'] (6.0) [Tree(Bob (6.0))])])])"
        self.assertEqual(repr_tree(self.avg_tree), expected)
コード例 #13
0
class SentenceAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few words.

    A *word* is a string containing only alphanumeric characters.
    The *prefix sequence* for a string is the list of words in the string
    (separated by whitespace). The words themselves do not contain spaces.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has two entries:
            - the first entry is a string
            - the second entry is the a number representing the weight of that
              string

        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one word, it is inserted into the Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE
        LINE FROM THE CSV FILE. (Updated Nov 19)
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        with open(config['file'], encoding='utf8') as f:
            if config['autocompleter'] == 'simple':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            elif config['autocompleter'] == 'compressed':
                self.autocompleter = SimplePrefixTree(config['weight_type'])
            for line in f:
                line = line.lower().split(',')
                line[0] = ''.join(char for char in line[0]
                                  if (char.isalnum() or char == ' '))
                self.autocompleter.insert(line[0], float(line[1]),
                                          line[0].split())

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        prefix_list = prefix.split()
        return self.autocompleter.autocomplete(prefix_list, limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        prefix_list = prefix.split()
        self.autocompleter.remove(prefix_list)
コード例 #14
0
def test_autocomplete(length: int) -> None:
    """Test the aggregate weight, length,..etc of the SimplePrefixTree"""
    import sys
    sys.setrecursionlimit(5000)

    # insertion method 1 (n = length)
    # prefixes = [[0,..,n-1],[1,..,n-1],[2,...n-1],....[n-1]]
    # every prefix has 1 value
    # spt will have 'n' subtrees
    # spt must len(prefixes) subtrees

    # insertion method 2 (n = length)
    # prefixes = [[0,..,n-1],[0,..,n-2],[0,...n-3],....[1]]
    # every prefix has 1 value
    # spt must have 1 subtree

    # insertion method 3 (n = length)
    # check method_spt3()
    # balanced spt

    methods = ['1', '2', '3']

    for method in methods:
        prefixes = []
        values = []
        weights = []
        spt = SimplePrefixTree('sum')
        spt_avg = SimplePrefixTree('average')

        if method == '3':
            prefixes = spt_method_3(spt, 3, list(range(15)))
            spt_method_3(spt_avg, 3, list(range(15)))
            values = prefixes  # values is only tested on length
            weights = list(range(15))
            weights.reverse()
        else:
            for x in range(0, length):
                if method == '1':
                    start = x
                    stop = length
                else:
                    start = 0
                    stop = length - x
                prefixes.append(list(range(start, stop)))
                values.append(length - x)
                # weight goes for values, go from weight = length, to weight = 1
                weights.append(length - x)
                spt.insert(values[len(values) - 1], weights[len(weights) - 1],
                           prefixes[len(prefixes) - 1])
                spt_avg.insert(values[len(values) - 1],
                               weights[len(weights) - 1],
                               prefixes[len(prefixes) - 1])

        prefixes.insert(0, [])
        for prefix in prefixes:
            for i in range(1, len(values) + 1):
                assert len(spt.autocomplete(prefix, i)) <= i
                assert len(spt.autocomplete(prefix, i**2)) <= len(values)
                assert len(spt_avg.autocomplete(prefix, i)) <= i
                assert len(spt_avg.autocomplete(prefix, i**2)) <= len(values)
                tup = spt.autocomplete(prefix, i)
                tup_av = spt_avg.autocomplete(prefix, i)
                for x in range(len(tup)):
                    # weights[0] should have the greatest weight
                    assert tup[x][1] <= weights[0]
                    assert tup_av[x][1] <= weights[0]
                    if x != len(tup) - 1:
                        # weights should be non-increasing
                        assert tup[x][1] >= tup[x + 1][1]
                        assert tup_av[x][1] >= tup[x + 1][1]
        prefixes.pop(0)  # popping [] out
コード例 #15
0
def test_remove(length: int) -> None:
    """Test remove method in the SimplePrefixTree class"""
    methods = ['1', '2', '3']

    for method in methods:
        prefixes = []
        values = []
        weights = []
        spt = SimplePrefixTree('sum')
        spt_avg = SimplePrefixTree('average')

        if method == '3':
            prefixes = spt_method_3(spt, 3, list(range(15)))
            spt_method_3(spt_avg, 3, list(range(15)))
            values = prefixes  # values is only tested on length
            weights = list(range(15))
            weights.reverse()
        else:
            for x in range(0, length):
                if method == '1':
                    start = x
                    stop = length
                elif method == '2':
                    start = 0
                    stop = length - x
                prefixes.append(list(range(start, stop)))
                values.append(length - x)
                # weight goes for values, go from weight = length, to weight = 1
                weights.append(length - x)
                spt.insert(values[len(values) - 1], weights[len(weights) - 1],
                           prefixes[len(prefixes) - 1])
                spt_avg.insert(values[len(values) - 1],
                               weights[len(weights) - 1],
                               prefixes[len(prefixes) - 1])
        if method == '1':
            for prefix in prefixes:
                prev_weight = spt.weight
                prev_weight_avg = spt_avg.weight
                prev_sum = spt.weight
                prev_num = len(spt)
                prev_num_nodes = num_nodes(spt)
                spt.remove([prefix[0]])
                spt_avg.remove([prefix[0]])
                assert len(spt) < prev_num  # deleting at least 1 leaf
                assert spt.weight < prev_sum
                if len(spt) == 0:
                    assert spt.weight == 0
                else:
                    assert spt_avg.weight == (spt.weight / len(spt))
                assert prev_weight_avg == (prev_sum / prev_num)
                assert spt.weight == len(spt) * spt_avg.weight
                assert spt.weight < prev_weight == prev_sum  # weight_type: 'sum'
                assert num_nodes(spt) < prev_num_nodes
                assert scheck_subtrees_non_increasing_order(spt)
                assert scheck_subtrees_non_increasing_order(spt_avg)
                assert scheck_subtrees_value(spt)
                assert scheck_subtrees_value(spt_avg)
                #assert stree_weight_check(spt, 'sum')
                assert stree_weight_check(spt_avg, 'average')
        elif method == '2':
            for prefix in prefixes:
                prev_weight = spt.weight
                prev_weight_avg = spt_avg.weight
                prev_sum = len(spt) * spt.weight
                prev_num = len(spt)
                prev_num_nodes = num_nodes(spt)
                spt.remove(prefix)
                spt_avg.remove(prefix)
                assert len(spt) < prev_num  # deleting 1 leaf
                assert len(spt) == prev_num - 1
                assert len(spt) * spt.weight < prev_sum
                if len(spt) == 0:
                    assert spt.weight == 0
                else:
                    assert spt_avg.weight == (len(spt) * spt.weight / len(spt))
                assert prev_weight_avg == (prev_sum / prev_num)
                assert spt.weight == len(spt) * spt.weight
                assert spt.weight < prev_weight == prev_sum  # weight_type: 'sum'
                assert prev_num_nodes - num_nodes(spt) == 2
                assert scheck_subtrees_non_increasing_order(spt)
                assert scheck_subtrees_non_increasing_order(spt_avg)
                assert scheck_subtrees_value(spt)
                assert scheck_subtrees_value(spt_avg)
                assert stree_weight_check(spt, 'sum')
                assert stree_weight_check(spt_avg, 'average')
        elif method == '3':
            prefixes.reverse()
            for prefix in prefixes:
                prev_weight = spt.weight
                prev_weight_avg = spt_avg.weight
                prev_sum = len(spt) * spt.weight
                prev_num = len(spt)
                prev_num_nodes = num_nodes(spt)
                spt.remove(prefix)
                spt_avg.remove(prefix)
                assert len(spt) < prev_num  # deleting 1 leaf
                assert len(spt) == prev_num - 1
                assert len(spt) * spt.weight < prev_sum
                if len(spt) == 0:
                    assert spt.weight == 0
                else:
                    assert spt_avg.weight == (len(spt) * spt.weight / len(spt))
                assert prev_weight_avg == (prev_sum / prev_num)
                assert spt.weight == len(spt) * spt.weight
                assert spt.weight < prev_weight == prev_sum  # weight_type: 'sum'
                assert prev_num_nodes - num_nodes(spt) == 2
                assert scheck_subtrees_non_increasing_order(spt)
                assert scheck_subtrees_non_increasing_order(spt_avg)
                assert scheck_subtrees_value(spt)
                assert scheck_subtrees_value(spt_avg)
                assert stree_weight_check(spt, 'sum')
                assert stree_weight_check(spt_avg, 'average')
            prefixes.reverse()
コード例 #16
0
ファイル: a2_test.py プロジェクト: patawatt/autocomplete
def test_remove() -> None:

    x = SimplePrefixTree('average')
    x.insert('car', 20, ['c', 'a', 'r'])
    x.insert('cat', 10, ['c', 'a', 't'])
    x.insert('care', 5, ['c', 'a', 'r', 'e'])
    assert x.weight == 11.666666666666666
    x.remove(['c', 'a', 'r', 'e'])
    assert x.weight == 15.0

    x.remove(['c', 'a', 't'])
    assert x.weight == 20.0

    # repeat removals of value no longer there.
    x.remove(['c', 'a', 't'])
    assert x.weight == 20.0
    x.remove(['c', 'a', 't'])
    assert x.weight == 20.0

    # remove everything
    x.remove(['c', 'a'])
    assert x.weight == 0
    x.remove(['c', 'a'])
    assert x.weight == 0
    x.remove(['c', 'a'])
    assert x.weight == 0

    x = SimplePrefixTree('sum')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r','e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n','g','e','r'])
    x.insert('door', 0.5, ['d', 'o', 'o','r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    x.remove(['d'])

    assert x.weight == (6 + 2 + 1)

    x = SimplePrefixTree('average')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    x.remove(['d'])

    assert x.weight == (6 + 2 + 1)/3

    x = CompressedPrefixTree('sum')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    x.remove(['d'])  # works, but anything else for compressed tree doesnt

    assert x.weight == (6 + 2 + 1)

    x = CompressedPrefixTree('average')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    x.remove(['d'])  # works, but anything else for compressed tree doesnt

    # can we put the d's back?
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    assert x.weight == 11.5 / 6
    assert x.num_leaves == 6
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 3.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[1].value == ['d']
    assert x.subtrees[1].weight == 2.5 / 3
    assert x.subtrees[1].num_leaves == 3
    assert x.subtrees[1].subtrees[0].value == ['d', 'a', 'n', 'g', 'e', 'r']
    assert x.subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[0].num_leaves == 1
    assert x.subtrees[1].subtrees[1].value == ['d', 'o', 'o', 'r']
    assert x.subtrees[1].subtrees[1].weight == 0.75
    assert x.subtrees[1].subtrees[1].num_leaves == 2
    assert x.subtrees[1].subtrees[1].subtrees[1].value == 'door'
    assert x.subtrees[1].subtrees[1].subtrees[1].weight == 0.5
    assert x.subtrees[1].subtrees[1].subtrees[1].num_leaves == 0
    assert x.subtrees[1].subtrees[1].subtrees[0].value == ['d', 'o', 'o', 'r',
                                                           's']
    assert x.subtrees[1].subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[1].subtrees[0].num_leaves == 1
    # yes we can!

    x.remove(['d', 'o', 'o', 'r'])

    assert x.weight == 10 / 4
    assert x.num_leaves == 4
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 3.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[1].value == ['d']
    assert x.subtrees[1].weight == 1.0
    assert x.subtrees[1].num_leaves == 1
    assert x.subtrees[1].subtrees[0].value == ['d', 'a', 'n', 'g', 'e', 'r']
    assert x.subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[0].num_leaves == 1

    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    assert x.weight == 11.5 / 6
    assert x.num_leaves == 6
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 3.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[1].value == ['d']
    assert x.subtrees[1].weight == 2.5 / 3
    assert x.subtrees[1].num_leaves == 3
    assert x.subtrees[1].subtrees[0].value == ['d', 'a', 'n', 'g', 'e', 'r']
    assert x.subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[0].num_leaves == 1
    assert x.subtrees[1].subtrees[1].value == ['d', 'o', 'o', 'r']
    assert x.subtrees[1].subtrees[1].weight == 0.75
    assert x.subtrees[1].subtrees[1].num_leaves == 2
    assert x.subtrees[1].subtrees[1].subtrees[1].value == 'door'
    assert x.subtrees[1].subtrees[1].subtrees[1].weight == 0.5
    assert x.subtrees[1].subtrees[1].subtrees[1].num_leaves == 0
    assert x.subtrees[1].subtrees[1].subtrees[0].value == ['d', 'o', 'o', 'r',
                                                           's']
    assert x.subtrees[1].subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[1].subtrees[0].num_leaves == 1
    # yes we can!

    x.insert('desk', 10, ['d', 'e', 's', 'k'])
コード例 #17
0
class LetterAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few letters.

    The *prefix sequence* for a string is the list of characters in the string.
    This can include space characters.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a text file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Each line of the specified file counts as one input string.
        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one alphanumeric character, it is inserted into the
        Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight (because of how Autocompleter.insert works).

        >>> import sys
        >>> sys.setrecursionlimit(5000)
        >>> a = LetterAutocompleteEngine({'file': 'data/lotr.txt', 'autocompleter': 'simple', 'weight_type': 'sum'})
        """
        # We've opened the file for you here. You should iterate over the
        # lines of the file and process them according to the description in
        # this method's docstring.
        self.autocompleter = SimplePrefixTree(config['weight_type'])

        with open(config['file'], encoding='utf8') as f:
            for line in f:
                sanitized_t = _sanitize(line)
                self.autocompleter.insert(sanitized_t[0].lower(), 1.0,
                                          [x.lower() for x in sanitized_t[1]])

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        return self.autocompleter.autocomplete(list(prefix), limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix string.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        self.autocompleter.remove(list(prefix))
コード例 #18
0
ファイル: a2_test.py プロジェクト: patawatt/autocomplete
def test_autocomplete() -> None:

    x = CompressedPrefixTree('sum')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('desk', 10, ['d', 'e', 's', 'k'])

    y = SimplePrefixTree('sum')
    y.insert('car', 1, ['c', 'a', 'r'])
    y.insert('care', 2, ['c', 'a', 'r', 'e'])
    y.insert('cat', 6, ['c', 'a', 't'])
    y.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    y.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    y.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    y.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    y.insert('desk', 10, ['d', 'e', 's', 'k'])

    assert x.autocomplete(['c']) == y.autocomplete(['c'])
    assert x.autocomplete(['c', 'a']) == y.autocomplete(['c', 'a'])
    assert x.autocomplete(['c','a','r']) == y.autocomplete(['c','a','r'])
    assert x.autocomplete(['c', 'a', 'r', 'e']) == y.autocomplete(['c', 'a', 'r', 'e'])
    assert x.autocomplete(['c', 'a', 't']) == y.autocomplete(['c', 'a', 't'])
    assert x.autocomplete(['d']) == y.autocomplete(['d'])
    assert x.autocomplete(['d', 'o']) == y.autocomplete(['d', 'o'])
    assert x.autocomplete(['d', 'a']) == y.autocomplete(['d', 'a'])
    assert x.autocomplete(['d', 'e']) == y.autocomplete(['d', 'e'])
    assert x.autocomplete(['d', 'e', 's']) == y.autocomplete(['d', 'e', 's'])
    assert x.autocomplete(['d', 'o', 'o']) == y.autocomplete(['d', 'o', 'o'])
    assert x.autocomplete(['d', 'a', 'n']) == y.autocomplete(['d', 'a', 'n'])
    assert x.autocomplete(['d', 'o', 'o', 'r']) == y.autocomplete(['d', 'o', 'o', 'r'])
    assert x.autocomplete(['d', 'o', 'o', 'r', 's']) == y.autocomplete(['d', 'o', 'o', 'r', 's'])
    assert x.autocomplete(['d', 'a', 'n', 'g', 'e', 'r']) == y.autocomplete(['d', 'a', 'n', 'g', 'e', 'r'])
コード例 #19
0
class MelodyAutocompleteEngine:
    """An autocomplete engine that suggests melodies based on a few intervals.

    The values stored are Melody objects, and the corresponding
    prefix sequence for a Melody is its interval sequence.

    Because the prefix is based only on interval sequence and not the
    starting pitch or duration of the notes, it is possible for different
    melodies to have the same prefix.

    # === Private Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has the following format:
            - The first entry is the name of a melody (a string).
            - The remaining entries are grouped into pairs (as in Assignment 1)
              where the first number in each pair is a note pitch,
              and the second number is the corresponding duration.

            HOWEVER, there may be blank entries (stored as an empty string '');
            as soon as you encounter a blank entry, stop processing this line
            and move onto the next line the CSV file.

        Each melody is be inserted into the Autocompleter with a weight of 1.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        self.autocompleter = SimplePrefixTree(config['weight_type']) \
            if config['autocompleter'] == 'simple' \
            else CompressedPrefixTree(config['weight_type'])
        with open(config['file']) as f:
            lines = csv.reader(f)
            for line in lines:
                if line:
                    pairs = []
                    for i in range(1, len(line) - 1, 2):
                        pairs.append((int(line[i]), int(line[i + 1])))
                    interval = []
                    for i in range(len(pairs) - 1):
                        interval.append(
                            int(pairs[i + 1][0]) - int(pairs[i][0]))
                    value = Melody(line[0], pairs)
                    self.autocompleter.insert(value, 1, interval)

    def autocomplete(
            self,
            prefix: List[int],
            limit: Optional[int] = None) -> List[Tuple[Melody, float]]:
        """Return up to <limit> matches for the given interval sequence.

        The return value is a list of tuples (melody, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given interval sequence.

        Precondition:
            limit is None or limit > 0
        """
        return self.autocompleter.autocomplete(prefix, limit)

    def remove(self, prefix: List[int]) -> None:
        """Remove all melodies that match the given interval sequence.
        """
        self.autocompleter.remove(prefix)
コード例 #20
0
class SimpleAutoCompleteTest(unittest.TestCase):

    def setUp(self):
        self.sum_tree = SimplePrefixTree('sum')

    def test_empty_tree_no_prefix(self):
        self.assertEqual(self.sum_tree.autocomplete([]), [])

    def test_empty_tree_extra_prefix(self):
        self.assertEqual(self.sum_tree.autocomplete(['c']), [])

    def test_one_leaf_no_prefix(self):
        self.sum_tree.insert('Alice', 5, [])
        expected = [('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([]), expected)

    def test_one_leaf_no_prefix_zero_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.assertEqual(self.sum_tree.autocomplete([], 0), [])

    def test_one_leaf_no_prefix_at_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        expected = [('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 1), expected)

    def test_multi_leaf_no_prefix_extra_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.sum_tree.insert('Jacky', 11, [])
        self.sum_tree.insert('Bob', 10, [])
        expected = [('Jacky', 11.0), ('Bob', 10.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 4), expected)

    def test_multi_leaf_no_prefix_not_enough_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.sum_tree.insert('Jacky', 11, [])
        self.sum_tree.insert('Bob', 10, [])
        expected = [('Jacky', 11.0), ('Bob', 10.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 2), expected)

    def test_multi_internal_no_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Bob', 10.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 3), expected)

    def test_with_multi_internal_and_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_internal_extra_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = []
        self.assertEqual(self.sum_tree.autocomplete(['a', 'b'], 3), expected)

    def test_multi_internal_short_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a', 'b'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_level_leaves(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_internal_limit_cutoff(self):
        self.sum_tree.insert('Alice', 5, ['a', 'c'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        expected = [('Jacky', 11.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 1), expected)

    def test_multi_internal_limit_continue(self):
        self.sum_tree.insert('Alice', 5, ['a', 'c'])
        self.sum_tree.insert('Jacky', 10, ['a', 'c'])
        self.sum_tree.insert('Bob', 11, ['a', 'd'])
        self.sum_tree.insert('Kevin', 9, ['a', 'd'])
        expected = [('Bob', 11.0), ('Jacky', 10.0), ('Kevin', 9.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)