Example #1
0
def test_neighbors():
    neighbors = lambda t,s,maxhd: set([
        tuple(x) for x in t.neighbors(s = s, maxhd = maxhd)])

    # Test that searching for a variant of a string does not return the string
    # itself.
    t = Trie()
    t[b"hello"] = (1,2,3)
    assert list(t.neighbors(b"hello", 5)) == []

    # Test searching for variants of non-existent string raises an exception.
    with pytest.raises(Exception):
        list(t.neighbors(b"he", 1))
    with pytest.raises(Exception):
        list(t.neighbors(b"h3llo", 5))

    t[b"h3llo"] = None
    assert list(t.neighbors(b"hello", 1)) == [(1, "h3llo", None)]

    # Test that giving a bad maxhd value raises an ValueError
    with pytest.raises(ValueError):
        list(t.neighbors(b"hello", -1))
    with pytest.raises(ValueError):
        list(t.neighbors(b"hello", 0))

    # Testing getting variant strings
    t = Trie()
    t[b"hello world"] = 0
    t[b"*ello world"] = 1
    t[b"*ell* world"] = 2
    t[b"*ell* w*rld"] = 3
    t[b"hell* w*rl*"] = 3

    correct_neighbors = [
            (1, "*ello world", 1),
            (2, "*ell* world", 2),
            (3, "*ell* w*rld", 3),
            (3, "hell* w*rl*", 3)]
    assert neighbors(t, b"hello world", 3) == set(correct_neighbors)
    assert neighbors(t, b"hello world", 1) == set(correct_neighbors[:1])

    correct_neighbors = [\
            (1, "*ello world", 1),
            (1, "*ell* w*rld", 3)]
    assert neighbors(t, b"*ell* world", 1) == set(correct_neighbors)

    # This test revealed a mistake in an older trie implementation, where the
    # current hamming distance got overwritten when child nodes were pushed
    # onto a stack.
    strings = [b("".join(p)) for p in product("ABC", "ABC", "ABC")]
    for s in strings:
        t[s] = 0
    assert len(neighbors(t, b"AAA", 1)) == 6
Example #2
0
def test_has_key():
    t = Trie()
    assert not t.has_key(b"a")
    t[b"a"] = 1
    assert t.has_key(b"a")
    del t[b"a"]
    assert not t.has_key(b"a")
Example #3
0
def test_suffixes():
    t = Trie()
    t[b"production"] = 1
    t[b"productivity"] = 2
    t[b"process"] = 3
    t[b"prom"] = 4
    t[b"proper"] = 5
    t[b"promiss"] = 6
    t[b"prophet"] = 7
    t[b"professional"] = 8
    t[b"professor"] = 9
    assert set(t.suffixes(b"product")) == set([
        ("ion", 1),
        ("ivity", 2)])
    assert next(t.suffixes(b"proc")) == ("ess", 3)
    assert set(t.suffixes(b"prom")) == set([("", 4), ("iss", 6)])
    assert set(t.suffixes(b"prop")) == set([("het", 7), ("er", 5)])
    assert set(t.suffixes(b"pro")) == set([
        ("duction", 1),
        ("ductivity", 2),
        ("cess", 3),
        ("m", 4),
        ("per", 5),
        ("miss", 6),
        ("phet", 7),
        ("fessional", 8),
        ("fessor", 9)])
Example #4
0
def test_sizeof():
    # NOTE: this test may fail because it assumes to know the size in bytes of
    # the root node and of non-root nodes. So failure of this test may not
    # necessarily indicate that the trie is reporting the wrong size.
    t = Trie()
    rs = 96 # size of root node in bytes
    ns = 56 # size of trie node in bytes
    sizeof = lambda t:t.__sizeof__()
    assert t.__sizeof__() == sizeof(t) == rs
    t[b"a"] = 1
    assert sizeof(t) == rs + ns + 1 + 1
    del t[b"a"]
    assert sizeof(t) == rs
    t[b"hello"] = 1
    assert sizeof(t) == rs + ns*5 + 5 + 1
    t[b"world"] = 1
    assert sizeof(t) == rs + ns*10 + 10 + 2
    t[b"h3llo"] = 1
    assert sizeof(t) == rs + ns*14 + 15 + 3
    t[b"hello"] = 2 # modifying existing value should not change the size
    assert sizeof(t) == rs + ns*14 + 15 + 3
    t[b"here"] = 3 # should add 2 nodes
    assert sizeof(t) == rs + ns*16 + 19 + 4
    del t[b"hello"] # can only remove "llo" nodes
    assert sizeof(t) == rs + ns*13 + 14 + 3
Example #5
0
def test_contains():
    t = Trie()
    assert not b"abc" in t
    t[b"abc"] = 1
    assert b"abc" in t
    del t[b"abc"]
    assert not b"abc" in t
    with pytest.raises(TypeError):
        5 in t

    t = Trie()
    assert not t.__contains__(b"abc")
    t[b"abc"] = 1
    assert t.__contains__(b"abc")
    del t[b"abc"]
    assert not t.__contains__(b"abc")
    with pytest.raises(TypeError):
        t.__contains__(5)
Example #6
0
def test_get():
    t = Trie()
    assert t.get(b"foo", 123) == 123
    assert not b"foo"in t
    t[b"foo"] = 1
    assert b"foo" in t
    assert t.get(b"foo", 123) == 1
    assert t.get(b"fo", 123) == 123
    assert t.get(b"fo") is None # should default to None
Example #7
0
def test_has_node():
    t = Trie()
    assert t.has_node(b"")
    assert not t.has_node(b"a")
    t[b"Hello"] = 0
    assert t.has_node(b"He")
    with pytest.raises(KeyError):
        t[b"He"]
    assert t.has_node(b"Hello")
    assert not t.has_node(b"Hello!")
    assert t[b"Hello"] == 0
Example #8
0
def test_reduce():
    t = Trie()
    t[b"hello"] = 1
    t[b"world"] = [1,2,3]
    for i in xrange(100):
        t[b(str(i))] = i
    t2 = pickle.loads(pickle.dumps(t))
    assert len(t) == len(t2)
    assert t.__sizeof__() == t2.__sizeof__()
    for e in t:
        assert t[b(e)] == t2[b(e)]
Example #9
0
def test_longest_prefix():
    t = Trie()
    assert t.longest_prefix(b"foobar") is None
    t[b"fo"] = 1
    t[b"foo"] = 2
    assert t.longest_prefix(b"foobar") == ("foo", 2)
    t[b"foobar"] = 3
    assert t.longest_prefix(b"foobar") == ("foobar", 3)
    assert t.longest_prefix(b"foozle") == ("foo", 2)
    del t[b"foo"]
    assert t.longest_prefix(b"foozle") == ("fo", 1)
Example #10
0
def test_keys_items_values():
    t = Trie()
    t[b"foo"] = 5
    assert t.keys() == ["foo"]
    assert t.values() == [5]
    assert t.items() == [("foo", 5)]
    t[b"foobar"] = 3
    t[b"hello"] = 7
    assert set(t.keys()) == set(["foo", "foobar", "hello"])
    assert set(t.values()) == set([5, 3, 7])
    assert set(t.items()) == set([("foo", 5), ("foobar", 3), ("hello", 7)])

    t = Trie()
    n = 1000
    for i in xrange(n):
        t[b(str(i))] = i
    assert len(t.keys()) == n
    values = xrange(n)
    keys = [str(x) for x in values]
    assert set(t.keys()) == set(keys)
    assert set(t.values()) == set(values)
    assert set(t.items()) == set(zip(keys, values))
Example #11
0
def test_len():
    t = Trie()
    assert len(t) == 0
    t[b"Hello"] = 123
    assert len(t) == 1
    t[b"World"] = "!"
    assert len(t) == 2
    t[b"Hello"] = 0
    assert len(t) == 2
    del t[b"World"]
    assert len(t) == 1
    del t[b"Hello"]
    assert len(t) == 0
Example #12
0
def test_iterkeys_itervalues_iteritems():
    t = Trie()
    t[b"foo"] = 1 
    assert next(t.iterkeys()) == "foo"
    assert next(t.itervalues()) == 1
    assert next(t.iteritems()) == ("foo", 1)
    t[b"foobar"] = 999
    t[b"hello"] = 404
    assert set(t.iterkeys()) == set(["foo", "foobar", "hello"])
    assert set(t.itervalues()) == set([1, 999, 404])
    assert set(t.iteritems()) == set([("foo", 1), ("foobar", 999),
        ("hello", 404)])

    t = Trie()
    n = 1000
    for i in xrange(n):
        t[b(str(i))] = i
    assert len(list(t.iterkeys())) == n
    values = xrange(n)
    keys = [str(x) for x in values] 
    assert set(t.iterkeys()) == set(keys)
    assert set(t.itervalues()) == set(values)
    assert set(t.iteritems()) == set(zip(keys, values))
Example #13
0
def test_longest_prefix():
    t = Trie()
    assert t.longest_prefix(b"foobar") is None
    t[b"fo"] = 1
    t[b"foo"] = 2
    assert t.longest_prefix(b"foobar") == ("foo", 2)
    t[b"foobar"] = 3
    assert t.longest_prefix(b"foobar") == ("foobar", 3)
    assert t.longest_prefix(b"foozle") == ("foo", 2)
    del t[b"foo"]
    assert t.longest_prefix(b"foozle") == ("fo", 1)

    # testing with named argument
    assert t.longest_prefix(key=b"foozle") == ("fo", 1)
    assert t.longest_prefix(key=b"foobar") == ("foobar", 3)
Example #14
0
def test_setdefault():
    t = Trie()
    assert t.setdefault(b"a", 123) == 123
    assert t[b"a"] == 123
    assert t.setdefault(b"a", 5) == 123
    assert t[b"a"] == 123
    assert t.setdefault(b"a") == 123 
    assert t[b"a"] == 123
    assert t.setdefault(b"abc") is None
    assert t[b"abc"] is None

    # Test if setdefault correctly makes the trie own a reference to inserted
    # values. Without a proper reference the following may result in a memory
    # corruption error.
    v = t.setdefault(b"hello", [])
    v.append(5)
    del v # important, delete current reference.
    v = t[b"hello"]
    v.append(5)
Example #15
0
def test_popitem():
    t = Trie()
    with pytest.raises(KeyError):
        t.popitem()
    t[b"hello"] = "world"
    assert len(t) == 1
    assert b"hello" in t
    assert t.popitem() == ("hello", "world")
    assert not b"hello" in t
    assert len(t) == 0

    n = 1000
    for i in xrange(n):
        t[b(str(i))] = i
    assert b"234" in t
    s = set()
    while t:
        s.add(t.popitem())
    assert s == set([(str(i), i) for i in xrange(n)])
    assert not b"234" in t
Example #16
0
def test_pop():
    t = Trie()
    with pytest.raises(KeyError):
        t.pop(b"a")
    assert t.pop(b"a", None) is None
    val = [1,2,3]
    assert t.pop(b"a", val) is val
    t[b"a"] = 5
    assert b"a" in t
    assert t.pop(b"a") == 5
    assert not b"a" in t

    # Test popping several keys
    n = 1000
    for i in xrange(n):
        t[b(str(i))] = i
    assert len(t) == n 
    numbers = [1, 808, 58, 256, 30, 905]
    for i in numbers:
        si = b(str(i))
        assert si in t
        assert t.pop(si, 123) == i
        assert not si in t
    assert len(t) == n - len(numbers)
Example #17
0
def test_num_nodes():
    t = Trie()
    assert t.num_nodes() == 0
    t[b"foo"] = 1
    assert t.num_nodes() == 3
    t[b"foobar"] = 1
    assert t.num_nodes() == 6
    t[b"foozle"] = 1
    assert t.num_nodes() == 9
    t[b"hello"] = 1
    assert t.num_nodes() == 14
    del t[b"foo"]
    assert not b"foo" in t
    assert t.num_nodes() == 14
    del t[b"foozle"]
    assert t.num_nodes() == 11
    del t[b"foobar"]
    assert t.num_nodes() == 5
    del t[b"hello"]
    assert t.num_nodes() == 0
    n = 100
    for i in xrange(n):
        t[b(str(i))] = i
    assert t.num_nodes() == n
Example #18
0
def test_iter():
    """Test iterating over a Trie"""
    t = Trie()
    words = [b"hello", b"foo", b"foobar", b"foozle"]
    for key in words:
        t[key] = 1
    assert set([b(key) for key in t]) == set(words)

    # Modify values in the trie using t's iterator
    for key in t:
        t[b(key)] = 2
    assert t.values() == [2]*len(words)

    # It should be an error to continue to iterate after adding and/or removing
    # nodes.
    i1 = iter(t)
    next(i1) # This should be fine
    t[b"new"] = 1
    with pytest.raises(RuntimeError):
        next(i1)
    # Alright, so addition was detected, now for deletion
    i2 = iter(t)
    del t[b"new"]
    with pytest.raises(RuntimeError):
        next(i2)
    # Detection should not be on number of nodes, so let's add and remove a
    # single node.
    i3 = iter(t)
    next(i3) # should be fine
    n = t.num_nodes()
    t[b"a"] = 1
    assert t.num_nodes() == n + 1
    del t[b"a"]
    assert t.num_nodes() == n
    with pytest.raises(RuntimeError):
        next(i3)
Example #19
0
def test_pairs():
    get_pairs = lambda t, keylen, maxhd:[(s1, s2) \
            for hd, s1, value1, s2, value2 in t.pairs(keylen, maxhd)]

    # Test that giving a bad maxhd or keylen value raises an ValueError
    t = Trie()
    t[b"hello"] = 0
    assert list(t.pairs(keylen = 5, maxhd = 1)) == []
    with pytest.raises(ValueError):
        list(t.pairs(5, -1))
    with pytest.raises(ValueError):
        list(t.pairs(5, 0))
    with pytest.raises(ValueError):
        list(t.pairs(-1, 1))

    # The pairs iterator modifies nodes in the trie to keep track of its
    # progress. Running multiple such iterators would cause inconsistent
    # results, hence the iterator should throw an exception if another is
    # active.
    t = Trie()
    t[b"hello"] = 0
    t[b"h3llo"] = 1
    assert eqp([("hello", "h3llo")], get_pairs(t, 5, 1))
    i1 = t.pairs(5, 1)
    i2 = t.pairs(5, 1)
    with pytest.raises(RuntimeError):
        next(i1)
    assert next(i2) == (1, "hello", 0, "h3llo", 1)

    # One should be able to modify the trie if a dirty iterator is active.
    t = Trie()
    it = t.pairs(1,1)
    t[b"abc"] = 1

    t = Trie()
    assert list(t.pairs(keylen = 4, maxhd = 5)) == []
    t[b"AAAA"] = 0
    assert list(t.pairs(4, 5)) == []
    t[b"AAAT"] = 0
    assert eqp([("AAAA", "AAAT")], get_pairs(t, 4, 1))
    t[b"ATAT"] = 0
    assert eqp([("AAAA", "AAAT"),
            ("ATAT", "AAAT")], get_pairs(t, 4, 1))
    assert eqp([("AAAA", "AAAT"),
            ("ATAT", "AAAT"),
            ("ATAT", "AAAA")], get_pairs(t, 4, 2))
    # Test inserting single different length prefix does not affect result.
    t[b"AA"] = 0
    assert eqp([("AAAA", "AAAT"),
            ("ATAT", "AAAT"),
            ("ATAT", "AAAA")], get_pairs(t, 4, 2))
    t[b"AT"] = 0
    assert eqp([("AAAA", "AAAT"),
            ("ATAT", "AAAT"),
            ("ATAT", "AAAA")], get_pairs(t, keylen = 4, maxhd = 2))
    assert eqp([("AA", "AT")], get_pairs(t, keylen = 2, maxhd = 2))
    # Double check that eqp works:
    assert not eqp([("AAAA", "AAAT"),
            ("ATAT", "AAAT"),
            ("ATAT", "AAAA"),
            ("AA", "AC")], get_pairs(t, 4, 2))

    # Test where only 1 string should not be in any of the pairs because of
    # maximum hamming distance limitation.
    t = Trie()
    t[b"AAAA"] = 1
    t[b"AAAT"] = 2
    t[b"TAAT"] = 3
    t[b"TATA"] = 4
    assert eqp([("AAAA", "AAAT"), ("TAAT", "AAAT")],
            get_pairs(t, 4, 1));

    # Test larger set of pairs
    t2 = Trie()
    for s in [b("{:08b}".format(i)) for i in range(256)]:
        t2[s] = 1 # "00000000" to "11111111"
    # Each string has 8 neighbors at HD 1. So the number of pairs, ignoring
    # order, should be ( 256 * 8 ) / 2
    assert len(list(t2.pairs(keylen = 8, maxhd = 1))) == (256*8)/2
    # Test hd = 2:
    # There are (8 choose 2) + (8 choose 1) = 36 neighbors for every number.
    assert len(list(t2.pairs(keylen = 8, maxhd = 2))) == (256*36)/2
    # There are (8 choose 3) + (8 choose 2) + (8 choose 1) = 92 neighbors for
    # every number.
    assert len(list(t2.pairs(keylen = 8, maxhd = 3))) == (256*92)/2

    # Test that hamming distance and nodes are correct
    for hd, s1, value1, s2, value2 in t2.pairs(keylen = 8, maxhd = 3):
        assert hd == sum([ch1 != ch2 for ch1, ch2 in zip(s1, s2)])
        assert value1 is t2[b(s1)]
        assert value2 is t2[b(s2)]

    for s in [b("{:04b}".format(i)) for i in range(16)]:
        t2[s] = 1 # "0000" to "1111"
    assert len(list(t2.pairs(keylen = 4, maxhd = 1))) == (16*4)/2

    # Test pairs where nodes can have 3 children
    # NOTE: this test is important! It revealed a mistake in an older trie
    # implementation. The mistake was that it used a field (the hamming
    # distance field) that was being overwritten when children of a node were
    # being pushed onto a stack. (solution was saving the hd field before
    # pushing children).
    t = Trie()
    strings = [b("".join(p)) for p in product("ABC", "ABC", "ABC")]
    for s in strings: 
        t[s] = 0
    assert len(list(t.pairs(3, 1))) == 27 * 6 / 2
    # There are 3 ways to select two mutated positions, for each mutated
    # position there are 2 variants, so there are 27 strings with each 12
    # possible neighbors, divide by two to prevent overcounting.
    assert len(list(t.pairs(3, 2))) == (27 * 3 * 4 / 2) + (27 * 6 / 2)
    assert len(list(t.pairs(3, 3))) == (27 * 26) / 2

    # Test garbage collection of pairs iterator. This test can reveal a mistake
    # in properly resetting the trie when the iterator is garbage collected.
    t = Trie()
    strings = [b("".join(p)) for p in product("ABC", "ABC", "ABC")]
    for s in strings: 
        t[s] = 0
    # Only go through the pairs partially
    for i, pair in enumerate(t.pairs(3,3)):
        if i > 100:
            break
    assert len(list(t.pairs(3,3))) == (27 * 26) / 2
Example #20
0
def test_getitem_setitem():
    t = Trie()
    t[b"Hello"] = "world"
    assert t[b"Hello"] == "world"
    # Test getting a non-inserted string raises an exception
    with pytest.raises(KeyError):
        t[b"Hello!"]
    with pytest.raises(KeyError):
        t[b"He"]
    with pytest.raises(KeyError):
        t[b""]

    # Test associating strings with a value, it should be possible to associate
    # strings with None as well.
    t = Trie()
    t[b"Hello"] = 0
    assert t[b"Hello"] == 0
    t[b"world"] =  "!"
    assert t[b"world"] == "!"
    t[b"ABC"] = (1,2,3,4)
    assert t[b"ABC"] == (1,2,3,4)
    t[b"AAA"] = None
    assert t[b"AAA"] is None

    # Test that the associated value is not a copy
    val = [1,2,3]
    t[b"XYZ"] = val
    assert t[b"XYZ"] is val 
    val[1] = 9
    assert t[b"XYZ"] == [1,9,3]

    # If setitem does not make the trie own a reference to the inserted value,
    # the following may result in a memory corruption error.
    t = Trie()
    t[b"a"] = []
    for i in xrange(100):
        t[b"a"].append(1)

    # Test that order of insertions does not affect string associations
    strvals = [
        (b"Hello", 0),
        (b"world", "!"),
        (b"there", (1,2,3)),
        (b"abcde", None),
        (b"dicti", {"a": None, "dict": (1,2,3)})]
    for perm in permutations(strvals):
        t = Trie()
        for (s, val) in perm:
            t[s] = val
        for (s, val) in strvals:
            assert t[s] is val

    # Test staggered insertions
    t = Trie()
    t[b"A"] = 0
    t[b"AB"] = (1,2)
    t[b"ABC"] = ["..."]
    assert t[b"A"] == 0
    assert t[b"AB"] == (1,2)
    assert t[b"ABC"] == ["..."]

    # Test inserting empty string works
    t = Trie()
    t[b""] = 123
    assert t[b""] == 123

    # Test overwriting values
    t[b"myval"] = 14
    assert t[b"myval"] == 14
    t[b"myval"] = [1,2,3,4]
    assert t[b"myval"] == [1,2,3,4]

    # Test that lists and tuples cannot be used in place of strings
    t = Trie()
    with pytest.raises(TypeError):
        t[ (1,2,3) ] = 0
    with pytest.raises(TypeError):
        t[ [1,2] ] = 0
    with pytest.raises(TypeError):
        t[None]
Example #21
0
def test_delitem():
    t = Trie()
    t[b"hellothere"] = 0
    t[b"helloworld"] = 1
    assert t[b"hellothere"] == 0
    assert t[b"helloworld"] == 1
    assert t.has_node(b"hello")

    del t[b"hellothere"]
    with pytest.raises(KeyError):
        t[b"hellothere"]
    assert not t.has_node(b"hellothere")
    assert not t.has_node(b"hellot")
    assert t.has_node(b"hello")
    with pytest.raises(KeyError):
        t[b"hello"]
    assert t[b"helloworld"] == 1

    # Test removing strings with insertions staggered
    t = Trie()
    strings = [b"AB", b"ABCD", b"ABCDEFG", b"ABCDEFGHIJK"]
    for i, s in enumerate(strings):
        t[s] = i;
    assert t[b"AB"] == 0
    assert t[b"ABCDEFGHIJK"] == 3
    del t[b"AB"]
    assert t.has_node(b"AB")
    # Make sure string is removed
    with pytest.raises(KeyError):
        t[b"AB"]
    # Check the other strings were not affected
    for i, s in enumerate(strings[1:]):
        assert t[s] == i + 1
    # Should not be able to remove non-existent string
    assert t.has_node(b"ABC")
    with pytest.raises(Exception):
        del t[b"ABC"]
    with pytest.raises(Exception):
        del t[b"ABCDEFGHIJKL"]
    # Remove string in between two others
    del t[b"ABCDEFG"]
    assert t.has_node(b"ABCDEFG")
    with pytest.raises(KeyError):
        t[b"ABCDEFG"]
    assert t[b"ABCDEFGHIJK"] == 3
    # See if nodes are actually removed when a string at a leaf is removed
    del t[b"ABCDEFGHIJK"]
    assert t.has_node(b"ABCD")
    assert t[b"ABCD"] == 1
    assert not t.has_node(b"ABCDE")
    with pytest.raises(KeyError):
        t[b"ABCDEFG"]
    with pytest.raises(KeyError):
        t[b"ABCDEFGHIJK"]
    # Remove the last string
    del t[b"ABCD"]
    assert not t.has_node(b"ABCD")
    assert not t.has_node(b"A")
    assert t.has_node(b"")
    with pytest.raises(KeyError):
        t[b""]

    # Test edge case where empty string is inserted and removed
    with pytest.raises(KeyError):
        t[b""]
    t[b""] = 0
    assert t[b""] == 0
    del t[b""]
    assert t.has_node(b"")
    with pytest.raises(KeyError):
        t[b""]