Example #1
0
 def test_save(self):
     trieobj = trie.trie()
     trieobj["foo"] = 1
     k = trieobj.keys()
     self.assertEqual(k, ["foo"])
     v = trieobj.values()
     self.assertEqual(v, [1])
     self.assertEqual(trieobj.get("bar", 99), 99)
     trieobj["hello"] = "55a"
     self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foa", 0), [])
     self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)])
     x = sorted(trieobj.get_approximate("foa", 2))
     self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)])
     # foo  foo-  foo-
     # foa  f-oa  fo-a
     # mismatch a->o
     # insertion after f, deletion of o
     # insertion after o, deletion of o
     x = trieobj.get_approximate("foo", 4)
     y = {}
     for z in x:
         y[z] = y.get(z, 0) + 1
     x = sorted(y.items())
     self.assertEqual(x, [(("foo", 1, 0), 1), (("hello", "55a", 4), 6)])
     h = StringIO()
     trie.save(h, trieobj)
     h.seek(0)
     trieobj = trie.load(h)
     k = trieobj.keys()
     self.assertTrue("foo" in k)
     self.assertTrue("hello" in k)
     self.assertEqual(repr(trieobj["foo"]), "1")
     self.assertEqual(repr(trieobj["hello"]), "'55a'")
Example #2
0
    def test_large_save_load(self):
        """Generate random key/val pairs in three length categories.

        100 items in each category. Insert them into a trie and into a reference dict.
        Write the trie to a temp file and read it back, verify that trie entries match
        the reference dict.
        """
        cmp_dict = {}
        trieobj = trie.trie()
        self.assertEqual(trieobj.get("foobar"), None)
        for max_str_len in [100, 1000, 10000]:
            cmp_dict = {}
            for i in range(1000):
                key = ''.join([
                    random.choice(ascii_lowercase) for _ in range(max_str_len)
                ])
                val = ''.join([
                    random.choice(ascii_lowercase) for _ in range(max_str_len)
                ])
                trieobj[key] = val
                cmp_dict[key] = val
            for key in cmp_dict:
                self.assertEqual(trieobj[key], cmp_dict[key])

        with tempfile.TemporaryFile(mode='w+b') as f:
            trie.save(f, trieobj)
            f.seek(0)
            trieobj = trie.load(f)
        for key in cmp_dict:
            self.assertEqual(trieobj[key], cmp_dict[key])
Example #3
0
 def test_save(self):
     trieobj = trie.trie()
     trieobj["foo"] = 1
     k = list(trieobj.keys())
     self.assertEqual(k, ["foo"])
     v = list(trieobj.values())
     self.assertEqual(v, [1])
     self.assertEqual(trieobj.get("bar", 99), 99)
     trieobj["hello"] = '55a'
     self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foa", 0), [])
     self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)])
     x = sorted(trieobj.get_approximate("foa", 2))
     self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)])
     # foo  foo-  foo-
     # foa  f-oa  fo-a
     # mismatch a->o
     # insertion after f, deletion of o
     # insertion after o, deletion of o
     x = trieobj.get_approximate("foo", 4)
     y = {}
     for z in x:
         y[z] = y.get(z, 0) + 1
     x = sorted(y.items())
     self.assertEqual(x, [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)])
     h = BytesIO()
     trie.save(h, trieobj)
     h.seek(0)
     trieobj = trie.load(h)
     k = list(trieobj.keys())
     self.assertTrue("foo" in k)
     self.assertTrue("hello" in k)
     self.assertEqual(repr(trieobj["foo"]), '1')
     self.assertEqual(repr(trieobj["hello"]), "'55a'")
Example #4
0
    def test_large_save_load(self):
        """Generate random key/val pairs in three length categories.

        100 items in each category. Insert them into a trie and into a reference dict.
        Write the trie to a temp file and read it back, verify that trie entries match
        the reference dict.
        """
        cmp_dict = {}
        trieobj = trie.trie()
        self.assertEqual(trieobj.get("foobar"), None)
        for max_str_len in [100, 1000, 10000]:
            cmp_dict = {}
            for i in range(1000):
                key = ''.join([random.choice(ascii_lowercase) for _ in range(max_str_len)])
                val = ''.join([random.choice(ascii_lowercase) for _ in range(max_str_len)])
                trieobj[key] = val
                cmp_dict[key] = val
            for key in cmp_dict:
                self.assertEqual(trieobj[key], cmp_dict[key])

        with tempfile.TemporaryFile(mode='w+b') as f:
            trie.save(f, trieobj)
            f.seek(0)
            trieobj = trie.load(f)
        for key in cmp_dict:
            self.assertEqual(trieobj[key], cmp_dict[key])
Example #5
0
def loadIndexFromDisk ():
    fp = open (indexFileName,'rb')
    index = trie.load (fp)
    #index = pickle.load(fp)
    fp.close()

    fp = open(docDictionary, 'rb')
    docDict = pickle.load (fp)
    fp.close()
    return index, docDict[0], docDict[1]
Example #6
0
# mismatch a->o
# insertion after f, deletion of o
# insertion after o, deletion of o

x = trieobj2.get_approximate("foo", 4)
y = {}
for z in x:
    y[z] = y.get(z, 0) + 1
x = y.items()
x.sort()
print x                       # [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)]

h = StringIO.StringIO()
trie.save(h, trieobj2)
h.seek(0)
trieobj3 = trie.load(h)
k = trieobj3.keys()
k.sort()
for m in k:                       # foo 1
    print m, repr(trieobj3[m])    # hello '55a'


# Found bug, doesn't handle insertions and deletions at end properly.
trieobj = trie.trie()
trieobj["hello"] = 1
print trieobj.get_approximate('he', 2)        # []
print trieobj.get_approximate('he', 3)        # [('hello', 1, 3)]
print trieobj.get_approximate('hello me!', 3) # []
print trieobj.get_approximate('hello me!', 4) # [('hello', 1, 4)]
print trieobj.get_approximate('hello me!', 5) # [('hello', 1, 4)]
Example #7
0
 def loadFromFile (self, indexFile):
     # TODO assert that trie should be empty right now,
     # otherwise you'll loose the index
     fp = open(indexFile, 'r')
     self.index = trie.load (fp)
     self.modified = False
Example #8
0
# mismatch a->o
# insertion after f, deletion of o
# insertion after o, deletion of o

x = trieobj2.get_approximate("foo", 4)
y = {}
for z in x:
    y[z] = y.get(z, 0) + 1
x = y.items()
x.sort()
print x  # [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)]

h = StringIO.StringIO()
trie.save(h, trieobj2)
h.seek(0)
trieobj3 = trie.load(h)
k = trieobj3.keys()
k.sort()
for m in k:  # foo 1
    print m, repr(trieobj3[m])  # hello '55a'

# Found bug, doesn't handle insertions and deletions at end properly.
trieobj = trie.trie()
trieobj["hello"] = 1
print trieobj.get_approximate('he', 2)  # []
print trieobj.get_approximate('he', 3)  # [('hello', 1, 3)]
print trieobj.get_approximate('hello me!', 3)  # []
print trieobj.get_approximate('hello me!', 4)  # [('hello', 1, 4)]
print trieobj.get_approximate('hello me!', 5)  # [('hello', 1, 4)]