Beispiel #1
0
    def test_stress_wtrie(self):
        ktrie = Trie()
        strie = Trie()
        etrie = Trie()

        keywords = {}
        search_terms = {}
        exchange_ids = {}

        with open(fixture) as f:
            for data in f:
                for word in data.split(' '):
                    vid = ktrie.add(word)
                    actual_vid = keywords.get(word)
                    if actual_vid is not None:
                        self.assertEqual(vid, actual_vid)
                    else:
                        keywords[word] = vid

                vid = strie.add(data)
                actual_vid = search_terms.get(data)
                if actual_vid is not None:
                    self.assertEqual(vid, actual_vid)
                else:
                    search_terms[data] = vid

        nodes, kids, nodelen = etrie.serialize()
        naddr, nlen = nodes.buffer_info()
        kaddr, klen = kids.buffer_info()
        #summarize(naddr, kaddr, nodelen)
        #print_it(naddr, kaddr)

        for dc, vid in exchange_ids.iteritems():
            rvid = etrie.add(dc)
            self.assertEqual(vid, rvid)

            print dc, vid
            value = value_for_vid(naddr, kaddr, vid)
            self.assertEqual(dc, value)
            if dc != value:
                print "      dc=%s adc=%s" % (dc, value)

            avid = vid_for_value(naddr, kaddr, dc)
            #print "vid=%s avid=%s" % (vid, avid)
            self.assertEqual(vid, avid)
Beispiel #2
0
    def test_stress_wtrie(self):
        ktrie = Trie()
        strie = Trie()
        etrie = Trie()

        keywords = {}
        search_terms = {}
        exchange_ids = {}

        with open(fixture) as f:
            for data in f:
                for word in data.split(" "):
                    vid = ktrie.add(word)
                    actual_vid = keywords.get(word)
                    if actual_vid is not None:
                        self.assertEqual(vid, actual_vid)
                    else:
                        keywords[word] = vid

                vid = strie.add(data)
                actual_vid = search_terms.get(data)
                if actual_vid is not None:
                    self.assertEqual(vid, actual_vid)
                else:
                    search_terms[data] = vid

        nodes, kids, nodelen = etrie.serialize()
        naddr, nlen = nodes.buffer_info()
        kaddr, klen = kids.buffer_info()
        # summarize(naddr, kaddr, nodelen)
        # print_it(naddr, kaddr)

        for dc, vid in exchange_ids.iteritems():
            rvid = etrie.add(dc)
            self.assertEqual(vid, rvid)

            print dc, vid
            value = value_for_vid(naddr, kaddr, vid)
            self.assertEqual(dc, value)
            if dc != value:
                print "      dc=%s adc=%s" % (dc, value)

            avid = vid_for_value(naddr, kaddr, dc)
            # print "vid=%s avid=%s" % (vid, avid)
            self.assertEqual(vid, avid)
Beispiel #3
0
    def test_rtrie_in_mdb(self):
        t = Trie()
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hell'), 2)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellothere'), 3)
        self.assertEqual(t.add('good'), 4)
        self.assertEqual(t.add('goodbye'), 5)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellsink'), 6)

        nodes, kids, _ = t.serialize()
        nodeaddr, nodelen = nodes.buffer_info()
        kidaddr, kidlen = kids.buffer_info()
        try:
            env = mdb.Env('/tmp/test_rtrie',
                          flags=mdb.MDB_WRITEMAP | mdb.MDB_NOSYNC
                          | mdb.MDB_NOSUBDIR)
            txn = env.begin_txn()
            db = env.open_db(txn, name='_meta_', flags=mdb.MDB_CREATE)
            db.put_raw(txn, 'nodes', nodeaddr, nodelen)
            db.put_raw(txn, 'kids', kidaddr, kidlen)

            n, ns = db.get_raw(txn, 'nodes')
            k, ks = db.get_raw(txn, 'kids')
            txn.commit()
            env.close()

            env = mdb.Env('/tmp/test_rtrie',
                          flags=mdb.MDB_NOSYNC | mdb.MDB_NOSUBDIR)
            txn = env.begin_txn()
            db = env.open_db(txn, name='_meta_')

            n, ns = db.get_raw(txn, 'nodes')
            k, ks = db.get_raw(txn, 'kids')
            self.assertEqual(rtrie.vid_for_value(n, k, 'hello'), 1)
            self.assertEqual(rtrie.vid_for_value(n, k, 'hell'), 2)
            self.assertEqual(rtrie.vid_for_value(n, k, 'goodbye'), 5)
            self.assertEqual(rtrie.vid_for_value(n, k, 'hellsink'), 6)
            self.assertEqual(rtrie.vid_for_value(n, k, 'hellothere'), 3)
            self.assertEqual(rtrie.vid_for_value(n, k, 'good'), 4)
            self.assertIsNone(rtrie.vid_for_value(n, k, 'notthere'))

            txn.commit()
            env.close()
        finally:
            import os
            os.unlink('/tmp/test_rtrie')
            os.unlink('/tmp/test_rtrie-lock')
Beispiel #4
0
    def test_rtrie_in_mdb(self):
        t = Trie()
        self.assertEqual(t.add("hello"), 1)
        self.assertEqual(t.add("hell"), 2)
        self.assertEqual(t.add("hello"), 1)
        self.assertEqual(t.add("hellothere"), 3)
        self.assertEqual(t.add("good"), 4)
        self.assertEqual(t.add("goodbye"), 5)
        self.assertEqual(t.add("hello"), 1)
        self.assertEqual(t.add("hellsink"), 6)

        nodes, kids, _ = t.serialize()
        nodeaddr, nodelen = nodes.buffer_info()
        kidaddr, kidlen = kids.buffer_info()
        try:
            env = mdb.Env("/tmp/test_rtrie", flags=mdb.MDB_WRITEMAP | mdb.MDB_NOSYNC | mdb.MDB_NOSUBDIR)
            txn = env.begin_txn()
            db = env.open_db(txn, name="_meta_", flags=mdb.MDB_CREATE)
            db.put_raw(txn, "nodes", nodeaddr, nodelen)
            db.put_raw(txn, "kids", kidaddr, kidlen)

            n, ns = db.get_raw(txn, "nodes")
            k, ks = db.get_raw(txn, "kids")
            txn.commit()
            env.close()

            env = mdb.Env("/tmp/test_rtrie", flags=mdb.MDB_NOSYNC | mdb.MDB_NOSUBDIR)
            txn = env.begin_txn()
            db = env.open_db(txn, name="_meta_")

            n, ns = db.get_raw(txn, "nodes")
            k, ks = db.get_raw(txn, "kids")
            self.assertEqual(rtrie.vid_for_value(n, k, "hello"), 1)
            self.assertEqual(rtrie.vid_for_value(n, k, "hell"), 2)
            self.assertEqual(rtrie.vid_for_value(n, k, "goodbye"), 5)
            self.assertEqual(rtrie.vid_for_value(n, k, "hellsink"), 6)
            self.assertEqual(rtrie.vid_for_value(n, k, "hellothere"), 3)
            self.assertEqual(rtrie.vid_for_value(n, k, "good"), 4)
            self.assertIsNone(rtrie.vid_for_value(n, k, "notthere"))

            txn.commit()
            env.close()
        finally:
            import os

            os.unlink("/tmp/test_rtrie")
            os.unlink("/tmp/test_rtrie-lock")
Beispiel #5
0
    def test_rtrie_in_memory(self):

        s = unicode(u'séllsink').encode('utf-8')
        #print "HELLSINK: %s" % s

        t = Trie()
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hell'), 2)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellothere'), 3)
        self.assertEqual(t.add('good'), 4)
        self.assertEqual(t.add('goodbye'), 5)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellsink'), 6)
        self.assertEqual(t.add(s), 7)
        t.print_it()

        nodes, kids, _ = t.serialize()
        nodeaddr, nodelen = nodes.buffer_info()
        kidaddr, kidlen = kids.buffer_info()
        print "LENS %s %s" % (nodelen, kidlen)

        for i in range(8):
            val = rtrie.value_for_vid(nodeaddr, kidaddr, i)
            print "Value", i, val

        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hello'), 1)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hell'), 2)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'goodbye'), 5)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hellsink'), 6)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hellothere'),
                         3)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'good'), 4)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, s), 7)
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'notthere'))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'h'))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'he'))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'hel'))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'hells'))
Beispiel #6
0
    def test_rtrie_in_memory(self):

        s = unicode(u"séllsink").encode("utf-8")
        # print "HELLSINK: %s" % s

        t = Trie()
        self.assertEqual(t.add("hello"), 1)
        self.assertEqual(t.add("hell"), 2)
        self.assertEqual(t.add("hello"), 1)
        self.assertEqual(t.add("hellothere"), 3)
        self.assertEqual(t.add("good"), 4)
        self.assertEqual(t.add("goodbye"), 5)
        self.assertEqual(t.add("hello"), 1)
        self.assertEqual(t.add("hellsink"), 6)
        self.assertEqual(t.add(s), 7)
        t.print_it()

        nodes, kids, _ = t.serialize()
        nodeaddr, nodelen = nodes.buffer_info()
        kidaddr, kidlen = kids.buffer_info()
        print "LENS %s %s" % (nodelen, kidlen)

        for i in range(8):
            val = rtrie.value_for_vid(nodeaddr, kidaddr, i)
            print "Value", i, val

        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hello"), 1)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hell"), 2)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "goodbye"), 5)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hellsink"), 6)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hellothere"), 3)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "good"), 4)
        self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, s), 7)
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "notthere"))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "h"))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "he"))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "hel"))
        self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "hells"))
Beispiel #7
0
    def test_wtrie(self):
        t = Trie()
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hell'), 2)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellothere'), 3)
        self.assertEqual(t.add('good'), 4)
        self.assertEqual(t.add('goodbye'), 5)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellsink'), 6)
        self.assertEqual(t.add(''), 0)

        # nodes = t.nodes
        # t.print_it()

        key, sz, pt = t.node_at_path()
        self.assertEqual(sz, 2)

        key, sz, pt = t.node_at_path(104)
        self.assertEqual(key, 'hell')
        self.assertEqual(pt, 0)
        self.assertEqual(sz, 2, 'actual %s' % sz)

        key2, sz, pt = t.node_at_path(104, 111)
        self.assertEqual(key2, 'o', 'actual %s' % key)
        self.assertEqual(pt, 2)
        self.assertEqual(sz, 1)

        key, sz, pt = t.node_at_path(104, 111, 116)
        self.assertEqual(key, 'there')
        self.assertEqual(pt, 1)
        self.assertEqual(sz, 0)

        n, k, _ = t.serialize()
        self.assertEqual(len(n), 7 * 4, "actual %d" % len(n))
        self.assertEqual(len(k), 100, "actual %d" % len(k))
        # print "sqork: %s" % t.kid_space

        print 'nodes', n
        print 'kids', k

        unpacked = struct.unpack_from("7I", n, 0)
        expected = (0x02000000, 0x01000010, 0x0200000b, 0x00000013, 0x01000004,
                    0x00000008, 0x00000016)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH2I", k, 0)
        expected = (0, 0, 0x67000004, 0x68000002)
        self.assertEqual(unpacked, expected, unpacked)

        unpacked = struct.unpack_from("IH4cI", k, 16)
        expected = (0x0000, 0x0004, 'g', 'o', 'o', 'd', 0x62000005)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH3c", k, 32)
        expected = (0x0004, 0x0003, 'b', 'y', 'e')
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH4c2I", k, 44)
        expected = (0x0000, 0x0004, 'h', 'e', 'l', 'l', 0x6f000001, 0x73000006)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IHcI", k, 64)
        expected = (0x0002, 1, 'o', 0x74000003)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH5c", k, 76)
        expected = (0x0001, 0x0005, 't', 'h', 'e', 'r', 'e')
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH4c", k, 88)
        expected = (0x0002, 0x0004, 's', 'i', 'n', 'k')
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))
Beispiel #8
0
    def test_wtrie(self):
        t = Trie()
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hell'), 2)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellothere'), 3)
        self.assertEqual(t.add('good'), 4)
        self.assertEqual(t.add('goodbye'), 5)
        self.assertEqual(t.add('hello'), 1)
        self.assertEqual(t.add('hellsink'), 6)
        self.assertEqual(t.add(''), 0)

        # nodes = t.nodes
        # t.print_it()

        key, sz, pt = t.node_at_path()
        self.assertEqual(sz, 2)

        key, sz, pt = t.node_at_path(104)
        self.assertEqual(key, 'hell')
        self.assertEqual(pt, 0)
        self.assertEqual(sz, 2, 'actual %s' % sz)

        key2, sz, pt = t.node_at_path(104, 111)
        self.assertEqual(key2, 'o', 'actual %s' % key)
        self.assertEqual(pt, 2)
        self.assertEqual(sz, 1)

        key, sz, pt = t.node_at_path(104, 111, 116)
        self.assertEqual(key, 'there')
        self.assertEqual(pt, 1)
        self.assertEqual(sz, 0)

        n, k, _ = t.serialize()
        self.assertEqual(len(n), 7 * 4, "actual %d" % len(n))
        self.assertEqual(len(k), 100, "actual %d" % len(k))
        # print "sqork: %s" % t.kid_space

        print 'nodes', n
        print 'kids', k

        unpacked = struct.unpack_from("7I", n, 0)
        expected = (0x02000000, 0x01000010, 0x0200000b, 0x00000013, 0x01000004, 0x00000008, 0x00000016)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH2I", k, 0)
        expected = (0, 0, 0x67000004, 0x68000002)
        self.assertEqual(unpacked, expected, unpacked)

        unpacked = struct.unpack_from("IH4cI", k, 16)
        expected = (0x0000, 0x0004, 'g', 'o', 'o', 'd', 0x62000005)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH3c", k, 32)
        expected = (0x0004, 0x0003, 'b', 'y', 'e')
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH4c2I", k, 44)
        expected = (0x0000, 0x0004, 'h', 'e', 'l', 'l', 0x6f000001, 0x73000006)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IHcI", k, 64)
        expected = (0x0002, 1, 'o', 0x74000003)
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH5c", k, 76)
        expected = (0x0001, 0x0005, 't', 'h', 'e', 'r', 'e')
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))

        unpacked = struct.unpack_from("IH4c", k, 88)
        expected = (0x0002, 0x0004, 's', 'i', 'n', 'k')
        self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))