def test_stress_wtrie(self): ktrie = Trie() strie = Trie() etrie = Trie() keywords = {} search_terms = {} exchange_ids = {} with open(fixture) as f: for data in f: for word in data.split(' '): vid = ktrie.add(word) actual_vid = keywords.get(word) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: keywords[word] = vid vid = strie.add(data) actual_vid = search_terms.get(data) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: search_terms[data] = vid nodes, kids, nodelen = etrie.serialize() naddr, nlen = nodes.buffer_info() kaddr, klen = kids.buffer_info() #summarize(naddr, kaddr, nodelen) #print_it(naddr, kaddr) for dc, vid in exchange_ids.iteritems(): rvid = etrie.add(dc) self.assertEqual(vid, rvid) print dc, vid value = value_for_vid(naddr, kaddr, vid) self.assertEqual(dc, value) if dc != value: print " dc=%s adc=%s" % (dc, value) avid = vid_for_value(naddr, kaddr, dc) #print "vid=%s avid=%s" % (vid, avid) self.assertEqual(vid, avid)
def test_stress_wtrie(self): ktrie = Trie() strie = Trie() etrie = Trie() keywords = {} search_terms = {} exchange_ids = {} with open(fixture) as f: for data in f: for word in data.split(" "): vid = ktrie.add(word) actual_vid = keywords.get(word) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: keywords[word] = vid vid = strie.add(data) actual_vid = search_terms.get(data) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: search_terms[data] = vid nodes, kids, nodelen = etrie.serialize() naddr, nlen = nodes.buffer_info() kaddr, klen = kids.buffer_info() # summarize(naddr, kaddr, nodelen) # print_it(naddr, kaddr) for dc, vid in exchange_ids.iteritems(): rvid = etrie.add(dc) self.assertEqual(vid, rvid) print dc, vid value = value_for_vid(naddr, kaddr, vid) self.assertEqual(dc, value) if dc != value: print " dc=%s adc=%s" % (dc, value) avid = vid_for_value(naddr, kaddr, dc) # print "vid=%s avid=%s" % (vid, avid) self.assertEqual(vid, avid)
def test_rtrie_in_mdb(self): t = Trie() self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hell'), 2) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellothere'), 3) self.assertEqual(t.add('good'), 4) self.assertEqual(t.add('goodbye'), 5) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellsink'), 6) nodes, kids, _ = t.serialize() nodeaddr, nodelen = nodes.buffer_info() kidaddr, kidlen = kids.buffer_info() try: env = mdb.Env('/tmp/test_rtrie', flags=mdb.MDB_WRITEMAP | mdb.MDB_NOSYNC | mdb.MDB_NOSUBDIR) txn = env.begin_txn() db = env.open_db(txn, name='_meta_', flags=mdb.MDB_CREATE) db.put_raw(txn, 'nodes', nodeaddr, nodelen) db.put_raw(txn, 'kids', kidaddr, kidlen) n, ns = db.get_raw(txn, 'nodes') k, ks = db.get_raw(txn, 'kids') txn.commit() env.close() env = mdb.Env('/tmp/test_rtrie', flags=mdb.MDB_NOSYNC | mdb.MDB_NOSUBDIR) txn = env.begin_txn() db = env.open_db(txn, name='_meta_') n, ns = db.get_raw(txn, 'nodes') k, ks = db.get_raw(txn, 'kids') self.assertEqual(rtrie.vid_for_value(n, k, 'hello'), 1) self.assertEqual(rtrie.vid_for_value(n, k, 'hell'), 2) self.assertEqual(rtrie.vid_for_value(n, k, 'goodbye'), 5) self.assertEqual(rtrie.vid_for_value(n, k, 'hellsink'), 6) self.assertEqual(rtrie.vid_for_value(n, k, 'hellothere'), 3) self.assertEqual(rtrie.vid_for_value(n, k, 'good'), 4) self.assertIsNone(rtrie.vid_for_value(n, k, 'notthere')) txn.commit() env.close() finally: import os os.unlink('/tmp/test_rtrie') os.unlink('/tmp/test_rtrie-lock')
def test_rtrie_in_mdb(self): t = Trie() self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hell"), 2) self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hellothere"), 3) self.assertEqual(t.add("good"), 4) self.assertEqual(t.add("goodbye"), 5) self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hellsink"), 6) nodes, kids, _ = t.serialize() nodeaddr, nodelen = nodes.buffer_info() kidaddr, kidlen = kids.buffer_info() try: env = mdb.Env("/tmp/test_rtrie", flags=mdb.MDB_WRITEMAP | mdb.MDB_NOSYNC | mdb.MDB_NOSUBDIR) txn = env.begin_txn() db = env.open_db(txn, name="_meta_", flags=mdb.MDB_CREATE) db.put_raw(txn, "nodes", nodeaddr, nodelen) db.put_raw(txn, "kids", kidaddr, kidlen) n, ns = db.get_raw(txn, "nodes") k, ks = db.get_raw(txn, "kids") txn.commit() env.close() env = mdb.Env("/tmp/test_rtrie", flags=mdb.MDB_NOSYNC | mdb.MDB_NOSUBDIR) txn = env.begin_txn() db = env.open_db(txn, name="_meta_") n, ns = db.get_raw(txn, "nodes") k, ks = db.get_raw(txn, "kids") self.assertEqual(rtrie.vid_for_value(n, k, "hello"), 1) self.assertEqual(rtrie.vid_for_value(n, k, "hell"), 2) self.assertEqual(rtrie.vid_for_value(n, k, "goodbye"), 5) self.assertEqual(rtrie.vid_for_value(n, k, "hellsink"), 6) self.assertEqual(rtrie.vid_for_value(n, k, "hellothere"), 3) self.assertEqual(rtrie.vid_for_value(n, k, "good"), 4) self.assertIsNone(rtrie.vid_for_value(n, k, "notthere")) txn.commit() env.close() finally: import os os.unlink("/tmp/test_rtrie") os.unlink("/tmp/test_rtrie-lock")
def test_rtrie_in_memory(self): s = unicode(u'séllsink').encode('utf-8') #print "HELLSINK: %s" % s t = Trie() self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hell'), 2) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellothere'), 3) self.assertEqual(t.add('good'), 4) self.assertEqual(t.add('goodbye'), 5) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellsink'), 6) self.assertEqual(t.add(s), 7) t.print_it() nodes, kids, _ = t.serialize() nodeaddr, nodelen = nodes.buffer_info() kidaddr, kidlen = kids.buffer_info() print "LENS %s %s" % (nodelen, kidlen) for i in range(8): val = rtrie.value_for_vid(nodeaddr, kidaddr, i) print "Value", i, val self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hello'), 1) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hell'), 2) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'goodbye'), 5) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hellsink'), 6) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hellothere'), 3) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'good'), 4) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, s), 7) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'notthere')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'h')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'he')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'hel')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'hells'))
def test_rtrie_in_memory(self): s = unicode(u"séllsink").encode("utf-8") # print "HELLSINK: %s" % s t = Trie() self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hell"), 2) self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hellothere"), 3) self.assertEqual(t.add("good"), 4) self.assertEqual(t.add("goodbye"), 5) self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hellsink"), 6) self.assertEqual(t.add(s), 7) t.print_it() nodes, kids, _ = t.serialize() nodeaddr, nodelen = nodes.buffer_info() kidaddr, kidlen = kids.buffer_info() print "LENS %s %s" % (nodelen, kidlen) for i in range(8): val = rtrie.value_for_vid(nodeaddr, kidaddr, i) print "Value", i, val self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hello"), 1) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hell"), 2) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "goodbye"), 5) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hellsink"), 6) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hellothere"), 3) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "good"), 4) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, s), 7) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "notthere")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "h")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "he")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "hel")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "hells"))
def test_wtrie(self): t = Trie() self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hell'), 2) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellothere'), 3) self.assertEqual(t.add('good'), 4) self.assertEqual(t.add('goodbye'), 5) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellsink'), 6) self.assertEqual(t.add(''), 0) # nodes = t.nodes # t.print_it() key, sz, pt = t.node_at_path() self.assertEqual(sz, 2) key, sz, pt = t.node_at_path(104) self.assertEqual(key, 'hell') self.assertEqual(pt, 0) self.assertEqual(sz, 2, 'actual %s' % sz) key2, sz, pt = t.node_at_path(104, 111) self.assertEqual(key2, 'o', 'actual %s' % key) self.assertEqual(pt, 2) self.assertEqual(sz, 1) key, sz, pt = t.node_at_path(104, 111, 116) self.assertEqual(key, 'there') self.assertEqual(pt, 1) self.assertEqual(sz, 0) n, k, _ = t.serialize() self.assertEqual(len(n), 7 * 4, "actual %d" % len(n)) self.assertEqual(len(k), 100, "actual %d" % len(k)) # print "sqork: %s" % t.kid_space print 'nodes', n print 'kids', k unpacked = struct.unpack_from("7I", n, 0) expected = (0x02000000, 0x01000010, 0x0200000b, 0x00000013, 0x01000004, 0x00000008, 0x00000016) self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked)) unpacked = struct.unpack_from("IH2I", k, 0) expected = (0, 0, 0x67000004, 0x68000002) self.assertEqual(unpacked, expected, unpacked) unpacked = struct.unpack_from("IH4cI", k, 16) expected = (0x0000, 0x0004, 'g', 'o', 'o', 'd', 0x62000005) self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked)) unpacked = struct.unpack_from("IH3c", k, 32) expected = (0x0004, 0x0003, 'b', 'y', 'e') self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked)) unpacked = struct.unpack_from("IH4c2I", k, 44) expected = (0x0000, 0x0004, 'h', 'e', 'l', 'l', 0x6f000001, 0x73000006) self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked)) unpacked = struct.unpack_from("IHcI", k, 64) expected = (0x0002, 1, 'o', 0x74000003) self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked)) unpacked = struct.unpack_from("IH5c", k, 76) expected = (0x0001, 0x0005, 't', 'h', 'e', 'r', 'e') self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked)) unpacked = struct.unpack_from("IH4c", k, 88) expected = (0x0002, 0x0004, 's', 'i', 'n', 'k') self.assertEqual(unpacked, expected, 'actual %s' % str(unpacked))