def test_stress_wtrie(self): ktrie = Trie() strie = Trie() etrie = Trie() keywords = {} search_terms = {} exchange_ids = {} with open(fixture) as f: for data in f: for word in data.split(' '): vid = ktrie.add(word) actual_vid = keywords.get(word) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: keywords[word] = vid vid = strie.add(data) actual_vid = search_terms.get(data) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: search_terms[data] = vid nodes, kids, nodelen = etrie.serialize() naddr, nlen = nodes.buffer_info() kaddr, klen = kids.buffer_info() #summarize(naddr, kaddr, nodelen) #print_it(naddr, kaddr) for dc, vid in exchange_ids.iteritems(): rvid = etrie.add(dc) self.assertEqual(vid, rvid) print dc, vid value = value_for_vid(naddr, kaddr, vid) self.assertEqual(dc, value) if dc != value: print " dc=%s adc=%s" % (dc, value) avid = vid_for_value(naddr, kaddr, dc) #print "vid=%s avid=%s" % (vid, avid) self.assertEqual(vid, avid)
def test_stress_wtrie(self): ktrie = Trie() strie = Trie() etrie = Trie() keywords = {} search_terms = {} exchange_ids = {} with open(fixture) as f: for data in f: for word in data.split(" "): vid = ktrie.add(word) actual_vid = keywords.get(word) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: keywords[word] = vid vid = strie.add(data) actual_vid = search_terms.get(data) if actual_vid is not None: self.assertEqual(vid, actual_vid) else: search_terms[data] = vid nodes, kids, nodelen = etrie.serialize() naddr, nlen = nodes.buffer_info() kaddr, klen = kids.buffer_info() # summarize(naddr, kaddr, nodelen) # print_it(naddr, kaddr) for dc, vid in exchange_ids.iteritems(): rvid = etrie.add(dc) self.assertEqual(vid, rvid) print dc, vid value = value_for_vid(naddr, kaddr, vid) self.assertEqual(dc, value) if dc != value: print " dc=%s adc=%s" % (dc, value) avid = vid_for_value(naddr, kaddr, dc) # print "vid=%s avid=%s" % (vid, avid) self.assertEqual(vid, avid)
def test_rtrie_in_memory(self): s = unicode(u'séllsink').encode('utf-8') #print "HELLSINK: %s" % s t = Trie() self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hell'), 2) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellothere'), 3) self.assertEqual(t.add('good'), 4) self.assertEqual(t.add('goodbye'), 5) self.assertEqual(t.add('hello'), 1) self.assertEqual(t.add('hellsink'), 6) self.assertEqual(t.add(s), 7) t.print_it() nodes, kids, _ = t.serialize() nodeaddr, nodelen = nodes.buffer_info() kidaddr, kidlen = kids.buffer_info() print "LENS %s %s" % (nodelen, kidlen) for i in range(8): val = rtrie.value_for_vid(nodeaddr, kidaddr, i) print "Value", i, val self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hello'), 1) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hell'), 2) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'goodbye'), 5) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hellsink'), 6) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'hellothere'), 3) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, 'good'), 4) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, s), 7) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'notthere')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'h')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'he')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'hel')) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, 'hells'))
def test_rtrie_in_memory(self): s = unicode(u"séllsink").encode("utf-8") # print "HELLSINK: %s" % s t = Trie() self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hell"), 2) self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hellothere"), 3) self.assertEqual(t.add("good"), 4) self.assertEqual(t.add("goodbye"), 5) self.assertEqual(t.add("hello"), 1) self.assertEqual(t.add("hellsink"), 6) self.assertEqual(t.add(s), 7) t.print_it() nodes, kids, _ = t.serialize() nodeaddr, nodelen = nodes.buffer_info() kidaddr, kidlen = kids.buffer_info() print "LENS %s %s" % (nodelen, kidlen) for i in range(8): val = rtrie.value_for_vid(nodeaddr, kidaddr, i) print "Value", i, val self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hello"), 1) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hell"), 2) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "goodbye"), 5) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hellsink"), 6) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "hellothere"), 3) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, "good"), 4) self.assertEqual(rtrie.vid_for_value(nodeaddr, kidaddr, s), 7) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "notthere")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "h")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "he")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "hel")) self.assertIsNone(rtrie.vid_for_value(nodeaddr, kidaddr, "hells"))
def _fetch_vid(vid, vid16_nodes, vid16_kids, vid_nodes, vid_kids): return rtrie.value_for_vid(vid_nodes, vid_kids, vid)