class AdaptiveIndex(object): def __init__(self): self.index = {} self.index_type = BITMAP_INDEX def add(self, value, position): if self.index_type == BITMAP_INDEX and len(self.index) > MAX_BIT_INDEX_SIZE: index = self.index self.index_type = BLOOMFILTER_INDEX self.index = Bloomfilter(BF_SIZE, BF_HASHES) for v in index: for pos in index[v].positions(): self.index.add([(v, pos)]) if self.index_type == BITMAP_INDEX: if value not in self.index: self.index[value] = BitIndex() index = self.index[value] index.set(position) if self.index_type == BLOOMFILTER_INDEX: self.index.add([(value, position)]) assert (value, position) in self.index def get(self, value, pos): if self.index_type == BITMAP_INDEX: return self.index[value].get(pos) if value in self.index else False else: return (value, pos) in self.index def filter(self, fun): if self.index_type == BITMAP_INDEX: return chain(v.positions() for k, v in self.index.items() if fun(k))
def add(self, value, position): if self.index_type == BITMAP_INDEX and len(self.index) > MAX_BIT_INDEX_SIZE: index = self.index self.index_type = BLOOMFILTER_INDEX self.index = Bloomfilter(BF_SIZE, BF_HASHES) for v in index: for pos in index[v].positions(): self.index.add([(v, pos)]) if self.index_type == BITMAP_INDEX: if value not in self.index: self.index[value] = BitIndex() index = self.index[value] index.set(position) if self.index_type == BLOOMFILTER_INDEX: self.index.add([(value, position)]) assert (value, position) in self.index
def test_bloomfilter(self): m, k = Bloomfilter.calculate_parameters(100000, 0.01) b = Bloomfilter(m, k) keys = [random.randint(0, 80000) for i in xrange(40000)] b.add(keys) self.assertTrue(keys[0] in b) self.assertTrue(all(b.match(keys))) self.assertTrue( len(filter(None, b.match(xrange(80000, 100000)))) < 100000 * 0.01)
def test_bloomfilter(self): m, k = Bloomfilter.calculate_parameters(100000, 0.01) b = Bloomfilter(m, k) keys = [random.randint(0, 80000) for i in xrange(40000)] b.add(keys) self.assertTrue(keys[0] in b) self.assertTrue(all(b.match(keys))) self.assertTrue(len(filter(None, b.match(xrange(80000, 100000)))) < 100000 * 0.01)