Esempio n. 1
0
class AdaptiveIndex(object):
    def __init__(self):
        self.index = {}
        self.index_type = BITMAP_INDEX

    def add(self, value, position):
        if self.index_type == BITMAP_INDEX and len(self.index) > MAX_BIT_INDEX_SIZE:
            index = self.index
            self.index_type = BLOOMFILTER_INDEX
            self.index = Bloomfilter(BF_SIZE, BF_HASHES)
            for v in index:
                for pos in index[v].positions():
                    self.index.add([(v, pos)])

        if self.index_type == BITMAP_INDEX:
            if value not in self.index:
                self.index[value] = BitIndex()
            index = self.index[value]
            index.set(position)
        
        if self.index_type == BLOOMFILTER_INDEX:
            self.index.add([(value, position)])
            assert (value, position) in self.index

    def get(self, value, pos):
        if self.index_type == BITMAP_INDEX:
            return self.index[value].get(pos) if value in self.index else False
        else:
            return (value, pos) in self.index

    def filter(self, fun):
        if self.index_type == BITMAP_INDEX:
            return chain(v.positions() for k, v in self.index.items() if fun(k))
Esempio n. 2
0
class AdaptiveIndex(object):
    def __init__(self):
        self.index = {}
        self.index_type = BITMAP_INDEX

    def add(self, value, position):
        if self.index_type == BITMAP_INDEX and len(self.index) > MAX_BIT_INDEX_SIZE:
            index = self.index
            self.index_type = BLOOMFILTER_INDEX
            self.index = Bloomfilter(BF_SIZE, BF_HASHES)
            for v in index:
                for pos in index[v].positions():
                    self.index.add([(v, pos)])

        if self.index_type == BITMAP_INDEX:
            if value not in self.index:
                self.index[value] = BitIndex()
            index = self.index[value]
            index.set(position)

        if self.index_type == BLOOMFILTER_INDEX:
            self.index.add([(value, position)])
            assert (value, position) in self.index

    def get(self, value, pos):
        if self.index_type == BITMAP_INDEX:
            return self.index[value].get(pos) if value in self.index else False
        else:
            return (value, pos) in self.index

    def filter(self, fun):
        if self.index_type == BITMAP_INDEX:
            return chain(v.positions() for k, v in self.index.items() if fun(k))
Esempio n. 3
0
    def add(self, value, position):
        if self.index_type == BITMAP_INDEX and len(self.index) > MAX_BIT_INDEX_SIZE:
            index = self.index
            self.index_type = BLOOMFILTER_INDEX
            self.index = Bloomfilter(BF_SIZE, BF_HASHES)
            for v in index:
                for pos in index[v].positions():
                    self.index.add([(v, pos)])

        if self.index_type == BITMAP_INDEX:
            if value not in self.index:
                self.index[value] = BitIndex()
            index = self.index[value]
            index.set(position)

        if self.index_type == BLOOMFILTER_INDEX:
            self.index.add([(value, position)])
            assert (value, position) in self.index
Esempio n. 4
0
 def test_bloomfilter(self):
     m, k = Bloomfilter.calculate_parameters(100000, 0.01)
     b = Bloomfilter(m, k)
     keys = [random.randint(0, 80000) for i in xrange(40000)]
     b.add(keys)
     self.assertTrue(keys[0] in b)
     self.assertTrue(all(b.match(keys)))
     self.assertTrue(
         len(filter(None, b.match(xrange(80000, 100000)))) < 100000 * 0.01)
Esempio n. 5
0
 def test_bloomfilter(self):
     m, k = Bloomfilter.calculate_parameters(100000, 0.01)
     b = Bloomfilter(m, k)
     keys = [random.randint(0, 80000) for i in xrange(40000)]
     b.add(keys)
     self.assertTrue(keys[0] in b)
     self.assertTrue(all(b.match(keys)))
     self.assertTrue(len(filter(None, b.match(xrange(80000, 100000)))) < 100000 * 0.01)
Esempio n. 6
0
    def add(self, value, position):
        if self.index_type == BITMAP_INDEX and len(self.index) > MAX_BIT_INDEX_SIZE:
            index = self.index
            self.index_type = BLOOMFILTER_INDEX
            self.index = Bloomfilter(BF_SIZE, BF_HASHES)
            for v in index:
                for pos in index[v].positions():
                    self.index.add([(v, pos)])

        if self.index_type == BITMAP_INDEX:
            if value not in self.index:
                self.index[value] = BitIndex()
            index = self.index[value]
            index.set(position)
        
        if self.index_type == BLOOMFILTER_INDEX:
            self.index.add([(value, position)])
            assert (value, position) in self.index