def test_select2(self): gap = 1 while gap <= 1024: rb = ImmutableRoaringBitmap(range(0, 100000, gap)) for k in range(0, 100000 // gap): assert rb.select(k) == k * gap gap *= 2
def test_orlen(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb = ImmutableRoaringBitmap(data1) rb2 = ImmutableRoaringBitmap(data2) assert len(ref | ref2) == rb.union_len(rb2), name assert len(rb | rb2) == rb.union_len(rb2), name
def test_rank2(self): rb = ImmutableRoaringBitmap(range(0, 100000, 7)) rb = rb.union(range(100000, 200000, 1000)) for k in range(100000): assert rb.rank(k) == 1 + k // 7 for k in range(100000, 200000): assert rb.rank(k) == 1 + 100000 // 7 + 1 + (k - 100000) // 1000
def test_and(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb = ImmutableRoaringBitmap(data1) rb2 = ImmutableRoaringBitmap(data2) assert ref & ref2 == set(rb & rb2), name assert type(rb & rb2) == RoaringBitmap, name
def test_inittrivial(self): data = list(range(5)) ref = set(data) rb = ImmutableRoaringBitmap(data) rb._checkconsistency() assert ref == rb assert type(rb) == ImmutableRoaringBitmap
def test_initrange(self): # creates a positive, dense, and inverted block, respectively for n in [400, 6000, 61241]: ref = set(range(23, n)) rb = ImmutableRoaringBitmap(range(23, n)) rb._checkconsistency() assert ref == rb, n
def test_aggregateor(self, multi): ref = set(multi[0]) res1 = ref.union(*[set(a) for a in multi[1:]]) rb = ImmutableRoaringBitmap(multi[0]) res2 = rb.union(*[ImmutableRoaringBitmap(a) for a in multi[1:]]) res2._checkconsistency() assert res1 == res2
def test_andlen(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb = ImmutableRoaringBitmap(data1) rb2 = ImmutableRoaringBitmap(data2) assert len(rb & rb2) == rb.intersection_len(rb2), name assert len(ref & ref2) == rb.intersection_len(rb2), name
def test_aggregateor(self, multi): ref = set(multi[0]) res1 = ref.union(*[set(a) for a in multi[1:]]) rb = ImmutableRoaringBitmap(multi[0]) res2 = rb.union(*[ImmutableRoaringBitmap(a) for a in multi[1:]]) res2._checkconsistency() assert res1 == res2, name
def test_pickle(self, single): for name, data in single: rb = ImmutableRoaringBitmap(data) rb_pickled = pickle.dumps(rb, protocol=-1) rb_unpickled = pickle.loads(rb_pickled) rb._checkconsistency() assert rb_unpickled == rb, name assert type(rb) == ImmutableRoaringBitmap, name
def test_rank(self, single): for name, data in single: ref = sorted(set(data)) rb = ImmutableRoaringBitmap(data) for _ in range(10): x = random.choice(ref) assert x in rb, name assert rb.rank(x) == ref.index(x) + 1, name
def test_initrb(self): r = RoaringBitmap(range(5)) i = ImmutableRoaringBitmap(r) r = RoaringBitmap(i) assert r == i i = ImmutableRoaringBitmap(range(5)) r = RoaringBitmap(i) assert r == i
def test_jaccard_dist(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb = ImmutableRoaringBitmap(data1) rb2 = ImmutableRoaringBitmap(data2) assert abs((len(ref & ref2) / float(len(ref | ref2))) - rb.intersection_len(rb2) / float(rb.union_len(rb2))) < 0.001, name assert abs((1 - (len(ref & ref2) / float(len(ref | ref2)))) - rb.jaccard_dist(rb2)) < 0.001, name
def test_select(self, single): for name, data in single: ref = sorted(set(data)) rb = ImmutableRoaringBitmap(data) lrb = list(rb) idx = [random.randint(0, len(ref) - 1) for _ in range(10)] for i in idx: assert lrb[i] == ref[i], name assert rb.select(i) in rb, name assert rb.select(i) == ref[i], name assert rb.rank(rb.select(i)) - 1 == i, name if rb.select(i) + 1 in rb: assert rb.rank(rb.select(i) + 1) - 1 == i + 1, name else: assert rb.rank(rb.select(i) + 1) - 1 == i, name
def test_jaccard(self, multi): mrb = MultiRoaringBitmap([ImmutableRoaringBitmap(a) for a in multi]) indices1 = array.array(b'L' if PY2 else 'L', [0, 6, 8]) indices2 = array.array(b'L' if PY2 else 'L', [1, 7, 6]) res = mrb.jaccard_dist(indices1, indices2) ref = array.array( b'd' if PY2 else 'd', [mrb[i].jaccard_dist(mrb[j]) for i, j in zip(indices1, indices2)]) assert res == ref
def test_andor_len_pairwise(self, multi): mrb = MultiRoaringBitmap([ImmutableRoaringBitmap(a) for a in multi]) indices1 = array.array(b'L' if PY2 else 'L', [0, 6, 8]) indices2 = array.array(b'L' if PY2 else 'L', [1, 7, 6]) res1 = array.array(b'L' if PY2 else 'L', [0] * len(indices1)) res2 = array.array(b'L' if PY2 else 'L', [0] * len(indices1)) mrb.andor_len_pairwise(indices1, indices2, res1, res2) ref1 = array.array(b'L' if PY2 else 'L') ref2 = array.array(b'L' if PY2 else 'L') for i, j in zip(indices1, indices2): ref1.append(len(mrb[i] & mrb[j])) ref2.append(len(mrb[i] | mrb[j])) assert res1 == ref1 assert res2 == ref2
def test_jaccard_dist(self, pair): for name, data1, data2 in pair: if len(data1) == 0 and len(data2) == 0: continue ref, ref2 = set(data1), set(data2) rb = ImmutableRoaringBitmap(data1) rb2 = ImmutableRoaringBitmap(data2) assert len(ref & ref2) / float(len(ref | ref2)) == pytest.approx( rb.intersection_len(rb2) / float(rb.union_len(rb2))), name assert ( 1 - (len(ref & ref2) / float(len(ref | ref2))) == pytest.approx( rb.jaccard_dist(rb2))), name
def test_inititerator(self, single): for name, data in single: ref = set(a for a in data) rb = ImmutableRoaringBitmap(a for a in data) rb._checkconsistency() assert ref == rb, name
def test_initunsorted(self, single): for name, data in single: ref = set(data) rb = ImmutableRoaringBitmap(data) rb._checkconsistency() assert ref == rb, name
def test_aggregateand(self, multi): ref = set(multi[0]) res1 = ref.intersection(*[set(a) for a in multi[1:]]) mrb = MultiRoaringBitmap([ImmutableRoaringBitmap(a) for a in multi]) res2 = mrb.intersection(list(range(len(mrb)))) assert res1 == res2
def test_sub(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = ImmutableRoaringBitmap(data1), ImmutableRoaringBitmap( data2) assert ref - ref2 == set(rb - rb2), name