def test_select2(self): gap = 1 while gap <= 1024: rb = RoaringBitmap(range(0, 100000, gap)) for k in range(0, 100000 // gap): assert rb.select(k) == k * gap gap *= 2
def test_pickle(self, single): for name, data in single: rb = RoaringBitmap(data) rb_pickled = pickle.dumps(rb, protocol=-1) rb_unpickled = pickle.loads(rb_pickled) rb._checkconsistency() assert rb_unpickled == rb, name
def test_initrange(self): # creates a positive, dense, and inverted block, respectively for n in [400, 6000, 61241]: ref = set(range(23, n)) rb = RoaringBitmap(range(23, n)) rb._checkconsistency() assert ref == rb, ('range(23, %d)' % n)
def test_aggregateor(self, multi): ref = set(multi[0]) ref.update(*[set(a) for a in multi[1:]]) rb = RoaringBitmap(multi[0]) rb.update(*[RoaringBitmap(a) for a in multi[1:]]) rb._checkconsistency() assert rb == ref, name
def test_rank2(self): rb = RoaringBitmap(range(0, 100000, 7)) rb.update(range(100000, 200000, 1000)) for k in range(100000): assert rb.rank(k) == 1 + k // 7 for k in range(100000, 200000): assert rb.rank(k) == 1 + 100000 // 7 + 1 + (k - 100000) // 1000
def test_rank(self, single): for name, data in single: ref = sorted(set(data)) rb = RoaringBitmap(data) for _ in range(10): x = random.choice(ref) assert x in rb, name assert rb.rank(x) == ref.index(x) + 1, name
def test_ixor(self, pair): for data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) ref ^= ref2 rb ^= rb2 assert len(ref) == len(rb) assert ref == set(rb)
def test_ior(self, pair): for data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) ref |= ref2 rb |= rb2 assert set(ref) == set(rb) assert rb == ref
def test_none(self, multi): orig = [RoaringBitmap(a) for a in multi] orig.insert(4, RoaringBitmap()) mrb = MultiRoaringBitmap(orig) assert len(orig) == len(mrb) for rb1, rb2 in zip(orig, mrb): assert rb1 == rb2 assert mrb.intersection([4, 5]) is None
def test_issue19(self): a = RoaringBitmap() b = RoaringBitmap(range(4095)) c = RoaringBitmap(range(2)) a |= b a |= c assert len(a - b - c) == 0 assert len((b | c) - b - c) == 0
def test_ior(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) ref |= ref2 rb |= rb2 rb._checkconsistency() assert rb == ref, name
def test_aggregateor(self): data = [[random.randint(0, 1000) for _ in range(2000)] for _ in range(10)] ref = set(data[0]) ref.update(*[set(a) for a in data[1:]]) rb = RoaringBitmap(data[0]) rb.update(*[RoaringBitmap(a) for a in data[1:]]) assert ref == set(rb) assert rb == ref
def test_rank(self, single): for data in single: ref = sorted(set(data)) rb = RoaringBitmap(data) print(len(rb)) for _ in range(10): x = random.choice(ref) assert x in rb assert rb.rank(x) == ref.index(x) + 1
def test_clamp(self, single): for name, data in single: a, b = sorted(random.sample(data, 2)) ref = set(data).intersection(range(a, b)) rb = RoaringBitmap(data).intersection(range(a, b)) rb2 = RoaringBitmap(data).clamp(a, b) assert a <= rb2.min() and rb2.max() < b, name assert ref == rb2, (name, a, b) assert rb == rb2, (name, a, b)
def test_ixor(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) ref ^= ref2 rb ^= rb2 rb._checkconsistency() assert len(ref) == len(rb), name assert ref == rb, name
def test_pop(self): rb = RoaringBitmap([60748, 28806, 54664, 28597, 58922, 75684, 56364, 67421, 52608, 55686, 10427, 48506, 64363, 14506, 73077, 59035, 70246, 19875, 73145, 40225, 58664, 6597, 65554, 73102, 26636, 74227, 59566, 19023]) while rb: rb.pop() rb._checkconsistency() assert len(rb) == 0
def test_fixtures(single): for name, data in single: rb = RoaringBitmap(data) if name == 'many keys': assert len(rb._keys()) > 100 elif name == 'empty': assert len(rb) == 0 else: assert name[0].upper() in rb.debuginfo()
def test_eq(self, single): for data in single: ref, ref2 = set(data), set(data) rb, rb2 = RoaringBitmap(data), RoaringBitmap(data) assert ref == ref2 assert rb == rb2 a = ref == ref2 b = rb == rb2 assert a == b
def test_neq(self, pair): for data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) assert ref != ref2 assert rb != rb2 a = ref != ref2 b = rb != rb2 assert a == b
def test_initrb(self): r = RoaringBitmap(range(5)) i = ImmutableRoaringBitmap(r) r = RoaringBitmap(i) assert r == i i = ImmutableRoaringBitmap(range(5)) r = RoaringBitmap(i) assert r == i
def test_jaccard_dist(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) assert abs((len(ref & ref2) / float(len(ref | ref2))) - rb.intersection_len(rb2) / float(rb.union_len(rb2))) < 0.001, name assert abs((1 - (len(ref & ref2) / float(len(ref | ref2)))) - rb.jaccard_dist(rb2)) < 0.001, name
def test_disjoint(self, pair): for data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) assert not ref.isdisjoint(ref2) assert not rb.isdisjoint(rb2) data3 = [a for a in data2 if a not in ref] ref3, rb3 = set(data3), RoaringBitmap(data3) assert ref.isdisjoint(ref3) assert rb.isdisjoint(rb3)
def test_disjoint(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) refans = ref.isdisjoint(ref2) assert rb.isdisjoint(rb2) == refans, name data3 = [a for a in data2 if a not in ref] ref3, rb3 = set(data3), RoaringBitmap(data3) refans2 = ref.isdisjoint(ref3) assert rb.isdisjoint(rb3) == refans2, name
def test_clamp2(self): a = RoaringBitmap([0x00010001]) b = RoaringBitmap([0x00030003, 0x00050005]) c = RoaringBitmap([0x00070007]) x = a | b | c assert x.clamp(0, 0x000FFFFF) == x assert x.clamp(0x000200FF, 0x000FFFFF) == b | c assert x.clamp(0x00030003, 0x000FFFFF) == b | c assert x.clamp(0, 0x00060006) == a | b assert x.clamp(0, 0x00050006) == a | b assert x.clamp(0, 0x00050005) == a | RoaringBitmap([0x00030003])
def test_contains(self, single): for name, data in single: ref = set(data) rb = RoaringBitmap(data) for a in data: assert a in ref, name assert a in rb, name for a in set(range(20000)) - set(data): assert a not in ref, name assert a not in rb, name rb._checkconsistency()
def test_subset(self, pair): for data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) assert not ref <= ref2 assert not set(rb) <= ref2 assert not rb <= rb2 k = len(data2) // 2 ref, rb = set(data2[:k]), RoaringBitmap(data2[:k]) assert ref <= ref2 assert set(rb) <= ref2 assert rb <= rb2
def test_subset(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) refans = ref <= ref2 assert (set(rb) <= ref2) == refans, name assert (rb <= rb2) == refans, name k = len(data2) // 2 ref, rb = set(data2[:k]), RoaringBitmap(data2[:k]) refans = ref <= ref2 assert (set(rb) <= ref2) == refans, name assert (rb <= rb2) == refans, name
def test_phase_2_no_temp_table_from_phase_1(): cs = CloStream(filter_fn=NO_FILTER_FN) transaction = frozenset('CD') temp_table = {transaction: 0} assert cs.closed_df.loc[0, 'itemset'] == frozenset() cs._phase_2(temp_table) assert cs.closed_df.values.tolist() == [[frozenset(), 0], [frozenset('CD'), 1]] assert cs.cid_list_map == dict(C=RoaringBitmap([1]), D=RoaringBitmap([1]))
def test_discard(self, single): for data in single: ref = set() rb = RoaringBitmap() for n in sorted(data): ref.add(n) rb.add(n) for n in sorted(data): ref.discard(n) rb.discard(n) assert len(ref) == 0 assert len(rb) == 0 assert set(ref) == set(rb) assert rb == ref
def test_select(self, single): for data in single: ref = sorted(set(data)) rb = RoaringBitmap(data) lrb = list(rb) idx = [random.randint(0, len(ref)) for _ in range(10)] for i in idx: assert lrb[i] == ref[i] assert rb.select(i) in rb assert rb.select(i) == ref[i] assert rb.rank(rb.select(i)) - 1 == i if rb.select(i) + 1 in rb: assert rb.rank(rb.select(i) + 1) - 1 == i + 1 else: assert rb.rank(rb.select(i) + 1) - 1 == i
def test_clamp(self, multi): a, b = sorted(sample(multi[0], 2)) ref = set.intersection(*[set(x) for x in multi]) & set(range(a, b)) mrb = MultiRoaringBitmap([RoaringBitmap(x) for x in multi]) rb = mrb.intersection(list(range(len(mrb))), start=a, stop=b) assert a <= rb.min() and rb.max() < b assert ref == rb
def test_minmax(self): rb = RoaringBitmap(range(0, 61440)) assert rb.min() == 0 assert rb.max() == 61439 rb1 = RoaringBitmap(range(0, 61441)) assert rb1.min() == 0 assert rb1.max() == 61440 assert rb1[61440] == 61440 assert list(rb1)[61440] == 61440
def add(self, transaction): transaction = frozenset(transaction) for item in transaction: if item in self.item_to_tids: self.item_to_tids[item].add(self.n_transactions) else: self.item_to_tids[item] = RoaringBitmap([self.n_transactions]) self.n_transactions += 1
def test_aggregateor(self, multi): ref = set(multi[0]) ref.update(*[set(a) for a in multi[1:]]) rb = RoaringBitmap(multi[0]) rb.update(*[RoaringBitmap(a) for a in multi[1:]]) rb._checkconsistency() assert rb == ref
def test_aggregateand(self, multi): ref = set(multi[0]) ref.intersection_update(*[set(a) for a in multi[1:]]) rb = RoaringBitmap(multi[0]) rb.intersection_update(*[RoaringBitmap(a) for a in multi[1:]]) rb._checkconsistency() assert rb == ref, name
def test_issue22(self): rb = RoaringBitmap(range(0, 61440)) rb1 = RoaringBitmap(range(0, 61441)) assert len(rb ^ rb) == 0 assert len(rb - rb) == 0 assert len(rb1 ^ rb1) == 0 assert len(rb1 - rb1) == 0 assert len(~rb) == 0 assert len(~rb1) == 0 rb1 = RoaringBitmap(range(0, 61441)) assert len(rb ^ rb) == 0 rb1 ^= rb1 assert len(rb1) == 0 rb1 = RoaringBitmap(range(0, 61441)) rb1 -= rb1 assert len(rb1) == 0
def test_eq(self, multi): orig = [RoaringBitmap(a) for a in multi] mrb = MultiRoaringBitmap(orig) mrb2 = MultiRoaringBitmap(orig) mrb3 = MultiRoaringBitmap(orig[1:]) assert mrb == orig assert mrb == mrb2 assert mrb != orig[1:] assert mrb != mrb3
def test_discard(self, single): for name, data in single: ref = set() rb = RoaringBitmap() for n in sorted(data): ref.add(n) rb.add(n) for n in sorted(data): ref.discard(n) rb.discard(n) rb._checkconsistency() assert len(ref) == 0, name assert len(rb) == 0, name assert rb == ref, name
def _indexfile(filename): """Create bitmap with locations of non-empty lines.""" result = RoaringBitmap() offset = 0 with open(filename, 'rb') as tmp: for line in tmp: if not line.isspace(): result.add(offset) offset += len(line) result.add(offset) return result.freeze()
def test_inititerator(self, single): for name, data in single: ref = set(a for a in data) rb = RoaringBitmap(a for a in data) rb._checkconsistency() assert ref == rb, name
def test_andlen(self, pair): for data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) assert len(ref & ref2) == rb.intersection_len(rb2)
def test_orlen(self, pair): for name, data1, data2 in pair: ref, ref2 = set(data1), set(data2) rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2) assert len(ref | ref2) == rb.union_len(rb2), name assert len(rb | rb2) == rb.union_len(rb2), name
def test_add(self, single): for name, data in single: ref = set() rb = RoaringBitmap() for n in sorted(data): ref.add(n) rb.add(n) assert rb == ref, name with pytest.raises(OverflowError): rb.add(-1) rb.add(1 << 32) rb.add(0) rb.add((1 << 32) - 1) rb._checkconsistency()
def test_inittrivial(self): data = list(range(5)) ref = set(data) rb = RoaringBitmap(data) rb._checkconsistency() assert ref == rb
def test_add(self, single): for data in single: ref = set() rb = RoaringBitmap() for n in sorted(data): ref.add(n) rb.add(n) assert set(ref) == set(rb) assert rb == ref with pytest.raises(OverflowError): rb.add(-1) rb.add(1 << 32) rb.add(0) rb.add((1 << 32) - 1)
def test_initunsorted(self, single): for name, data in single: ref = set(data) rb = RoaringBitmap(data) rb._checkconsistency() assert ref == rb, name