Example #1
0
 def test_aggregateor(self, multi):
     ref = set(multi[0])
     ref.update(*[set(a) for a in multi[1:]])
     rb = RoaringBitmap(multi[0])
     rb.update(*[RoaringBitmap(a) for a in multi[1:]])
     rb._checkconsistency()
     assert rb == ref
Example #2
0
 def test_aggregateand(self, multi):
     ref = set(multi[0])
     ref.intersection_update(*[set(a) for a in multi[1:]])
     rb = RoaringBitmap(multi[0])
     rb.intersection_update(*[RoaringBitmap(a) for a in multi[1:]])
     rb._checkconsistency()
     assert rb == ref, name
Example #3
0
 def test_ixor(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref ^= ref2
         rb ^= rb2
         assert len(ref) == len(rb)
         assert ref == set(rb)
Example #4
0
 def test_ior(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref |= ref2
         rb |= rb2
         assert set(ref) == set(rb)
         assert rb == ref
Example #5
0
 def test_issue19(self):
     a = RoaringBitmap()
     b = RoaringBitmap(range(4095))
     c = RoaringBitmap(range(2))
     a |= b
     a |= c
     assert len(a - b - c) == 0
     assert len((b | c) - b - c) == 0
Example #6
0
 def test_ior(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref |= ref2
         rb |= rb2
         rb._checkconsistency()
         assert rb == ref, name
Example #7
0
 def test_none(self, multi):
     orig = [RoaringBitmap(a) for a in multi]
     orig.insert(4, RoaringBitmap())
     mrb = MultiRoaringBitmap(orig)
     assert len(orig) == len(mrb)
     for rb1, rb2 in zip(orig, mrb):
         assert rb1 == rb2
     assert mrb.intersection([4, 5]) is None
Example #8
0
 def test_ixor(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref ^= ref2
         rb ^= rb2
         rb._checkconsistency()
         assert len(ref) == len(rb), name
         assert ref == rb, name
Example #9
0
    def test_initrb(self):
        r = RoaringBitmap(range(5))
        i = ImmutableRoaringBitmap(r)
        r = RoaringBitmap(i)
        assert r == i

        i = ImmutableRoaringBitmap(range(5))
        r = RoaringBitmap(i)
        assert r == i
Example #10
0
 def test_minmax(self):
     rb = RoaringBitmap(range(0, 61440))
     assert rb.min() == 0
     assert rb.max() == 61439
     rb1 = RoaringBitmap(range(0, 61441))
     assert rb1.min() == 0
     assert rb1.max() == 61440
     assert rb1[61440] == 61440
     assert list(rb1)[61440] == 61440
Example #11
0
 def test_jaccard_dist(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         assert abs((len(ref & ref2) / float(len(ref | ref2))) -
                    rb.intersection_len(rb2) /
                    float(rb.union_len(rb2))) < 0.001, name
         assert abs((1 - (len(ref & ref2) / float(len(ref | ref2)))) -
                    rb.jaccard_dist(rb2)) < 0.001, name
Example #12
0
 def test_clamp(self, single):
     for name, data in single:
         a, b = sorted(random.sample(data, 2))
         ref = set(data).intersection(range(a, b))
         rb = RoaringBitmap(data).intersection(range(a, b))
         rb2 = RoaringBitmap(data).clamp(a, b)
         assert a <= rb2.min() and rb2.max() < b, name
         assert ref == rb2, (name, a, b)
         assert rb == rb2, (name, a, b)
Example #13
0
 def test_neq(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         assert ref != ref2
         assert rb != rb2
         a = ref != ref2
         b = rb != rb2
         assert a == b
Example #14
0
 def test_eq(self, single):
     for data in single:
         ref, ref2 = set(data), set(data)
         rb, rb2 = RoaringBitmap(data), RoaringBitmap(data)
         assert ref == ref2
         assert rb == rb2
         a = ref == ref2
         b = rb == rb2
         assert a == b
Example #15
0
 def test_aggregateor(self):
     data = [[random.randint(0, 1000) for _ in range(2000)]
             for _ in range(10)]
     ref = set(data[0])
     ref.update(*[set(a) for a in data[1:]])
     rb = RoaringBitmap(data[0])
     rb.update(*[RoaringBitmap(a) for a in data[1:]])
     assert ref == set(rb)
     assert rb == ref
Example #16
0
 def test_disjoint(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         refans = ref.isdisjoint(ref2)
         assert rb.isdisjoint(rb2) == refans, name
         data3 = [a for a in data2 if a not in ref]
         ref3, rb3 = set(data3), RoaringBitmap(data3)
         refans2 = ref.isdisjoint(ref3)
         assert rb.isdisjoint(rb3) == refans2, name
Example #17
0
 def test_disjoint(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         assert not ref.isdisjoint(ref2)
         assert not rb.isdisjoint(rb2)
         data3 = [a for a in data2 if a not in ref]
         ref3, rb3 = set(data3), RoaringBitmap(data3)
         assert ref.isdisjoint(ref3)
         assert rb.isdisjoint(rb3)
Example #18
0
 def test_clamp2(self):
     a = RoaringBitmap([0x00010001])
     b = RoaringBitmap([0x00030003, 0x00050005])
     c = RoaringBitmap([0x00070007])
     x = a | b | c
     assert x.clamp(0, 0x000FFFFF) == x
     assert x.clamp(0x000200FF, 0x000FFFFF) == b | c
     assert x.clamp(0x00030003, 0x000FFFFF) == b | c
     assert x.clamp(0, 0x00060006) == a | b
     assert x.clamp(0, 0x00050006) == a | b
     assert x.clamp(0, 0x00050005) == a | RoaringBitmap([0x00030003])
Example #19
0
 def test_subset(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         refans = ref <= ref2
         assert (set(rb) <= ref2) == refans, name
         assert (rb <= rb2) == refans, name
         k = len(data2) // 2
         ref, rb = set(data2[:k]), RoaringBitmap(data2[:k])
         refans = ref <= ref2
         assert (set(rb) <= ref2) == refans, name
         assert (rb <= rb2) == refans, name
Example #20
0
 def test_jaccard_dist(self, pair):
     for name, data1, data2 in pair:
         if len(data1) == 0 and len(data2) == 0:
             continue
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         assert len(ref & ref2) / float(len(ref | ref2)) == pytest.approx(
             rb.intersection_len(rb2) / float(rb.union_len(rb2))), name
         assert (
             1 -
             (len(ref & ref2) / float(len(ref | ref2))) == pytest.approx(
                 rb.jaccard_dist(rb2))), name
Example #21
0
 def test_subset(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         assert not ref <= ref2
         assert not set(rb) <= ref2
         assert not rb <= rb2
         k = len(data2) // 2
         ref, rb = set(data2[:k]), RoaringBitmap(data2[:k])
         assert ref <= ref2
         assert set(rb) <= ref2
         assert rb <= rb2
Example #22
0
 def test_select_issue15(self):
     rb = RoaringBitmap(range(0x10000, 0x1ffff + 1))
     assert rb[0] == 0x10000
     rb.discard(0x10010)
     assert rb[0] == 0x10000
     rb = RoaringBitmap(range(0x10010, 0x1ffff + 1))
     assert rb[0] == 0x10010
     lst = list(range(1, 0xccbb))
     lst.extend(range(0xcccc, 0xfffc))
     rb = RoaringBitmap(lst)
     for n in (0, 0xcccc, -1):
         assert lst[n] == rb[n], (n, lst[n], rb[n])
Example #23
0
def test_phase_2_no_temp_table_from_phase_1():
    cs = CloStream(filter_fn=NO_FILTER_FN)

    transaction = frozenset('CD')
    temp_table = {transaction: 0}

    assert cs.closed_df.loc[0, 'itemset'] == frozenset()
    cs._phase_2(temp_table)

    assert cs.closed_df.values.tolist() == [[frozenset(), 0],
                                            [frozenset('CD'), 1]]
    assert cs.cid_list_map == dict(C=RoaringBitmap([1]), D=RoaringBitmap([1]))
Example #24
0
    def test_issue24(self):
        r = RoaringBitmap(range(131071))
        assert r.pop() == 131070
        assert r.pop() == 131069

        rr = r - RoaringBitmap([130752])
        assert 130752 not in rr
        assert rr.pop() == 131068

        r.difference_update(RoaringBitmap([130752]))
        assert 130752 not in r
        assert r.pop() == 131068
Example #25
0
 def test_initrangestep(self):
     # creates a positive, dense, and inverted block, respectively
     for n in [400, 6000, 61241]:
         for step in (2, 7, 113):
             ref = set(range(23, n * step, step))
             rb = RoaringBitmap(range(23, n * step, step))
             rb._checkconsistency()
             assert ref == rb, ('range(23, %d, %d)' % (n, step))
     n = 100 * (1 << 16)
     step = (1 << 16) + 7
     ref = set(range(23, n, step))
     rb = RoaringBitmap(range(23, n, step))
     rb._checkconsistency()
     assert ref == rb, ('range(23, %d, %d)' % (n, step))
Example #26
0
 def test_clamp(self, multi):
     a, b = sorted(sample(multi[0], 2))
     ref = set.intersection(*[set(x) for x in multi]) & set(range(a, b))
     mrb = MultiRoaringBitmap([RoaringBitmap(x) for x in multi])
     rb = mrb.intersection(list(range(len(mrb))), start=a, stop=b)
     assert a <= rb.min() and rb.max() < b
     assert ref == rb
Example #27
0
 def test_initrange(self):
     # creates a positive, dense, and inverted block, respectively
     for n in [400, 6000, 61241]:
         ref = set(range(23, n))
         rb = RoaringBitmap(range(23, n))
         rb._checkconsistency()
         assert ref == rb, ('range(23, %d)' % n)
Example #28
0
 def test_rank2(self):
     rb = RoaringBitmap(range(0, 100000, 7))
     rb.update(range(100000, 200000, 1000))
     for k in range(100000):
         assert rb.rank(k) == 1 + k // 7
     for k in range(100000, 200000):
         assert rb.rank(k) == 1 + 100000 // 7 + 1 + (k - 100000) // 1000
Example #29
0
 def test_select2(self):
     gap = 1
     while gap <= 1024:
         rb = RoaringBitmap(range(0, 100000, gap))
         for k in range(0, 100000 // gap):
             assert rb.select(k) == k * gap
         gap *= 2
Example #30
0
 def test_pickle(self, single):
     for name, data in single:
         rb = RoaringBitmap(data)
         rb_pickled = pickle.dumps(rb, protocol=-1)
         rb_unpickled = pickle.loads(rb_pickled)
         rb._checkconsistency()
         assert rb_unpickled == rb, name