def test_compressors(self):
     n_seg = 100
     n_per = 1000
     s = 3
     xs = np.linspace(0, 1, n_per).astype(float)
     ccs = [
         # compress_quant.CoopCompressor(),
         compress_quant.CoopCompressorFinite(),
         compress_quant.SkipCompressor(biased=False),
         compress_quant.SkipCompressor(biased=True),
         compress_quant.RankTracker([.1, .5, .9]),
         compress_quant.QRandomSampleCompressor(),
     ]
     q_res = [QuantileResultWrapper() for _ in ccs]
     names = ["coop", "skiprand", "skipbias", "ranktrack", "random"]
     expected_acc = [1, 1, 0, 3, 1]
     for cc_idx in range(len(ccs)):
         cur_cc = ccs[cc_idx]
         cur_res_acc = q_res[cc_idx]
         print(names[cc_idx])
         for i in range(n_seg):
             new_res = cur_cc.compress(xs, s)
             self.assertLess(len(new_res), s + 1)
             cur_res_acc.update(new_res)
         print(cur_res_acc.rank(.9))
    def test_simple(self):
        xs = np.linspace(0, 1, 100)
        xs = np.sort(xs)
        s_comp = compress_quant.SkipCompressor(15)
        res = s_comp.compress(xs)
        self.assertEqual(100, sum(res.values()))

        t_comp = compress_quant.SkipCompressor(2, biased=True)
        res = t_comp.compress(xs)
        r_keys = list(res.keys())
        self.assertAlmostEqual(.25, r_keys[0], 2)
Example #3
0
    def run(self):
        print("Running Quantile Linear Bench with size: {} on {} segs".format(
            self.size,
            len(self.segments)
        ))

        compressors = [
            cq.RankTracker(x_tracked=self.x_to_track),
            cq.CoopCompressor(self.size),
            cq.SkipCompressor(self.size, biased=False),
            cq.SkipCompressor(self.size, biased=True),
            cq.QRandomSampleCompressor(2*self.size)
        ]
        compressor_names = [
            "ranktrack",
            "coop",
            "pps",
            "skip",
            "random_sample"
        ]

        dyadic_height = int(math.log2(len(self.segments)))
        dyadic_size = self.size/(dyadic_height+1)
        print("Dyadic Height: {}, Size:{}".format(dyadic_height, dyadic_size))
        dyadic_compressor = compress_dyadic.DyadicQuantileCompressor(
            size=dyadic_size,
            max_height=dyadic_height
        )


        results = []

        for cur_seg_idx, cur_seg in tqdm(enumerate(self.segments)):
            for compressor_idx, cur_compressor in enumerate(compressors):
                cur_seg = np.sort(cur_seg)
                cur_compressor_name = compressor_names[compressor_idx]
                compressed_counts = cur_compressor.compress(cur_seg)
                results.append({
                    "seg_idx": cur_seg_idx,
                    "method": cur_compressor_name,
                    "counts": compressed_counts,
                })

            dyadic_summaries = dyadic_compressor.compress(cur_seg)
            for summ_height, cur_dyadic_summ in enumerate(dyadic_summaries):
                results.append({
                    "seg_idx": (summ_height, cur_seg_idx),
                    "method": "dyadic_truncation",
                    "counts": cur_dyadic_summ
                })


        return results
Example #4
0
 def test_acc_gk(self):
     np.random.seed(0)
     x_stream = np.random.uniform(0, 1, 10000)
     cur_granularity = 128
     sketch_size = 64
     segments = np.array_split(x_stream, cur_granularity)
     sketch_gen = board_sketch.SeqDictCompressorGen(
         name="pps", compressor=cq.SkipCompressor())
     board_constructor = board_gen.BoardGen(sketch_gen)
     segment_times = np.cumsum([len(cur_seg) for cur_seg in segments])
     df = board_constructor.generate(
         segments=segments,
         tags=[{
             "t": t,
             "size": sketch_size
         } for t in segment_times],
     )
     x_to_track = np.linspace(0, 1, 10)
     tot_results_true = board_query.query_linear(
         df,
         seg_start=1,
         seg_end=52,
         x_to_track=x_to_track,
         quantile=1,
         dyadic_base=0,
     )
     tot_results_est = board_query.query_linear_acc_quant(
         df,
         seg_start=1,
         seg_end=52,
         x_to_track=x_to_track,
         acc_size=50,
     )
     print(tot_results_true)
     print(tot_results_est)
    def test_converge(self):
        n_total = 10000
        summ_size = 15
        xs = np.linspace(0, 1, n_total)

        tot_result = quantile.QuantileResultWrapper()
        n_segs = 100
        for i in range(n_segs):
            s_comp = compress_quant.SkipCompressor(size=summ_size,
                                                   seed=i,
                                                   biased=False)
            new_res = s_comp.compress(xs)
            self.assertEqual(summ_size, len(new_res))
            tot_result.update(new_res)

        self.assertAlmostEqual(.5, tot_result.rank(.5) / (n_total * n_segs), 2)
def get_sketch_gen(sketch_name: str, x_to_track: Sequence = None) -> board_sketch.SketchGen:
    sketch_gen = None
    if sketch_name == "top_values":
        sketch_gen = board_sketch.ItemDictCompressorGen(
            name=sketch_name,
            compressor=cf.TopValueCompressor(x_to_track=x_to_track)
        )
    elif sketch_name == "cooperative":
        sketch_gen = board_sketch.ItemDictCompressorGen(
            name=sketch_name,
            compressor=cf.IncrementalRangeCompressor()
        )
    elif sketch_name.startswith("cooperative"):
        base = get_dyadic_base(sketch_name)
        sketch_gen = board_sketch.ItemDictCompressorGen(
            name=sketch_name,
            compressor=cf.IncrementalRangeCompressor(max_t=base)
        )
    elif sketch_name == "random_sample":
        sketch_gen = board_sketch.ItemDictCompressorGen(
            name=sketch_name,
            compressor=cf.UniformSamplingCompressor()
        )
    elif sketch_name == "cms_min":
        sketch_gen = board_sketch.CMSGen()
    elif sketch_name == "truncation":
        sketch_gen = board_sketch.ItemDictCompressorGen(
            name=sketch_name,
            compressor=cf.TruncationCompressor()
        )
    elif sketch_name == "pps":
        sketch_gen = board_sketch.ItemDictCompressorGen(
            name=sketch_name,
            compressor=cf.PPSCompressor()
        )
    elif sketch_name.startswith("dyadic"):
        base = get_dyadic_base(sketch_name)
        sketch_gen = board_sketch.DyadicItemDictGen(
            h_compressor=cd.DyadicFrequencyCompressor(max_height=20, base=base)
        )
    ## Quantile Sketches
    elif sketch_name == "q_top_values":
        sketch_gen = board_sketch.SeqDictCompressorGen(
            name=sketch_name,
            compressor=cq.RankTracker(x_tracked=np.unique(x_to_track))
        )
    elif sketch_name == "q_cooperative":
        sketch_gen = board_sketch.SeqDictCompressorGen(
            name=sketch_name,
            compressor=cq.CoopCompressorFinite()
        )
    elif sketch_name == "q_random_sample":
        sketch_gen = board_sketch.SeqDictCompressorGen(
            name=sketch_name,
            compressor=cq.QRandomSampleCompressor()
        )
    elif sketch_name == "kll":
        sketch_gen = board_sketch.KLLGen()
    elif sketch_name == "q_truncation":
        sketch_gen = board_sketch.SeqDictCompressorGen(
            name=sketch_name,
            compressor=cq.SkipCompressor(biased=True)
        )
    elif sketch_name == "q_pps":
        sketch_gen = board_sketch.SeqDictCompressorGen(
            name=sketch_name,
            compressor=cq.SkipCompressor(biased=False)
        )
    elif sketch_name.startswith("q_dyadic"):
        base = get_dyadic_base(sketch_name)
        sketch_gen = board_sketch.DyadicSeqDictGen(
            h_compressor=cd.DyadicQuantileCompressor(max_height=20, base=base)
        )
    else:
        raise Exception("Invalid Sketch: {}".format(sketch_name))
    return sketch_gen