def test_compressors(self): n_seg = 100 n_per = 1000 s = 3 xs = np.linspace(0, 1, n_per).astype(float) ccs = [ # compress_quant.CoopCompressor(), compress_quant.CoopCompressorFinite(), compress_quant.SkipCompressor(biased=False), compress_quant.SkipCompressor(biased=True), compress_quant.RankTracker([.1, .5, .9]), compress_quant.QRandomSampleCompressor(), ] q_res = [QuantileResultWrapper() for _ in ccs] names = ["coop", "skiprand", "skipbias", "ranktrack", "random"] expected_acc = [1, 1, 0, 3, 1] for cc_idx in range(len(ccs)): cur_cc = ccs[cc_idx] cur_res_acc = q_res[cc_idx] print(names[cc_idx]) for i in range(n_seg): new_res = cur_cc.compress(xs, s) self.assertLess(len(new_res), s + 1) cur_res_acc.update(new_res) print(cur_res_acc.rank(.9))
def test_simple(self): xs = np.linspace(0, 1, 100) xs = np.sort(xs) s_comp = compress_quant.SkipCompressor(15) res = s_comp.compress(xs) self.assertEqual(100, sum(res.values())) t_comp = compress_quant.SkipCompressor(2, biased=True) res = t_comp.compress(xs) r_keys = list(res.keys()) self.assertAlmostEqual(.25, r_keys[0], 2)
def run(self): print("Running Quantile Linear Bench with size: {} on {} segs".format( self.size, len(self.segments) )) compressors = [ cq.RankTracker(x_tracked=self.x_to_track), cq.CoopCompressor(self.size), cq.SkipCompressor(self.size, biased=False), cq.SkipCompressor(self.size, biased=True), cq.QRandomSampleCompressor(2*self.size) ] compressor_names = [ "ranktrack", "coop", "pps", "skip", "random_sample" ] dyadic_height = int(math.log2(len(self.segments))) dyadic_size = self.size/(dyadic_height+1) print("Dyadic Height: {}, Size:{}".format(dyadic_height, dyadic_size)) dyadic_compressor = compress_dyadic.DyadicQuantileCompressor( size=dyadic_size, max_height=dyadic_height ) results = [] for cur_seg_idx, cur_seg in tqdm(enumerate(self.segments)): for compressor_idx, cur_compressor in enumerate(compressors): cur_seg = np.sort(cur_seg) cur_compressor_name = compressor_names[compressor_idx] compressed_counts = cur_compressor.compress(cur_seg) results.append({ "seg_idx": cur_seg_idx, "method": cur_compressor_name, "counts": compressed_counts, }) dyadic_summaries = dyadic_compressor.compress(cur_seg) for summ_height, cur_dyadic_summ in enumerate(dyadic_summaries): results.append({ "seg_idx": (summ_height, cur_seg_idx), "method": "dyadic_truncation", "counts": cur_dyadic_summ }) return results
def test_acc_gk(self): np.random.seed(0) x_stream = np.random.uniform(0, 1, 10000) cur_granularity = 128 sketch_size = 64 segments = np.array_split(x_stream, cur_granularity) sketch_gen = board_sketch.SeqDictCompressorGen( name="pps", compressor=cq.SkipCompressor()) board_constructor = board_gen.BoardGen(sketch_gen) segment_times = np.cumsum([len(cur_seg) for cur_seg in segments]) df = board_constructor.generate( segments=segments, tags=[{ "t": t, "size": sketch_size } for t in segment_times], ) x_to_track = np.linspace(0, 1, 10) tot_results_true = board_query.query_linear( df, seg_start=1, seg_end=52, x_to_track=x_to_track, quantile=1, dyadic_base=0, ) tot_results_est = board_query.query_linear_acc_quant( df, seg_start=1, seg_end=52, x_to_track=x_to_track, acc_size=50, ) print(tot_results_true) print(tot_results_est)
def test_converge(self): n_total = 10000 summ_size = 15 xs = np.linspace(0, 1, n_total) tot_result = quantile.QuantileResultWrapper() n_segs = 100 for i in range(n_segs): s_comp = compress_quant.SkipCompressor(size=summ_size, seed=i, biased=False) new_res = s_comp.compress(xs) self.assertEqual(summ_size, len(new_res)) tot_result.update(new_res) self.assertAlmostEqual(.5, tot_result.rank(.5) / (n_total * n_segs), 2)
def get_sketch_gen(sketch_name: str, x_to_track: Sequence = None) -> board_sketch.SketchGen: sketch_gen = None if sketch_name == "top_values": sketch_gen = board_sketch.ItemDictCompressorGen( name=sketch_name, compressor=cf.TopValueCompressor(x_to_track=x_to_track) ) elif sketch_name == "cooperative": sketch_gen = board_sketch.ItemDictCompressorGen( name=sketch_name, compressor=cf.IncrementalRangeCompressor() ) elif sketch_name.startswith("cooperative"): base = get_dyadic_base(sketch_name) sketch_gen = board_sketch.ItemDictCompressorGen( name=sketch_name, compressor=cf.IncrementalRangeCompressor(max_t=base) ) elif sketch_name == "random_sample": sketch_gen = board_sketch.ItemDictCompressorGen( name=sketch_name, compressor=cf.UniformSamplingCompressor() ) elif sketch_name == "cms_min": sketch_gen = board_sketch.CMSGen() elif sketch_name == "truncation": sketch_gen = board_sketch.ItemDictCompressorGen( name=sketch_name, compressor=cf.TruncationCompressor() ) elif sketch_name == "pps": sketch_gen = board_sketch.ItemDictCompressorGen( name=sketch_name, compressor=cf.PPSCompressor() ) elif sketch_name.startswith("dyadic"): base = get_dyadic_base(sketch_name) sketch_gen = board_sketch.DyadicItemDictGen( h_compressor=cd.DyadicFrequencyCompressor(max_height=20, base=base) ) ## Quantile Sketches elif sketch_name == "q_top_values": sketch_gen = board_sketch.SeqDictCompressorGen( name=sketch_name, compressor=cq.RankTracker(x_tracked=np.unique(x_to_track)) ) elif sketch_name == "q_cooperative": sketch_gen = board_sketch.SeqDictCompressorGen( name=sketch_name, compressor=cq.CoopCompressorFinite() ) elif sketch_name == "q_random_sample": sketch_gen = board_sketch.SeqDictCompressorGen( name=sketch_name, compressor=cq.QRandomSampleCompressor() ) elif sketch_name == "kll": sketch_gen = board_sketch.KLLGen() elif sketch_name == "q_truncation": sketch_gen = board_sketch.SeqDictCompressorGen( name=sketch_name, compressor=cq.SkipCompressor(biased=True) ) elif sketch_name == "q_pps": sketch_gen = board_sketch.SeqDictCompressorGen( name=sketch_name, compressor=cq.SkipCompressor(biased=False) ) elif sketch_name.startswith("q_dyadic"): base = get_dyadic_base(sketch_name) sketch_gen = board_sketch.DyadicSeqDictGen( h_compressor=cd.DyadicQuantileCompressor(max_height=20, base=base) ) else: raise Exception("Invalid Sketch: {}".format(sketch_name)) return sketch_gen