def test_consistent_merge(): """ Test that merge() calls do not modify the argument sketch. """ s1 = GKArray(test_eps) s2 = GKArray(test_eps) d = Normal(100) for v in d.data: s1.add(v) s1.merge(s2) # s2 is still empty np.testing.assert_equal(s2.num_values, 0) d = Normal(50) for v in d.data: s2.add(v) s2_summary = [s2.quantile(q) for q in test_quantiles]+ [s2.sum, s2.avg, s2.num_values] s1.merge(s2) d = Normal(10) for v in d.data: s1.add(v) # changes to s1 does not affect s2 after merge s2_summary = [s2.quantile(q) for q in test_quantiles] + [s2.sum, s2.avg, s2.num_values] np.testing.assert_almost_equal([s2.quantile(q) for q in test_quantiles] + [s2.sum, s2.avg, s2.num_values], s2_summary) s3 = GKArray(test_eps) s3.merge(s2) # merging to an empty sketch does not change s2 np.testing.assert_almost_equal([s2.quantile(q) for q in test_quantiles] + [s2.sum, s2.avg, s2.num_values], s2_summary)
def test_merge_equal(): parameters = [(35, 1), (1, 3), (15, 2), (40, 0.5)] for n in test_sizes: d = EmptyDataset(0) s = GKArray(test_eps) for params in parameters: generator = Normal.from_params(params[0], params[1], n) sketch = GKArray(test_eps) for v in generator.data: sketch.add(v) d.add(v) s.merge(sketch) evaluate_sketch_accuracy(s, d, 2*s.eps)
def test_merge_mixed(): ntests = 20 datasets = [Normal, Exponential, Laplace, Bimodal] for i in range(ntests): d = EmptyDataset(0) s = GKArray(test_eps) for dataset in datasets: generator = dataset(np.random.randint(0, 500)) sketch = GKArray(test_eps) for v in generator.data: sketch.add(v) d.add(v) s.merge(sketch) evaluate_sketch_accuracy(s, d, 2*s.eps)
def test_merge_unequal(): ntests = 20 for i in range(ntests): for n in test_sizes: d = Lognormal(n) s1 = GKArray(test_eps) s2 = GKArray(test_eps) for v in d.data: if np.random.random() > 0.7: s1.add(v) else: s2.add(v) s1.merge(s2) evaluate_sketch_accuracy(s1, d, 2*s1.eps)
def test_constant(): for n in test_sizes: data = Constant(n) sketch = GKArray(test_eps) for v in data.data: sketch.add(v) for q in test_quantiles: np.testing.assert_equal(sketch.quantile(q), 42)
def test_distributions(): for dataset in datasets: for n in test_sizes: data = dataset(n) sketch = GKArray(test_eps) for v in data.data: sketch.add(v) evaluate_sketch_accuracy(sketch, data, sketch.eps)