Exemple #1
0
 def test_denoiser_estimation_correct(self):
     noised_adbf = UniformBloomFilter(4, random_seed=1)
     noised_adbf.sketch[0] = 1
     denoiser = SurrealDenoiser(epsilon=math.log(3))
     denoised_adbf = denoiser([noised_adbf])[0]
     expected = np.array([1.5, -0.5, -0.5, -0.5])
     np.testing.assert_allclose(denoised_adbf.sketch, expected, atol=0.01)
 def test_denoise(self):
     noised_adbf = UniformBloomFilter(4, random_seed=1)
     noised_adbf.sketch[0] = 1
     denoiser = SurrealDenoiser(probability=0.25)
     denoised_adbf = denoiser([noised_adbf])[0]
     expected = np.array([1.5, -0.5, -0.5, -0.5])
     np.testing.assert_allclose(denoised_adbf.sketch,
                                expected,
                                atol=0.01,
                                err_msg='Denoiser does not work.')
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))
    universe_size = int(100 * FLAGS.sketch_size)

    ## config all decay rates
    estimator_config_list = []
    for a in FLAGS.exponential_bloom_filter_decay_rate:

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + str(int(a)),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, a),
            estimator=FirstMomentEstimator(
                method='exp',
                denoiser=SurrealDenoiser(probability=noiser_flip_probability)),
            sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

        estimator_config_list += [estimator_config_exponential_bloom_filter]

    # config evaluation
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio:
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        ## list scenarios
        scenario_config_list += [
            configs.ScenarioConfig(
                name=str(int(set_size_ratio)),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='3_vary_decay_rate_' + str(int(FLAGS.sketch_size / 1000)) + "k",
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## config all decay rates
    estimator_config_list = []
    for sketch_size in FLAGS.sketch_size:
        for epsilon in FLAGS.noiser_epsilon:
            estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
                ## flipping prob
                name=str(int(sketch_size / 1000)) + "k_" + \
                    "{:.2f}".format(1 / (1 + np.exp(epsilon))),
                sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                    sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
                estimator=FirstMomentEstimator(
                    method='exp',
                    denoiser=SurrealDenoiser(epsilon)),
                sketch_noiser=BlipNoiser(epsilon))
            estimator_config_list += [
                estimator_config_exponential_bloom_filter
            ]

    # config evaluation
    scenario_config_list = []
    for universe_size in FLAGS.universe_size:
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(universe_size / 1000000),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='5_prediction',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
Exemple #5
0
 def test_denoise_and_union(self):
     noiser = BlipNoiser(epsilon=math.log(3),
                         random_state=np.random.RandomState(5))
     estimator = FirstMomentEstimator(
         method='log', denoiser=SurrealDenoiser(epsilon=math.log(3)))
     results = []
     truth = 1000
     for i in range(100):
         sketch_list = []
         set_ids = np.arange(truth)
         for _ in range(2):
             sketch = LogarithmicBloomFilter(length=2048, random_seed=i)
             sketch.add_ids(set_ids)
             sketch = noiser(sketch)
             sketch_list.append(sketch)
         estimate = estimator(sketch_list)
         results.append(estimate)
     self.assertAlmostEqual(truth, np.mean(results), delta=truth * 0.1)
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))

    ## list three adbf estimators
    estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
        name='geo_BF',
        sketch_factory=GeometricBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
        estimator=FirstMomentEstimator(
            method='geo',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
        name='log_BF',
        sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
            FLAGS.sketch_size),
        estimator=FirstMomentEstimator(
            method='log',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
        name='exp_BF',
        sketch_factory=ExponentialBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
        estimator=FirstMomentEstimator(
            method='exp',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))
        
    estimator_config_list = [
        estimator_config_geometric_bloom_filter,
        estimator_config_logarithmic_bloom_filter,
        estimator_config_exponential_bloom_filter,
    ]

    # list scenarios of different set sizes
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio: 
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(set_size_ratio),
                set_generator_factory=(
                    set_generator.IndependentSetGenerator
                    .get_generator_factory_with_num_and_size(
                        universe_size=FLAGS.universe_size, 
                        num_sets=FLAGS.number_of_sets, 
                        set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='2_vary_set_size',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
Exemple #7
0
  def setUp(self):
    super(InteroperabilityTest, self).setUp()
    self.number_of_trials = 2
    self.universe_size = 2000
    self.set_size_list = [5, 7, 9]
    self.large_set_size = 6
    self.small_set_size = 3
    self.sketch_size = 128
    self.number_of_sets = 3
    self.set_size = 50
    self.num_large_sets = 1
    self.num_small_sets = 3
    self.order = set_generator.ORDER_RANDOM
    self.user_activity_association = (
        set_generator.USER_ACTIVITY_ASSOCIATION_INDEPENDENT)
    self.shared_prop = 0.2
    self.num_bloom_filter_hashes = 2
    self.exponential_bloom_filter_decay_rate = 10
    self.noiser_epsilon = np.log(3)
    self.noiser_flip_probability = .25

    self.set_random_state = np.random.RandomState(42)
    self.sketch_random_state = np.random.RandomState(137)
    self.noise_random_state = np.random.RandomState(3)

    # non-noised estimators
    estimator_config_cascading_legions = SketchEstimatorConfig(
        name='cascading_legions',
        sketch_factory=CascadingLegions.get_sketch_factory(
            self.sketch_size, self.sketch_size),
        estimator=Estimator())

    estimator_config_bloom_filter = SketchEstimatorConfig(
        name='bloom_filter-union_estimator',
        sketch_factory=BloomFilter.get_sketch_factory(
            self.sketch_size, self.num_bloom_filter_hashes),
        estimator=UnionEstimator())

    estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
        name='log_bloom_filter-first_moment_log',
        sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
            self.sketch_size),
        estimator=FirstMomentEstimator(method='log'))

    estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
        name='exp_bloom_filter-first_moment_exp',
        sketch_factory=ExponentialBloomFilter.get_sketch_factory(
            self.sketch_size, self.exponential_bloom_filter_decay_rate),
        estimator=FirstMomentEstimator(method='exp'))

    estimator_config_voc = SketchEstimatorConfig(
        name='vector_of_counts-sequential',
        sketch_factory=VectorOfCounts.get_sketch_factory(self.sketch_size),
        estimator=SequentialEstimator())

    estimator_config_exact = SketchEstimatorConfig(
        name='exact_set-lossless',
        sketch_factory=ExactSet.get_sketch_factory(),
        estimator=LosslessEstimator())

    estimator_config_hll = SketchEstimatorConfig(
        name='hyper_log_log',
        sketch_factory=HyperLogLogPlusPlus.get_sketch_factory(self.sketch_size),
        estimator=HllCardinality())

    config_list = [
        estimator_config_exact,
        estimator_config_cascading_legions,
        estimator_config_bloom_filter,
        estimator_config_logarithmic_bloom_filter,
        estimator_config_exponential_bloom_filter,
        estimator_config_voc,
        estimator_config_hll,
    ]

    self.name_to_non_noised_estimator_config = {
        config.name: config for config in config_list
    }

    # noised estimators
    noised_estimator_config_cascading_legions = SketchEstimatorConfig(
        name='cascading_legions',
        sketch_factory=CascadingLegions.get_sketch_factory(
            self.sketch_size, self.sketch_size),
        estimator=Estimator(),
        sketch_noiser=Noiser(self.noiser_flip_probability))

    noised_estimator_config_bloom_filter = SketchEstimatorConfig(
        name='bloom_filter-union_estimator',
        sketch_factory=BloomFilter.get_sketch_factory(
            self.sketch_size, self.num_bloom_filter_hashes),
        estimator=UnionEstimator(),
        sketch_noiser=BlipNoiser(self.noiser_epsilon, self.noise_random_state))

    noised_estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
        name='log_bloom_filter-first_moment_log',
        sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
            self.sketch_size),
        estimator=FirstMomentEstimator(
            method='log',
            denoiser=SurrealDenoiser(
                probability=self.noiser_flip_probability)),
        sketch_noiser=BlipNoiser(self.noiser_epsilon, self.noise_random_state))

    noised_estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
        name='exp_bloom_filter-first_moment_exp',
        sketch_factory=ExponentialBloomFilter.get_sketch_factory(
            self.sketch_size, self.exponential_bloom_filter_decay_rate),
        estimator=FirstMomentEstimator(
            method='exp',
            denoiser=SurrealDenoiser(
                probability=self.noiser_flip_probability)),
        sketch_noiser=BlipNoiser(self.noiser_epsilon, self.noise_random_state))

    noised_estimator_config_voc = SketchEstimatorConfig(
        name='vector_of_counts-sequential',
        sketch_factory=VectorOfCounts.get_sketch_factory(self.sketch_size),
        estimator=SequentialEstimator(),
        sketch_noiser=LaplaceNoiser())

    noised_estimator_config_exact = SketchEstimatorConfig(
        name='exact_set-lossless',
        sketch_factory=ExactSet.get_sketch_factory(),
        estimator=LosslessEstimator(),
        sketch_noiser=AddRandomElementsNoiser(1, self.noise_random_state))

    noised_config_list = [
        noised_estimator_config_exact,
        noised_estimator_config_cascading_legions,
        noised_estimator_config_bloom_filter,
        noised_estimator_config_logarithmic_bloom_filter,
        noised_estimator_config_exponential_bloom_filter,
        noised_estimator_config_voc,
    ]

    self.name_to_noised_estimator_config = {
        config.name: config for config in noised_config_list
    }
    def setUp(self):
        super(InteroperabilityTest, self).setUp()
        self.number_of_trials = 2
        self.universe_size = 2000
        self.set_size = 5
        self.large_set_size = 6
        self.small_set_size = 3
        self.sketch_size = 64
        self.number_of_sets = 2
        self.num_large_sets = 1
        self.num_small_sets = 3
        self.order = set_generator.ORDER_RANDOM
        self.user_activity_association = (
            set_generator.USER_ACTIVITY_ASSOCIATION_INDEPENDENT)
        self.shared_prop = 0.2
        self.num_bloom_filter_hashes = 2
        self.exponential_bloom_filter_decay_rate = 10
        self.noiser_epsilon = np.log(3)
        self.noiser_flip_probability = .25

        self.set_random_state = np.random.RandomState(42)
        self.sketch_random_state = np.random.RandomState(137)
        self.noise_random_state = np.random.RandomState(3)

        # non-noised estimators
        estimator_config_cascading_legions = EstimatorConfig(
            sketch_factory=CascadingLegions.get_sketch_factory(
                self.sketch_size, self.sketch_size),
            estimator=Estimator(),
            sketch_noiser=None,
            estimate_noiser=None)

        estimator_config_bloom_filter = EstimatorConfig(
            sketch_factory=BloomFilter.get_sketch_factory(
                self.sketch_size, self.num_bloom_filter_hashes),
            estimator=UnionEstimator(),
            sketch_noiser=None,
            estimate_noiser=None)

        estimator_config_logarithmic_bloom_filter = EstimatorConfig(
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                self.sketch_size),
            estimator=FirstMomentEstimator(method='log'),
            sketch_noiser=None,
            estimate_noiser=None)

        estimator_config_exponential_bloom_filter = EstimatorConfig(
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                self.sketch_size, self.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(method='exp'),
            sketch_noiser=None,
            estimate_noiser=None)

        estimator_config_geometric_bloom_filter = EstimatorConfig(
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                self.sketch_size),
            estimator=GeometricUnionEstimator(),
            sketch_noiser=None,
            estimate_noiser=None)

        estimator_config_voc = EstimatorConfig(
            sketch_factory=VectorOfCounts.get_sketch_factory(self.sketch_size),
            estimator=SequentialEstimator(),
            sketch_noiser=None,
            estimate_noiser=None)

        estimator_config_exact = EstimatorConfig(
            sketch_factory=ExactSet.get_sketch_factory(),
            estimator=LosslessEstimator(),
            sketch_noiser=None,
            estimate_noiser=None)

        estimator_config_hll = EstimatorConfig(
            sketch_factory=HyperLogLogPlusPlus.get_sketch_factory(
                self.sketch_size),
            estimator=HllCardinality(),
            sketch_noiser=None,
            estimate_noiser=None)

        self.name_to_non_noised_estimator_config = {
            'exact_set': estimator_config_exact,
            'cascading_legions': estimator_config_cascading_legions,
            'bloom_filter': estimator_config_bloom_filter,
            'logarithmic_bloom_filter':
            estimator_config_logarithmic_bloom_filter,
            'exponential_bloom_filter':
            estimator_config_exponential_bloom_filter,
            'geometric_bloom_filter': estimator_config_geometric_bloom_filter,
            'vector_of_counts': estimator_config_voc,
            'hll': estimator_config_hll,
        }

        # noised estimators

        noised_estimator_config_cascading_legions = EstimatorConfig(
            sketch_factory=CascadingLegions.get_sketch_factory(
                self.sketch_size, self.sketch_size),
            estimator=Estimator(),
            sketch_noiser=Noiser(self.noiser_flip_probability),
            estimate_noiser=None)

        noised_estimator_config_bloom_filter = EstimatorConfig(
            sketch_factory=BloomFilter.get_sketch_factory(
                self.sketch_size, self.num_bloom_filter_hashes),
            estimator=UnionEstimator(),
            sketch_noiser=BlipNoiser(self.noiser_epsilon,
                                     self.noise_random_state),
            estimate_noiser=None)

        noised_estimator_config_logarithmic_bloom_filter = EstimatorConfig(
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                self.sketch_size),
            estimator=FirstMomentEstimator(
                method='log',
                denoiser=SurrealDenoiser(
                    probability=self.noiser_flip_probability)),
            sketch_noiser=None,
            estimate_noiser=None)

        noised_estimator_config_exponential_bloom_filter = EstimatorConfig(
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                self.sketch_size, self.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(
                method='exp',
                denoiser=SurrealDenoiser(
                    probability=self.noiser_flip_probability)),
            sketch_noiser=None,
            estimate_noiser=None)

        noised_estimator_config_geometric_bloom_filter = EstimatorConfig(
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                self.sketch_size),
            estimator=GeometricUnionEstimator(),
            sketch_noiser=BlipNoiser(self.noiser_epsilon,
                                     self.noise_random_state),
            estimate_noiser=None)

        noised_estimator_config_voc = EstimatorConfig(
            sketch_factory=VectorOfCounts.get_sketch_factory(self.sketch_size),
            estimator=SequentialEstimator(),
            sketch_noiser=LaplaceNoiser(),
            estimate_noiser=None)

        noised_estimator_config_exact = EstimatorConfig(
            sketch_factory=ExactSet.get_sketch_factory(),
            estimator=LosslessEstimator(),
            sketch_noiser=AddRandomElementsNoiser(1, self.noise_random_state),
            estimate_noiser=None)

        self.name_to_noised_estimator_config = {
            'exact_set': noised_estimator_config_exact,
            'cascading_legions': noised_estimator_config_cascading_legions,
            'bloom_filter': noised_estimator_config_bloom_filter,
            'logarithmic_bloom_filter':
            noised_estimator_config_logarithmic_bloom_filter,
            'exponential_bloom_filter':
            noised_estimator_config_exponential_bloom_filter,
            'geometric_bloom_filter':
            noised_estimator_config_geometric_bloom_filter,
            'vector_of_counts': noised_estimator_config_voc,
        }
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## list all estimators
    estimator_config_list = []
    for epsilon in FLAGS.noiser_epsilon:
        ## flipping prob
        noiser_flip_probability = 1 / (1 + np.exp(epsilon))

        # estimator_config_bloom_filter = SketchEstimatorConfig(
        #     name='unif_BF_' + "{:.2f}".format(noiser_flip_probability),
        #     sketch_factory=BloomFilter.get_sketch_factory(
        #         FLAGS.sketch_size, FLAGS.num_bloom_filter_hashes),
        #     estimator=UnionEstimator(),
        #     sketch_noiser=BlipNoiser(epsilon))

        estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
            name='geo_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
            estimator=FirstMomentEstimator(method='geo',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
            name='log_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                FLAGS.sketch_size),
            estimator=FirstMomentEstimator(method='log',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(method='exp',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_list += [
            # estimator_config_bloom_filter,
            estimator_config_geometric_bloom_filter,
            estimator_config_logarithmic_bloom_filter,
            estimator_config_exponential_bloom_filter,
        ]

    # config evaluation
    evaluation_config = configs.EvaluationConfig(
        name='1_vary_flip_prob',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=[
            configs.ScenarioConfig(
                name='independent',
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=FLAGS.universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ])

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()