def _get_test_evaluator(out_dir, overwrite=False):
   return evaluator.Evaluator(
       evaluation_config=self.evaluation_config,
       sketch_estimator_config_list=self.sketch_estimator_config_list,
       run_name=self.run_name,
       out_dir=out_dir,
       overwrite=overwrite)
        def _run_evaluation_and_simulation(out_dir):
            self.evaluator = evaluator.Evaluator(
                evaluation_config=self.evaluation_config,
                sketch_estimator_config_list=self.sketch_estimator_config_list,
                run_name=self.evaluation_run_name,
                out_dir=out_dir)
            self.evaluator()

            self.analyzer = analyzer.CardinalityEstimatorEvaluationAnalyzer(
                out_dir=out_dir,
                evaluation_directory=out_dir,
                evaluation_run_name=self.evaluation_run_name,
                evaluation_name=self.evaluation_config.name,
                estimable_criteria_list=[(0.05, 0.95), (1.01, 0.9)])
            self.analyzer()
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))
    universe_size = int(100 * FLAGS.sketch_size)

    ## config all decay rates
    estimator_config_list = []
    for a in FLAGS.exponential_bloom_filter_decay_rate:

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + str(int(a)),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, a),
            estimator=FirstMomentEstimator(
                method='exp',
                denoiser=SurrealDenoiser(probability=noiser_flip_probability)),
            sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

        estimator_config_list += [estimator_config_exponential_bloom_filter]

    # config evaluation
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio:
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        ## list scenarios
        scenario_config_list += [
            configs.ScenarioConfig(
                name=str(int(set_size_ratio)),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='3_vary_decay_rate_' + str(int(FLAGS.sketch_size / 1000)) + "k",
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## config all decay rates
    estimator_config_list = []
    for sketch_size in FLAGS.sketch_size:
        for epsilon in FLAGS.noiser_epsilon:
            estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
                ## flipping prob
                name=str(int(sketch_size / 1000)) + "k_" + \
                    "{:.2f}".format(1 / (1 + np.exp(epsilon))),
                sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                    sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
                estimator=FirstMomentEstimator(
                    method='exp',
                    denoiser=SurrealDenoiser(epsilon)),
                sketch_noiser=BlipNoiser(epsilon))
            estimator_config_list += [
                estimator_config_exponential_bloom_filter
            ]

    # config evaluation
    scenario_config_list = []
    for universe_size in FLAGS.universe_size:
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(universe_size / 1000000),
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='5_prediction',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    required_flags = ('evaluation_out_dir', 'analysis_out_dir',
                      'evaluation_config', 'sketch_estimator_configs',
                      'evaluation_run_name', 'num_runs')
    for f in required_flags:
        flags.mark_flag_as_required(f)

    logging.set_verbosity(logging.INFO)
    logging.info('====Running %s using evaluation %s for:\n%s',
                 FLAGS.evaluation_config, FLAGS.evaluation_run_name,
                 ', '.join(FLAGS.sketch_estimator_configs))

    evaluation_config = evaluation_configs.NAME_TO_EVALUATION_CONFIGS[
        FLAGS.evaluation_config](FLAGS.num_runs)
    sketch_estimator_config_list = [
        evaluation_configs.NAME_TO_ESTIMATOR_CONFIGS[conf]
        for conf in FLAGS.sketch_estimator_configs
    ]

    # Run simulations.
    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=sketch_estimator_config_list,
        run_name=FLAGS.evaluation_run_name,
        out_dir=FLAGS.evaluation_out_dir)
    generate_results()

    # Analyze results.
    logging.info('====Analyzing the results.')
    generate_summary = analyzer.CardinalityEstimatorEvaluationAnalyzer(
        out_dir=FLAGS.analysis_out_dir,
        evaluation_directory=FLAGS.evaluation_out_dir,
        evaluation_run_name=FLAGS.evaluation_run_name,
        evaluation_name=evaluation_config.name,
        error_margin=FLAGS.error_margin,
        proportion_of_runs=FLAGS.proportion_of_runs,
        plot_params={
            analyzer.BOXPLOT_SIZE_WIDTH_INCH: FLAGS.boxplot_size_width_inch,
            analyzer.BOXPLOT_SIZE_HEIGHT_INCH: FLAGS.boxplot_size_height_inch,
        })
    generate_summary()

    logging.info('====Evaluation and analysis done!')
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    noiser_flip_probability = 1 / (1 + np.exp(FLAGS.noiser_epsilon))

    ## list three adbf estimators
    estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
        name='geo_BF',
        sketch_factory=GeometricBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
        estimator=FirstMomentEstimator(
            method='geo',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
        name='log_BF',
        sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
            FLAGS.sketch_size),
        estimator=FirstMomentEstimator(
            method='log',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))

    estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
        name='exp_BF',
        sketch_factory=ExponentialBloomFilter.get_sketch_factory(
            FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
        estimator=FirstMomentEstimator(
            method='exp',
            denoiser=SurrealDenoiser(
                probability=noiser_flip_probability)), 
        sketch_noiser=BlipNoiser(FLAGS.noiser_epsilon))
        
    estimator_config_list = [
        estimator_config_geometric_bloom_filter,
        estimator_config_logarithmic_bloom_filter,
        estimator_config_exponential_bloom_filter,
    ]

    # list scenarios of different set sizes
    scenario_config_list = []
    for set_size_ratio in FLAGS.set_size_ratio: 
        set_size = int(set_size_ratio * FLAGS.sketch_size)
        scenario_config_list += [
            configs.ScenarioConfig(
                name="{:.1f}".format(set_size_ratio),
                set_generator_factory=(
                    set_generator.IndependentSetGenerator
                    .get_generator_factory_with_num_and_size(
                        universe_size=FLAGS.universe_size, 
                        num_sets=FLAGS.number_of_sets, 
                        set_size=set_size)))
        ]
    evaluation_config = configs.EvaluationConfig(
        name='2_vary_set_size',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=scenario_config_list)

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()
Example #7
0
def _run(run_evaluation,
         run_analysis,
         generate_html_report,
         evaluation_out_dir,
         analysis_out_dir,
         report_out_dir,
         evaluation_config,
         sketch_estimator_configs,
         evaluation_run_name,
         num_runs,
         num_workers,
         error_margin,
         proportion_of_runs,
         boxplot_xlabel_rotate,
         boxplot_size_width_inch,
         boxplot_size_height_inch,
         analysis_type,
         max_frequency,
         barplot_size_width_inch=None,
         barplot_size_height_inch=None,
         universe_size=None):
    """Run evaluation."""
    evaluation_config_args = {'num_runs': num_runs}
    if universe_size is not None:
        evaluation_config_args.update({'universe_size': int(universe_size)})
    evaluation_config = evaluation_configs.get_evaluation_config(
        evaluation_config)(**evaluation_config_args)

    sketch_estimator_config_list = evaluation_configs.get_estimator_configs(
        sketch_estimator_configs, max_frequency)

    if run_evaluation:
        logging.info('====Running %s using evaluation %s for:\n%s',
                     evaluation_config, evaluation_run_name,
                     ', '.join(sketch_estimator_configs))
        generate_results = evaluator.Evaluator(
            evaluation_config=evaluation_config,
            sketch_estimator_config_list=sketch_estimator_config_list,
            run_name=evaluation_run_name,
            out_dir=evaluation_out_dir,
            workers=num_workers)
        generate_results()

    error_margin = [float(x) for x in error_margin]
    proportion_of_runs = [float(x) for x in proportion_of_runs]
    estimable_criteria_list = list(
        map(tuple, zip(error_margin, proportion_of_runs)))

    if analysis_type == 'frequency':
        estimator_analyzer_func = analyzer.FrequencyEstimatorEvaluationAnalyzer
        report_generator_func = report_generator.FrequencyReportGenerator
    else:
        estimator_analyzer_func = analyzer.CardinalityEstimatorEvaluationAnalyzer
        report_generator_func = report_generator.CardinalityReportGenerator

    if run_analysis:
        logging.info('====Analyzing the results.')
        generate_summary = estimator_analyzer_func(
            out_dir=analysis_out_dir,
            evaluation_directory=evaluation_out_dir,
            evaluation_run_name=evaluation_run_name,
            evaluation_name=evaluation_config.name,
            estimable_criteria_list=estimable_criteria_list,
            plot_params={
                analyzer.XLABEL_ROTATE: boxplot_xlabel_rotate,
                analyzer.BOXPLOT_SIZE_WIDTH_INCH: boxplot_size_width_inch,
                analyzer.BOXPLOT_SIZE_HEIGHT_INCH: boxplot_size_height_inch,
                analyzer.BARPLOT_SIZE_WIDTH_INCH: barplot_size_width_inch,
                analyzer.BARPLOT_SIZE_HEIGHT_INCH: barplot_size_height_inch,
            })
        generate_summary()

    logging.info('====Evaluation and analysis done!')

    if generate_html_report:
        generate_report = report_generator_func(
            out_dir=report_out_dir,
            analysis_out_dir=analysis_out_dir,
            evaluation_run_name=evaluation_run_name,
            evaluation_name=evaluation_config.name)
        report_url = generate_report()
        logging.info('====Report generated: %s.', report_url)
 def get_test_evaluator(self, out_dir):
     return evaluator.Evaluator(
         evaluation_config=self.evaluation_config,
         sketch_estimator_config_list=self.sketch_estimator_config_list,
         run_name=self.run_name,
         out_dir=out_dir)
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    ## list all estimators
    estimator_config_list = []
    for epsilon in FLAGS.noiser_epsilon:
        ## flipping prob
        noiser_flip_probability = 1 / (1 + np.exp(epsilon))

        # estimator_config_bloom_filter = SketchEstimatorConfig(
        #     name='unif_BF_' + "{:.2f}".format(noiser_flip_probability),
        #     sketch_factory=BloomFilter.get_sketch_factory(
        #         FLAGS.sketch_size, FLAGS.num_bloom_filter_hashes),
        #     estimator=UnionEstimator(),
        #     sketch_noiser=BlipNoiser(epsilon))

        estimator_config_geometric_bloom_filter = SketchEstimatorConfig(
            name='geo_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=GeometricBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.geometric_bloom_filter_probability),
            estimator=FirstMomentEstimator(method='geo',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_logarithmic_bloom_filter = SketchEstimatorConfig(
            name='log_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=LogarithmicBloomFilter.get_sketch_factory(
                FLAGS.sketch_size),
            estimator=FirstMomentEstimator(method='log',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_exponential_bloom_filter = SketchEstimatorConfig(
            name='exp_BF_' + "{:.2f}".format(noiser_flip_probability),
            sketch_factory=ExponentialBloomFilter.get_sketch_factory(
                FLAGS.sketch_size, FLAGS.exponential_bloom_filter_decay_rate),
            estimator=FirstMomentEstimator(method='exp',
                                           denoiser=SurrealDenoiser(epsilon)),
            sketch_noiser=BlipNoiser(epsilon))

        estimator_config_list += [
            # estimator_config_bloom_filter,
            estimator_config_geometric_bloom_filter,
            estimator_config_logarithmic_bloom_filter,
            estimator_config_exponential_bloom_filter,
        ]

    # config evaluation
    evaluation_config = configs.EvaluationConfig(
        name='1_vary_flip_prob',
        num_runs=FLAGS.number_of_trials,
        scenario_config_list=[
            configs.ScenarioConfig(
                name='independent',
                set_generator_factory=(set_generator.IndependentSetGenerator.
                                       get_generator_factory_with_num_and_size(
                                           universe_size=FLAGS.universe_size,
                                           num_sets=FLAGS.number_of_sets,
                                           set_size=FLAGS.set_size)))
        ])

    generate_results = evaluator.Evaluator(
        evaluation_config=evaluation_config,
        sketch_estimator_config_list=estimator_config_list,
        run_name="eval_adbf_result",
        out_dir=".",
        workers=10)
    generate_results()