def modify_experiment_data_if_requested( # pylint: disable=too-many-arguments experiment_df, experiment_names, benchmarks, fuzzers, label_by_experiment, end_time, merge_with_clobber): """Helper function that returns a copy of |experiment_df| that is modified based on the other parameters. These parameters come from values specified by the user on the command line (or callers to generate_report).""" if benchmarks: # Filter benchmarks if requested. experiment_df = data_utils.filter_benchmarks(experiment_df, benchmarks) if fuzzers is not None: # Filter fuzzers if requested. experiment_df = data_utils.filter_fuzzers(experiment_df, fuzzers) if label_by_experiment: # Label each fuzzer by the experiment it came from to easily compare the # same fuzzer accross multiple experiments. experiment_df = data_utils.label_fuzzers_by_experiment(experiment_df) if end_time is not None: # Cut off the experiment at a specific time if requested. experiment_df = data_utils.filter_max_time(experiment_df, end_time) if merge_with_clobber: # Merge with clobber if requested. experiment_df = data_utils.clobber_experiments_data( experiment_df, experiment_names) return experiment_df
def test_clobber_experiments_data(): """Tests that clobber experiments data clobbers stale snapshots from earlier experiments.""" df = pd.concat( create_experiment_data('experiment-%d' % experiment_num) for experiment_num in range(3)) df.reset_index(inplace=True) to_drop = df[(df.experiment == 'experiment-2') & (df.benchmark == 'libpng') & (df.fuzzer == 'afl')].index df.drop(to_drop, inplace=True) experiments = list(df['experiment'].drop_duplicates().values) df = data_utils.clobber_experiments_data(df, experiments) columns = ['experiment', 'benchmark', 'fuzzer'] expected_result = pd.DataFrame([ ['experiment-2', 'libpng', 'libfuzzer'], ['experiment-2', 'libxml', 'afl'], ['experiment-2', 'libxml', 'libfuzzer'], ['experiment-1', 'libpng', 'afl'], ], columns=columns) expected_result.sort_index(inplace=True) assert ( df[columns].drop_duplicates().values == expected_result.values).all()
def generate_report(experiment_names, report_directory, report_name=None, label_by_experiment=False, benchmarks=None, fuzzers=None, report_type='default', quick=False, log_scale=False, from_cached_data=False, in_progress=False, end_time=None, merge_with_clobber=False): """Generate report helper.""" report_name = report_name or experiment_names[0] filesystem.create_directory(report_directory) data_path = os.path.join(report_directory, 'data.csv.gz') if from_cached_data and os.path.exists(data_path): experiment_df = pd.read_csv(data_path) else: experiment_df = queries.get_experiment_data(experiment_names) # Save the raw data along with the report. experiment_df.to_csv(data_path) data_utils.validate_data(experiment_df) if benchmarks is not None: experiment_df = data_utils.filter_benchmarks(experiment_df, benchmarks) if fuzzers is not None: experiment_df = data_utils.filter_fuzzers(experiment_df, fuzzers) if label_by_experiment: experiment_df = data_utils.label_fuzzers_by_experiment(experiment_df) if end_time is not None: experiment_df = data_utils.filter_max_time(experiment_df, end_time) if merge_with_clobber: experiment_df = data_utils.clobber_experiments_data( experiment_df, experiment_names) fuzzer_names = experiment_df.fuzzer.unique() plotter = plotting.Plotter(fuzzer_names, quick, log_scale) experiment_ctx = experiment_results.ExperimentResults( experiment_df, report_directory, plotter, experiment_name=report_name) template = report_type + '.html' detailed_report = rendering.render_report(experiment_ctx, template, in_progress) filesystem.write(os.path.join(report_directory, 'index.html'), detailed_report)
def generate_report(experiment_names, report_directory, report_name=None, label_by_experiment=False, benchmarks=None, fuzzers=None, report_type='default', quick=False, log_scale=False, from_cached_data=False, in_progress=False, end_time=None, merge_with_clobber=False, merge_with_clobber_nonprivate=False, coverage_report=False): """Generate report helper.""" if merge_with_clobber_nonprivate: experiment_names = ( queries.add_nonprivate_experiments_for_merge_with_clobber( experiment_names)) main_experiment_name = experiment_names[0] report_name = report_name or main_experiment_name filesystem.create_directory(report_directory) data_path = os.path.join(report_directory, 'data.csv.gz') if from_cached_data and os.path.exists(data_path): experiment_df = pd.read_csv(data_path) description = "from cached data" else: experiment_df = queries.get_experiment_data(experiment_names) description = queries.get_experiment_description(main_experiment_name) data_utils.validate_data(experiment_df) if benchmarks is not None: experiment_df = data_utils.filter_benchmarks(experiment_df, benchmarks) if fuzzers is not None: experiment_df = data_utils.filter_fuzzers(experiment_df, fuzzers) if label_by_experiment: experiment_df = data_utils.label_fuzzers_by_experiment(experiment_df) if end_time is not None: experiment_df = data_utils.filter_max_time(experiment_df, end_time) if merge_with_clobber or merge_with_clobber_nonprivate: experiment_df = data_utils.clobber_experiments_data( experiment_df, experiment_names) # Save the filtered raw data along with the report if not using cached data # or if the data does not exist. if not from_cached_data or not os.path.exists(data_path): experiment_df.to_csv(data_path) # Load the coverage json summary file. coverage_dict = {} if coverage_report: coverage_dict = coverage_data_utils.get_covered_regions_dict( experiment_df) fuzzer_names = experiment_df.fuzzer.unique() plotter = plotting.Plotter(fuzzer_names, quick, log_scale) experiment_ctx = experiment_results.ExperimentResults( experiment_df, coverage_dict, report_directory, plotter, experiment_name=report_name) template = report_type + '.html' detailed_report = rendering.render_report(experiment_ctx, template, in_progress, coverage_report, description) filesystem.write(os.path.join(report_directory, 'index.html'), detailed_report)