def test_backup_corpus(self):
    """Test backup_corpus."""
    libfuzzer_corpus = corpus_manager.FuzzTargetCorpus('libFuzzer', 'fuzzer')

    corpus_manager.backup_corpus('backup_bucket', libfuzzer_corpus, '/dir')

    self.mock.copy_file_to.assert_has_calls([
        mock.call('/2017-01-01.zip',
                  'gs://backup_bucket/corpus/libFuzzer/fuzzer/2017-01-01.zip')
    ])

    self.mock.copy_blob.assert_has_calls([
        mock.call('gs://backup_bucket/corpus/libFuzzer/fuzzer/2017-01-01.zip',
                  'gs://backup_bucket/corpus/libFuzzer/fuzzer/latest.zip'),
    ])
Example #2
0
def do_corpus_pruning(context, last_execution_failed, revision):
    """Run corpus pruning."""
    # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz
    # target and its related files.
    environment.set_value("FUZZ_TARGET", context.fuzz_target.binary)

    if environment.is_trusted_host():
        from bot.untrusted_runner import tasks_host

        return tasks_host.do_corpus_pruning(context, last_execution_failed,
                                            revision)

    build_manager.setup_build(revision=revision)
    build_directory = environment.get_value("BUILD_DIR")
    if not build_directory:
        raise CorpusPruningException("Failed to setup build.")

    start_time = datetime.datetime.utcnow()
    runner = Runner(build_directory, context)
    pruner = CorpusPruner(runner)
    fuzzer_binary_name = os.path.basename(runner.target_path)

    # If our last execution failed, shrink to a randomized corpus of usable size
    # to prevent corpus from growing unbounded and recurring failures when trying
    # to minimize it.
    if last_execution_failed:
        for corpus_url in [
                context.corpus.get_gcs_url(),
                context.quarantine_corpus.get_gcs_url(),
        ]:
            _limit_corpus_size(corpus_url, CORPUS_SIZE_LIMIT_FOR_FAILURES)

    # Get initial corpus to process from GCS.
    context.sync_to_disk()
    initial_corpus_size = shell.get_directory_file_count(
        context.initial_corpus_path)

    # Restore a small batch of quarantined units back to corpus.
    context.restore_quarantined_units()

    # Shrink to a minimized corpus using corpus merge.
    pruner.run(
        context.initial_corpus_path,
        context.minimized_corpus_path,
        context.bad_units_path,
    )

    # Sync minimized corpus back to GCS.
    context.sync_to_gcs()

    # Create corpus backup.
    backup_bucket = environment.get_value("BACKUP_BUCKET")
    corpus_backup_url = corpus_manager.backup_corpus(
        backup_bucket, context.corpus, context.minimized_corpus_path)

    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)

    logs.log("Corpus pruned from %d to %d units." %
             (initial_corpus_size, minimized_corpus_size_units))

    # Process bad units found during merge.
    # Mapping of crash state -> CorpusCrash
    crashes = {}
    pruner.process_bad_units(context.bad_units_path,
                             context.quarantine_corpus_path, crashes)
    context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path)

    # Store corpus stats into CoverageInformation entity.
    project_qualified_name = context.fuzz_target.project_qualified_name()
    today = datetime.datetime.utcnow().date()
    coverage_info = data_types.CoverageInformation(
        fuzzer=project_qualified_name, date=today)

    quarantine_corpus_size = shell.get_directory_file_count(
        context.quarantine_corpus_path)
    quarantine_corpus_dir_size = shell.get_directory_size(
        context.quarantine_corpus_path)

    # Populate coverage stats.
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes
    coverage_info.quarantine_size_units = quarantine_corpus_size
    coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size
    coverage_info.corpus_backup_location = corpus_backup_url
    coverage_info.corpus_location = context.corpus.get_gcs_url()
    coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url()

    # Calculate remaining time to use for shared corpus merging.
    time_remaining = _get_time_remaining(start_time)
    if time_remaining <= 0:
        logs.log_warn("Not enough time for shared corpus merging.")
        return None

    cross_pollinator = CrossPollinator(runner)
    cross_pollinator.run(time_remaining)

    context.sync_to_gcs()

    # Update corpus size stats.
    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes

    logs.log("Finished.")

    result = CorpusPruningResult(
        coverage_info=coverage_info,
        crashes=list(crashes.values()),
        fuzzer_binary_name=fuzzer_binary_name,
        revision=environment.get_value("APP_REVISION"),
    )

    return result
def do_corpus_pruning(context, last_execution_failed, revision):
    """Run corpus pruning."""
    # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz
    # target and its related files.
    environment.set_value('FUZZ_TARGET', context.fuzz_target.binary)

    if environment.is_trusted_host():
        from bot.untrusted_runner import tasks_host
        return tasks_host.do_corpus_pruning(context, last_execution_failed,
                                            revision)

    if not build_manager.setup_build(revision=revision):
        raise CorpusPruningException('Failed to setup build.')

    build_directory = environment.get_value('BUILD_DIR')
    start_time = datetime.datetime.utcnow()
    runner = Runner(build_directory, context)
    pruner = CorpusPruner(runner)
    fuzzer_binary_name = os.path.basename(runner.target_path)

    # If our last execution failed, shrink to a randomized corpus of usable size
    # to prevent corpus from growing unbounded and recurring failures when trying
    # to minimize it.
    if last_execution_failed:
        for corpus_url in [
                context.corpus.get_gcs_url(),
                context.quarantine_corpus.get_gcs_url()
        ]:
            _limit_corpus_size(corpus_url)

    # Get initial corpus to process from GCS.
    context.sync_to_disk()
    initial_corpus_size = shell.get_directory_file_count(
        context.initial_corpus_path)

    # Restore a small batch of quarantined units back to corpus.
    context.restore_quarantined_units()

    # Shrink to a minimized corpus using corpus merge.
    pruner_stats = pruner.run(context.initial_corpus_path,
                              context.minimized_corpus_path,
                              context.bad_units_path)

    # Sync minimized corpus back to GCS.
    context.sync_to_gcs()

    # Create corpus backup.
    # Temporarily copy the past crash regressions folder into the minimized corpus
    # so that corpus backup archive can have both.
    regressions_input_dir = os.path.join(context.initial_corpus_path,
                                         'regressions')
    regressions_output_dir = os.path.join(context.minimized_corpus_path,
                                          'regressions')
    if shell.get_directory_file_count(regressions_input_dir):
        shutil.copytree(regressions_input_dir, regressions_output_dir)
    backup_bucket = environment.get_value('BACKUP_BUCKET')
    corpus_backup_url = corpus_manager.backup_corpus(
        backup_bucket, context.corpus, context.minimized_corpus_path)
    shell.remove_directory(regressions_output_dir)

    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)

    logs.log('Corpus pruned from %d to %d units.' %
             (initial_corpus_size, minimized_corpus_size_units))

    # Process bad units found during merge.
    # Mapping of crash state -> CorpusCrash
    crashes = {}
    pruner.process_bad_units(context.bad_units_path,
                             context.quarantine_corpus_path, crashes)
    context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path)

    # Store corpus stats into CoverageInformation entity.
    project_qualified_name = context.fuzz_target.project_qualified_name()
    today = datetime.datetime.utcnow().date()
    coverage_info = data_types.CoverageInformation(
        fuzzer=project_qualified_name, date=today)

    quarantine_corpus_size = shell.get_directory_file_count(
        context.quarantine_corpus_path)
    quarantine_corpus_dir_size = shell.get_directory_size(
        context.quarantine_corpus_path)

    # Save the minimize corpus size before cross pollination to put in BigQuery.
    pre_pollination_corpus_size = minimized_corpus_size_units

    # Populate coverage stats.
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes
    coverage_info.quarantine_size_units = quarantine_corpus_size
    coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size
    coverage_info.corpus_backup_location = corpus_backup_url
    coverage_info.corpus_location = context.corpus.get_gcs_url()
    coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url()

    # Calculate remaining time to use for shared corpus merging.
    time_remaining = _get_time_remaining(start_time)
    if time_remaining <= 0:
        logs.log_warn('Not enough time for shared corpus merging.')
        return None

    cross_pollinator = CrossPollinator(runner)
    pollinator_stats = cross_pollinator.run(time_remaining)

    context.sync_to_gcs()

    # Update corpus size stats.
    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes

    logs.log('Finished.')

    sources = ','.join([
        fuzzer.fuzz_target.project_qualified_name()
        for fuzzer in context.cross_pollinate_fuzzers
    ])

    cross_pollination_stats = None
    if pruner_stats and pollinator_stats:
        cross_pollination_stats = CrossPollinationStats(
            project_qualified_name, context.cross_pollination_method, sources,
            context.tag, initial_corpus_size, pre_pollination_corpus_size,
            pruner_stats['edge_coverage'], pollinator_stats['edge_coverage'],
            pruner_stats['feature_coverage'],
            pollinator_stats['feature_coverage'])

    return CorpusPruningResult(coverage_info=coverage_info,
                               crashes=list(crashes.values()),
                               fuzzer_binary_name=fuzzer_binary_name,
                               revision=environment.get_value('APP_REVISION'),
                               cross_pollination_stats=cross_pollination_stats)