Ejemplo n.º 1
0
    def conduct_trial(self):
        """Conduct the benchmarking trial."""
        self.initialize_directories()
        log_file = os.path.join(self.results_dir, 'fuzzer-log.txt')

        logs.info('Starting trial.')

        max_total_time = environment.get('MAX_TOTAL_TIME')
        args = (max_total_time, log_file)
        fuzz_thread = threading.Thread(target=run_fuzzer, args=args)
        fuzz_thread.start()

        if environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
            # Hack so that the fuzz_thread has some time to fail if something is
            # wrong. Without this we will sleep for a long time before checking
            # if the fuzz thread is alive.
            time.sleep(5)

        while fuzz_thread.is_alive():
            self.sleep_until_next_sync()
            self.do_sync()
            self.cycle += 1

        logs.info('Doing final sync.')
        self.do_sync(final_sync=True)
        fuzz_thread.join()
Ejemplo n.º 2
0
def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory):
    """If a clusterfuzz seed corpus archive is available, unpack it into the
    corpus directory if it exists. Copied from unpack_seed_corpus in
    engine_common.py in ClusterFuzz.
    """
    seed_corpus_archive_path = get_clusterfuzz_seed_corpus_path(
        fuzz_target_path)

    if not seed_corpus_archive_path:
        return

    with zipfile.ZipFile(seed_corpus_archive_path) as zip_file:
        # Unpack seed corpus recursively into the root of the main corpus
        # directory.
        idx = 0
        for seed_corpus_file in zip_file.infolist():
            if seed_corpus_file.filename.endswith('/'):
                # Ignore directories.
                continue

            # Allow callers to opt-out of unpacking large files.
            if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT:
                continue

            output_filename = '%016d' % idx
            output_file_path = os.path.join(corpus_directory, output_filename)
            zip_file.extract(seed_corpus_file, output_file_path)
            idx += 1

    logs.info('Unarchived %d files from seed corpus %s.', idx,
              seed_corpus_archive_path)
Ejemplo n.º 3
0
def measure_loop(experiment: str, max_total_time: int):
    """Continuously measure trials for |experiment|."""
    logs.initialize(default_extras={
        'component': 'dispatcher',
        'subcomponent': 'measurer',
    })
    logs.info('Start measure_loop.')

    with multiprocessing.Pool() as pool, multiprocessing.Manager() as manager:
        set_up_coverage_binaries(pool, experiment)
        # Using Multiprocessing.Queue will fail with a complaint about
        # inheriting queue.
        q = manager.Queue()  # pytype: disable=attribute-error
        while True:
            try:
                # Get whether all trials have ended before we measure to prevent
                # races.
                all_trials_ended = scheduler.all_trials_ended(experiment)

                if not measure_all_trials(experiment, max_total_time, pool, q):
                    # We didn't measure any trials.
                    if all_trials_ended:
                        # There are no trials producing snapshots to measure.
                        # Given that we couldn't measure any snapshots, we won't
                        # be able to measure any the future, so stop now.
                        break
            except Exception:  # pylint: disable=broad-except
                logger.error('Error occurred during measuring.')

            time.sleep(FAIL_WAIT_SECONDS)

    logger.info('Finished measuring.')
Ejemplo n.º 4
0
def _clean_seed_corpus(seed_corpus_dir):
    """Prepares |seed_corpus_dir| for the trial. This ensures that it can be
    used by AFL which is picky about the seed corpus. Moves seed corpus files
    from sub-directories into the corpus directory root. Also, deletes any files
    that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the
    seed corpus files are deleted."""
    if not os.path.exists(seed_corpus_dir):
        return

    if environment.get('NO_SEEDS'):
        logs.info('NO_SEEDS specified, deleting seed corpus files.')
        shutil.rmtree(seed_corpus_dir)
        os.mkdir(seed_corpus_dir)
        return

    failed_to_move_files = []
    for root, _, files in os.walk(seed_corpus_dir):
        for filename in files:
            file_path = os.path.join(root, filename)

            if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT:
                os.remove(file_path)
                logs.warning('Removed seed file %s as it exceeds 1 Mb limit.',
                             file_path)
                continue

            sha1sum = utils.file_hash(file_path)
            new_file_path = os.path.join(seed_corpus_dir, sha1sum)
            try:
                shutil.move(file_path, new_file_path)
            except OSError:
                failed_to_move_files.append((file_path, new_file_path))

    if failed_to_move_files:
        logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
Ejemplo n.º 5
0
def build_measurer(benchmark: str) -> bool:
    """Do a coverage build for a benchmark."""
    try:
        logger.info('Building measurer for benchmark: %s.', benchmark)
        buildlib.build_coverage(benchmark)
        docker_name = benchmark_utils.get_docker_name(benchmark)
        archive_name = 'coverage-build-%s.tar.gz' % docker_name

        coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
        benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark
        os.mkdir(benchmark_coverage_binary_dir)
        cloud_bucket_archive_path = exp_path.gcs(coverage_binaries_dir /
                                                 archive_name)
        gsutil.cp(cloud_bucket_archive_path,
                  str(benchmark_coverage_binary_dir),
                  parallel=False,
                  write_to_stdout=False)

        archive_path = benchmark_coverage_binary_dir / archive_name
        tar = tarfile.open(archive_path, 'r:gz')
        tar.extractall(benchmark_coverage_binary_dir)
        os.remove(archive_path)
        logs.info('Done building measurer for benchmark: %s.', benchmark)
        return True
    except Exception:  # pylint: disable=broad-except
        logger.error('Failed to build measurer for %s.', benchmark)
        return False
Ejemplo n.º 6
0
def main():
    """Copies files needed to integrate an OSS-Fuzz benchmark and creates the
    benchmark's benchmark.yaml file."""
    parser = argparse.ArgumentParser(description='Integrate a new benchmark.')
    parser.add_argument('-p',
                        '--project',
                        help='Project for benchmark. Example: "zlib"',
                        required=True)
    parser.add_argument(
        '-f',
        '--fuzz-target',
        help='Fuzz target for benchmark. Example: "zlib_uncompress_fuzzer"',
        required=True)
    parser.add_argument(
        '-n',
        '--benchmark-name',
        help='Benchmark name. Defaults to <project>_<fuzz_target>',
        required=False)
    parser.add_argument('-c', '--commit', help='Project commit hash.')
    parser.add_argument(
        '-d',
        '--date',
        help='Date of the commit. Example: 2019-10-19T09:07:25+01:00')

    logs.initialize()
    args = parser.parse_args()
    benchmark = integrate_benchmark(args.project, args.fuzz_target,
                                    args.benchmark_name, args.commit,
                                    args.date)
    logs.info('Successfully integrated benchmark: %s.', benchmark)
    logs.info('Please run "make test-run-afl-%s" to test integration.',
              benchmark)
    return 0
Ejemplo n.º 7
0
def run_diff_experiment(dry_run):
    """Run a diff experiment. This is an experiment that runs only on
    fuzzers that have changed since the last experiment."""
    fuzzers = experiment_changes.get_fuzzers_changed_since_last()
    logs.info('Running experiment with fuzzers: %s.', ' '.join(fuzzers))
    fuzzer_configs = fuzzer_utils.get_fuzzer_configs(fuzzers=fuzzers)
    return _run_experiment(fuzzer_configs, dry_run)
Ejemplo n.º 8
0
def create_instance(instance_name: str,
                    instance_type: InstanceType,
                    config: dict,
                    startup_script: str = None,
                    preemptible: bool = False,
                    **kwargs) -> bool:
    """Creates a GCE instance with name, |instance_name|, type, |instance_type|
    and with optionally provided and |startup_script|."""

    if experiment_utils.is_local_experiment():
        return run_local_instance(startup_script)

    command = [
        'gcloud',
        'compute',
        'instances',
        'create',
        instance_name,
        '--image-family=cos-stable',
        '--image-project=cos-cloud',
        '--zone=%s' % config['cloud_compute_zone'],
        '--scopes=cloud-platform',
    ]
    if instance_type == InstanceType.DISPATCHER:
        command.extend([
            '--machine-type=%s' % DISPATCHER_MACHINE_TYPE,
            '--boot-disk-size=%s' % DISPATCHER_BOOT_DISK_SIZE,
            '--boot-disk-type=%s' % DISPATCHER_BOOT_DISK_TYPE,
        ])
    else:
        machine_type = config['runner_machine_type']
        if machine_type is not None:
            command.append('--machine-type=%s' % machine_type)
        else:
            # Do this to support KLEE experiments.
            command.append([
                '--custom-memory=%s' % config['runner_memory'],
                '--custom-cpu=%s' % config['runner_num_cpu_cores']
            ])

        command.extend([
            '--no-address',
            '--boot-disk-size=%s' % RUNNER_BOOT_DISK_SIZE,
        ])

    if preemptible:
        command.append('--preemptible')
    if startup_script:
        command.extend(
            ['--metadata-from-file', 'startup-script=' + startup_script])

    result = new_process.execute(command, expect_zero=False, **kwargs)
    if result.retcode == 0:
        return True

    logs.info('Failed to create instance. Command: %s failed. Output: %s',
              command, result.output)
    return False
Ejemplo n.º 9
0
def _run_experiment(experiment_name, fuzzers, dry_run=False):
    """Run an experiment named |experiment_name| on |fuzzer_configs| and shut it
    down once it terminates."""
    logs.info('Starting experiment: %s.', experiment_name)
    if dry_run:
        logs.info('Dry run. Not actually running experiment.')
        return
    run_experiment.start_experiment(experiment_name, EXPERIMENT_CONFIG_FILE,
                                    BENCHMARKS, fuzzers)
Ejemplo n.º 10
0
def experiment_main():
    """Do a trial as part of an experiment."""
    logs.info('Doing trial as part of experiment.')
    try:
        runner = TrialRunner()
        runner.conduct_trial()
    except Exception as error:  # pylint: disable=broad-except
        logs.error('Error doing trial.')
        raise error
Ejemplo n.º 11
0
def replace_base_builder(benchmark_dir, commit_date):
    """Replace the parent image of the Dockerfile in |benchmark_dir|,
    base-builder (latest), with a version of base-builder that is likely to
    build the project as it was on |commit_date| without issue."""
    base_builder_repo = _load_base_builder_docker_repo()  # pylint: disable=protected-access
    if base_builder_repo:
        base_builder_digest = base_builder_repo.find_digest(commit_date)
        logs.info('Using base-builder with digest %s.', base_builder_digest)
        _replace_base_builder_digest(  # pylint: disable=protected-access
            os.path.join(benchmark_dir, 'Dockerfile'), base_builder_digest)
Ejemplo n.º 12
0
def build_measurer(benchmark: str) -> bool:
    """Do a coverage build for a benchmark."""
    try:
        logger.info('Building measurer for benchmark: %s.', benchmark)
        buildlib.build_coverage(benchmark)
        logs.info('Done building measurer for benchmark: %s.', benchmark)
        return True
    except Exception:  # pylint: disable=broad-except
        logger.error('Failed to build measurer for %s.', benchmark)
        return False
Ejemplo n.º 13
0
def dispatcher_main():
    """Do the experiment and report results."""
    logs.info('Starting experiment.')

    # Set this here because we get failures if we do it in measurer for some
    # reason.
    multiprocessing.set_start_method('spawn')
    db_utils.initialize()
    if experiment_utils.is_local_experiment():
        models.Base.metadata.create_all(db_utils.engine)

    experiment_config_file_path = _get_config_file_path()
    experiment = Experiment(experiment_config_file_path)

    _initialize_experiment_in_db(experiment.config)

    trials = build_images_for_trials(experiment.fuzzers, experiment.benchmarks,
                                     experiment.num_trials,
                                     experiment.preemptible,
                                     experiment.concurrent_builds)
    _initialize_trials_in_db(trials)

    create_work_subdirs(['experiment-folders', 'measurement-folders'])

    # Start measurer and scheduler in seperate threads/processes.
    scheduler_loop_thread = threading.Thread(target=scheduler.schedule_loop,
                                             args=(experiment.config, ))
    scheduler_loop_thread.start()

    measurer_main_process = multiprocessing.Process(
        target=measure_manager.measure_main, args=(experiment.config, ))

    measurer_main_process.start()

    is_complete = False
    while True:
        time.sleep(LOOP_WAIT_SECONDS)
        if not scheduler_loop_thread.is_alive():
            is_complete = not measurer_main_process.is_alive()

        # Generate periodic output reports.
        reporter.output_report(experiment.config,
                               in_progress=not is_complete,
                               coverage_report=is_complete)

        if is_complete:
            # Experiment is complete, bail out.
            break

    scheduler_loop_thread.join()
    measurer_main_process.join()

    _record_experiment_time_ended(experiment.experiment_name)
    logs.info('Experiment ended.')
Ejemplo n.º 14
0
 def start(self):
     """Start the experiment on the dispatcher."""
     logs.info('Started dispatcher with instance name: %s',
               self.instance_name)
     with tempfile.NamedTemporaryFile(dir=os.getcwd(),
                                      mode='w') as startup_script:
         self.write_startup_script(startup_script)
         gcloud.create_instance(self.instance_name,
                                gcloud.InstanceType.DISPATCHER,
                                self.config,
                                startup_script=startup_script.name)
Ejemplo n.º 15
0
def build_fuzzer_benchmark(fuzzer: str, benchmark: str) -> bool:
    """Wrapper around buildlib.build_fuzzer_benchmark that logs and catches
    exceptions."""
    logger.info('Building benchmark: %s, fuzzer: %s.', benchmark, fuzzer)
    try:
        buildlib.build_fuzzer_benchmark(fuzzer, benchmark)
    except subprocess.CalledProcessError:
        logger.error('Failed to build benchmark: %s, fuzzer: %s.', benchmark,
                     fuzzer)
        return False
    logs.info('Done building benchmark: %s, fuzzer: %s.', benchmark, fuzzer)
    return True
Ejemplo n.º 16
0
def main():
    """Creates or gets an already created service account key and saves it to
    the provided path."""
    logs.initialize()
    try:
        keyfile = sys.argv[1]
        get_or_create_key(sys.argv[2], keyfile)
        logs.info('Saved key to %s.', keyfile)
    except Exception:  # pylint: disable=broad-except
        logs.error('Failed to get or create key.')
        return 1
    return 0
Ejemplo n.º 17
0
def robust_begin_gcloud_ssh(instance_name: str, zone: str):
    """Try to SSH into an instance, |instance_name| in |zone| that might not be
    ready."""
    for _ in range(10):
        result = ssh(instance_name,
                     zone=zone,
                     command='echo ping',
                     expect_zero=False)
        if result.retcode == 0:
            return
        logs.info('GCP instance isn\'t ready yet. Rerunning SSH momentarily.')
        time.sleep(5)
    raise Exception('Couldn\'t SSH to instance.')
Ejemplo n.º 18
0
def dispatcher_main():
    """Do the experiment and report results."""
    logs.info('Starting experiment.')

    # Set this here because we get failures if we do it in measurer for some
    # reason.
    multiprocessing.set_start_method('spawn')
    db_utils.initialize()
    if os.getenv('LOCAL_EXPERIMENT'):
        models.Base.metadata.create_all(db_utils.engine)

    experiment_config_file_path = os.path.join(fuzzer_config_utils.get_dir(),
                                               'experiment.yaml')
    experiment = Experiment(experiment_config_file_path)
    preemptible = experiment.preemptible
    trials = build_images_for_trials(experiment.fuzzers, experiment.benchmarks,
                                     experiment.num_trials, preemptible)
    _initialize_experiment_in_db(experiment.experiment_name,
                                 experiment.git_hash, trials)

    create_work_subdirs(['experiment-folders', 'measurement-folders'])

    # Start measurer and scheduler in seperate threads/processes.
    scheduler_loop_thread = threading.Thread(target=scheduler.schedule_loop,
                                             args=(experiment.config, ))
    scheduler_loop_thread.start()

    max_total_time = experiment.config['max_total_time']
    measurer_loop_process = multiprocessing.Process(
        target=measurer.measure_loop,
        args=(experiment.experiment_name, max_total_time))

    measurer_loop_process.start()

    is_complete = False
    while True:
        time.sleep(LOOP_WAIT_SECONDS)
        if not scheduler_loop_thread.is_alive():
            is_complete = not measurer_loop_process.is_alive()

        # Generate periodic output reports.
        reporter.output_report(experiment.web_bucket,
                               in_progress=not is_complete)

        if is_complete:
            # Experiment is complete, bail out.
            break

    logs.info('Dispatcher finished.')
    scheduler_loop_thread.join()
    measurer_loop_process.join()
Ejemplo n.º 19
0
def dispatcher_main():
    """Do the experiment and report results."""
    logs.info('Starting experiment.')

    # Set this here because we get failures if we do it in measurer for some
    # reason.
    multiprocessing.set_start_method('spawn')
    db_utils.initialize()
    if os.getenv('LOCAL_EXPERIMENT'):
        models.Base.metadata.create_all(db_utils.engine)

    builder.build_base_images()

    experiment_config_file_path = os.path.join(fuzzer_config_utils.get_dir(),
                                               'experiment.yaml')
    experiment = Experiment(experiment_config_file_path)

    # When building, we only care about the underlying fuzzer rather than the
    # display name that we use to identify a specific configuration.
    unique_fuzzers = list({
        fuzzer_config_utils.get_underlying_fuzzer_name(f)
        for f in experiment.fuzzers
    })
    builder.build_all_fuzzer_benchmarks(unique_fuzzers, experiment.benchmarks)

    create_work_subdirs(['experiment-folders', 'measurement-folders'])

    # Start measurer and scheduler in threads.
    scheduler_loop_thread = threading.Thread(target=scheduler.schedule_loop,
                                             args=(experiment.config, ))
    scheduler_loop_thread.start()
    measurer_loop_thread = multiprocessing.Process(
        target=measurer.measure_loop,
        args=(
            experiment.config['experiment'],
            experiment.config['max_total_time'],
        ))
    measurer_loop_thread.start()

    while True:
        time.sleep(LOOP_WAIT_SECONDS)
        is_complete = (not scheduler_loop_thread.is_alive()
                       and not measurer_loop_thread.is_alive())

        # Generate periodic output reports.
        reporter.output_report(experiment.web_bucket,
                               in_progress=not is_complete)

        if is_complete:
            # Experiment is complete, bail out.
            break
Ejemplo n.º 20
0
    def process_crashes(self, cycle):
        """Process and store crashes."""
        if not os.listdir(self.crashes_dir):
            logs.info('No crashes found for cycle %d.', cycle)
            return []

        logs.info('Saving crash files crashes for cycle %d.', cycle)
        self.save_crash_files(cycle)

        logs.info('Processing crashes for cycle %d.', cycle)
        app_binary = coverage_utils.get_coverage_binary(self.benchmark)
        crash_metadata = run_crashes.do_crashes_run(app_binary,
                                                    self.crashes_dir)
        logs.info('Crash metadata: %s', crash_metadata)
        crashes = []
        for crash_key in crash_metadata:
            crash = crash_metadata[crash_key]
            crashes.append(
                models.Crash(crash_key=crash_key,
                             crash_testcase=crash.crash_testcase,
                             crash_type=crash.crash_type,
                             crash_address=crash.crash_address,
                             crash_state=crash.crash_state,
                             crash_stacktrace=crash.crash_stacktrace))
        return crashes
Ejemplo n.º 21
0
def build_fuzzer_benchmark(fuzzer: str, benchmark: str) -> bool:
    """Builds |benchmark| for |fuzzer|."""
    logger.info('Building benchmark: %s, fuzzer: %s.', benchmark, fuzzer)
    try:
        if benchmark_utils.is_oss_fuzz(benchmark):
            gcb_build_oss_fuzz_project_fuzzer(benchmark, fuzzer)
        else:
            gcb_build_benchmark_fuzzer(benchmark, fuzzer)
    except subprocess.CalledProcessError:
        logger.error('Failed to build benchmark: %s, fuzzer: %s.', benchmark,
                     fuzzer)
        return False
    logs.info('Done building benchmark: %s, fuzzer: %s.', benchmark, fuzzer)
    return True
Ejemplo n.º 22
0
def retry_build_loop(build_func: Callable, inputs: List[Tuple],
                     num_concurrent_builds: int) -> List:
    """Calls |build_func| in parallel on |inputs|. Repeat on failures up to
    |NUM_BUILD_RETRIES| times. Returns the list of inputs that |build_func| was
    called successfully on."""
    successes = []
    logs.info('Concurrent builds: %d.', num_concurrent_builds)
    with mp_pool.ThreadPool(num_concurrent_builds) as pool:
        for _ in range(NUM_BUILD_RETRIES):
            logs.info('Building using (%s): %s', build_func, inputs)
            results = pool.starmap(build_func, inputs)
            curr_successes, curr_failures = split_successes_and_failures(
                inputs, results)

            logs.info('Build successes: %s', curr_successes)
            successes.extend(curr_successes)
            if not curr_failures:
                break

            logs.error('Build failures: %s', curr_failures)
            inputs = curr_failures
            sleep_interval = random.uniform(1, BUILD_FAIL_WAIT)
            logs.info('Sleeping for %d secs before retrying.', sleep_interval)
            time.sleep(sleep_interval)

    return successes
Ejemplo n.º 23
0
        def handle_retry(num_try, exception=None):
            """Handle retry."""

            if (exception is None or
                    isinstance(exception, exception_type)) and num_try < tries:
                logs.info('Retrying on %s failed with %s. Retrying again.',
                          function_with_type,
                          sys.exc_info()[1])
                sleep(get_delay(num_try, delay, backoff))
                return True

            logs.error('Retrying on %s failed with %s. Raise.',
                       function_with_type,
                       sys.exc_info()[1])
            return False
Ejemplo n.º 24
0
def run_requested_experiment(dry_run):
    """Run the oldest requested experiment that hasn't been run yet in
    experiment-requests.yaml."""
    requested_experiments = _get_requested_experiments()

    # TODO(metzman): Look into supporting benchmarks as an optional parameter so
    # that people can add fuzzers that don't support everything.

    if PAUSE_SERVICE_KEYWORD in requested_experiments:
        # Check if automated experiment service is paused.
        logs.warning('Pause service requested, not running experiment.')
        return None

    requested_experiment = None
    for experiment_config in reversed(requested_experiments):
        experiment_name = _get_experiment_name(experiment_config)
        with db_utils.session_scope() as session:
            is_new_experiment = session.query(models.Experiment).filter(
                models.Experiment.name == experiment_name).first() is None
        if is_new_experiment:
            requested_experiment = experiment_config
            break

    if requested_experiment is None:
        logs.info('No new experiment to run. Exiting.')
        return None

    experiment_name = _get_experiment_name(requested_experiment)
    if not validate_experiment_requests([requested_experiment]):
        logs.error('Requested experiment: %s in %s is not valid.',
                   requested_experiment, REQUESTED_EXPERIMENTS_PATH)
        return None
    fuzzers = requested_experiment['fuzzers']

    benchmark_type = requested_experiment.get('type')
    if benchmark_type == benchmark_utils.BenchmarkType.BUG.value:
        benchmarks = benchmark_utils.get_bug_benchmarks()
    else:
        benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() +
                      benchmark_utils.get_standard_coverage_benchmarks())

    logs.info('Running experiment: %s with fuzzers: %s.', experiment_name,
              ' '.join(fuzzers))
    description = _get_description(requested_experiment)
    oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment)
    return _run_experiment(experiment_name, fuzzers, benchmarks, description,
                           oss_fuzz_corpus, dry_run)
Ejemplo n.º 25
0
    def archive_crashes(self, cycle):
        """Archive this cycle's crashes into cloud bucket."""
        if not os.listdir(self.crashes_dir):
            logs.info('No crashes found for cycle %d.', cycle)
            return

        logs.info('Archiving crashes for cycle %d.', cycle)
        crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle)
        archive = os.path.join(os.path.dirname(self.crashes_dir),
                               crashes_archive_name)
        with tarfile.open(archive, 'w:gz') as tar:
            tar.add(self.crashes_dir,
                    arcname=os.path.basename(self.crashes_dir))
        gcs_path = exp_path.gcs(
            posixpath.join(self.trial_dir, 'crashes', crashes_archive_name))
        gsutil.cp(archive, gcs_path)
        os.remove(archive)
Ejemplo n.º 26
0
def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory):
    """If a clusterfuzz seed corpus archive is available, unpack it into the
    corpus directory if it exists. Copied from unpack_seed_corpus in
    engine_common.py in ClusterFuzz.
    """
    oss_fuzz_corpus = environment.get('OSS_FUZZ_CORPUS')
    if oss_fuzz_corpus:
        benchmark = environment.get('BENCHMARK')
        corpus_archive_filename = f'{benchmark}.zip'
        oss_fuzz_corpus_archive_path = posixpath.join(
            experiment_utils.get_oss_fuzz_corpora_filestore_path(),
            corpus_archive_filename)
        seed_corpus_archive_path = posixpath.join(FUZZ_TARGET_DIR,
                                                  corpus_archive_filename)
        filestore_utils.cp(oss_fuzz_corpus_archive_path,
                           seed_corpus_archive_path)
    else:
        seed_corpus_archive_path = get_clusterfuzz_seed_corpus_path(
            fuzz_target_path)

    if not seed_corpus_archive_path:
        return

    with zipfile.ZipFile(seed_corpus_archive_path) as zip_file:
        # Unpack seed corpus recursively into the root of the main corpus
        # directory.
        idx = 0
        for seed_corpus_file in zip_file.infolist():
            if seed_corpus_file.filename.endswith('/'):
                # Ignore directories.
                continue

            # Allow callers to opt-out of unpacking large files.
            if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT:
                continue

            output_filename = '%016d' % idx
            output_file_path = os.path.join(corpus_directory, output_filename)
            zip_file.extract(seed_corpus_file, output_file_path)
            idx += 1

    logs.info('Unarchived %d files from seed corpus %s.', idx,
              seed_corpus_archive_path)
Ejemplo n.º 27
0
def _run_experiment(  # pylint: disable=too-many-arguments
        experiment_name,
        fuzzers,
        benchmarks,
        description,
        oss_fuzz_corpus,
        dry_run=False):
    """Run an experiment named |experiment_name| on |fuzzer_configs| and shut it
    down once it terminates."""
    logs.info('Starting experiment: %s.', experiment_name)
    if dry_run:
        logs.info('Dry run. Not actually running experiment.')
        return
    run_experiment.start_experiment(experiment_name,
                                    EXPERIMENT_CONFIG_FILE,
                                    benchmarks,
                                    fuzzers,
                                    description=description,
                                    oss_fuzz_corpus=oss_fuzz_corpus)
Ejemplo n.º 28
0
    def conduct_trial(self):
        """Conduct the benchmarking trial."""
        self.initialize_directories()
        log_file = os.path.join(self.results_dir, 'fuzzer-log.txt')

        logs.info('Starting trial.')

        max_total_time = environment.get('MAX_TOTAL_TIME')
        args = (max_total_time, log_file)
        thread = threading.Thread(target=run_fuzzer, args=args)
        thread.start()

        while thread.is_alive():
            self.sleep_until_next_sync()
            self.do_sync()
            self.cycle += 1

        logs.info('Doing final sync.')
        self.do_sync(final_sync=True)
        thread.join()
Ejemplo n.º 29
0
def run_requested_experiment(dry_run):
    """Run the oldest requested experiment that hasn't been run yet in
    experiment-requests.yaml."""
    requested_experiments = _get_requested_experiments()

    # TODO(metzman): Look into supporting benchmarks as an optional parameter so
    # that people can add fuzzers that don't support everything.

    if PAUSE_SERVICE_KEYWORD in requested_experiments:
        # Check if automated experiment service is paused.
        logs.warning('Pause service requested, not running experiment.')
        return None

    requested_experiment = None
    for experiment_config in reversed(requested_experiments):
        experiment_name = _get_experiment_name(experiment_config)
        is_new_experiment = db_utils.query(models.Experiment).filter(
            models.Experiment.name == experiment_name).first() is None
        if is_new_experiment:
            requested_experiment = experiment_config
            break

    if requested_experiment is None:
        logs.info('No new experiment to run. Exiting.')
        return None

    experiment_name = _get_experiment_name(requested_experiment)
    if not validate_experiment_requests([requested_experiment]):
        logs.error('Requested experiment: %s in %s is not valid.',
                   requested_experiment, REQUESTED_EXPERIMENTS_PATH)
        return None
    fuzzers = requested_experiment['fuzzers']

    logs.info('Running experiment: %s with fuzzers: %s.', experiment_name,
              ' '.join(fuzzers))
    fuzzer_configs = fuzzer_utils.get_fuzzer_configs(fuzzers=fuzzers)
    return _run_experiment(experiment_name, fuzzer_configs, dry_run)
def schedule(experiment_config: dict, queue):
    """Schedule measurer workers. This cannot be called before
    initialize_measurers."""
    logger.info('Scheduling measurer workers.')

    # TODO(metzman): This method doesn't seem to correctly take into account
    # jobs that are running (the API provided by rq doesn't work intuitively).
    # That is OK for now since scheduling only happens while nothing is being
    # measured but this should be fixed.
    jobs = queue_utils.get_all_jobs(queue)
    counts = collections.defaultdict(int)
    for job in jobs:
        counts[job.get_status(refresh=False)] += 1

    num_instances_needed = counts['queued'] + counts['started']
    num_instances_needed = min(num_instances_needed, MAX_INSTANCES_PER_GROUP)

    logger.info('Scheduling %d workers.', num_instances_needed)
    instance_group_name = get_instance_group_name(
        experiment_config['experiment'])
    project = experiment_config['cloud_project']
    zone = experiment_config['cloud_compute_zone']
    num_instances = gce.get_instance_group_size(instance_group_name, project,
                                                zone)

    # TODO(metzman): Use autoscaling as it probably can deal with quotas more
    # easily.
    if not num_instances_needed:
        # Can't go below 1 instance per group.
        logs.info('num_instances_needed = 0, resizing to 1.')
        num_instances_needed = 1

    if num_instances_needed != num_instances:
        # TODO(metzman): Add some limits so always have some measurers but not
        # too many.
        gce.resize_instance_group(num_instances_needed, instance_group_name,
                                  project, zone)