def conduct_trial(self): """Conduct the benchmarking trial.""" self.initialize_directories() log_file = os.path.join(self.results_dir, 'fuzzer-log.txt') logs.info('Starting trial.') max_total_time = environment.get('MAX_TOTAL_TIME') args = (max_total_time, log_file) fuzz_thread = threading.Thread(target=run_fuzzer, args=args) fuzz_thread.start() if environment.get('FUZZ_OUTSIDE_EXPERIMENT'): # Hack so that the fuzz_thread has some time to fail if something is # wrong. Without this we will sleep for a long time before checking # if the fuzz thread is alive. time.sleep(5) while fuzz_thread.is_alive(): self.sleep_until_next_sync() self.do_sync() self.cycle += 1 logs.info('Doing final sync.') self.do_sync(final_sync=True) fuzz_thread.join()
def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory): """If a clusterfuzz seed corpus archive is available, unpack it into the corpus directory if it exists. Copied from unpack_seed_corpus in engine_common.py in ClusterFuzz. """ seed_corpus_archive_path = get_clusterfuzz_seed_corpus_path( fuzz_target_path) if not seed_corpus_archive_path: return with zipfile.ZipFile(seed_corpus_archive_path) as zip_file: # Unpack seed corpus recursively into the root of the main corpus # directory. idx = 0 for seed_corpus_file in zip_file.infolist(): if seed_corpus_file.filename.endswith('/'): # Ignore directories. continue # Allow callers to opt-out of unpacking large files. if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT: continue output_filename = '%016d' % idx output_file_path = os.path.join(corpus_directory, output_filename) zip_file.extract(seed_corpus_file, output_file_path) idx += 1 logs.info('Unarchived %d files from seed corpus %s.', idx, seed_corpus_archive_path)
def measure_loop(experiment: str, max_total_time: int): """Continuously measure trials for |experiment|.""" logs.initialize(default_extras={ 'component': 'dispatcher', 'subcomponent': 'measurer', }) logs.info('Start measure_loop.') with multiprocessing.Pool() as pool, multiprocessing.Manager() as manager: set_up_coverage_binaries(pool, experiment) # Using Multiprocessing.Queue will fail with a complaint about # inheriting queue. q = manager.Queue() # pytype: disable=attribute-error while True: try: # Get whether all trials have ended before we measure to prevent # races. all_trials_ended = scheduler.all_trials_ended(experiment) if not measure_all_trials(experiment, max_total_time, pool, q): # We didn't measure any trials. if all_trials_ended: # There are no trials producing snapshots to measure. # Given that we couldn't measure any snapshots, we won't # be able to measure any the future, so stop now. break except Exception: # pylint: disable=broad-except logger.error('Error occurred during measuring.') time.sleep(FAIL_WAIT_SECONDS) logger.info('Finished measuring.')
def _clean_seed_corpus(seed_corpus_dir): """Prepares |seed_corpus_dir| for the trial. This ensures that it can be used by AFL which is picky about the seed corpus. Moves seed corpus files from sub-directories into the corpus directory root. Also, deletes any files that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the seed corpus files are deleted.""" if not os.path.exists(seed_corpus_dir): return if environment.get('NO_SEEDS'): logs.info('NO_SEEDS specified, deleting seed corpus files.') shutil.rmtree(seed_corpus_dir) os.mkdir(seed_corpus_dir) return failed_to_move_files = [] for root, _, files in os.walk(seed_corpus_dir): for filename in files: file_path = os.path.join(root, filename) if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT: os.remove(file_path) logs.warning('Removed seed file %s as it exceeds 1 Mb limit.', file_path) continue sha1sum = utils.file_hash(file_path) new_file_path = os.path.join(seed_corpus_dir, sha1sum) try: shutil.move(file_path, new_file_path) except OSError: failed_to_move_files.append((file_path, new_file_path)) if failed_to_move_files: logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
def build_measurer(benchmark: str) -> bool: """Do a coverage build for a benchmark.""" try: logger.info('Building measurer for benchmark: %s.', benchmark) buildlib.build_coverage(benchmark) docker_name = benchmark_utils.get_docker_name(benchmark) archive_name = 'coverage-build-%s.tar.gz' % docker_name coverage_binaries_dir = build_utils.get_coverage_binaries_dir() benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark os.mkdir(benchmark_coverage_binary_dir) cloud_bucket_archive_path = exp_path.gcs(coverage_binaries_dir / archive_name) gsutil.cp(cloud_bucket_archive_path, str(benchmark_coverage_binary_dir), parallel=False, write_to_stdout=False) archive_path = benchmark_coverage_binary_dir / archive_name tar = tarfile.open(archive_path, 'r:gz') tar.extractall(benchmark_coverage_binary_dir) os.remove(archive_path) logs.info('Done building measurer for benchmark: %s.', benchmark) return True except Exception: # pylint: disable=broad-except logger.error('Failed to build measurer for %s.', benchmark) return False
def main(): """Copies files needed to integrate an OSS-Fuzz benchmark and creates the benchmark's benchmark.yaml file.""" parser = argparse.ArgumentParser(description='Integrate a new benchmark.') parser.add_argument('-p', '--project', help='Project for benchmark. Example: "zlib"', required=True) parser.add_argument( '-f', '--fuzz-target', help='Fuzz target for benchmark. Example: "zlib_uncompress_fuzzer"', required=True) parser.add_argument( '-n', '--benchmark-name', help='Benchmark name. Defaults to <project>_<fuzz_target>', required=False) parser.add_argument('-c', '--commit', help='Project commit hash.') parser.add_argument( '-d', '--date', help='Date of the commit. Example: 2019-10-19T09:07:25+01:00') logs.initialize() args = parser.parse_args() benchmark = integrate_benchmark(args.project, args.fuzz_target, args.benchmark_name, args.commit, args.date) logs.info('Successfully integrated benchmark: %s.', benchmark) logs.info('Please run "make test-run-afl-%s" to test integration.', benchmark) return 0
def run_diff_experiment(dry_run): """Run a diff experiment. This is an experiment that runs only on fuzzers that have changed since the last experiment.""" fuzzers = experiment_changes.get_fuzzers_changed_since_last() logs.info('Running experiment with fuzzers: %s.', ' '.join(fuzzers)) fuzzer_configs = fuzzer_utils.get_fuzzer_configs(fuzzers=fuzzers) return _run_experiment(fuzzer_configs, dry_run)
def create_instance(instance_name: str, instance_type: InstanceType, config: dict, startup_script: str = None, preemptible: bool = False, **kwargs) -> bool: """Creates a GCE instance with name, |instance_name|, type, |instance_type| and with optionally provided and |startup_script|.""" if experiment_utils.is_local_experiment(): return run_local_instance(startup_script) command = [ 'gcloud', 'compute', 'instances', 'create', instance_name, '--image-family=cos-stable', '--image-project=cos-cloud', '--zone=%s' % config['cloud_compute_zone'], '--scopes=cloud-platform', ] if instance_type == InstanceType.DISPATCHER: command.extend([ '--machine-type=%s' % DISPATCHER_MACHINE_TYPE, '--boot-disk-size=%s' % DISPATCHER_BOOT_DISK_SIZE, '--boot-disk-type=%s' % DISPATCHER_BOOT_DISK_TYPE, ]) else: machine_type = config['runner_machine_type'] if machine_type is not None: command.append('--machine-type=%s' % machine_type) else: # Do this to support KLEE experiments. command.append([ '--custom-memory=%s' % config['runner_memory'], '--custom-cpu=%s' % config['runner_num_cpu_cores'] ]) command.extend([ '--no-address', '--boot-disk-size=%s' % RUNNER_BOOT_DISK_SIZE, ]) if preemptible: command.append('--preemptible') if startup_script: command.extend( ['--metadata-from-file', 'startup-script=' + startup_script]) result = new_process.execute(command, expect_zero=False, **kwargs) if result.retcode == 0: return True logs.info('Failed to create instance. Command: %s failed. Output: %s', command, result.output) return False
def _run_experiment(experiment_name, fuzzers, dry_run=False): """Run an experiment named |experiment_name| on |fuzzer_configs| and shut it down once it terminates.""" logs.info('Starting experiment: %s.', experiment_name) if dry_run: logs.info('Dry run. Not actually running experiment.') return run_experiment.start_experiment(experiment_name, EXPERIMENT_CONFIG_FILE, BENCHMARKS, fuzzers)
def experiment_main(): """Do a trial as part of an experiment.""" logs.info('Doing trial as part of experiment.') try: runner = TrialRunner() runner.conduct_trial() except Exception as error: # pylint: disable=broad-except logs.error('Error doing trial.') raise error
def replace_base_builder(benchmark_dir, commit_date): """Replace the parent image of the Dockerfile in |benchmark_dir|, base-builder (latest), with a version of base-builder that is likely to build the project as it was on |commit_date| without issue.""" base_builder_repo = _load_base_builder_docker_repo() # pylint: disable=protected-access if base_builder_repo: base_builder_digest = base_builder_repo.find_digest(commit_date) logs.info('Using base-builder with digest %s.', base_builder_digest) _replace_base_builder_digest( # pylint: disable=protected-access os.path.join(benchmark_dir, 'Dockerfile'), base_builder_digest)
def build_measurer(benchmark: str) -> bool: """Do a coverage build for a benchmark.""" try: logger.info('Building measurer for benchmark: %s.', benchmark) buildlib.build_coverage(benchmark) logs.info('Done building measurer for benchmark: %s.', benchmark) return True except Exception: # pylint: disable=broad-except logger.error('Failed to build measurer for %s.', benchmark) return False
def dispatcher_main(): """Do the experiment and report results.""" logs.info('Starting experiment.') # Set this here because we get failures if we do it in measurer for some # reason. multiprocessing.set_start_method('spawn') db_utils.initialize() if experiment_utils.is_local_experiment(): models.Base.metadata.create_all(db_utils.engine) experiment_config_file_path = _get_config_file_path() experiment = Experiment(experiment_config_file_path) _initialize_experiment_in_db(experiment.config) trials = build_images_for_trials(experiment.fuzzers, experiment.benchmarks, experiment.num_trials, experiment.preemptible, experiment.concurrent_builds) _initialize_trials_in_db(trials) create_work_subdirs(['experiment-folders', 'measurement-folders']) # Start measurer and scheduler in seperate threads/processes. scheduler_loop_thread = threading.Thread(target=scheduler.schedule_loop, args=(experiment.config, )) scheduler_loop_thread.start() measurer_main_process = multiprocessing.Process( target=measure_manager.measure_main, args=(experiment.config, )) measurer_main_process.start() is_complete = False while True: time.sleep(LOOP_WAIT_SECONDS) if not scheduler_loop_thread.is_alive(): is_complete = not measurer_main_process.is_alive() # Generate periodic output reports. reporter.output_report(experiment.config, in_progress=not is_complete, coverage_report=is_complete) if is_complete: # Experiment is complete, bail out. break scheduler_loop_thread.join() measurer_main_process.join() _record_experiment_time_ended(experiment.experiment_name) logs.info('Experiment ended.')
def start(self): """Start the experiment on the dispatcher.""" logs.info('Started dispatcher with instance name: %s', self.instance_name) with tempfile.NamedTemporaryFile(dir=os.getcwd(), mode='w') as startup_script: self.write_startup_script(startup_script) gcloud.create_instance(self.instance_name, gcloud.InstanceType.DISPATCHER, self.config, startup_script=startup_script.name)
def build_fuzzer_benchmark(fuzzer: str, benchmark: str) -> bool: """Wrapper around buildlib.build_fuzzer_benchmark that logs and catches exceptions.""" logger.info('Building benchmark: %s, fuzzer: %s.', benchmark, fuzzer) try: buildlib.build_fuzzer_benchmark(fuzzer, benchmark) except subprocess.CalledProcessError: logger.error('Failed to build benchmark: %s, fuzzer: %s.', benchmark, fuzzer) return False logs.info('Done building benchmark: %s, fuzzer: %s.', benchmark, fuzzer) return True
def main(): """Creates or gets an already created service account key and saves it to the provided path.""" logs.initialize() try: keyfile = sys.argv[1] get_or_create_key(sys.argv[2], keyfile) logs.info('Saved key to %s.', keyfile) except Exception: # pylint: disable=broad-except logs.error('Failed to get or create key.') return 1 return 0
def robust_begin_gcloud_ssh(instance_name: str, zone: str): """Try to SSH into an instance, |instance_name| in |zone| that might not be ready.""" for _ in range(10): result = ssh(instance_name, zone=zone, command='echo ping', expect_zero=False) if result.retcode == 0: return logs.info('GCP instance isn\'t ready yet. Rerunning SSH momentarily.') time.sleep(5) raise Exception('Couldn\'t SSH to instance.')
def dispatcher_main(): """Do the experiment and report results.""" logs.info('Starting experiment.') # Set this here because we get failures if we do it in measurer for some # reason. multiprocessing.set_start_method('spawn') db_utils.initialize() if os.getenv('LOCAL_EXPERIMENT'): models.Base.metadata.create_all(db_utils.engine) experiment_config_file_path = os.path.join(fuzzer_config_utils.get_dir(), 'experiment.yaml') experiment = Experiment(experiment_config_file_path) preemptible = experiment.preemptible trials = build_images_for_trials(experiment.fuzzers, experiment.benchmarks, experiment.num_trials, preemptible) _initialize_experiment_in_db(experiment.experiment_name, experiment.git_hash, trials) create_work_subdirs(['experiment-folders', 'measurement-folders']) # Start measurer and scheduler in seperate threads/processes. scheduler_loop_thread = threading.Thread(target=scheduler.schedule_loop, args=(experiment.config, )) scheduler_loop_thread.start() max_total_time = experiment.config['max_total_time'] measurer_loop_process = multiprocessing.Process( target=measurer.measure_loop, args=(experiment.experiment_name, max_total_time)) measurer_loop_process.start() is_complete = False while True: time.sleep(LOOP_WAIT_SECONDS) if not scheduler_loop_thread.is_alive(): is_complete = not measurer_loop_process.is_alive() # Generate periodic output reports. reporter.output_report(experiment.web_bucket, in_progress=not is_complete) if is_complete: # Experiment is complete, bail out. break logs.info('Dispatcher finished.') scheduler_loop_thread.join() measurer_loop_process.join()
def dispatcher_main(): """Do the experiment and report results.""" logs.info('Starting experiment.') # Set this here because we get failures if we do it in measurer for some # reason. multiprocessing.set_start_method('spawn') db_utils.initialize() if os.getenv('LOCAL_EXPERIMENT'): models.Base.metadata.create_all(db_utils.engine) builder.build_base_images() experiment_config_file_path = os.path.join(fuzzer_config_utils.get_dir(), 'experiment.yaml') experiment = Experiment(experiment_config_file_path) # When building, we only care about the underlying fuzzer rather than the # display name that we use to identify a specific configuration. unique_fuzzers = list({ fuzzer_config_utils.get_underlying_fuzzer_name(f) for f in experiment.fuzzers }) builder.build_all_fuzzer_benchmarks(unique_fuzzers, experiment.benchmarks) create_work_subdirs(['experiment-folders', 'measurement-folders']) # Start measurer and scheduler in threads. scheduler_loop_thread = threading.Thread(target=scheduler.schedule_loop, args=(experiment.config, )) scheduler_loop_thread.start() measurer_loop_thread = multiprocessing.Process( target=measurer.measure_loop, args=( experiment.config['experiment'], experiment.config['max_total_time'], )) measurer_loop_thread.start() while True: time.sleep(LOOP_WAIT_SECONDS) is_complete = (not scheduler_loop_thread.is_alive() and not measurer_loop_thread.is_alive()) # Generate periodic output reports. reporter.output_report(experiment.web_bucket, in_progress=not is_complete) if is_complete: # Experiment is complete, bail out. break
def process_crashes(self, cycle): """Process and store crashes.""" if not os.listdir(self.crashes_dir): logs.info('No crashes found for cycle %d.', cycle) return [] logs.info('Saving crash files crashes for cycle %d.', cycle) self.save_crash_files(cycle) logs.info('Processing crashes for cycle %d.', cycle) app_binary = coverage_utils.get_coverage_binary(self.benchmark) crash_metadata = run_crashes.do_crashes_run(app_binary, self.crashes_dir) logs.info('Crash metadata: %s', crash_metadata) crashes = [] for crash_key in crash_metadata: crash = crash_metadata[crash_key] crashes.append( models.Crash(crash_key=crash_key, crash_testcase=crash.crash_testcase, crash_type=crash.crash_type, crash_address=crash.crash_address, crash_state=crash.crash_state, crash_stacktrace=crash.crash_stacktrace)) return crashes
def build_fuzzer_benchmark(fuzzer: str, benchmark: str) -> bool: """Builds |benchmark| for |fuzzer|.""" logger.info('Building benchmark: %s, fuzzer: %s.', benchmark, fuzzer) try: if benchmark_utils.is_oss_fuzz(benchmark): gcb_build_oss_fuzz_project_fuzzer(benchmark, fuzzer) else: gcb_build_benchmark_fuzzer(benchmark, fuzzer) except subprocess.CalledProcessError: logger.error('Failed to build benchmark: %s, fuzzer: %s.', benchmark, fuzzer) return False logs.info('Done building benchmark: %s, fuzzer: %s.', benchmark, fuzzer) return True
def retry_build_loop(build_func: Callable, inputs: List[Tuple], num_concurrent_builds: int) -> List: """Calls |build_func| in parallel on |inputs|. Repeat on failures up to |NUM_BUILD_RETRIES| times. Returns the list of inputs that |build_func| was called successfully on.""" successes = [] logs.info('Concurrent builds: %d.', num_concurrent_builds) with mp_pool.ThreadPool(num_concurrent_builds) as pool: for _ in range(NUM_BUILD_RETRIES): logs.info('Building using (%s): %s', build_func, inputs) results = pool.starmap(build_func, inputs) curr_successes, curr_failures = split_successes_and_failures( inputs, results) logs.info('Build successes: %s', curr_successes) successes.extend(curr_successes) if not curr_failures: break logs.error('Build failures: %s', curr_failures) inputs = curr_failures sleep_interval = random.uniform(1, BUILD_FAIL_WAIT) logs.info('Sleeping for %d secs before retrying.', sleep_interval) time.sleep(sleep_interval) return successes
def handle_retry(num_try, exception=None): """Handle retry.""" if (exception is None or isinstance(exception, exception_type)) and num_try < tries: logs.info('Retrying on %s failed with %s. Retrying again.', function_with_type, sys.exc_info()[1]) sleep(get_delay(num_try, delay, backoff)) return True logs.error('Retrying on %s failed with %s. Raise.', function_with_type, sys.exc_info()[1]) return False
def run_requested_experiment(dry_run): """Run the oldest requested experiment that hasn't been run yet in experiment-requests.yaml.""" requested_experiments = _get_requested_experiments() # TODO(metzman): Look into supporting benchmarks as an optional parameter so # that people can add fuzzers that don't support everything. if PAUSE_SERVICE_KEYWORD in requested_experiments: # Check if automated experiment service is paused. logs.warning('Pause service requested, not running experiment.') return None requested_experiment = None for experiment_config in reversed(requested_experiments): experiment_name = _get_experiment_name(experiment_config) with db_utils.session_scope() as session: is_new_experiment = session.query(models.Experiment).filter( models.Experiment.name == experiment_name).first() is None if is_new_experiment: requested_experiment = experiment_config break if requested_experiment is None: logs.info('No new experiment to run. Exiting.') return None experiment_name = _get_experiment_name(requested_experiment) if not validate_experiment_requests([requested_experiment]): logs.error('Requested experiment: %s in %s is not valid.', requested_experiment, REQUESTED_EXPERIMENTS_PATH) return None fuzzers = requested_experiment['fuzzers'] benchmark_type = requested_experiment.get('type') if benchmark_type == benchmark_utils.BenchmarkType.BUG.value: benchmarks = benchmark_utils.get_bug_benchmarks() else: benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() + benchmark_utils.get_standard_coverage_benchmarks()) logs.info('Running experiment: %s with fuzzers: %s.', experiment_name, ' '.join(fuzzers)) description = _get_description(requested_experiment) oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment) return _run_experiment(experiment_name, fuzzers, benchmarks, description, oss_fuzz_corpus, dry_run)
def archive_crashes(self, cycle): """Archive this cycle's crashes into cloud bucket.""" if not os.listdir(self.crashes_dir): logs.info('No crashes found for cycle %d.', cycle) return logs.info('Archiving crashes for cycle %d.', cycle) crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle) archive = os.path.join(os.path.dirname(self.crashes_dir), crashes_archive_name) with tarfile.open(archive, 'w:gz') as tar: tar.add(self.crashes_dir, arcname=os.path.basename(self.crashes_dir)) gcs_path = exp_path.gcs( posixpath.join(self.trial_dir, 'crashes', crashes_archive_name)) gsutil.cp(archive, gcs_path) os.remove(archive)
def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory): """If a clusterfuzz seed corpus archive is available, unpack it into the corpus directory if it exists. Copied from unpack_seed_corpus in engine_common.py in ClusterFuzz. """ oss_fuzz_corpus = environment.get('OSS_FUZZ_CORPUS') if oss_fuzz_corpus: benchmark = environment.get('BENCHMARK') corpus_archive_filename = f'{benchmark}.zip' oss_fuzz_corpus_archive_path = posixpath.join( experiment_utils.get_oss_fuzz_corpora_filestore_path(), corpus_archive_filename) seed_corpus_archive_path = posixpath.join(FUZZ_TARGET_DIR, corpus_archive_filename) filestore_utils.cp(oss_fuzz_corpus_archive_path, seed_corpus_archive_path) else: seed_corpus_archive_path = get_clusterfuzz_seed_corpus_path( fuzz_target_path) if not seed_corpus_archive_path: return with zipfile.ZipFile(seed_corpus_archive_path) as zip_file: # Unpack seed corpus recursively into the root of the main corpus # directory. idx = 0 for seed_corpus_file in zip_file.infolist(): if seed_corpus_file.filename.endswith('/'): # Ignore directories. continue # Allow callers to opt-out of unpacking large files. if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT: continue output_filename = '%016d' % idx output_file_path = os.path.join(corpus_directory, output_filename) zip_file.extract(seed_corpus_file, output_file_path) idx += 1 logs.info('Unarchived %d files from seed corpus %s.', idx, seed_corpus_archive_path)
def _run_experiment( # pylint: disable=too-many-arguments experiment_name, fuzzers, benchmarks, description, oss_fuzz_corpus, dry_run=False): """Run an experiment named |experiment_name| on |fuzzer_configs| and shut it down once it terminates.""" logs.info('Starting experiment: %s.', experiment_name) if dry_run: logs.info('Dry run. Not actually running experiment.') return run_experiment.start_experiment(experiment_name, EXPERIMENT_CONFIG_FILE, benchmarks, fuzzers, description=description, oss_fuzz_corpus=oss_fuzz_corpus)
def conduct_trial(self): """Conduct the benchmarking trial.""" self.initialize_directories() log_file = os.path.join(self.results_dir, 'fuzzer-log.txt') logs.info('Starting trial.') max_total_time = environment.get('MAX_TOTAL_TIME') args = (max_total_time, log_file) thread = threading.Thread(target=run_fuzzer, args=args) thread.start() while thread.is_alive(): self.sleep_until_next_sync() self.do_sync() self.cycle += 1 logs.info('Doing final sync.') self.do_sync(final_sync=True) thread.join()
def run_requested_experiment(dry_run): """Run the oldest requested experiment that hasn't been run yet in experiment-requests.yaml.""" requested_experiments = _get_requested_experiments() # TODO(metzman): Look into supporting benchmarks as an optional parameter so # that people can add fuzzers that don't support everything. if PAUSE_SERVICE_KEYWORD in requested_experiments: # Check if automated experiment service is paused. logs.warning('Pause service requested, not running experiment.') return None requested_experiment = None for experiment_config in reversed(requested_experiments): experiment_name = _get_experiment_name(experiment_config) is_new_experiment = db_utils.query(models.Experiment).filter( models.Experiment.name == experiment_name).first() is None if is_new_experiment: requested_experiment = experiment_config break if requested_experiment is None: logs.info('No new experiment to run. Exiting.') return None experiment_name = _get_experiment_name(requested_experiment) if not validate_experiment_requests([requested_experiment]): logs.error('Requested experiment: %s in %s is not valid.', requested_experiment, REQUESTED_EXPERIMENTS_PATH) return None fuzzers = requested_experiment['fuzzers'] logs.info('Running experiment: %s with fuzzers: %s.', experiment_name, ' '.join(fuzzers)) fuzzer_configs = fuzzer_utils.get_fuzzer_configs(fuzzers=fuzzers) return _run_experiment(experiment_name, fuzzer_configs, dry_run)
def schedule(experiment_config: dict, queue): """Schedule measurer workers. This cannot be called before initialize_measurers.""" logger.info('Scheduling measurer workers.') # TODO(metzman): This method doesn't seem to correctly take into account # jobs that are running (the API provided by rq doesn't work intuitively). # That is OK for now since scheduling only happens while nothing is being # measured but this should be fixed. jobs = queue_utils.get_all_jobs(queue) counts = collections.defaultdict(int) for job in jobs: counts[job.get_status(refresh=False)] += 1 num_instances_needed = counts['queued'] + counts['started'] num_instances_needed = min(num_instances_needed, MAX_INSTANCES_PER_GROUP) logger.info('Scheduling %d workers.', num_instances_needed) instance_group_name = get_instance_group_name( experiment_config['experiment']) project = experiment_config['cloud_project'] zone = experiment_config['cloud_compute_zone'] num_instances = gce.get_instance_group_size(instance_group_name, project, zone) # TODO(metzman): Use autoscaling as it probably can deal with quotas more # easily. if not num_instances_needed: # Can't go below 1 instance per group. logs.info('num_instances_needed = 0, resizing to 1.') num_instances_needed = 1 if num_instances_needed != num_instances: # TODO(metzman): Add some limits so always have some measurers but not # too many. gce.resize_instance_group(num_instances_needed, instance_group_name, project, zone)