def add_nonprivate_experiments_for_merge_with_clobber(experiment_names): """Returns a new list containing experiment names preeceeded by a list of nonprivate experiments in the order in which they were run, such that these nonprivate experiments executed before. This is useful if you want to combine reports from |experiment_names| and all nonprivate experiments.""" earliest_creation_time = None for result in db_utils.query(Experiment.time_created).filter( Experiment.name.in_(experiment_names)): experiment_creation_time = result[0] if not earliest_creation_time: earliest_creation_time = experiment_creation_time else: earliest_creation_time = min(earliest_creation_time, experiment_creation_time) nonprivate_experiments = db_utils.query(Experiment.name).filter( ~Experiment.private, ~Experiment.name.in_(experiment_names), ~Experiment.time_ended.is_(None), Experiment.time_created <= earliest_creation_time).order_by( Experiment.time_created) nonprivate_experiment_names = [ result[0] for result in nonprivate_experiments ] return nonprivate_experiment_names + experiment_names
def test_initialize_experiment_in_db(dispatcher_experiment): """Tests that _initialize_experiment_in_db adds the right things to the database.""" trials_args = itertools.product(dispatcher_experiment.benchmarks, range(dispatcher_experiment.num_trials), dispatcher_experiment.fuzzers) trials = [ models.Trial(fuzzer=fuzzer, experiment=dispatcher_experiment.experiment_name, benchmark=benchmark) for benchmark, _, fuzzer in trials_args ] dispatcher._initialize_experiment_in_db(dispatcher_experiment.config, trials) db_experiments = db_utils.query(models.Experiment).all() assert len(db_experiments) == 1 db_experiment = db_experiments[0] assert db_experiment.name == os.environ['EXPERIMENT'] trials = db_utils.query(models.Trial).all() fuzzer_and_benchmarks = [(trial.benchmark, trial.fuzzer) for trial in trials] assert fuzzer_and_benchmarks == ([('benchmark-1', 'fuzzer-a'), ('benchmark-1', 'fuzzer-b')] * 4) + [('benchmark-2', 'fuzzer-a'), ('benchmark-2', 'fuzzer-b')] * 4
def test_schedule(mocked_datetime_now, mocked_get_by_variant_name, mocked_execute, pending_trials, experiment_config): """Tests that schedule() ends expired trials and starts new ones as needed.""" mocked_execute.return_value = new_process.ProcessResult(0, '', False) mocked_get_by_variant_name.return_value = {'fuzzer': 'test_fuzzer'} experiment = experiment_config['experiment'] datetimes_first_experiments_started = [ trial.time_started for trial in db_utils.query(models.Trial).filter( models.Trial.experiment == experiment).filter( models.Trial.time_started.isnot(None)) ] mocked_datetime_now.return_value = ( max(datetimes_first_experiments_started) + datetime.timedelta(seconds=(experiment_config['max_total_time'] + scheduler.GRACE_TIME_SECONDS * 2))) with ThreadPool() as pool: scheduler.schedule(experiment_config, pool) assert db_utils.query(models.Trial).filter( models.Trial.time_started.in_( datetimes_first_experiments_started)).all() == (db_utils.query( models.Trial).filter(models.Trial.time_ended.isnot(None)).all()) assert pending_trials.filter( models.Trial.time_started.isnot(None)).all() == pending_trials.all()
def pending_trials(db, experiment_config): """Adds trials to the database and returns pending trials.""" create_experiments(experiment_config) def create_trial(experiment, time_started=None, time_ended=None): """Creates a database trial.""" return models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER, time_started=time_started, time_ended=time_ended) our_pending_trials = [ create_trial(experiment_config['experiment']), create_trial(experiment_config['experiment']) ] other_experiment_name = get_other_experiment_name(experiment_config) other_trials = [ create_trial(other_experiment_name), create_trial(experiment_config['experiment'], ARBITRARY_DATETIME), create_trial(experiment_config['experiment'], ARBITRARY_DATETIME) ] db_utils.add_all(other_trials + our_pending_trials) our_trial_ids = [trial.id for trial in our_pending_trials] return db_utils.query(models.Trial).filter( models.Trial.id.in_(our_trial_ids))
def pending_trials(db, experiment_config): """Adds trials to the database and returns pending trials.""" other_experiment_name = experiment_config['experiment'] + 'other' db_utils.add_all([ models.Experiment(name=experiment_config['experiment']), models.Experiment(name=other_experiment_name) ]) def create_trial(experiment, time_started=None, time_ended=None): """Creates a database trial.""" return models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER, time_started=time_started, time_ended=time_ended) our_pending_trials = [ create_trial(experiment_config['experiment']), create_trial(experiment_config['experiment']) ] other_trials = [ create_trial(other_experiment_name), create_trial(experiment_config['experiment'], datetime.datetime.now()), create_trial(experiment_config['experiment'], datetime.datetime.now()) ] db_utils.add_all(other_trials + our_pending_trials) our_trial_ids = [trial.id for trial in our_pending_trials] return db_utils.query(models.Trial).filter( models.Trial.id.in_(our_trial_ids))
def __init__(self, num_trials, experiment_config): self.experiment_config = experiment_config self.num_trials = num_trials # Bound for the number of nonpreemptibles we can start if the experiment # specified preemptible_runners. self.max_nonpreemptibles = min( math.ceil(self.num_trials * self.NONPREEMPTIBLES_FRACTION), self.MAX_NONPREEMPTIBLES) logger.info('Max nonpreemptibles: %d.', self.max_nonpreemptibles) # Attributes for preemptible retry window. The preemptible retry window # is a time period that starts when the last initial trial is started. # It determines how long we can retry preempted trials using # preemptibles. This bounds the length of time an experiment lasts. self.preemptible_window = (experiment_config['max_total_time'] * self.PREEMPTIBLE_WINDOW_MULTIPLIER) self._initial_trials = list( get_experiment_trials(experiment_config['experiment'])) self._max_time_started = None self.preempted_trials = {} self.preemptible_starts_futile = False # Filter operations happening before the experiment started. self.last_preemptible_query = (db_utils.query( models.Experiment).filter( models.Experiment.name == experiment_config['experiment']).one( ).time_created.replace(tzinfo=datetime.timezone.utc))
def get_experiment_description(experiment_name): """Get the description of the experiment named by |experiment_name|.""" # Do another query for the description so we don't explode the size of the # results from get_experiment_data. return db_utils.query(Experiment.description)\ .select_from(Experiment)\ .filter(Experiment.name == experiment_name).one()
def get_experiment_data(experiment_names): """Get measurements (such as coverage) on experiments from the database.""" snapshots_query = db_utils.query(models.Snapshot).options( sqlalchemy.orm.joinedload('trial')).filter( models.Snapshot.trial.has( models.Trial.experiment.in_(experiment_names))) return pd.read_sql_query(snapshots_query.statement, db_utils.engine)
def get_fuzzers_changed_since_last(): """Returns a list of fuzzers that have changed since the last experiment stored in the database that has a commit that is in the current branch.""" # TODO(metzman): Figure out a way of skipping experiments that were stopped # early. # Loop over experiments since some may have hashes that are not in the # current branch. experiments = list( db_utils.query(models.Experiment).order_by( models.Experiment.time_created.desc())) if not experiments: raise Exception('No experiments found. Cannot find changed fuzzers.') changed_files = None for experiment in experiments: try: changed_files = diff_utils.get_changed_files(experiment.git_hash) break except diff_utils.DiffError: logs.warning('Skipping %s. Commit is not in branch.', experiment.git_hash) if changed_files is None: raise Exception('No in-branch experiments. ' 'Cannot find changed fuzzers.') return change_utils.get_changed_fuzzers(changed_files)
def test_experiment(dispatcher_experiment): """Tests creating an Experiment object.""" assert dispatcher_experiment.benchmarks == ['benchmark-1', 'benchmark-2'] assert dispatcher_experiment.fuzzers == FUZZERS assert ( dispatcher_experiment.web_bucket == 'gs://web-reports/test-experiment') db_experiments = db_utils.query(models.Experiment).all() assert len(db_experiments) == 1 db_experiment = db_experiments[0] assert db_experiment.name == os.environ['EXPERIMENT'] trials = db_utils.query(models.Trial).all() fuzzer_and_benchmarks = [(trial.benchmark, trial.fuzzer) for trial in trials] assert fuzzer_and_benchmarks == ([('benchmark-1', 'fuzzer-a'), ('benchmark-1', 'fuzzer-b')] * 4) + [('benchmark-2', 'fuzzer-a'), ('benchmark-2', 'fuzzer-b')] * 4
def get_trial_instance_manager(experiment_config: dict): """Returns an instance of TrialInstanceManager for |experiment_config|.""" if not db_utils.query(models.Experiment).filter( models.Experiment.name == experiment_config['experiment']).first(): create_experiments(experiment_config) default_num_trials = 100 return scheduler.TrialInstanceManager(default_num_trials, experiment_config)
def get_git_hash(experiment_df): """Return git hash for the experiment.""" if len(experiment_df.experiment.unique()) != 1: # Not possible to represent hashes for multiple experiments. return None return utils.query(models.Experiment).filter( models.Experiment.name == experiment_df.experiment[0]).first().git_hash
def _query_unmeasured_trials(experiment: str): """Returns a query of trials in |experiment| that have not been measured.""" trial_query = db_utils.query(models.Trial) ids_of_trials_with_snapshots = _query_ids_of_measured_trials(experiment) no_snapshots_filter = ~models.Trial.id.in_(ids_of_trials_with_snapshots) started_trials_filter = ~models.Trial.time_started.is_(None) experiment_trials_filter = models.Trial.experiment == experiment return trial_query.filter(experiment_trials_filter, no_snapshots_filter, started_trials_filter)
def get_trial_ids(experiment: str, fuzzer: str, benchmark: str): """Gets ids of all finished trials for a pair of fuzzer and benchmark.""" trial_ids = [ trial_id_tuple[0] for trial_id_tuple in db_utils.query(models.Trial.id).filter( models.Trial.experiment == experiment, models.Trial.fuzzer == fuzzer, models.Trial.benchmark == benchmark, ~models.Trial.preempted) ] return trial_ids
def get_experiment_data(experiment_names): """Get measurements (such as coverage) on experiments from the database.""" snapshots_query = db_utils.query(models.Snapshot).options( sqlalchemy.orm.joinedload('trial')).filter( models.Snapshot.trial.has( models.Trial.experiment.in_(experiment_names))) # id must be loaded to do the join but get rid of it now since # trial_id provides the same info. data = pd.read_sql_query(snapshots_query.statement, db_utils.engine) return data.drop(columns=['id'])
def set_up_coverage_binaries(pool, experiment): """Set up coverage binaries for all benchmarks in |experiment|.""" benchmarks = [ trial.benchmark for trial in db_utils.query(models.Trial).distinct( models.Trial.benchmark).filter( models.Trial.experiment == experiment) ] coverage_binaries_dir = build_utils.get_coverage_binaries_dir() if not os.path.exists(coverage_binaries_dir): os.makedirs(coverage_binaries_dir) pool.map(set_up_coverage_binary, benchmarks)
def _query_ids_of_measured_trials(experiment: str): """Returns a query of the ids of trials in |experiment| that have measured snapshots.""" trials_and_snapshots_query = db_utils.query(models.Snapshot).options( orm.joinedload('trial')) experiment_trials_filter = models.Snapshot.trial.has(experiment=experiment) experiment_trials_and_snapshots_query = trials_and_snapshots_query.filter( experiment_trials_filter) experiment_snapshot_trial_ids_query = ( experiment_trials_and_snapshots_query.with_entities( models.Snapshot.trial_id)) return experiment_snapshot_trial_ids_query.distinct()
def set_up_coverage_binaries(pool, experiment): """Set up coverage binaries for all benchmarks in |experiment|.""" # Use set comprehension to select distinct benchmarks. benchmarks = [ benchmark_tuple[0] for benchmark_tuple in db_utils.query(models.Trial.benchmark).distinct( ).filter(models.Trial.experiment == experiment) ] coverage_binaries_dir = build_utils.get_coverage_binaries_dir() filesystem.create_directory(coverage_binaries_dir) pool.map(set_up_coverage_binary, benchmarks)
def get_last_trial_time_started(experiment: str): """Returns the time_started of the last trial that was started in |experiment|. This function cannot be called if there are any unstarted (e.g. pending trials). It will raise an assertion failure if there are any pending trials because it does not make sense to call this function before that time.""" assert get_pending_trials(experiment).first() is None # Don't use get_experiment_trials because it already orders the results by # id. last_trial = db_utils.query(models.Trial).filter( models.Trial.experiment == experiment, STARTED_TRIALS_FILTER).order_by( models.Trial.time_started.desc()).first() return last_trial.time_started
def add_nonprivate_experiments_for_merge_with_clobber(experiment_names): """Returns a new list containing experiment names preeceeded by a list of nonprivate experiments in the order in which they were run. This is useful if you want to combine reports from |experiment_names| and all nonprivate experiments.""" nonprivate_experiments = db_utils.query(Experiment.name).filter( ~Experiment.private, ~Experiment.name.in_(experiment_names)).order_by( Experiment.time_created) nonprivate_experiment_names = [ result[0] for result in nonprivate_experiments ] return nonprivate_experiment_names + experiment_names
def get_experiment_data(experiment_names): """Get measurements (such as coverage) on experiments from the database.""" snapshots_query = db_utils.query( Experiment.git_hash,\ Trial.experiment, Trial.fuzzer, Trial.benchmark,\ Trial.time_started, Trial.time_ended,\ Snapshot.trial_id, Snapshot.time, Snapshot.edges_covered)\ .select_from(Experiment)\ .join(Trial)\ .join(Snapshot)\ .filter(Experiment.name.in_(experiment_names)) return pd.read_sql_query(snapshots_query.statement, db_utils.engine)
def _query_measured_latest_snapshots(experiment: str): """Returns a generator of a SnapshotWithTime representing a snapshot that is the first snapshot for their trial. The trials are trials in |experiment|.""" latest_time_column = func.max(models.Snapshot.time) # The order of these columns must correspond to the fields in # SnapshotWithTime. columns = (models.Trial.fuzzer, models.Trial.benchmark, models.Snapshot.trial_id, latest_time_column) experiment_filter = models.Snapshot.trial.has(experiment=experiment) group_by_columns = (models.Snapshot.trial_id, models.Trial.benchmark, models.Trial.fuzzer) snapshots_query = db_utils.query(*columns).join( models.Trial).filter(experiment_filter).group_by(*group_by_columns) return (SnapshotWithTime(*snapshot) for snapshot in snapshots_query)
def _initialize_experiment_in_db(experiment_config: dict): """Initializes |experiment| in the database by creating the experiment entity.""" experiment_exists = db_utils.query(models.Experiment).filter( models.Experiment.name == experiment_config['experiment']).first() if experiment_exists: raise Exception('Experiment already exists in database.') db_utils.add_all([ db_utils.get_or_create( models.Experiment, name=experiment_config['experiment'], git_hash=experiment_config['git_hash'], private=experiment_config.get('private', True), experiment_filestore=experiment_config['experiment_filestore']) ])
def run_requested_experiment(dry_run): """Run the oldest requested experiment that hasn't been run yet in experiment-requests.yaml.""" requested_experiments = _get_requested_experiments() # TODO(metzman): Look into supporting benchmarks as an optional parameter so # that people can add fuzzers that don't support everything. if PAUSE_SERVICE_KEYWORD in requested_experiments: # Check if automated experiment service is paused. logs.warning('Pause service requested, not running experiment.') return None requested_experiment = None for experiment_config in reversed(requested_experiments): experiment_name = _get_experiment_name(experiment_config) is_new_experiment = db_utils.query(models.Experiment).filter( models.Experiment.name == experiment_name).first() is None if is_new_experiment: requested_experiment = experiment_config break if requested_experiment is None: logs.info('No new experiment to run. Exiting.') return None experiment_name = _get_experiment_name(requested_experiment) if not validate_experiment_requests([requested_experiment]): logs.error('Requested experiment: %s in %s is not valid.', requested_experiment, REQUESTED_EXPERIMENTS_PATH) return None fuzzers = requested_experiment['fuzzers'] benchmark_type = requested_experiment.get('type') if benchmark_type == benchmark_utils.BenchmarkType.BUG.value: benchmarks = benchmark_utils.get_bug_benchmarks() else: benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() + benchmark_utils.get_standard_coverage_benchmarks()) logs.info('Running experiment: %s with fuzzers: %s.', experiment_name, ' '.join(fuzzers)) description = _get_description(requested_experiment) oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment) return _run_experiment(experiment_name, fuzzers, benchmarks, description, oss_fuzz_corpus, dry_run)
def get_experiment_data(experiment_names): """Get measurements (such as coverage) on experiments from the database.""" snapshots_query = db_utils.query( Experiment.git_hash, Experiment.experiment_filestore, Trial.experiment, Trial.fuzzer, Trial.benchmark, Trial.time_started, Trial.time_ended, Snapshot.trial_id, Snapshot.time, Snapshot.edges_covered, Snapshot.fuzzer_stats, Crash.crash_key)\ .select_from(Experiment)\ .join(Trial)\ .join(Snapshot)\ .join(Crash, and_(Snapshot.time==Crash.time, Snapshot.trial_id == Crash.trial_id), isouter=True)\ .filter(Experiment.name.in_(experiment_names))\ .filter(Trial.preempted.is_(False)) return pd.read_sql_query(snapshots_query.statement, db_utils.engine)
def get_experiment_trials(experiment: str): """Returns a query of trials in |experiment|.""" return db_utils.query(models.Trial).filter( models.Trial.experiment == experiment).order_by(models.Trial.id)
def _record_experiment_time_ended(experiment_name: str): """Record |experiment| end time in the database.""" experiment = db_utils.query(models.Experiment).filter( models.Experiment.name == experiment_name).one() experiment.time_ended = datetime.datetime.utcnow() db_utils.add_all([experiment])