Ejemplo n.º 1
0
def add_nonprivate_experiments_for_merge_with_clobber(experiment_names):
    """Returns a new list containing experiment names preeceeded by a list of
    nonprivate experiments in the order in which they were run, such that
    these nonprivate experiments executed before. This is useful
    if you want to combine reports from |experiment_names| and all nonprivate
    experiments."""
    earliest_creation_time = None
    for result in db_utils.query(Experiment.time_created).filter(
            Experiment.name.in_(experiment_names)):
        experiment_creation_time = result[0]
        if not earliest_creation_time:
            earliest_creation_time = experiment_creation_time
        else:
            earliest_creation_time = min(earliest_creation_time,
                                         experiment_creation_time)

    nonprivate_experiments = db_utils.query(Experiment.name).filter(
        ~Experiment.private, ~Experiment.name.in_(experiment_names),
        ~Experiment.time_ended.is_(None),
        Experiment.time_created <= earliest_creation_time).order_by(
            Experiment.time_created)
    nonprivate_experiment_names = [
        result[0] for result in nonprivate_experiments
    ]

    return nonprivate_experiment_names + experiment_names
Ejemplo n.º 2
0
def test_initialize_experiment_in_db(dispatcher_experiment):
    """Tests that _initialize_experiment_in_db adds the right things to the
    database."""
    trials_args = itertools.product(dispatcher_experiment.benchmarks,
                                    range(dispatcher_experiment.num_trials),
                                    dispatcher_experiment.fuzzers)
    trials = [
        models.Trial(fuzzer=fuzzer,
                     experiment=dispatcher_experiment.experiment_name,
                     benchmark=benchmark)
        for benchmark, _, fuzzer in trials_args
    ]
    dispatcher._initialize_experiment_in_db(dispatcher_experiment.config,
                                            trials)
    db_experiments = db_utils.query(models.Experiment).all()
    assert len(db_experiments) == 1
    db_experiment = db_experiments[0]
    assert db_experiment.name == os.environ['EXPERIMENT']
    trials = db_utils.query(models.Trial).all()
    fuzzer_and_benchmarks = [(trial.benchmark, trial.fuzzer)
                             for trial in trials]
    assert fuzzer_and_benchmarks == ([('benchmark-1', 'fuzzer-a'),
                                      ('benchmark-1', 'fuzzer-b')] *
                                     4) + [('benchmark-2', 'fuzzer-a'),
                                           ('benchmark-2', 'fuzzer-b')] * 4
Ejemplo n.º 3
0
def test_schedule(mocked_datetime_now, mocked_get_by_variant_name,
                  mocked_execute, pending_trials, experiment_config):
    """Tests that schedule() ends expired trials and starts new ones as
    needed."""
    mocked_execute.return_value = new_process.ProcessResult(0, '', False)
    mocked_get_by_variant_name.return_value = {'fuzzer': 'test_fuzzer'}
    experiment = experiment_config['experiment']
    datetimes_first_experiments_started = [
        trial.time_started for trial in db_utils.query(models.Trial).filter(
            models.Trial.experiment == experiment).filter(
                models.Trial.time_started.isnot(None))
    ]

    mocked_datetime_now.return_value = (
        max(datetimes_first_experiments_started) +
        datetime.timedelta(seconds=(experiment_config['max_total_time'] +
                                    scheduler.GRACE_TIME_SECONDS * 2)))

    with ThreadPool() as pool:
        scheduler.schedule(experiment_config, pool)
    assert db_utils.query(models.Trial).filter(
        models.Trial.time_started.in_(
            datetimes_first_experiments_started)).all() == (db_utils.query(
                models.Trial).filter(models.Trial.time_ended.isnot(None)).all())

    assert pending_trials.filter(
        models.Trial.time_started.isnot(None)).all() == pending_trials.all()
Ejemplo n.º 4
0
def pending_trials(db, experiment_config):
    """Adds trials to the database and returns pending trials."""
    create_experiments(experiment_config)

    def create_trial(experiment, time_started=None, time_ended=None):
        """Creates a database trial."""
        return models.Trial(experiment=experiment,
                            benchmark=BENCHMARK,
                            fuzzer=FUZZER,
                            time_started=time_started,
                            time_ended=time_ended)

    our_pending_trials = [
        create_trial(experiment_config['experiment']),
        create_trial(experiment_config['experiment'])
    ]
    other_experiment_name = get_other_experiment_name(experiment_config)
    other_trials = [
        create_trial(other_experiment_name),
        create_trial(experiment_config['experiment'], ARBITRARY_DATETIME),
        create_trial(experiment_config['experiment'], ARBITRARY_DATETIME)
    ]
    db_utils.add_all(other_trials + our_pending_trials)
    our_trial_ids = [trial.id for trial in our_pending_trials]
    return db_utils.query(models.Trial).filter(
        models.Trial.id.in_(our_trial_ids))
Ejemplo n.º 5
0
def pending_trials(db, experiment_config):
    """Adds trials to the database and returns pending trials."""
    other_experiment_name = experiment_config['experiment'] + 'other'
    db_utils.add_all([
        models.Experiment(name=experiment_config['experiment']),
        models.Experiment(name=other_experiment_name)
    ])

    def create_trial(experiment, time_started=None, time_ended=None):
        """Creates a database trial."""
        return models.Trial(experiment=experiment,
                            benchmark=BENCHMARK,
                            fuzzer=FUZZER,
                            time_started=time_started,
                            time_ended=time_ended)

    our_pending_trials = [
        create_trial(experiment_config['experiment']),
        create_trial(experiment_config['experiment'])
    ]
    other_trials = [
        create_trial(other_experiment_name),
        create_trial(experiment_config['experiment'], datetime.datetime.now()),
        create_trial(experiment_config['experiment'], datetime.datetime.now())
    ]
    db_utils.add_all(other_trials + our_pending_trials)
    our_trial_ids = [trial.id for trial in our_pending_trials]
    return db_utils.query(models.Trial).filter(
        models.Trial.id.in_(our_trial_ids))
Ejemplo n.º 6
0
    def __init__(self, num_trials, experiment_config):
        self.experiment_config = experiment_config
        self.num_trials = num_trials

        # Bound for the number of nonpreemptibles we can start if the experiment
        # specified preemptible_runners.
        self.max_nonpreemptibles = min(
            math.ceil(self.num_trials * self.NONPREEMPTIBLES_FRACTION),
            self.MAX_NONPREEMPTIBLES)
        logger.info('Max nonpreemptibles: %d.', self.max_nonpreemptibles)

        # Attributes for preemptible retry window. The preemptible retry window
        # is a time period that starts when the last initial trial is started.
        # It determines how long we can retry preempted trials using
        # preemptibles. This bounds the length of time an experiment lasts.
        self.preemptible_window = (experiment_config['max_total_time'] *
                                   self.PREEMPTIBLE_WINDOW_MULTIPLIER)
        self._initial_trials = list(
            get_experiment_trials(experiment_config['experiment']))
        self._max_time_started = None

        self.preempted_trials = {}
        self.preemptible_starts_futile = False

        # Filter operations happening before the experiment started.
        self.last_preemptible_query = (db_utils.query(
            models.Experiment).filter(
                models.Experiment.name == experiment_config['experiment']).one(
                ).time_created.replace(tzinfo=datetime.timezone.utc))
Ejemplo n.º 7
0
def get_experiment_description(experiment_name):
    """Get the description of the experiment named by |experiment_name|."""
    # Do another query for the description so we don't explode the size of the
    # results from get_experiment_data.
    return db_utils.query(Experiment.description)\
            .select_from(Experiment)\
            .filter(Experiment.name == experiment_name).one()
Ejemplo n.º 8
0
def get_experiment_data(experiment_names):
    """Get measurements (such as coverage) on experiments from the database."""
    snapshots_query = db_utils.query(models.Snapshot).options(
        sqlalchemy.orm.joinedload('trial')).filter(
            models.Snapshot.trial.has(
                models.Trial.experiment.in_(experiment_names)))
    return pd.read_sql_query(snapshots_query.statement, db_utils.engine)
Ejemplo n.º 9
0
def get_fuzzers_changed_since_last():
    """Returns a list of fuzzers that have changed since the last experiment
    stored in the database that has a commit that is in the current branch."""
    # TODO(metzman): Figure out a way of skipping experiments that were stopped
    # early.

    # Loop over experiments since some may have hashes that are not in the
    # current branch.
    experiments = list(
        db_utils.query(models.Experiment).order_by(
            models.Experiment.time_created.desc()))
    if not experiments:
        raise Exception('No experiments found. Cannot find changed fuzzers.')

    changed_files = None
    for experiment in experiments:
        try:
            changed_files = diff_utils.get_changed_files(experiment.git_hash)
            break
        except diff_utils.DiffError:
            logs.warning('Skipping %s. Commit is not in branch.',
                         experiment.git_hash)

    if changed_files is None:
        raise Exception('No in-branch experiments. '
                        'Cannot find changed fuzzers.')
    return change_utils.get_changed_fuzzers(changed_files)
Ejemplo n.º 10
0
def test_experiment(dispatcher_experiment):
    """Tests creating an Experiment object."""
    assert dispatcher_experiment.benchmarks == ['benchmark-1', 'benchmark-2']
    assert dispatcher_experiment.fuzzers == FUZZERS
    assert (
        dispatcher_experiment.web_bucket == 'gs://web-reports/test-experiment')
    db_experiments = db_utils.query(models.Experiment).all()
    assert len(db_experiments) == 1
    db_experiment = db_experiments[0]
    assert db_experiment.name == os.environ['EXPERIMENT']
    trials = db_utils.query(models.Trial).all()
    fuzzer_and_benchmarks = [(trial.benchmark, trial.fuzzer)
                             for trial in trials]
    assert fuzzer_and_benchmarks == ([('benchmark-1', 'fuzzer-a'),
                                      ('benchmark-1', 'fuzzer-b')] *
                                     4) + [('benchmark-2', 'fuzzer-a'),
                                           ('benchmark-2', 'fuzzer-b')] * 4
Ejemplo n.º 11
0
def get_trial_instance_manager(experiment_config: dict):
    """Returns an instance of TrialInstanceManager for |experiment_config|."""
    if not db_utils.query(models.Experiment).filter(
            models.Experiment.name == experiment_config['experiment']).first():
        create_experiments(experiment_config)

    default_num_trials = 100
    return scheduler.TrialInstanceManager(default_num_trials, experiment_config)
Ejemplo n.º 12
0
def get_git_hash(experiment_df):
    """Return git hash for the experiment."""
    if len(experiment_df.experiment.unique()) != 1:
        # Not possible to represent hashes for multiple experiments.
        return None

    return utils.query(models.Experiment).filter(
        models.Experiment.name == experiment_df.experiment[0]).first().git_hash
Ejemplo n.º 13
0
def _query_unmeasured_trials(experiment: str):
    """Returns a query of trials in |experiment| that have not been measured."""
    trial_query = db_utils.query(models.Trial)
    ids_of_trials_with_snapshots = _query_ids_of_measured_trials(experiment)
    no_snapshots_filter = ~models.Trial.id.in_(ids_of_trials_with_snapshots)
    started_trials_filter = ~models.Trial.time_started.is_(None)
    experiment_trials_filter = models.Trial.experiment == experiment
    return trial_query.filter(experiment_trials_filter, no_snapshots_filter,
                              started_trials_filter)
Ejemplo n.º 14
0
def get_trial_ids(experiment: str, fuzzer: str, benchmark: str):
    """Gets ids of all finished trials for a pair of fuzzer and benchmark."""
    trial_ids = [
        trial_id_tuple[0]
        for trial_id_tuple in db_utils.query(models.Trial.id).filter(
            models.Trial.experiment == experiment, models.Trial.fuzzer ==
            fuzzer, models.Trial.benchmark == benchmark,
            ~models.Trial.preempted)
    ]
    return trial_ids
Ejemplo n.º 15
0
def get_experiment_data(experiment_names):
    """Get measurements (such as coverage) on experiments from the database."""
    snapshots_query = db_utils.query(models.Snapshot).options(
        sqlalchemy.orm.joinedload('trial')).filter(
            models.Snapshot.trial.has(
                models.Trial.experiment.in_(experiment_names)))

    # id must be loaded to do the join but get rid of it now since
    # trial_id provides the same info.
    data = pd.read_sql_query(snapshots_query.statement, db_utils.engine)
    return data.drop(columns=['id'])
Ejemplo n.º 16
0
def set_up_coverage_binaries(pool, experiment):
    """Set up coverage binaries for all benchmarks in |experiment|."""
    benchmarks = [
        trial.benchmark for trial in db_utils.query(models.Trial).distinct(
            models.Trial.benchmark).filter(
                models.Trial.experiment == experiment)
    ]
    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    if not os.path.exists(coverage_binaries_dir):
        os.makedirs(coverage_binaries_dir)
    pool.map(set_up_coverage_binary, benchmarks)
Ejemplo n.º 17
0
def _query_ids_of_measured_trials(experiment: str):
    """Returns a query of the ids of trials in |experiment| that have measured
    snapshots."""
    trials_and_snapshots_query = db_utils.query(models.Snapshot).options(
        orm.joinedload('trial'))
    experiment_trials_filter = models.Snapshot.trial.has(experiment=experiment)
    experiment_trials_and_snapshots_query = trials_and_snapshots_query.filter(
        experiment_trials_filter)
    experiment_snapshot_trial_ids_query = (
        experiment_trials_and_snapshots_query.with_entities(
            models.Snapshot.trial_id))
    return experiment_snapshot_trial_ids_query.distinct()
Ejemplo n.º 18
0
def set_up_coverage_binaries(pool, experiment):
    """Set up coverage binaries for all benchmarks in |experiment|."""
    # Use set comprehension to select distinct benchmarks.
    benchmarks = [
        benchmark_tuple[0]
        for benchmark_tuple in db_utils.query(models.Trial.benchmark).distinct(
        ).filter(models.Trial.experiment == experiment)
    ]

    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    filesystem.create_directory(coverage_binaries_dir)
    pool.map(set_up_coverage_binary, benchmarks)
Ejemplo n.º 19
0
def get_last_trial_time_started(experiment: str):
    """Returns the time_started of the last trial that was started in
    |experiment|. This function cannot be called if there are any unstarted
    (e.g. pending trials). It will raise an assertion failure if there are any
    pending trials because it does not make sense to call this function before
    that time."""
    assert get_pending_trials(experiment).first() is None
    # Don't use get_experiment_trials because it already orders the results by
    # id.
    last_trial = db_utils.query(models.Trial).filter(
        models.Trial.experiment == experiment, STARTED_TRIALS_FILTER).order_by(
            models.Trial.time_started.desc()).first()
    return last_trial.time_started
Ejemplo n.º 20
0
def add_nonprivate_experiments_for_merge_with_clobber(experiment_names):
    """Returns a new list containing experiment names preeceeded by a list of
    nonprivate experiments in the order in which they were run. This is useful
    if you want to combine reports from |experiment_names| and all nonprivate
    experiments."""
    nonprivate_experiments = db_utils.query(Experiment.name).filter(
        ~Experiment.private, ~Experiment.name.in_(experiment_names)).order_by(
            Experiment.time_created)
    nonprivate_experiment_names = [
        result[0] for result in nonprivate_experiments
    ]

    return nonprivate_experiment_names + experiment_names
Ejemplo n.º 21
0
def get_experiment_data(experiment_names):
    """Get measurements (such as coverage) on experiments from the database."""

    snapshots_query = db_utils.query(
        Experiment.git_hash,\
        Trial.experiment, Trial.fuzzer, Trial.benchmark,\
        Trial.time_started, Trial.time_ended,\
        Snapshot.trial_id, Snapshot.time, Snapshot.edges_covered)\
        .select_from(Experiment)\
        .join(Trial)\
        .join(Snapshot)\
        .filter(Experiment.name.in_(experiment_names))

    return pd.read_sql_query(snapshots_query.statement, db_utils.engine)
Ejemplo n.º 22
0
def _query_measured_latest_snapshots(experiment: str):
    """Returns a generator of a SnapshotWithTime representing a snapshot that is
    the first snapshot for their trial. The trials are trials in
    |experiment|."""
    latest_time_column = func.max(models.Snapshot.time)
    # The order of these columns must correspond to the fields in
    # SnapshotWithTime.
    columns = (models.Trial.fuzzer, models.Trial.benchmark,
               models.Snapshot.trial_id, latest_time_column)
    experiment_filter = models.Snapshot.trial.has(experiment=experiment)
    group_by_columns = (models.Snapshot.trial_id, models.Trial.benchmark,
                        models.Trial.fuzzer)
    snapshots_query = db_utils.query(*columns).join(
        models.Trial).filter(experiment_filter).group_by(*group_by_columns)
    return (SnapshotWithTime(*snapshot) for snapshot in snapshots_query)
Ejemplo n.º 23
0
def _initialize_experiment_in_db(experiment_config: dict):
    """Initializes |experiment| in the database by creating the experiment
    entity."""
    experiment_exists = db_utils.query(models.Experiment).filter(
        models.Experiment.name == experiment_config['experiment']).first()
    if experiment_exists:
        raise Exception('Experiment already exists in database.')

    db_utils.add_all([
        db_utils.get_or_create(
            models.Experiment,
            name=experiment_config['experiment'],
            git_hash=experiment_config['git_hash'],
            private=experiment_config.get('private', True),
            experiment_filestore=experiment_config['experiment_filestore'])
    ])
Ejemplo n.º 24
0
def run_requested_experiment(dry_run):
    """Run the oldest requested experiment that hasn't been run yet in
    experiment-requests.yaml."""
    requested_experiments = _get_requested_experiments()

    # TODO(metzman): Look into supporting benchmarks as an optional parameter so
    # that people can add fuzzers that don't support everything.

    if PAUSE_SERVICE_KEYWORD in requested_experiments:
        # Check if automated experiment service is paused.
        logs.warning('Pause service requested, not running experiment.')
        return None

    requested_experiment = None
    for experiment_config in reversed(requested_experiments):
        experiment_name = _get_experiment_name(experiment_config)
        is_new_experiment = db_utils.query(models.Experiment).filter(
            models.Experiment.name == experiment_name).first() is None
        if is_new_experiment:
            requested_experiment = experiment_config
            break

    if requested_experiment is None:
        logs.info('No new experiment to run. Exiting.')
        return None

    experiment_name = _get_experiment_name(requested_experiment)
    if not validate_experiment_requests([requested_experiment]):
        logs.error('Requested experiment: %s in %s is not valid.',
                   requested_experiment, REQUESTED_EXPERIMENTS_PATH)
        return None
    fuzzers = requested_experiment['fuzzers']

    benchmark_type = requested_experiment.get('type')
    if benchmark_type == benchmark_utils.BenchmarkType.BUG.value:
        benchmarks = benchmark_utils.get_bug_benchmarks()
    else:
        benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() +
                      benchmark_utils.get_standard_coverage_benchmarks())

    logs.info('Running experiment: %s with fuzzers: %s.', experiment_name,
              ' '.join(fuzzers))
    description = _get_description(requested_experiment)
    oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment)
    return _run_experiment(experiment_name, fuzzers, benchmarks, description,
                           oss_fuzz_corpus, dry_run)
Ejemplo n.º 25
0
def get_experiment_data(experiment_names):
    """Get measurements (such as coverage) on experiments from the database."""

    snapshots_query = db_utils.query(
        Experiment.git_hash, Experiment.experiment_filestore,
        Trial.experiment, Trial.fuzzer, Trial.benchmark,
        Trial.time_started, Trial.time_ended,
        Snapshot.trial_id, Snapshot.time, Snapshot.edges_covered,
        Snapshot.fuzzer_stats, Crash.crash_key)\
        .select_from(Experiment)\
        .join(Trial)\
        .join(Snapshot)\
        .join(Crash,
              and_(Snapshot.time==Crash.time,
                   Snapshot.trial_id == Crash.trial_id), isouter=True)\
        .filter(Experiment.name.in_(experiment_names))\
        .filter(Trial.preempted.is_(False))

    return pd.read_sql_query(snapshots_query.statement, db_utils.engine)
Ejemplo n.º 26
0
def get_experiment_trials(experiment: str):
    """Returns a query of trials in |experiment|."""
    return db_utils.query(models.Trial).filter(
        models.Trial.experiment == experiment).order_by(models.Trial.id)
Ejemplo n.º 27
0
def _record_experiment_time_ended(experiment_name: str):
    """Record |experiment| end time in the database."""
    experiment = db_utils.query(models.Experiment).filter(
        models.Experiment.name == experiment_name).one()
    experiment.time_ended = datetime.datetime.utcnow()
    db_utils.add_all([experiment])