def _query_and_upload_strategy_probabilities(engine):
    """Uploads queried data into datastore.

  Calls query functions and uploads query results
  to datastore to use as new probabilities. Probabilities
  are based on new_edges feature."""
    strategy_data = []
    data = _query_multi_armed_bandit_probabilities(engine)
    logs.log('Queried distribution for {}.'.format(engine.name))

    # TODO(mukundv): Update once we choose a temperature parameter for final
    # implementation.
    for row in data:
        curr_strategy = data_types.FuzzStrategyProbability()
        curr_strategy.strategy_name = str(row['strategy'])
        curr_strategy.probability = float(row['bandit_weight'])
        curr_strategy.engine = engine.name
        strategy_data.append(curr_strategy)

    query = data_types.FuzzStrategyProbability.query(
        data_types.FuzzStrategyProbability.engine == engine.name)
    ndb_utils.delete_multi(
        [entity.key for entity in ndb_utils.get_all_from_query(query)])
    ndb_utils.put_multi(strategy_data)
    logs.log('Uploaded queried distribution to ndb for {}'.format(engine.name))
    _store_probabilities_in_bigquery(engine, data)
    logs.log('Uploaded queried distribution to BigQuery for {}'.format(
        engine.name))
Example #2
0
def execute_task(*_):
    """Execute the report uploads."""
    logs.log('Uploading pending reports.')

    # Get metadata for reports requiring upload.
    reports_metadata = ndb_utils.get_all_from_query(
        data_types.ReportMetadata.query(
            ndb_utils.is_false(data_types.ReportMetadata.is_uploaded)))
    reports_metadata = list(reports_metadata)
    if not reports_metadata:
        logs.log('No reports that need upload found.')
        return

    environment.set_value('UPLOAD_MODE', 'prod')

    # Otherwise, upload corresponding reports.
    logs.log('Uploading reports for testcases: %s' %
             str([report.testcase_id for report in reports_metadata]))

    report_metadata_to_delete = []
    for report_metadata in reports_metadata:
        # Convert metadata back into actual report.
        crash_info = crash_uploader.crash_report_info_from_metadata(
            report_metadata)
        testcase_id = report_metadata.testcase_id

        try:
            _ = data_handler.get_testcase_by_id(testcase_id)
        except errors.InvalidTestcaseError:
            logs.log_warn('Could not find testcase %s.' % testcase_id)
            report_metadata_to_delete.append(report_metadata.key)
            continue

        # Upload the report and update the corresponding testcase info.
        logs.log('Processing testcase %s for crash upload.' % testcase_id)
        crash_report_id = crash_info.upload()
        if crash_report_id is None:
            logs.log_error(
                'Crash upload for testcase %s failed, retry later.' %
                testcase_id)
            continue

        # Update the report metadata to indicate successful upload.
        report_metadata.crash_report_id = crash_report_id
        report_metadata.is_uploaded = True
        report_metadata.put()

        logs.log('Uploaded testcase %s to crash, got back report id %s.' %
                 (testcase_id, crash_report_id))
        time.sleep(1)

    # Delete report metadata entries where testcase does not exist anymore or
    # upload is not supported.
    if report_metadata_to_delete:
        ndb_utils.delete_multi(report_metadata_to_delete)

    # Log done with uploads.
    # Deletion happens in batches in cleanup_task, so that in case of error there
    # is some buffer for looking at stored ReportMetadata in the meantime.
    logs.log('Finished uploading crash reports.')
def update_mappings_for_fuzzer(fuzzer, mappings=None):
    """Clear existing mappings for a fuzzer, and replace them."""
    if mappings is None:
        mappings = fuzzer.jobs

    query = data_types.FuzzerJob.query()
    query = query.filter(data_types.FuzzerJob.fuzzer == fuzzer.name)
    entities = ndb_utils.get_all_from_query(query)
    old_mappings = {}
    for entity in entities:
        old_mappings[entity.job] = entity

    new_mappings = []
    for job_name in mappings:
        mapping = old_mappings.pop(job_name, None)
        if mapping:
            continue

        job = data_types.Job.query(data_types.Job.name == job_name).get()
        if not job:
            logs.log_error('An unknown job %s was selected for fuzzer %s.' %
                           (job_name, fuzzer.name))
            continue

        mapping = data_types.FuzzerJob()
        mapping.fuzzer = fuzzer.name
        mapping.job = job_name
        mapping.platform = job.platform
        new_mappings.append(mapping)

    ndb_utils.put_multi(new_mappings)
    ndb_utils.delete_multi([m.key for m in list(old_mappings.values())])
def update_platform_for_job(job_name, new_platform):
    """Update platform for all mappings for a particular job."""
    query = data_types.FuzzerJob.query()
    query = query.filter(data_types.FuzzerJob.job == job_name)
    mappings = ndb_utils.get_all_from_query(query)
    new_mappings = []
    for mapping in mappings:
        mapping.platform = new_platform
        new_mappings.append(mapping)
    ndb_utils.put_multi(new_mappings)
def get_similar_issues(issue_tracker, testcase, only_open=True):
  """Get issue objects that seem to be related to a particular test case."""
  # Get list of issues using the search query.
  keywords = get_search_keywords(testcase)

  issues = issue_tracker.find_issues(keywords=keywords, only_open=only_open)
  if issues:
    issues = list(issues)
  else:
    issues = []

  issue_ids = [issue.id for issue in issues]

  # Add issues from similar testcases sharing the same group id.
  if testcase.group_id:
    group_query = data_types.Testcase.query(
        data_types.Testcase.group_id == testcase.group_id)
    similar_testcases = ndb_utils.get_all_from_query(group_query)
    for similar_testcase in similar_testcases:
      if not similar_testcase.bug_information:
        continue

      # Exclude issues already added above from search terms.
      issue_id = int(similar_testcase.bug_information)
      if issue_id in issue_ids:
        continue

      # Get issue object using ID.
      issue = issue_tracker.get_issue(issue_id)
      if not issue:
        continue

      # If our search criteria allows open bugs only, then check issue and
      # testcase status so as to exclude closed ones.
      if (only_open and (not issue.is_open or not testcase.open)):
        continue

      issues.append(issue)
      issue_ids.append(issue_id)

  return issues
def get_fuzz_target_jobs(fuzz_target_name=None,
                         engine=None,
                         job=None,
                         limit=None):
    """Return a Datastore query for fuzz target to job mappings."""
    query = data_types.FuzzTargetJob.query()

    if fuzz_target_name:
        query = query.filter(
            data_types.FuzzTargetJob.fuzz_target_name == fuzz_target_name)

    if job:
        query = query.filter(data_types.FuzzTargetJob.job == job)

    if engine:
        query = query.filter(data_types.FuzzTargetJob.engine == engine)

    if limit is not None:
        return query.iter(limit=limit)

    return ndb_utils.get_all_from_query(query)
def get_fuzz_task_payload(platform=None):
    """Select a fuzzer that can run on this platform."""
    if not platform:
        queue_override = environment.get_value('QUEUE_OVERRIDE')
        platform = queue_override if queue_override else environment.platform()

    if environment.is_local_development():
        query = data_types.FuzzerJob.query()
        query = query.filter(data_types.FuzzerJobs.platform == platform)
        mappings = list(ndb_utils.get_all_from_query(query))
    else:
        query = data_types.FuzzerJobs.query()
        query = query.filter(data_types.FuzzerJobs.platform == platform)

        mappings = []
        for entity in query:
            mappings.extend(entity.fuzzer_jobs)

    if not mappings:
        return None, None

    selection = utils.random_weighted_choice(mappings,
                                             weight_attribute='actual_weight')
    return selection.fuzzer, selection.job
Example #8
0
def update_fuzzer_and_data_bundles(fuzzer_name):
    """Update the fuzzer with a given name if necessary."""
    fuzzer = data_types.Fuzzer.query(
        data_types.Fuzzer.name == fuzzer_name).get()
    if not fuzzer:
        logs.log_error('No fuzzer exists with name %s.' % fuzzer_name)
        raise errors.InvalidFuzzerError

    # Set some helper environment variables.
    fuzzer_directory = get_fuzzer_directory(fuzzer_name)
    environment.set_value('FUZZER_DIR', fuzzer_directory)
    environment.set_value('UNTRUSTED_CONTENT', fuzzer.untrusted_content)

    # If the fuzzer generates large testcases or a large number of small ones
    # that don't fit on tmpfs, then use the larger disk directory.
    if fuzzer.has_large_testcases:
        testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK')
        environment.set_value('FUZZ_INPUTS', testcase_disk_directory)

    # Adjust the test timeout, if user has provided one.
    if fuzzer.timeout:
        environment.set_value('TEST_TIMEOUT', fuzzer.timeout)

        # Increase fuzz test timeout if the fuzzer timeout is higher than its
        # current value.
        fuzz_test_timeout = environment.get_value('FUZZ_TEST_TIMEOUT')
        if fuzz_test_timeout and fuzz_test_timeout < fuzzer.timeout:
            environment.set_value('FUZZ_TEST_TIMEOUT', fuzzer.timeout)

    # Adjust the max testcases if this fuzzer has specified a lower limit.
    max_testcases = environment.get_value('MAX_TESTCASES')
    if fuzzer.max_testcases and fuzzer.max_testcases < max_testcases:
        environment.set_value('MAX_TESTCASES', fuzzer.max_testcases)

    # Check for updates to this fuzzer.
    version_file = os.path.join(fuzzer_directory, '.%s_version' % fuzzer_name)
    if (not fuzzer.builtin
            and revisions.needs_update(version_file, fuzzer.revision)):
        logs.log('Fuzzer update was found, updating.')

        # Clear the old fuzzer directory if it exists.
        if not shell.remove_directory(fuzzer_directory, recreate=True):
            logs.log_error('Failed to clear fuzzer directory.')
            return None

        # Copy the archive to local disk and unpack it.
        archive_path = os.path.join(fuzzer_directory, fuzzer.filename)
        if not blobs.read_blob_to_disk(fuzzer.blobstore_key, archive_path):
            logs.log_error('Failed to copy fuzzer archive.')
            return None

        try:
            archive.unpack(archive_path, fuzzer_directory)
        except Exception:
            error_message = (
                'Failed to unpack fuzzer archive %s '
                '(bad archive or unsupported format).') % fuzzer.filename
            logs.log_error(error_message)
            fuzzer_logs.upload_script_log('Fatal error: ' + error_message,
                                          fuzzer_name=fuzzer_name)
            return None

        fuzzer_path = os.path.join(fuzzer_directory, fuzzer.executable_path)
        if not os.path.exists(fuzzer_path):
            error_message = (
                'Fuzzer executable %s not found. '
                'Check fuzzer configuration.') % fuzzer.executable_path
            logs.log_error(error_message)
            fuzzer_logs.upload_script_log('Fatal error: ' + error_message,
                                          fuzzer_name=fuzzer_name)
            return None

        # Make fuzzer executable.
        os.chmod(fuzzer_path, 0o750)

        # Cleanup unneeded archive.
        shell.remove_file(archive_path)

        # Save the current revision of this fuzzer in a file for later checks.
        revisions.write_revision_to_revision_file(version_file,
                                                  fuzzer.revision)
        logs.log('Updated fuzzer to revision %d.' % fuzzer.revision)

    _clear_old_data_bundles_if_needed()

    # Setup data bundles associated with this fuzzer.
    data_bundles = ndb_utils.get_all_from_query(
        data_types.DataBundle.query(
            data_types.DataBundle.name == fuzzer.data_bundle_name))
    for data_bundle in data_bundles:
        if not update_data_bundle(fuzzer, data_bundle):
            return None

    # Setup environment variable for launcher script path.
    if fuzzer.launcher_script:
        fuzzer_launcher_path = os.path.join(fuzzer_directory,
                                            fuzzer.launcher_script)
        environment.set_value('LAUNCHER_PATH', fuzzer_launcher_path)

        # For launcher script usecase, we need the entire fuzzer directory on the
        # worker.
        if environment.is_trusted_host():
            from clusterfuzz._internal.bot.untrusted_runner import file_host
            worker_fuzzer_directory = file_host.rebase_to_worker_root(
                fuzzer_directory)
            file_host.copy_directory_to_worker(fuzzer_directory,
                                               worker_fuzzer_directory,
                                               replace=True)

    return fuzzer
Example #9
0
def _get_job_list_for_fuzzer(fuzzer):
    """Helper function to return the mappings for a fuzzer as a list."""
    query = data_types.FuzzerJob.query()
    query.filter(data_types.FuzzerJob.fuzzer == fuzzer.name)
    return [m.job for m in ndb_utils.get_all_from_query(query)]
Example #10
0
def _check_and_update_similar_bug(testcase, issue_tracker):
  """Get list of similar open issues and ones that were recently closed."""
  # Get similar testcases from the same group.
  similar_testcases_from_group = []
  if testcase.group_id:
    group_query = data_types.Testcase.query(
        data_types.Testcase.group_id == testcase.group_id)
    similar_testcases_from_group = ndb_utils.get_all_from_query(
        group_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2)

  # Get testcases with the same crash params. These might not be in the a group
  # if they were just fixed.
  same_crash_params_query = data_types.Testcase.query(
      data_types.Testcase.crash_type == testcase.crash_type,
      data_types.Testcase.crash_state == testcase.crash_state,
      data_types.Testcase.security_flag == testcase.security_flag,
      data_types.Testcase.project_name == testcase.project_name,
      data_types.Testcase.status == 'Processed')

  similar_testcases_from_query = ndb_utils.get_all_from_query(
      same_crash_params_query,
      batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2)
  for similar_testcase in itertools.chain(similar_testcases_from_group,
                                          similar_testcases_from_query):
    # Exclude ourself from comparison.
    if similar_testcase.key.id() == testcase.key.id():
      continue

    # Exclude similar testcases without bug information.
    if not similar_testcase.bug_information:
      continue

    # Get the issue object given its ID.
    issue = issue_tracker.get_issue(similar_testcase.bug_information)
    if not issue:
      continue

    # If the reproducible issue is not verified yet, bug is still valid and
    # might be caused by non-availability of latest builds. In that case,
    # don't file a new bug yet.
    if similar_testcase.open and not similar_testcase.one_time_crasher_flag:
      return True

    # If the issue is still open, no need to file a duplicate bug.
    if issue.is_open:
      return True

    # If the issue indicates that this crash needs to be ignored, no need to
    # file another one.
    policy = issue_tracker_policy.get(issue_tracker.project)
    ignore_label = policy.label('ignore')
    if ignore_label in issue.labels:
      _add_triage_message(
          testcase,
          ('Skipping filing a bug since similar testcase ({testcase_id}) in '
           'issue ({issue_id}) is blacklisted with {ignore_label} label.'
          ).format(
              testcase_id=similar_testcase.key.id(),
              issue_id=issue.id,
              ignore_label=ignore_label))
      return True

    # If the issue is recently closed, wait certain time period to make sure
    # our fixed verification has completed.
    if (issue.closed_time and not dates.time_has_expired(
        issue.closed_time, hours=data_types.MIN_ELAPSED_TIME_SINCE_FIXED)):
      _add_triage_message(
          testcase,
          ('Delaying filing a bug since similar testcase '
           '({testcase_id}) in issue ({issue_id}) was just fixed.').format(
               testcase_id=similar_testcase.key.id(), issue_id=issue.id))
      return True

  return False
Example #11
0
def redo_testcase(testcase, tasks, user_email):
    """Redo specific tasks for a testcase."""
    for task in tasks:
        if task not in VALID_REDO_TASKS:
            raise InvalidRedoTask(task)

    minimize = 'minimize' in tasks
    regression = 'regression' in tasks
    progression = 'progression' in tasks
    impact = 'impact' in tasks
    blame = 'blame' in tasks

    task_list = []
    testcase_id = testcase.key.id()

    # Metadata keys to clear based on which redo tasks were selected.
    metadata_keys_to_clear = ['potentially_flaky']

    if minimize:
        task_list.append('minimize')
        testcase.minimized_keys = ''
        testcase.set_metadata('redo_minimize', True, update_testcase=False)
        metadata_keys_to_clear += [
            'env', 'current_minimization_phase_attempts', 'minimization_phase'
        ]

        # If this testcase was archived during minimization, update the state.
        testcase.archive_state &= ~data_types.ArchiveStatus.MINIMIZED

    if regression:
        task_list.append('regression')
        testcase.regression = ''
        metadata_keys_to_clear += [
            'last_regression_min', 'last_regression_max'
        ]

    if progression:
        task_list.append('progression')
        testcase.fixed = ''
        testcase.open = True
        testcase.last_tested_crash_stacktrace = None
        testcase.triaged = False
        testcase.set_metadata('progression_pending',
                              True,
                              update_testcase=False)
        metadata_keys_to_clear += [
            'last_progression_min', 'last_progression_max',
            'last_tested_revision'
        ]

    if impact:
        task_list.append('impact')
        testcase.is_impact_set_flag = False

    if blame:
        task_list.append('blame')
        testcase.set_metadata('blame_pending', True, update_testcase=False)
        testcase.set_metadata('predator_result', None, update_testcase=False)

    for key in metadata_keys_to_clear:
        testcase.delete_metadata(key, update_testcase=False)

    testcase.comments += '[%s] %s: Redo task(s): %s\n' % (
        utils.current_date_time(), user_email, ', '.join(sorted(task_list)))
    testcase.one_time_crasher_flag = False
    testcase.put()

    # Allow new notifications to be sent for this testcase.
    notifications = ndb_utils.get_all_from_query(data_types.Notification.query(
        data_types.Notification.testcase_id == testcase.key.id()),
                                                 keys_only=True)
    ndb_utils.delete_multi(notifications)

    # If we are re-doing minimization, other tasks will be done automatically
    # after minimization completes. So, don't add those tasks.
    if minimize:
        add_task('minimize', testcase_id, testcase.job_type,
                 queue_for_testcase(testcase))
    else:
        if regression:
            add_task('regression', testcase_id, testcase.job_type,
                     queue_for_testcase(testcase))

        if progression:
            add_task('progression', testcase_id, testcase.job_type,
                     queue_for_testcase(testcase))

        if impact:
            add_task('impact', testcase_id, testcase.job_type,
                     queue_for_testcase(testcase))

        if blame:
            add_task('blame', testcase_id, testcase.job_type,
                     queue_for_testcase(testcase))