예제 #1
0
    def create(self, work_dir):
        """Configures a emulator process which can subsequently be `run`."""
        # Download emulator image.
        if not environment.get_value('ANDROID_EMULATOR_BUCKET_PATH'):
            logs.log_error('ANDROID_EMULATOR_BUCKET_PATH is not set.')
            return
        archive_src_path = environment.get_value(
            'ANDROID_EMULATOR_BUCKET_PATH')
        archive_dst_path = os.path.join(work_dir, 'emulator_bundle.zip')
        storage.copy_file_from(archive_src_path, archive_dst_path)

        # Extract emulator image.
        self.emulator_path = os.path.join(work_dir, 'emulator')
        shell.remove_directory(self.emulator_path)
        archive.unpack(archive_dst_path, self.emulator_path)
        shell.remove_file(archive_dst_path)

        # Stop any stale emulator instances.
        stop_script_path = os.path.join(self.emulator_path, 'stop')
        stop_proc = new_process.ProcessRunner(stop_script_path)
        stop_proc.run_and_wait()

        # Run emulator.
        run_script_path = os.path.join(self.emulator_path, 'run')
        self.process_runner = new_process.ProcessRunner(run_script_path)
예제 #2
0
def update_tests_if_needed():
    """Updates layout tests every day."""
    data_directory = environment.get_value('FUZZ_DATA')
    error_occured = False
    expected_task_duration = 60 * 60  # 1 hour.
    retry_limit = environment.get_value('FAIL_RETRIES')
    temp_archive = os.path.join(data_directory, 'temp.zip')
    tests_url = environment.get_value('WEB_TESTS_URL')

    # Check if we have a valid tests url.
    if not tests_url:
        return

    # Layout test updates are usually disabled to speedup local testing.
    if environment.get_value('LOCAL_DEVELOPMENT'):
        return

    # |UPDATE_WEB_TESTS| env variable can be used to control our update behavior.
    if not environment.get_value('UPDATE_WEB_TESTS'):
        return

    last_modified_time = persistent_cache.get_value(
        LAYOUT_TEST_LAST_UPDATE_KEY,
        constructor=datetime.datetime.utcfromtimestamp)
    if (last_modified_time is not None and not dates.time_has_expired(
            last_modified_time, days=LAYOUT_TEST_UPDATE_INTERVAL_DAYS)):
        return

    logs.log('Updating layout tests.')
    tasks.track_task_start(tasks.Task('update_tests', '', ''),
                           expected_task_duration)

    # Download and unpack the tests archive.
    for _ in xrange(retry_limit):
        try:
            shell.remove_directory(data_directory, recreate=True)
            storage.copy_file_from(tests_url, temp_archive)
            archive.unpack(temp_archive, data_directory, trusted=True)
            shell.remove_file(temp_archive)
            error_occured = False
            break
        except:
            logs.log_error(
                'Could not retrieve and unpack layout tests archive. Retrying.'
            )
            error_occured = True

    if not error_occured:
        persistent_cache.set_value(LAYOUT_TEST_LAST_UPDATE_KEY,
                                   time.time(),
                                   persist_across_reboots=True)

    tasks.track_task_end()
예제 #3
0
def download_model_from_gcs(local_model_directory, fuzzer_name):
    """Pull model from GCS bucket and put them in specified model directory."""
    # ML model is stored in corpus bucket.
    gcs_corpus_bucket = environment.get_value('CORPUS_BUCKET')
    if not gcs_corpus_bucket:
        logs.log('Corpus bucket is not set. Skip generation.')
        return False

    # Get cloud storage path.
    # e.g. gs://clusterfuzz-corpus/rnn/libpng_read_fuzzer
    gcs_model_directory = 'gs://%s/%s/%s' % (
        gcs_corpus_bucket, constants.RNN_MODEL_NAME, fuzzer_name)

    logs.log('GCS model directory for fuzzer %s is %s.' %
             (fuzzer_name, gcs_model_directory))

    # RNN model consists of three files.
    meta_filename = constants.RNN_MODEL_NAME + constants.MODEL_META_SUFFIX
    data_filename = constants.RNN_MODEL_NAME + constants.MODEL_DATA_SUFFIX
    index_filename = constants.RNN_MODEL_NAME + constants.MODEL_INDEX_SUFFIX

    # Cloud file paths.
    gcs_meta_path = '%s/%s' % (gcs_model_directory, meta_filename)
    gcs_data_path = '%s/%s' % (gcs_model_directory, data_filename)
    gcs_index_path = '%s/%s' % (gcs_model_directory, index_filename)

    # Check if model exists.
    if not (storage.exists(gcs_meta_path) and storage.exists(gcs_data_path)
            and storage.exists(gcs_index_path)):
        logs.log(
            'ML RNN model for fuzzer %s does not exist. Skip generation.' %
            fuzzer_name)
        return False

    # Local file paths.
    local_meta_path = os.path.join(local_model_directory, meta_filename)
    local_data_path = os.path.join(local_model_directory, data_filename)
    local_index_path = os.path.join(local_model_directory, index_filename)

    # Download model files.
    result = (storage.copy_file_from(gcs_meta_path, local_meta_path)
              and storage.copy_file_from(gcs_data_path, local_data_path)
              and storage.copy_file_from(gcs_index_path, local_index_path))

    if not result:
        logs.log(
            'Failed to download RNN model for fuzzer %s. Skip generation.' %
            fuzzer_name)
        return False

    return True
예제 #4
0
def get_corpus(corpus_directory, fuzzer_name):
    """Get corpus directory.

  This function will download latest corpus backup file from GCS, unzip
  the file and put them in corpus directory.

  Args:
    directory: The directory to place corpus.
    fuzzer_name: Fuzzer name, e.g. libpng_read_fuzzer, xml_parser_fuzzer, etc.

  Returns:
    True if the corpus can be acquired and False otherwise.
  """
    backup_bucket_name = environment.get_value('BACKUP_BUCKET')
    corpus_fuzzer_name = environment.get_value('CORPUS_FUZZER_NAME_OVERRIDE')

    # Get GCS backup path.
    gcs_backup_path = corpus_manager.gcs_url_for_backup_file(
        backup_bucket_name, corpus_fuzzer_name, fuzzer_name,
        corpus_manager.LATEST_BACKUP_TIMESTAMP)

    # Get local backup path.
    local_backup_name = os.path.basename(gcs_backup_path)
    local_backup_path = os.path.join(corpus_directory, local_backup_name)

    # Download latest backup.
    if not storage.copy_file_from(gcs_backup_path, local_backup_path):
        logs.log_error('Failed to download corpus from GCS bucket {}.'.format(
            gcs_backup_path))
        return False

    # Extract corpus from zip file.
    archive.unpack(local_backup_path, corpus_directory)
    shell.remove_file(local_backup_path)
    return True
예제 #5
0
    def create(self):
        """Configures a emulator process which can subsequently be `run`."""
        # Download emulator image.
        if not environment.get_value('ANDROID_EMULATOR_BUCKET_PATH'):
            logs.log_error('ANDROID_EMULATOR_BUCKET_PATH is not set.')
            return
        temp_directory = environment.get_value('BOT_TMPDIR')
        archive_src_path = environment.get_value(
            'ANDROID_EMULATOR_BUCKET_PATH')
        archive_dst_path = os.path.join(temp_directory, 'emulator_bundle.zip')
        storage.copy_file_from(archive_src_path, archive_dst_path)

        # Extract emulator image.
        self.emulator_path = os.path.join(temp_directory, 'emulator')
        archive.unpack(archive_dst_path, self.emulator_path)
        shell.remove_file(archive_dst_path)

        # Run emulator.
        script_path = os.path.join(self.emulator_path, 'run')
        self.process_runner = new_process.ProcessRunner(script_path)
예제 #6
0
def _download_mutator_plugin_archive(mutator_plugin_archive):
  """Downloads the |mutator_plugin_archive| from the mutator plugin storage
  bucket to the plugin archives directory. Returns the path that the archive was
  downloaded to."""
  file_path = os.path.join(_get_mutator_plugins_archives_dir(),
                           mutator_plugin_archive)
  url = '%s/%s' % (_get_mutator_plugins_bucket_url(), mutator_plugin_archive)
  if not storage.copy_file_from(url, file_path):
    logs.log_error(
        'Failed to copy plugin archive from %s to %s' % (url, file_path))
    return None

  return file_path
예제 #7
0
    def _cross_pollinate_other_fuzzer_corpuses(self):
        """Add other fuzzer corpuses to shared corpus path for cross-pollination."""
        corpus_backup_date = utils.utcnow().date() - datetime.timedelta(
            days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)

        for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers:
            project_qualified_name = (
                cross_pollinate_fuzzer.fuzz_target.project_qualified_name())
            backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name
            corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name

            corpus_backup_url = corpus_manager.gcs_url_for_backup_file(
                backup_bucket_name,
                corpus_engine_name,
                project_qualified_name,
                corpus_backup_date,
            )
            corpus_backup_local_filename = "%s-%s" % (
                project_qualified_name,
                os.path.basename(corpus_backup_url),
            )
            corpus_backup_local_path = os.path.join(
                self.shared_corpus_path, corpus_backup_local_filename)

            if not storage.exists(corpus_backup_url, ignore_errors=True):
                # This can happen in cases when a new fuzz target is checked in or if
                # missed to capture a backup for a particular day (for OSS-Fuzz, this
                # will result in a 403 instead of 404 since that GCS path belongs to
                # other project). So, just log a warning for debugging purposes only.
                logs.log_warn("Corpus backup does not exist, ignoring: %s." %
                              corpus_backup_url)
                continue

            if not storage.copy_file_from(corpus_backup_url,
                                          corpus_backup_local_path):
                continue

            corpus_backup_output_directory = os.path.join(
                self.shared_corpus_path, project_qualified_name)
            shell.create_directory(corpus_backup_output_directory)
            result = archive.unpack(corpus_backup_local_path,
                                    corpus_backup_output_directory)
            shell.remove_file(corpus_backup_local_path)

            if result:
                logs.log(
                    "Corpus backup url %s successfully unpacked into shared corpus."
                    % corpus_backup_url)
            else:
                logs.log_error("Failed to unpack corpus backup from url %s." %
                               corpus_backup_url)
예제 #8
0
    def download_recommended_dictionary_from_gcs(self, local_dict_path):
        """Download recommended dictionary from GCS to the given location.

    Args:
      local_dict_path: Path to a dictionary file on the disk.

    Returns:
      A boolean indicating whether downloading succeeded or not.
    """
        # When the fuzz target is initially created or when it has no new
        # coverage or dictionary recommendations, then we won't have a
        # recommended dictionary in GCS.
        if not storage.exists(self.gcs_path):
            return False

        if storage.copy_file_from(self.gcs_path, local_dict_path):
            return True

        logs.log('Downloading %s failed.' % self.gcs_path)
        return False
예제 #9
0
def _unpack_build(base_build_dir, build_dir, build_url, target_weights=None):
  """Unpacks a build from a build url into the build directory."""
  # Track time taken to unpack builds so that it doesn't silently regress.
  start_time = time.time()

  # Free up memory.
  utils.python_gc()

  # Remove the current build.
  logs.log('Removing build directory %s.' % build_dir)
  if not shell.remove_directory(build_dir, recreate=True):
    logs.log_error('Unable to clear build directory %s.' % build_dir)
    _handle_unrecoverable_error_on_windows()
    return False

  # Decide whether to use cache build archives or not.
  use_cache = environment.get_value('CACHE_STORE', False)

  # Download build archive locally.
  build_local_archive = os.path.join(build_dir, os.path.basename(build_url))

  # Make the disk space necessary for the archive available.
  archive_size = storage.get_download_file_size(
      build_url, build_local_archive, use_cache=True)
  if archive_size is not None and not _make_space(archive_size, base_build_dir):
    shell.clear_data_directories()
    logs.log_fatal_and_exit(
        'Failed to make space for download. '
        'Cleared all data directories to free up space, exiting.')

  logs.log('Downloading build from url %s.' % build_url)
  try:
    storage.copy_file_from(build_url, build_local_archive, use_cache=use_cache)
  except:
    logs.log_error('Unable to download build url %s.' % build_url)
    return False

  unpack_everything = environment.get_value('UNPACK_ALL_FUZZ_TARGETS_AND_FILES')
  if not unpack_everything:
    # For fuzzing, pick a random fuzz target so that we only un-archive that
    # particular fuzz target and its dependencies and save disk space.
    # If we are going to unpack everythng in archive based on
    # |UNPACK_ALL_FUZZ_TARGETS_AND_FILES| in the job defition, then don't set a
    # random fuzz target before we've unpacked the build. It won't actually save
    # us anything in this case and can be really expensive for large builds
    # (such as Chrome OS). Defer setting it until after the build has been
    # unpacked.
    _set_random_fuzz_target_for_fuzzing_if_needed(
        _get_fuzz_targets_from_archive(build_local_archive), target_weights)

  # Actual list of files to unpack can be smaller if we are only unarchiving
  # a particular fuzz target.
  file_match_callback = _get_file_match_callback()
  assert not (unpack_everything and file_match_callback is not None)

  if not _make_space_for_build(build_local_archive, base_build_dir,
                               file_match_callback):
    shell.clear_data_directories()
    logs.log_fatal_and_exit(
        'Failed to make space for build. '
        'Cleared all data directories to free up space, exiting.')

  # Unpack the local build archive.
  logs.log('Unpacking build archive %s.' % build_local_archive)
  trusted = not utils.is_oss_fuzz()
  try:
    archive.unpack(
        build_local_archive,
        build_dir,
        trusted=trusted,
        file_match_callback=file_match_callback)
  except:
    logs.log_error('Unable to unpack build archive %s.' % build_local_archive)
    return False

  if unpack_everything:
    # Set a random fuzz target now that the build has been unpacked, if we
    # didn't set one earlier.
    _set_random_fuzz_target_for_fuzzing_if_needed(
        _get_fuzz_targets_from_dir(build_dir), target_weights)

  # If this is partial build due to selected build files, then mark it as such
  # so that it is not re-used.
  if file_match_callback:
    partial_build_file_path = os.path.join(build_dir, PARTIAL_BUILD_FILE)
    utils.write_data_to_file('', partial_build_file_path)

  # No point in keeping the archive around.
  shell.remove_file(build_local_archive)

  end_time = time.time()
  elapsed_time = end_time - start_time
  log_func = logs.log_warn if elapsed_time > UNPACK_TIME_LIMIT else logs.log
  log_func('Build took %0.02f minutes to unpack.' % (elapsed_time / 60.))

  return True
예제 #10
0
def update_source_code():
  """Updates source code files with latest version from appengine."""
  process_handler.cleanup_stale_processes()
  shell.clear_temp_directory()

  root_directory = environment.get_value('ROOT_DIR')
  temp_directory = environment.get_value('BOT_TMPDIR')
  temp_archive = os.path.join(temp_directory, 'clusterfuzz-source.zip')
  try:
    storage.copy_file_from(get_source_url(), temp_archive)
  except Exception:
    logs.log_error('Could not retrieve source code archive from url.')
    return

  try:
    file_list = archive.get_file_list(temp_archive)
    zip_archive = zipfile.ZipFile(temp_archive, 'r')
  except Exception:
    logs.log_error('Bad zip file.')
    return

  src_directory = os.path.join(root_directory, 'src')
  output_directory = os.path.dirname(root_directory)
  error_occurred = False
  normalized_file_set = set()
  for filepath in file_list:
    filename = os.path.basename(filepath)

    # This file cannot be updated on the fly since it is running as server.
    if filename == 'adb':
      continue

    absolute_filepath = os.path.join(output_directory, filepath)
    if os.path.altsep:
      absolute_filepath = absolute_filepath.replace(os.path.altsep, os.path.sep)

    if os.path.realpath(absolute_filepath) != absolute_filepath:
      continue

    normalized_file_set.add(absolute_filepath)
    try:
      file_extension = os.path.splitext(filename)[1]

      # Remove any .so files first before overwriting, as they can be loaded
      # in the memory of existing processes. Overwriting them directly causes
      # segfaults in existing processes (e.g. run.py).
      if file_extension == '.so' and os.path.exists(absolute_filepath):
        os.remove(absolute_filepath)

      # On Windows, to update DLLs (and native .pyd extensions), we rename it
      # first so that we can install the new version.
      if (environment.platform() == 'WINDOWS' and
          file_extension in ['.dll', '.pyd'] and
          os.path.exists(absolute_filepath)):
        _rename_dll_for_update(absolute_filepath)
    except Exception:
      logs.log_error('Failed to remove or move %s before extracting new '
                     'version.' % absolute_filepath)

    try:
      extracted_path = zip_archive.extract(filepath, output_directory)
      external_attr = zip_archive.getinfo(filepath).external_attr
      mode = (external_attr >> 16) & 0o777
      mode |= 0o440
      os.chmod(extracted_path, mode)
    except:
      error_occurred = True
      logs.log_error(
          'Failed to extract file %s from source archive.' % filepath)

  zip_archive.close()

  if error_occurred:
    return

  clear_pyc_files(src_directory)
  clear_old_files(src_directory, normalized_file_set)

  local_manifest_path = os.path.join(root_directory,
                                     utils.LOCAL_SOURCE_MANIFEST)
  source_version = utils.read_data_from_file(
      local_manifest_path, eval_data=False)
  logs.log('Source code updated to %s.' % source_version)