Example #1
0
def download_model_from_gcs(local_model_directory, fuzzer_name):
    """Pull model from GCS bucket and put them in specified model directory."""
    # ML model is stored in corpus bucket.
    gcs_corpus_bucket = environment.get_value('CORPUS_BUCKET')
    if not gcs_corpus_bucket:
        logs.log('Corpus bucket is not set. Skip generation.')
        return False

    # Get cloud storage path.
    # e.g. gs://clusterfuzz-corpus/rnn/libpng_read_fuzzer
    gcs_model_directory = 'gs://%s/%s/%s' % (
        gcs_corpus_bucket, constants.RNN_MODEL_NAME, fuzzer_name)

    logs.log('GCS model directory for fuzzer %s is %s.' %
             (fuzzer_name, gcs_model_directory))

    # RNN model consists of three files.
    meta_filename = constants.RNN_MODEL_NAME + constants.MODEL_META_SUFFIX
    data_filename = constants.RNN_MODEL_NAME + constants.MODEL_DATA_SUFFIX
    index_filename = constants.RNN_MODEL_NAME + constants.MODEL_INDEX_SUFFIX

    # Cloud file paths.
    gcs_meta_path = '%s/%s' % (gcs_model_directory, meta_filename)
    gcs_data_path = '%s/%s' % (gcs_model_directory, data_filename)
    gcs_index_path = '%s/%s' % (gcs_model_directory, index_filename)

    # Check if model exists.
    if not (storage.exists(gcs_meta_path) and storage.exists(gcs_data_path)
            and storage.exists(gcs_index_path)):
        logs.log(
            'ML RNN model for fuzzer %s does not exist. Skip generation.' %
            fuzzer_name)
        return False

    # Local file paths.
    local_meta_path = os.path.join(local_model_directory, meta_filename)
    local_data_path = os.path.join(local_model_directory, data_filename)
    local_index_path = os.path.join(local_model_directory, index_filename)

    # Download model files.
    result = (storage.copy_file_from(gcs_meta_path, local_meta_path)
              and storage.copy_file_from(gcs_data_path, local_data_path)
              and storage.copy_file_from(gcs_index_path, local_index_path))

    if not result:
        logs.log(
            'Failed to download RNN model for fuzzer %s. Skip generation.' %
            fuzzer_name)
        return False

    return True
Example #2
0
    def _cross_pollinate_other_fuzzer_corpuses(self):
        """Add other fuzzer corpuses to shared corpus path for cross-pollination."""
        corpus_backup_date = utils.utcnow().date() - datetime.timedelta(
            days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)

        for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers:
            project_qualified_name = (
                cross_pollinate_fuzzer.fuzz_target.project_qualified_name())
            backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name
            corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name

            corpus_backup_url = corpus_manager.gcs_url_for_backup_file(
                backup_bucket_name,
                corpus_engine_name,
                project_qualified_name,
                corpus_backup_date,
            )
            corpus_backup_local_filename = "%s-%s" % (
                project_qualified_name,
                os.path.basename(corpus_backup_url),
            )
            corpus_backup_local_path = os.path.join(
                self.shared_corpus_path, corpus_backup_local_filename)

            if not storage.exists(corpus_backup_url, ignore_errors=True):
                # This can happen in cases when a new fuzz target is checked in or if
                # missed to capture a backup for a particular day (for OSS-Fuzz, this
                # will result in a 403 instead of 404 since that GCS path belongs to
                # other project). So, just log a warning for debugging purposes only.
                logs.log_warn("Corpus backup does not exist, ignoring: %s." %
                              corpus_backup_url)
                continue

            if not storage.copy_file_from(corpus_backup_url,
                                          corpus_backup_local_path):
                continue

            corpus_backup_output_directory = os.path.join(
                self.shared_corpus_path, project_qualified_name)
            shell.create_directory(corpus_backup_output_directory)
            result = archive.unpack(corpus_backup_local_path,
                                    corpus_backup_output_directory)
            shell.remove_file(corpus_backup_local_path)

            if result:
                logs.log(
                    "Corpus backup url %s successfully unpacked into shared corpus."
                    % corpus_backup_url)
            else:
                logs.log_error("Failed to unpack corpus backup from url %s." %
                               corpus_backup_url)
    def download_recommended_dictionary_from_gcs(self, local_dict_path):
        """Download recommended dictionary from GCS to the given location.

    Args:
      local_dict_path: Path to a dictionary file on the disk.

    Returns:
      A boolean indicating whether downloading succeeded or not.
    """
        # When the fuzz target is initially created or when it has no new
        # coverage or dictionary recommendations, then we won't have a
        # recommended dictionary in GCS.
        if not storage.exists(self.gcs_path):
            return False

        if storage.copy_file_from(self.gcs_path, local_dict_path):
            return True

        logs.log('Downloading %s failed.' % self.gcs_path)
        return False
    def update_recommended_dictionary(self, new_dictionary):
        """Update recommended dictionary stored in GCS with new dictionary elements.

    Args:
      new_dictionary: A set of dictionary elements to be added into dictionary.

    Returns:
      A number of new elements actually added to the dictionary stored in GCS.
    """
        if environment.is_lib():
            return 0

        # If the dictionary does not already exist, then directly update it.
        if not storage.exists(self.gcs_path):
            storage.write_data('\n'.join(new_dictionary).encode('utf-8'),
                               self.gcs_path)
            return len(new_dictionary)

        # Read current version of the dictionary.
        old_dictionary_data = storage.read_data(self.gcs_path).decode('utf-8')

        # Use "Compare-and-swap"-like approach to avoid race conditions and also to
        # avoid having a separate job merging multiple recommended dictionaries.
        succeeded = False
        while not succeeded:
            # If old_dictionary_data is None, there is no dictionary in GCS yet, i.e.
            # it's empty. Otherwise, we parse it and use it.
            old_dictionary = set()
            if old_dictionary_data:
                old_dictionary = set(old_dictionary_data.splitlines())

            # Merge two dictionaries.
            new_dictionary |= old_dictionary
            if new_dictionary == old_dictionary:
                # "New dictionary" elements have been already added to GCS, bail out.
                return 0

            succeeded, old_dictionary_data = self._compare_and_swap_gcs_dictionary(
                old_dictionary_data, '\n'.join(new_dictionary))

        return len(new_dictionary) - len(old_dictionary)