Python read_data Exemples, clusterfuzz._internal.google_cloud_utils.storage.read_data Python Exemples

Exemple #1

0

Afficher le fichier

    def _read_to_bytesio(self, gcs_path):
        """Return a bytesio representing a GCS object."""
        data = storage.read_data(gcs_path)
        if not data:
            raise helpers.EarlyExitException(
                'Failed to read uploaded archive.', 500)

        return io.BytesIO(data)

Exemple #2

0

Afficher le fichier

Fichier : dictionary_manager.py Projet : vanhauser-thc/clusterfuzz

  def _compare_and_swap_gcs_dictionary(self, old_content, new_content):
    """Compare and swap implementation for dictionary stored in GCS. Of course,
    this function is not atomic, but window for race is acceptably small."""
    current_content = storage.read_data(self.gcs_path).decode('utf-8')
    if current_content != old_content:
      return False, current_content

    storage.write_data(new_content.encode('utf-8'), self.gcs_path)
    return True, old_content

Exemple #3

0

Afficher le fichier

def get_introspector_index():
    """Return introspector projects status"""
    if storage.exists(INTROSPECTOR_INDEX_JSON_URL):
        introspector_index = json.loads(
            storage.read_data(INTROSPECTOR_INDEX_JSON_URL))
    else:
        introspector_index = {}
    logs.log('Loaded introspector status: %d' % len(introspector_index))
    return introspector_index

Exemple #4

0

Afficher le fichier

Fichier : fuzzer_coverage.py Projet : vanhauser-thc/clusterfuzz

def _read_json(url):
    """Returns a JSON obejct loaded from the given GCS url."""
    data = storage.read_data(url)

    result = None
    try:
        result = json.loads(data)
    except Exception as e:
        logs.log_warn('Empty or malformed code coverage JSON (%s): %s.' %
                      (url, str(e)))

    return result

Exemple #5

0

Afficher le fichier

Fichier : revisions.py Projet : vanhauser-thc/clusterfuzz

def _get_url_content(url):
    """Read a potentially base64-encoded resource from the given URL."""
    if url.startswith(storage.GS_PREFIX):
        # Fetch a GCS path with authentication.
        url_data = storage.read_data(url)
        if url_data is None:
            return None

        url_content = url_data.decode('utf-8')
    else:
        # Fetch a regular url without authentication.
        url_content = utils.fetch_url(url)

        # Urls on googlesource.com return file data as base64 encoded to avoid
        # cross-site scripting attacks. If the requested url contains |format=text|,
        # then the output is base64 encoded. So, decode it first.
        if url_content and url.endswith('format=text'):
            url_content = base64.b64decode(url_content)

    return url_content

Exemple #6

0

Afficher le fichier

Fichier : dictionary_manager.py Projet : vanhauser-thc/clusterfuzz

  def update_recommended_dictionary(self, new_dictionary):
    """Update recommended dictionary stored in GCS with new dictionary elements.

    Args:
      new_dictionary: A set of dictionary elements to be added into dictionary.

    Returns:
      A number of new elements actually added to the dictionary stored in GCS.
    """
    if environment.is_lib():
      return 0

    # If the dictionary does not already exist, then directly update it.
    if not storage.exists(self.gcs_path):
      storage.write_data('\n'.join(new_dictionary).encode('utf-8'),
                         self.gcs_path)
      return len(new_dictionary)

    # Read current version of the dictionary.
    old_dictionary_data = storage.read_data(self.gcs_path).decode('utf-8')

    # Use "Compare-and-swap"-like approach to avoid race conditions and also to
    # avoid having a separate job merging multiple recommended dictionaries.
    succeeded = False
    while not succeeded:
      # If old_dictionary_data is None, there is no dictionary in GCS yet, i.e.
      # it's empty. Otherwise, we parse it and use it.
      old_dictionary = set()
      if old_dictionary_data:
        old_dictionary = set(old_dictionary_data.splitlines())

      # Merge two dictionaries.
      new_dictionary |= old_dictionary
      if new_dictionary == old_dictionary:
        # "New dictionary" elements have been already added to GCS, bail out.
        return 0

      succeeded, old_dictionary_data = self._compare_and_swap_gcs_dictionary(
          old_dictionary_data, '\n'.join(new_dictionary))

    return len(new_dictionary) - len(old_dictionary)

Exemple #7

0

Afficher le fichier

Fichier : project_setup.py Projet : google/clusterfuzz

def get_projects_from_gcs(gcs_url):
    """Get projects from GCS path."""
    data = json.loads(storage.read_data(gcs_url))
    return [(project['name'], project) for project in data['projects']]

Exemple #8

0

Afficher le fichier

def get_remote_source_revision(source_manifest_url):
    """Get remote revision. We refactor this method out, so that we can mock
    it."""
    return storage.read_data(source_manifest_url).decode('utf-8').strip()

Exemple #9

0

Afficher le fichier

def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory,
                               data_directory):
    """Upload test cases from the list to a cloud storage bucket."""
    # Since builtin fuzzers have a coverage minimized corpus, no need to upload
    # test case samples for them.
    if fuzzer_name in fuzzing.ENGINES:
        return

    bucket_name = local_config.ProjectConfig().get(
        'coverage.fuzzer-testcases.bucket')
    if not bucket_name:
        return

    files_list = []
    has_testcases_in_testcase_directory = False
    has_testcases_in_data_directory = False
    for testcase_path in testcase_list:
        if testcase_path.startswith(testcase_directory):
            files_list.append(
                os.path.relpath(testcase_path, testcase_directory))
            has_testcases_in_testcase_directory = True
        elif testcase_path.startswith(data_directory):
            files_list.append(os.path.relpath(testcase_path, data_directory))
            has_testcases_in_data_directory = True
    if not files_list:
        return

    formatted_date = str(utils.utcnow().date())
    gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format(
        bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name)

    runner = gsutil.GSUtilRunner()
    batch_directory_blobs = storage.list_blobs(gcs_base_url)
    total_testcases = 0
    for blob in batch_directory_blobs:
        if not blob.endswith(LIST_FILE_BASENAME):
            continue

        list_gcs_url = storage.get_cloud_storage_file_path(bucket_name, blob)
        data = storage.read_data(list_gcs_url)
        if not data:
            logs.log_error(
                'Read no data from test case list at {gcs_url}'.format(
                    gcs_url=list_gcs_url))
            continue

        total_testcases += len(data.splitlines())

        # If we've already uploaded enough test cases for this fuzzer today, return.
        if total_testcases >= TESTCASES_PER_DAY:
            return

    # Cap the number of files.
    testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases)
    files_list = files_list[:testcases_limit]

    # Upload each batch of tests to its own unique sub-bucket.
    identifier = environment.get_value('BOT_NAME') + str(utils.utcnow())
    gcs_base_url += utils.string_hash(identifier)

    list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME
    if not storage.write_data('\n'.join(files_list).encode('utf-8'),
                              list_gcs_url):
        return

    if has_testcases_in_testcase_directory:
        # Sync everything in |testcase_directory| since it is fuzzer-generated.
        runner.rsync(testcase_directory, gcs_base_url)

    if has_testcases_in_data_directory:
        # Sync all fuzzer generated testcase in data bundle directory.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern=('(?!.*{fuzz_prefix})'.format(
                         fuzz_prefix=testcase_manager.FUZZ_PREFIX)))

        # Sync all possible resource dependencies as a best effort. It matches
        # |resources-| prefix that a fuzzer can use to indicate resources. Also, it
        # matches resources directory that Chromium web_tests use for dependencies.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern='(?!.*resource)')

    logs.log('Synced {count} test cases to {gcs_url}.'.format(
        count=len(files_list), gcs_url=gcs_base_url))

Exemple #10

0

Afficher le fichier

def _get_config_dict(url):
    """Read configs from a json and return them as a dict"""
    url_data = storage.read_data(url)
    if not url_data:
        raise BuildOverrideError(OVERRIDE_CONFIG_NOT_READ_ERROR.format(url))
    return json.loads(url_data)