Python write_dataの例、google_cloud_utils.storage.write_data Pythonの例

コード例 #1

0

ファイルを表示

ファイル: dictionary_manager.py プロジェクト: wdgreen/clusterfuzz

    def _compare_and_swap_gcs_dictionary(self, old_content, new_content):
        """Compare and swap implementation for dictionary stored in GCS. Of course,
    this function is not atomic, but window for race is acceptably small."""
        current_content = storage.read_data(self.gcs_path)
        if current_content != old_content:
            return False, current_content

        storage.write_data(new_content, self.gcs_path)
        return True, old_content

コード例 #2

0

ファイルを表示

ファイル: coverage_uploader.py プロジェクト: lovesuae/clusterfuzz-1

def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory):
    """Upload test cases from the list to a cloud storage bucket."""
    bucket_name = local_config.ProjectConfig().get(
        'coverage.fuzzer-testcases.bucket')
    if not bucket_name:
        return

    # Only consider test cases in the output directory. We might upload too much
    # if we search the data directory as well, or have missing resources.
    # TODO(mbarbella): Support resources in data bundles.
    testcase_list = [
        os.path.relpath(testcase, testcase_directory)
        for testcase in testcase_list
        if testcase.startswith(testcase_directory)
    ]
    if not testcase_list:
        return

    # Bail out if this batch of test cases is too large.
    directory_size = shell.get_directory_size(testcase_directory)
    if directory_size >= MAX_TESTCASE_DIRECTORY_SIZE:
        return

    formatted_date = str(utils.utcnow().date())
    gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format(
        bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name)

    runner = gsutil.GSUtilRunner()
    batch_directory_blobs = storage.list_blobs(gcs_base_url)
    total_testcases = 0
    for blob in batch_directory_blobs:
        if not blob.endswith(LIST_FILE_BASENAME):
            continue

        list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name,
                                                     blob=blob)
        data = storage.read_data(list_gcs_url)
        if not data:
            logs.log_error(
                'Read no data from test case list at {gcs_url}'.format(
                    gcs_url=list_gcs_url))
            continue

        total_testcases += len(data.splitlines())

        # If we've already uploaded enough test cases for this fuzzer today, return.
        if total_testcases >= TESTCASES_PER_DAY:
            return

    # Upload each batch of tests to its own unique sub-bucket.
    identifier = environment.get_value('BOT_NAME') + str(utils.utcnow())
    gcs_base_url += utils.string_hash(identifier)

    list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME
    if not storage.write_data('\n'.join(testcase_list), list_gcs_url):
        return

    runner.rsync(testcase_directory, gcs_base_url)
    logs.log('Synced {count} test cases to {gcs_url}'.format(
        count=len(testcase_list), gcs_url=gcs_base_url))

コード例 #3

0

ファイルを表示

ファイル: dictionary_manager.py プロジェクト: zhanglGitHub/clusterfuzz

    def update_recommended_dictionary(self, new_dictionary):
        """Update recommended dictionary stored in GCS with new dictionary elements.

    Args:
      new_dictionary: A set of dictionary elements to be added into dictionary.

    Returns:
      A number of new elements actually added to the dictionary stored in GCS.
    """
        if environment.is_lib():
            return 0

        # If the dictionary does not already exist, then directly update it.
        if not storage.exists(self.gcs_path):
            storage.write_data('\n'.join(new_dictionary).encode('utf-8'),
                               self.gcs_path)
            return len(new_dictionary)

        # Read current version of the dictionary.
        old_dictionary_data = storage.read_data(self.gcs_path).decode('utf-8')

        # Use "Compare-and-swap"-like approach to avoid race conditions and also to
        # avoid having a separate job merging multiple recommended dictionaries.
        succeeded = False
        while not succeeded:
            # If old_dictionary_data is None, there is no dictionary in GCS yet, i.e.
            # it's empty. Otherwise, we parse it and use it.
            old_dictionary = set()
            if old_dictionary_data:
                old_dictionary = set(old_dictionary_data.splitlines())

            # Merge two dictionaries.
            new_dictionary |= old_dictionary
            if new_dictionary == old_dictionary:
                # "New dictionary" elements have been already added to GCS, bail out.
                return 0

            succeeded, old_dictionary_data = self._compare_and_swap_gcs_dictionary(
                old_dictionary_data, '\n'.join(new_dictionary))

        return len(new_dictionary) - len(old_dictionary)

コード例 #4

0

ファイルを表示

ファイル: fuzzer_stats.py プロジェクト: sauravsrijan/clusterfuzz

def upload_stats(stats_list, filename=None):
    """Upload the fuzzer run to the bigquery bucket. Assumes that all the stats
    given are for the same fuzzer/job run."""
    if not stats_list:
        logs.log_error("Failed to upload fuzzer stats: empty stats.")
        return

    if not isinstance(stats_list, list):
        raise AssertionError

    bucket_name = big_query.get_bucket()
    if not bucket_name:
        logs.log_error("Failed to upload fuzzer stats: missing bucket name.")
        return

    kind = stats_list[0].kind
    fuzzer = stats_list[0].fuzzer

    # Group all stats for fuzz targets.
    fuzzer_or_engine_name = get_fuzzer_or_engine_name(fuzzer)

    if not filename:
        # Generate a random filename.
        filename = "%016x" % random.randint(0, (1 << 64) - 1) + ".json"

    # Handle runs that bleed into the next day.
    def timestamp_start_of_day(s):
        return utils.utc_date_to_timestamp(
            datetime.datetime.utcfromtimestamp(s.timestamp).date())

    stats_list.sort(key=lambda s: s.timestamp)

    for timestamp, stats in itertools.groupby(stats_list,
                                              timestamp_start_of_day):
        upload_data = "\n".join(stat.to_json() for stat in stats)

        day_path = ("gs:/" + get_gcs_stats_path(
            kind, fuzzer_or_engine_name, timestamp=timestamp) + filename)

        if not storage.write_data(upload_data, day_path):
            logs.log_error("Failed to upload FuzzerRun.")

コード例 #5

0

ファイルを表示

ファイル: fuzzer_logs.py プロジェクト: zzdxxd/clusterfuzz

def upload_to_logs(bucket_name,
                   contents,
                   time=None,
                   fuzzer_name=None,
                   job_type=None,
                   file_extension=None):
    """Upload file contents to log directory in GCS bucket.
  Args:
    bucket_name: Bucket logs are stored in.
    contents: String containing log to be uploaded.
    time: A datetime object used to generate filename for the log.
    fuzzer_name: Name of the fuzzer. If None, gets this from the environment.
    job_type: Job name. If None, gets this from the environment.
    file_extension: A string appended to the end of the log filename. A default
      value is used if None.

  Returns:
    The path of the uploaded file and whether the uploaded succeeded.
  """
    if not fuzzer_name:
        fuzzer_name = environment.get_value('FUZZER_NAME')

    if not job_type:
        job_type = environment.get_value('JOB_NAME')

    log_directory = get_logs_directory(bucket_name, fuzzer_name, job_type)

    if not time:
        time = datetime.datetime.utcnow()

    log_path = 'gs:/' + log_directory + '/' + get_log_relative_path(
        time, file_extension)

    if storage.write_data(contents, log_path):
        logs.log('Uploaded file to logs bucket.', log_path=log_path)
    else:
        logs.log_error('Failed to write file to logs bucket.',
                       log_path=log_path)

コード例 #6

0

ファイルを表示

def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory,
                               data_directory):
    """Upload test cases from the list to a cloud storage bucket."""
    # Since builtin fuzzers have a coverage minimized corpus, no need to upload
    # test case samples for them.
    if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS:
        return

    bucket_name = local_config.ProjectConfig().get(
        'coverage.fuzzer-testcases.bucket')
    if not bucket_name:
        return

    files_list = []
    has_testcases_in_testcase_directory = False
    has_testcases_in_data_directory = False
    for testcase_path in testcase_list:
        if testcase_path.startswith(testcase_directory):
            files_list.append(
                os.path.relpath(testcase_path, testcase_directory))
            has_testcases_in_testcase_directory = True
        elif testcase_path.startswith(data_directory):
            files_list.append(os.path.relpath(testcase_path, data_directory))
            has_testcases_in_data_directory = True
    if not files_list:
        return

    formatted_date = str(utils.utcnow().date())
    gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format(
        bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name)

    runner = gsutil.GSUtilRunner()
    batch_directory_blobs = storage.list_blobs(gcs_base_url)
    total_testcases = 0
    for blob in batch_directory_blobs:
        if not blob.endswith(LIST_FILE_BASENAME):
            continue

        list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name,
                                                     blob=blob)
        data = storage.read_data(list_gcs_url)
        if not data:
            logs.log_error(
                'Read no data from test case list at {gcs_url}'.format(
                    gcs_url=list_gcs_url))
            continue

        total_testcases += len(data.splitlines())

        # If we've already uploaded enough test cases for this fuzzer today, return.
        if total_testcases >= TESTCASES_PER_DAY:
            return

    # Cap the number of files.
    testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases)
    files_list = files_list[:testcases_limit]

    # Upload each batch of tests to its own unique sub-bucket.
    identifier = environment.get_value('BOT_NAME') + str(utils.utcnow())
    gcs_base_url += utils.string_hash(identifier)

    list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME
    if not storage.write_data('\n'.join(files_list), list_gcs_url):
        return

    if has_testcases_in_testcase_directory:
        # Sync everything in |testcase_directory| since it is fuzzer-generated.
        runner.rsync(testcase_directory, gcs_base_url)

    if has_testcases_in_data_directory:
        # Sync all fuzzer generated testcase in data bundle directory.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern=('(?!.*{fuzz_prefix})'.format(
                         fuzz_prefix=testcase_manager.FUZZ_PREFIX)))

        # Sync all possible resource dependencies as a best effort. It matches
        # |resources-| prefix that a fuzzer can use to indicate resources. Also, it
        # matches resources directory that Chromium web_tests use for dependencies.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern='(?!.*resource)')

    logs.log('Synced {count} test cases to {gcs_url}.'.format(
        count=len(files_list), gcs_url=gcs_base_url))