Python string_hash 예제들, base.utils.string_hash Python 예제들

예제 #1

0

파일 보기

파일: build_manager.py 프로젝트: zzdxxd/clusterfuzz

def get_build_urls_list(bucket_path, reverse=True):
  """Returns a sorted list of build urls from a bucket path."""
  if not bucket_path:
    return []

  base_url = os.path.dirname(bucket_path)
  if environment.is_running_on_app_engine():
    build_urls = list(storage.list_blobs(base_url))
  else:
    keys_directory = environment.get_value('BUILD_URLS_DIR')
    keys_filename = '%s.list' % utils.string_hash(bucket_path)
    keys_file_path = os.path.join(keys_directory, keys_filename)

    # For one task, keys file that is cached locally should be re-used.
    # Otherwise, we do waste lot of network bandwidth calling and getting the
    # same set of urls (esp for regression and progression testing).
    if not os.path.exists(keys_file_path):
      # Get url list by reading the GCS bucket.
      with open(keys_file_path, 'w') as f:
        for path in storage.list_blobs(base_url):
          f.write(path + '\n')

    content = utils.read_data_from_file(keys_file_path, eval_data=False)
    if not content:
      return []

    build_urls = content.splitlines()

  return _sort_build_urls_by_revision(build_urls, bucket_path, reverse)

예제 #2

0

파일 보기

파일: service_accounts.py 프로젝트: yuanjunh-git/clusterfuzz

def _service_account_id(project):
  """Return service account ID for project."""
  # From
  # cloud.google.com/iam/reference/rest/v1/projects.serviceAccounts/create:
  #
  # The account id that is used to generate the service account email address
  # and a stable unique id. It is unique within a project, must be 6-30
  # characters long, and match the regular expression [a-z]([-a-z0-9]*[a-z0-9])
  # to comply with RFC1035.
  account_id = _ACCOUNT_PREFIX + project.replace('_', '-')
  if not account_id[-1].isalnum():
    # Must end in '[a-z][0-9]'.
    account_id += '0'

  if len(account_id) < _MIN_LEN:
    # Must be at least |min_len| in length.
    account_id = account_id.ljust(_MIN_LEN, '0')

  # Use a hash prefix as the service account name if the project name is too
  # long.
  if len(account_id) > _MAX_LEN:
    account_id = _ACCOUNT_PREFIX + utils.string_hash(project)[:_HASH_PREFIX_LEN]

  assert len(account_id) >= _MIN_LEN and len(account_id) <= _MAX_LEN
  return account_id

예제 #3

0

파일 보기

파일: coverage_uploader.py 프로젝트: lovesuae/clusterfuzz-1

def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory):
    """Upload test cases from the list to a cloud storage bucket."""
    bucket_name = local_config.ProjectConfig().get(
        'coverage.fuzzer-testcases.bucket')
    if not bucket_name:
        return

    # Only consider test cases in the output directory. We might upload too much
    # if we search the data directory as well, or have missing resources.
    # TODO(mbarbella): Support resources in data bundles.
    testcase_list = [
        os.path.relpath(testcase, testcase_directory)
        for testcase in testcase_list
        if testcase.startswith(testcase_directory)
    ]
    if not testcase_list:
        return

    # Bail out if this batch of test cases is too large.
    directory_size = shell.get_directory_size(testcase_directory)
    if directory_size >= MAX_TESTCASE_DIRECTORY_SIZE:
        return

    formatted_date = str(utils.utcnow().date())
    gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format(
        bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name)

    runner = gsutil.GSUtilRunner()
    batch_directory_blobs = storage.list_blobs(gcs_base_url)
    total_testcases = 0
    for blob in batch_directory_blobs:
        if not blob.endswith(LIST_FILE_BASENAME):
            continue

        list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name,
                                                     blob=blob)
        data = storage.read_data(list_gcs_url)
        if not data:
            logs.log_error(
                'Read no data from test case list at {gcs_url}'.format(
                    gcs_url=list_gcs_url))
            continue

        total_testcases += len(data.splitlines())

        # If we've already uploaded enough test cases for this fuzzer today, return.
        if total_testcases >= TESTCASES_PER_DAY:
            return

    # Upload each batch of tests to its own unique sub-bucket.
    identifier = environment.get_value('BOT_NAME') + str(utils.utcnow())
    gcs_base_url += utils.string_hash(identifier)

    list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME
    if not storage.write_data('\n'.join(testcase_list), list_gcs_url):
        return

    runner.rsync(testcase_directory, gcs_base_url)
    logs.log('Synced {count} test cases to {gcs_url}'.format(
        count=len(testcase_list), gcs_url=gcs_base_url))

예제 #4

0

파일 보기

파일: build_manager.py 프로젝트: zzdxxd/clusterfuzz

def _get_build_directory(bucket_path, job_name):
  """Return the build directory based on bucket path and job name."""
  builds_directory = environment.get_value('BUILDS_DIR')

  # In case we have a bucket path, we want those to share the same build
  # directory.
  if bucket_path:
    path = _remove_scheme(bucket_path).lstrip('/')
    bucket_path, file_pattern = path.rsplit('/', 1)
    bucket_path = bucket_path.replace('/', '_')

    # Remove similar build types to force them in same directory.
    file_pattern = utils.remove_sub_strings(file_pattern, BUILD_TYPE_SUBSTRINGS)

    file_pattern_hash = utils.string_hash(file_pattern)
    job_directory = '%s_%s' % (bucket_path, file_pattern_hash)
  else:
    job_directory = job_name

  return os.path.join(builds_directory, job_directory)

예제 #5

0

파일 보기

def store_file_in_cache(file_path,
                        cached_files_per_directory_limit=True,
                        force_update=False):
  """Get file from nfs cache if available."""
  if not os.path.exists(file_path):
    logs.log_error(
        'Local file %s does not exist, nothing to store in cache.' % file_path)
    return

  if os.path.getsize(file_path) > CACHE_SIZE_LIMIT:
    logs.log('File %s is too large to store in cache, skipping.' % file_path)
    return

  nfs_root = environment.get_value('NFS_ROOT')
  if not nfs_root:
    # No NFS, nothing to store in cache.
    return

  # If NFS server is not available due to heavy load, skip storage operation
  # altogether as we would fail to store file.
  if not os.path.exists(os.path.join(nfs_root, '.')):  # Use . to iterate mount.
    logs.log_warn('Cache %s not available.' % nfs_root)
    return

  cache_file_path = get_cache_file_path(file_path)
  cache_directory = os.path.dirname(cache_file_path)
  filename = os.path.basename(file_path)

  if not os.path.exists(cache_directory):
    if not shell.create_directory(cache_directory, create_intermediates=True):
      logs.log_error('Failed to create cache directory %s.' % cache_directory)
      return

  # Check if the file already exists in cache.
  if file_exists_in_cache(cache_file_path):
    if not force_update:
      return

    # If we are forcing update, we need to remove current cached file and its
    # metadata.
    remove_cache_file_and_metadata(cache_file_path)

  # Delete old cached files beyond our maximum storage limit.
  if cached_files_per_directory_limit:
    # Get a list of cached files.
    cached_files_list = []
    for cached_filename in os.listdir(cache_directory):
      if cached_filename.endswith(CACHE_METADATA_FILE_EXTENSION):
        continue
      cached_file_path = os.path.join(cache_directory, cached_filename)
      cached_files_list.append(cached_file_path)

    mtime = lambda f: os.stat(f).st_mtime
    last_used_cached_files_list = list(
        sorted(cached_files_list, key=mtime, reverse=True))
    for cached_file_path in (
        last_used_cached_files_list[MAX_CACHED_FILES_PER_DIRECTORY - 1:]):
      remove_cache_file_and_metadata(cached_file_path)

  # Start storing the actual file in cache now.
  logs.log('Started storing file %s into cache.' % filename)

  # Fetch lock to store this file. Try only once since if any other bot has
  # started to store it, we don't need to do it ourselves. Just bail out.
  lock_name = 'store:cache_file:%s' % utils.string_hash(cache_file_path)
  if not locks.acquire_lock(
      lock_name, max_hold_seconds=CACHE_LOCK_TIMEOUT, retries=1, by_zone=True):
    logs.log_warn(
        'Unable to fetch lock to update cache file %s, skipping.' % filename)
    return

  # Check if another bot already updated it.
  if file_exists_in_cache(cache_file_path):
    locks.release_lock(lock_name, by_zone=True)
    return

  shell.copy_file(file_path, cache_file_path)
  write_cache_file_metadata(cache_file_path, file_path)
  time.sleep(CACHE_COPY_WAIT_TIME)
  error_occurred = not file_exists_in_cache(cache_file_path)
  locks.release_lock(lock_name, by_zone=True)

  if error_occurred:
    logs.log_error('Failed to store file %s into cache.' % filename)
  else:
    logs.log('Completed storing file %s into cache.' % filename)

예제 #6

0

파일 보기

def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory,
                               data_directory):
    """Upload test cases from the list to a cloud storage bucket."""
    # Since builtin fuzzers have a coverage minimized corpus, no need to upload
    # test case samples for them.
    if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS:
        return

    bucket_name = local_config.ProjectConfig().get(
        'coverage.fuzzer-testcases.bucket')
    if not bucket_name:
        return

    files_list = []
    has_testcases_in_testcase_directory = False
    has_testcases_in_data_directory = False
    for testcase_path in testcase_list:
        if testcase_path.startswith(testcase_directory):
            files_list.append(
                os.path.relpath(testcase_path, testcase_directory))
            has_testcases_in_testcase_directory = True
        elif testcase_path.startswith(data_directory):
            files_list.append(os.path.relpath(testcase_path, data_directory))
            has_testcases_in_data_directory = True
    if not files_list:
        return

    formatted_date = str(utils.utcnow().date())
    gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format(
        bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name)

    runner = gsutil.GSUtilRunner()
    batch_directory_blobs = storage.list_blobs(gcs_base_url)
    total_testcases = 0
    for blob in batch_directory_blobs:
        if not blob.endswith(LIST_FILE_BASENAME):
            continue

        list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name,
                                                     blob=blob)
        data = storage.read_data(list_gcs_url)
        if not data:
            logs.log_error(
                'Read no data from test case list at {gcs_url}'.format(
                    gcs_url=list_gcs_url))
            continue

        total_testcases += len(data.splitlines())

        # If we've already uploaded enough test cases for this fuzzer today, return.
        if total_testcases >= TESTCASES_PER_DAY:
            return

    # Cap the number of files.
    testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases)
    files_list = files_list[:testcases_limit]

    # Upload each batch of tests to its own unique sub-bucket.
    identifier = environment.get_value('BOT_NAME') + str(utils.utcnow())
    gcs_base_url += utils.string_hash(identifier)

    list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME
    if not storage.write_data('\n'.join(files_list), list_gcs_url):
        return

    if has_testcases_in_testcase_directory:
        # Sync everything in |testcase_directory| since it is fuzzer-generated.
        runner.rsync(testcase_directory, gcs_base_url)

    if has_testcases_in_data_directory:
        # Sync all fuzzer generated testcase in data bundle directory.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern=('(?!.*{fuzz_prefix})'.format(
                         fuzz_prefix=testcase_manager.FUZZ_PREFIX)))

        # Sync all possible resource dependencies as a best effort. It matches
        # |resources-| prefix that a fuzzer can use to indicate resources. Also, it
        # matches resources directory that Chromium web_tests use for dependencies.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern='(?!.*resource)')

    logs.log('Synced {count} test cases to {gcs_url}.'.format(
        count=len(files_list), gcs_url=gcs_base_url))

예제 #7

0

파일 보기

def run_testcase_and_return_result_in_queue(crash_queue,
                                            thread_index,
                                            file_path,
                                            gestures,
                                            env_copy,
                                            upload_output=False):
    """Run a single testcase and return crash results in the crash queue."""

    # Since this is running in its own process, initialize the log handler again.
    # This is needed for Windows where instances are not shared across child
    # processes. See:
    # https://stackoverflow.com/questions/34724643/python-logging-with-multiprocessing-root-logger-different-in-windows
    logs.configure('run_testcase', {
        'testcase_path': file_path,
    })

    try:
        # Run testcase and check whether a crash occurred or not.
        return_code, crash_time, output = run_testcase(thread_index, file_path,
                                                       gestures, env_copy)

        # Pull testcase directory to host to get any stats files.
        if environment.is_trusted_host():
            from bot.untrusted_runner import file_host
            file_host.pull_testcases_from_worker()

        # Analyze the crash.
        crash_output = _get_crash_output(output)
        crash_result = CrashResult(return_code, crash_time, crash_output)

        # To provide consistency between stats and logs, we use timestamp taken
        # from stats when uploading logs and testcase.
        if upload_output:
            log_time = _get_testcase_time(file_path)

        if crash_result.is_crash():
            # Initialize resource list with the testcase path.
            resource_list = [file_path]
            resource_list += get_resource_paths(crash_output)

            # Store the crash stack file in the crash stacktrace directory
            # with filename as the hash of the testcase path.
            crash_stacks_directory = environment.get_value(
                'CRASH_STACKTRACES_DIR')
            stack_file_path = os.path.join(crash_stacks_directory,
                                           utils.string_hash(file_path))
            utils.write_data_to_file(crash_output, stack_file_path)

            # Put crash/no-crash results in the crash queue.
            crash_queue.put(
                Crash(file_path=file_path,
                      crash_time=crash_time,
                      return_code=return_code,
                      resource_list=resource_list,
                      gestures=gestures,
                      stack_file_path=stack_file_path))

            # Don't upload uninteresting testcases (no crash) or if there is no log to
            # correlate it with (not upload_output).
            if upload_output:
                upload_testcase(file_path, log_time)

        if upload_output:
            # Include full output for uploaded logs (crash output, merge output, etc).
            crash_result_full = CrashResult(return_code, crash_time, output)
            log = prepare_log_for_upload(crash_result_full.get_stacktrace(),
                                         return_code)
            upload_log(log, log_time)
    except Exception:
        logs.log_error('Exception occurred while running '
                       'run_testcase_and_return_result_in_queue.')