Ejemplo n.º 1
0
    def get(self):
        """Handle a get request."""
        gcs_path = request.args.get('path', '')
        if not gcs_path:
            raise helpers.EarlyExitException('No path provided.', 400)

        if storage.get(gcs_path):
            host_url = storage.OBJECT_URL
        else:
            host_url = storage.DIRECTORY_URL

        bucket_name, object_path = storage.get_bucket_name_and_path(gcs_path)
        return self.redirect(host_url + '/' + bucket_name + '/' + object_path)
Ejemplo n.º 2
0
def _limit_corpus_size(corpus_url, size_limit):
  """Limit number of files in a corpus url."""
  files_list = list(storage.list_blobs(corpus_url))
  corpus_size = len(files_list)

  if corpus_size <= size_limit:
    # Corpus directory size is within limit, no more work to do.
    return

  logs.log(
      'Limit corpus at {corpus_url} from {corpus_size} to {size_limit}.'.format(
          corpus_url=corpus_url, corpus_size=corpus_size,
          size_limit=size_limit))
  files_to_delete = random.sample(files_list, corpus_size - size_limit)
  bucket, _ = storage.get_bucket_name_and_path(corpus_url)
  for file_to_delete in files_to_delete:
    path_to_delete = storage.get_cloud_storage_file_path(bucket, file_to_delete)
    storage.delete(path_to_delete)
Ejemplo n.º 3
0
def _sort_build_urls_by_revision(build_urls, bucket_path, reverse):
  """Return a sorted list of build url by revision."""
  base_url = os.path.dirname(bucket_path)
  file_pattern = os.path.basename(bucket_path)
  filename_by_revision_dict = {}

  _, base_path = storage.get_bucket_name_and_path(base_url)
  base_path_with_seperator = base_path + '/' if base_path else ''

  for build_url in build_urls:
    match_pattern = '{base_path_with_seperator}({file_pattern})'.format(
        base_path_with_seperator=base_path_with_seperator,
        file_pattern=file_pattern)
    match = re.match(match_pattern, build_url)
    if match:
      filename = match.group(1)
      revision = match.group(2)

      # Ensure that there are no duplicate revisions.
      if revision in filename_by_revision_dict:
        job_name = environment.get_value('JOB_NAME')
        raise errors.BadStateError(
            'Found duplicate revision %s when processing bucket. '
            'Bucket path is probably malformed for job %s.' % (revision,
                                                               job_name))

      filename_by_revision_dict[revision] = filename

  try:
    sorted_revisions = sorted(
        filename_by_revision_dict,
        reverse=reverse,
        key=lambda x: map(int, x.split('.')))
  except:
    logs.log_warn(
        'Revision pattern is not an integer, falling back to string sort.')
    sorted_revisions = sorted(filename_by_revision_dict, reverse=reverse)

  sorted_build_urls = []
  for revision in sorted_revisions:
    filename = filename_by_revision_dict[revision]
    sorted_build_urls.append('%s/%s' % (base_url, filename))

  return sorted_build_urls
Ejemplo n.º 4
0
def _limit_corpus_size(corpus_url):
    """Limit number of files and size of a corpus."""
    corpus_count = 0
    corpus_size = 0
    deleted_corpus_count = 0
    bucket, _ = storage.get_bucket_name_and_path(corpus_url)
    for corpus_file in storage.get_blobs(corpus_url):
        corpus_count += 1
        corpus_size += corpus_file['size']
        if (corpus_count > CORPUS_FILES_LIMIT_FOR_FAILURES
                or corpus_size > CORPUS_SIZE_LIMIT_FOR_FAILURES):
            path_to_delete = storage.get_cloud_storage_file_path(
                bucket, corpus_file['name'])
            storage.delete(path_to_delete)
            deleted_corpus_count += 1

    if deleted_corpus_count:
        logs.log('Removed %d files from oversized corpus: %s.' %
                 (deleted_corpus_count, corpus_url))