Exemple #1
0
        def BuildPatchedWpr():
            shutil.copyfile(self._common_builder.original_wpr_task.path,
                            BuildPatchedWpr.path)
            wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)

            # Save up original response headers.
            original_response_headers = {e.url: e.GetResponseHeadersDict() \
                for e in wpr_archive.ListUrlEntries()}
            logging.info('save up response headers for %d resources',
                         len(original_response_headers))
            if not original_response_headers:
                # TODO(gabadie): How is it possible to not even have the main resource
                # in the WPR archive? Example URL can be found in:
                # http://crbug.com/623966#c5
                raise Exception(
                    'Looks like no resources were recorded in WPR during: {}'.
                    format(self._common_builder.original_wpr_task.name))
            with open(self._original_headers_path, 'w') as file_output:
                json.dump(original_response_headers, file_output)

            # Patch WPR.
            wpr_url_entries = wpr_archive.ListUrlEntries()
            for wpr_url_entry in wpr_url_entries:
                sandwich_utils.PatchWprEntryToBeCached(wpr_url_entry)
            logging.info('number of patched entries: %d', len(wpr_url_entries))
            wpr_archive.Persist()
Exemple #2
0
def PatchWpr(wpr_archive_path):
  """Patches a WPR archive to get all resources into the HTTP cache and avoid
  invalidation and revalidations.

  Args:
    wpr_archive_path: Path of the WPR archive to patch.
  """
  # Sets the resources cache max-age to 10 years.
  MAX_AGE = 10 * 365 * 24 * 60 * 60
  CACHE_CONTROL = 'public, max-age={}'.format(MAX_AGE)

  wpr_archive = wpr_backend.WprArchiveBackend(wpr_archive_path)
  for url_entry in wpr_archive.ListUrlEntries():
    response_headers = url_entry.GetResponseHeadersDict()
    if 'cache-control' in response_headers and \
        response_headers['cache-control'] == CACHE_CONTROL:
      continue
    logging.info('patching %s' % url_entry.url)
    # TODO(gabadie): may need to patch Last-Modified and If-Modified-Since.
    # TODO(gabadie): may need to delete ETag.
    # TODO(gabadie): may need to patch Vary.
    # TODO(gabadie): may need to take care of x-cache.
    #
    # Override the cache-control header to set the resources max age to MAX_AGE.
    #
    # Important note: Some resources holding sensitive information might have
    # cache-control set to no-store which allow the resource to be cached but
    # not cached in the file system. NoState-Prefetch is going to take care of
    # this case. But in here, to simulate NoState-Prefetch, we don't have other
    # choices but save absolutely all cached resources on disk so they survive
    # after killing chrome for cache save, modification and push.
    url_entry.SetResponseHeader('cache-control', CACHE_CONTROL)
  wpr_archive.Persist()
Exemple #3
0
 def BuildPatchedWpr():
     shutil.copyfile(self._common_builder.original_wpr_task.path,
                     BuildPatchedWpr.path)
     wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)
     wpr_url_entries = wpr_archive.ListUrlEntries()
     for wpr_url_entry in wpr_url_entries:
         sandwich_utils.PatchWprEntryToBeCached(wpr_url_entry)
     logging.info('number of patched entries: %d', len(wpr_url_entries))
     wpr_archive.Persist()
    def BuildPatchedWpr():
      common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
      shutil.copyfile(
          self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
      wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)

      # Save up original response headers.
      original_response_headers = {e.url: e.GetResponseHeadersDict() \
          for e in wpr_archive.ListUrlEntries()}
      with open(self._original_headers_path, 'w') as file_output:
        json.dump(original_response_headers, file_output)

      # Patch WPR.
      _PatchWpr(wpr_archive)
      wpr_archive.Persist()
def PatchWpr(wpr_archive_path):
    """Patches a WPR archive to get all resources into the HTTP cache and avoid
  invalidation and revalidations.

  Args:
    wpr_archive_path: Path of the WPR archive to patch.
  """
    # Sets the resources cache max-age to 10 years.
    MAX_AGE = 10 * 365 * 24 * 60 * 60
    CACHE_CONTROL = 'public, max-age={}'.format(MAX_AGE)

    wpr_archive = wpr_backend.WprArchiveBackend(wpr_archive_path)
    for url_entry in wpr_archive.ListUrlEntries():
        response_headers = url_entry.GetResponseHeadersDict()
        if 'cache-control' in response_headers and \
            response_headers['cache-control'] == CACHE_CONTROL:
            continue
        logging.info('patching %s' % url_entry.url)
        # TODO(gabadie): may need to patch Last-Modified and If-Modified-Since.
        # TODO(gabadie): may need to delete ETag.
        # TODO(gabadie): may need to take care of x-cache.
        #
        # Override the cache-control header to set the resources max age to MAX_AGE.
        #
        # Important note: Some resources holding sensitive information might have
        # cache-control set to no-store which allow the resource to be cached but
        # not cached in the file system. NoState-Prefetch is going to take care of
        # this case. But in here, to simulate NoState-Prefetch, we don't have other
        # choices but save absolutely all cached resources on disk so they survive
        # after killing chrome for cache save, modification and push.
        url_entry.SetResponseHeader('cache-control', CACHE_CONTROL)

        # TODO(gabadie): May need to extend Vary blacklist (referer?)
        #
        # All of these Vary and Pragma possibilities need to be removed from
        # response headers in order for Chrome to store a resource in HTTP cache and
        # not to invalidate it.
        #
        # Note: HttpVaryData::Init() in Chrome adds an implicit 'Vary: cookie'
        # header to any redirect.
        # TODO(gabadie): Find a way to work around this issue.
        url_entry.RemoveResponseHeaderDirectives('vary', {'*', 'cookie'})
        url_entry.RemoveResponseHeaderDirectives('pragma', {'no-cache'})

    wpr_archive.Persist()