Ejemplo n.º 1
0
def _BuildPatchedCache(original_cache_run_path, original_cache_archive_path,
                       cache_archive_dest_path):
    CACHE_CONTROL_VALUE = 'max-age=0,stale-while-revalidate=315360000'
    trace_path = os.path.join(original_cache_run_path, '0',
                              sandwich_runner.TRACE_FILENAME)
    trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
    patch_count = 0
    with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
        cache_path = os.path.join(tmp_path, 'cache')
        chrome_cache.UnzipDirectoryContent(original_cache_archive_path,
                                           cache_path)
        cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')
        cache_keys = set(cache_backend.ListKeys())
        for request in trace.request_track.GetEvents():
            if request.url not in cache_keys:
                continue
            caching_policy = request_track.CachingPolicy(request)
            assert caching_policy.IsCacheable()
            freshness = caching_policy.GetFreshnessLifetimes()
            if freshness[0] == 0:
                continue
            request.SetHTTPResponseHeader('cache-control', CACHE_CONTROL_VALUE)
            raw_headers = request.GetRawResponseHeaders()
            cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
            patch_count += 1
        chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
    logging.info('Patched %d cached resources out of %d' %
                 (patch_count, len(cache_keys)))
Ejemplo n.º 2
0
def PatchCacheArchive(cache_archive_path, loading_trace_path,
                      cache_archive_dest_path):
    """Patch the cache archive.

  Note: This method update the raw response headers of cache entries' to store
    the ones such as Set-Cookie that were pruned by the
    net::HttpCacheTransaction, and remove the stream index 2 holding resource's
    compile meta data.

  Args:
    cache_archive_path: Input archive's path to patch.
    loading_trace_path: Path of the loading trace that have recorded the cache
        archive <cache_archive_path>.
    cache_archive_dest_path: Archive destination's path.
  """
    trace = LoadingTrace.FromJsonFile(loading_trace_path)
    with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
        cache_path = os.path.join(tmp_path, 'cache')
        chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
        cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')
        cache_entries = set(cache_backend.ListKeys())
        logging.info('Original cache size: %d bytes' % cache_backend.GetSize())
        for request in _FilterOutDataAndIncompleteRequests(
                trace.request_track.GetEvents()):
            # On requests having an upload data stream such as POST requests,
            # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with
            # the upload data stream's session unique identifier.
            #
            # It is fine to not patch these requests since when reopening Chrome,
            # there is no way the entry can be reused since the upload data stream's
            # identifier will be different.
            #
            # The fact that these entries are kept in the cache after closing Chrome
            # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath()
            # do is a known Chrome bug (crbug.com/610725).
            #
            # TODO(gabadie): Add support in ValidateCacheArchiveContent() and in
            #   VerifyBenchmarkOutputDirectory() for POST requests to be known as
            #   impossible to use from cache.
            if request.url not in cache_entries:
                if request.method != 'POST':
                    raise RuntimeError(
                        'Unexpected method that is not found in cache.'
                        ''.format(request.method))
                continue
            # Chrome prunes Set-Cookie from response headers before storing them in
            # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect
            # response headers. Sandwich manages the cache, but between recording the
            # cache and benchmarking the cookie jar is invalidated. This leads to
            # invalidation of all cacheable redirects.
            raw_headers = request.GetRawResponseHeaders()
            cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
            # NoState-Prefetch would only fetch the resources, but not parse them.
            cache_backend.DeleteStreamForKey(request.url, 2)
        chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
        logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
Ejemplo n.º 3
0
def ValidateCacheArchiveContent(ref_urls, cache_archive_path):
    """Validates a cache archive content.

  Args:
    ref_urls: Reference list of urls.
    cache_archive_path: Cache archive's path to validate.
  """
    # TODO(gabadie): What's the best way of propagating errors happening in here?
    logging.info('lists cached urls from %s' % cache_archive_path)
    with common_util.TemporaryDirectory() as cache_directory:
        chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
        cached_urls = \
            chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()
    _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')
Ejemplo n.º 4
0
def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
    """Validates a cache archive content.

  Args:
    cache_build_trace_path: Path of the generated trace at the cache build time.
    cache_archive_path: Cache archive's path to validate.

  Returns:
    {
      'effective_encoded_data_lengths':
        {URL of all requests: encoded_data_length},
      'effective_post_requests': [URLs of POST requests],
      'expected_cached_resources': [URLs of resources expected to be cached],
      'successfully_cached': [URLs of cached sub-resources]
    }
  """
    # TODO(gabadie): What's the best way of propagating errors happening in here?
    logging.info('lists cached urls from %s' % cache_archive_path)
    with common_util.TemporaryDirectory() as cache_directory:
        chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
        cache_keys = set(
            chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
    trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
    effective_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.All)
    effective_post_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.Post)
    effective_encoded_data_lengths = {}
    for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
            trace.request_track.GetEvents()):
        if request.from_disk_cache or request.served_from_cache:
            # At cache archive creation time, a request might be loaded several times,
            # but avoid the request.encoded_data_length == 0 if loaded from cache.
            continue
        if request.url in effective_encoded_data_lengths:
            effective_encoded_data_lengths[request.url] = max(
                effective_encoded_data_lengths[request.url],
                request.GetResponseTransportLength())
        else:
            effective_encoded_data_lengths[request.url] = (
                request.GetResponseTransportLength())

    upload_data_stream_cache_entry_keys = set()
    upload_data_stream_requests = set()
    for cache_entry_key in cache_keys:
        match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key)
        if not match:
            continue
        upload_data_stream_cache_entry_keys.add(cache_entry_key)
        upload_data_stream_requests.add(match.group('url'))

    expected_cached_requests = effective_requests.difference(
        effective_post_requests)
    effective_cache_keys = cache_keys.difference(
        upload_data_stream_cache_entry_keys)

    _PrintUrlSetComparison(effective_post_requests,
                           upload_data_stream_requests, 'POST resources')
    _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
                           'Cached resources')

    return {
        'effective_encoded_data_lengths': effective_encoded_data_lengths,
        'effective_post_requests': [url for url in effective_post_requests],
        'expected_cached_resources': [url for url in expected_cached_requests],
        'successfully_cached_resources': [url for url in effective_cache_keys]
    }
Ejemplo n.º 5
0
def _BuildBenchmarkCache(original_wpr_trace_path, urls_to_enable_swr,
                         original_cache_trace_path,
                         original_cache_archive_path, cache_archive_dest_path):
    # Load trace that was generated at original cache creation.
    logging.info('loading %s', original_wpr_trace_path)
    trace = loading_trace.LoadingTrace.FromJsonFile(original_wpr_trace_path)

    # Lists URLs that should not be in the cache or already have SWR headers.
    urls_should_not_be_cached = set()
    urls_already_with_swr = set()
    for request in trace.request_track.GetEvents():
        caching_policy = request_track.CachingPolicy(request)
        if not caching_policy.IsCacheable():
            urls_should_not_be_cached.add(request.url)
        elif caching_policy.GetFreshnessLifetimes()[1] > 0:
            urls_already_with_swr.add(request.url)
    # Trace are fat, kill this one to save up memory for the next one to load in
    # this scope.
    del trace

    # Load trace that was generated at original cache creation.
    logging.info('loading %s', original_cache_trace_path)
    trace = loading_trace.LoadingTrace.FromJsonFile(original_cache_trace_path)

    # Create cache contents.
    delete_count = 0
    swr_patch_count = 0
    originaly_swr_patch_count = 0
    noswr_patch_count = 0
    with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
        cache_path = os.path.join(tmp_path, 'cache')
        chrome_cache.UnzipDirectoryContent(original_cache_archive_path,
                                           cache_path)
        cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')
        cache_keys = set(cache_backend.ListKeys())
        for request in trace.request_track.GetEvents():
            if request.url not in cache_keys:
                continue
            if request.url in urls_should_not_be_cached:
                cache_backend.DeleteKey(request.url)
                delete_count += 1
                continue
            if not request.HasReceivedResponse():
                continue
            if request.url in urls_to_enable_swr:
                request.SetHTTPResponseHeader(
                    'cache-control',
                    'max-age=0,stale-while-revalidate=315360000')
                request.SetHTTPResponseHeader('last-modified',
                                              'Thu, 23 Jun 2016 11:30:00 GMT')
                swr_patch_count += 1
            elif request.url in urls_already_with_swr:
                # Force to use SWR on resources that originally attempted to use it.
                request.SetHTTPResponseHeader(
                    'cache-control',
                    'max-age=0,stale-while-revalidate=315360000')
                # The resource originally had SWR enabled therefore we don't
                # Last-Modified to repro exactly the performance impact in case these
                # headers were not set properly causing an invalidation instead of a
                # revalidation.
                originaly_swr_patch_count += 1
            else:
                # Force synchronous revalidation.
                request.SetHTTPResponseHeader('cache-control', 'max-age=0')
                noswr_patch_count += 1
            raw_headers = request.GetRawResponseHeaders()
            cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
        chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
    logging.info('patched %d cached resources with forced SWR',
                 swr_patch_count)
    logging.info('patched %d cached resources with original SWR',
                 originaly_swr_patch_count)
    logging.info('patched %d cached resources without SWR', noswr_patch_count)
    logging.info('deleted %d cached resources', delete_count)