def _BuildPatchedCache(original_cache_run_path, original_cache_archive_path, cache_archive_dest_path): CACHE_CONTROL_VALUE = 'max-age=0,stale-while-revalidate=315360000' trace_path = os.path.join(original_cache_run_path, '0', sandwich_runner.TRACE_FILENAME) trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) patch_count = 0 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: cache_path = os.path.join(tmp_path, 'cache') chrome_cache.UnzipDirectoryContent(original_cache_archive_path, cache_path) cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') cache_keys = set(cache_backend.ListKeys()) for request in trace.request_track.GetEvents(): if request.url not in cache_keys: continue caching_policy = request_track.CachingPolicy(request) assert caching_policy.IsCacheable() freshness = caching_policy.GetFreshnessLifetimes() if freshness[0] == 0: continue request.SetHTTPResponseHeader('cache-control', CACHE_CONTROL_VALUE) raw_headers = request.GetRawResponseHeaders() cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) patch_count += 1 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) logging.info('Patched %d cached resources out of %d' % (patch_count, len(cache_keys)))
def _PullCacheFromDevice(self): assert self.cache_operation == CacheOperation.SAVE assert self.cache_archive_path, 'Need to specify where to save the cache' cache_directory_path = self._chrome_ctl.PullBrowserCache() chrome_cache.ZipDirectoryContent(cache_directory_path, self.cache_archive_path) shutil.rmtree(cache_directory_path)
def PatchCacheArchive(cache_archive_path, loading_trace_path, cache_archive_dest_path): """Patch the cache archive. Note: This method update the raw response headers of cache entries' to store the ones such as Set-Cookie that were pruned by the net::HttpCacheTransaction, and remove the stream index 2 holding resource's compile meta data. Args: cache_archive_path: Input archive's path to patch. loading_trace_path: Path of the loading trace that have recorded the cache archive <cache_archive_path>. cache_archive_dest_path: Archive destination's path. """ trace = LoadingTrace.FromJsonFile(loading_trace_path) with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: cache_path = os.path.join(tmp_path, 'cache') chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') cache_entries = set(cache_backend.ListKeys()) logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) for request in _FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): # On requests having an upload data stream such as POST requests, # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with # the upload data stream's session unique identifier. # # It is fine to not patch these requests since when reopening Chrome, # there is no way the entry can be reused since the upload data stream's # identifier will be different. # # The fact that these entries are kept in the cache after closing Chrome # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() # do is a known Chrome bug (crbug.com/610725). # # TODO(gabadie): Add support in ValidateCacheArchiveContent() and in # VerifyBenchmarkOutputDirectory() for POST requests to be known as # impossible to use from cache. if request.url not in cache_entries: if request.method != 'POST': raise RuntimeError( 'Unexpected method that is not found in cache.' ''.format(request.method)) continue # Chrome prunes Set-Cookie from response headers before storing them in # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect # response headers. Sandwich manages the cache, but between recording the # cache and benchmarking the cookie jar is invalidated. This leads to # invalidation of all cacheable redirects. raw_headers = request.GetRawResponseHeaders() cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) # NoState-Prefetch would only fetch the resources, but not parse them. cache_backend.DeleteStreamForKey(request.url, 2) chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
def _PullCacheFromDevice(self): assert self.cache_operation == 'save' assert self.cache_archive_path, 'Need to specify where to save the cache' # Move Chrome to background to allow it to flush the index. self._device.adb.Shell('am start com.google.android.launcher') time.sleep(_TIME_TO_DEVICE_IDLE_SECONDS) self._device.KillAll(OPTIONS.chrome_package_name, quiet=True) time.sleep(_TIME_TO_DEVICE_IDLE_SECONDS) cache_directory_path = chrome_cache.PullBrowserCache(self._device) chrome_cache.ZipDirectoryContent( cache_directory_path, self.cache_archive_path) shutil.rmtree(cache_directory_path)
def testCacheArchive(self): zip_dest = self.GetTempPath('cache.zip') chrome_cache.ZipDirectoryContent(LOADING_DIR, zip_dest) unzip_dest = self.GetTempPath('cache') chrome_cache.UnzipDirectoryContent(zip_dest, unzip_dest) self.CompareDirectories(LOADING_DIR, unzip_dest) self.CreateNewGarbageFile(os.path.join(unzip_dest, 'garbage')) chrome_cache.UnzipDirectoryContent(zip_dest, unzip_dest) self.CompareDirectories(LOADING_DIR, unzip_dest) unzip_dest = self.GetTempPath('foo/bar/cache') chrome_cache.UnzipDirectoryContent(zip_dest, unzip_dest) self.CompareDirectories(LOADING_DIR, unzip_dest)
def _BuildBenchmarkCache(original_wpr_trace_path, urls_to_enable_swr, original_cache_trace_path, original_cache_archive_path, cache_archive_dest_path): # Load trace that was generated at original cache creation. logging.info('loading %s', original_wpr_trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(original_wpr_trace_path) # Lists URLs that should not be in the cache or already have SWR headers. urls_should_not_be_cached = set() urls_already_with_swr = set() for request in trace.request_track.GetEvents(): caching_policy = request_track.CachingPolicy(request) if not caching_policy.IsCacheable(): urls_should_not_be_cached.add(request.url) elif caching_policy.GetFreshnessLifetimes()[1] > 0: urls_already_with_swr.add(request.url) # Trace are fat, kill this one to save up memory for the next one to load in # this scope. del trace # Load trace that was generated at original cache creation. logging.info('loading %s', original_cache_trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(original_cache_trace_path) # Create cache contents. delete_count = 0 swr_patch_count = 0 originaly_swr_patch_count = 0 noswr_patch_count = 0 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: cache_path = os.path.join(tmp_path, 'cache') chrome_cache.UnzipDirectoryContent(original_cache_archive_path, cache_path) cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') cache_keys = set(cache_backend.ListKeys()) for request in trace.request_track.GetEvents(): if request.url not in cache_keys: continue if request.url in urls_should_not_be_cached: cache_backend.DeleteKey(request.url) delete_count += 1 continue if not request.HasReceivedResponse(): continue if request.url in urls_to_enable_swr: request.SetHTTPResponseHeader( 'cache-control', 'max-age=0,stale-while-revalidate=315360000') request.SetHTTPResponseHeader('last-modified', 'Thu, 23 Jun 2016 11:30:00 GMT') swr_patch_count += 1 elif request.url in urls_already_with_swr: # Force to use SWR on resources that originally attempted to use it. request.SetHTTPResponseHeader( 'cache-control', 'max-age=0,stale-while-revalidate=315360000') # The resource originally had SWR enabled therefore we don't # Last-Modified to repro exactly the performance impact in case these # headers were not set properly causing an invalidation instead of a # revalidation. originaly_swr_patch_count += 1 else: # Force synchronous revalidation. request.SetHTTPResponseHeader('cache-control', 'max-age=0') noswr_patch_count += 1 raw_headers = request.GetRawResponseHeaders() cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) logging.info('patched %d cached resources with forced SWR', swr_patch_count) logging.info('patched %d cached resources with original SWR', originaly_swr_patch_count) logging.info('patched %d cached resources without SWR', noswr_patch_count) logging.info('deleted %d cached resources', delete_count)