def _ExtractTimeToFirstMeaningfulPaint(loading_trace): """Extracts the time to first meaningful paint from a given trace. Args: loading_trace: loading_trace_module.LoadingTrace. Returns: Time to first meaningful paint in milliseconds. """ required_categories = set(sandwich_runner.TTFMP_ADDITIONAL_CATEGORIES) if not required_categories.issubset( loading_trace.tracing_track.Categories()): return _UNAVAILABLE_CSV_VALUE logging.info(' Extracting first_meaningful_paint') events = [e.ToJsonDict() for e in loading_trace.tracing_track.GetEvents()] with common_util.TemporaryDirectory(prefix='sandwich_tmp_') as tmp_dir: chrome_trace_path = os.path.join(tmp_dir, 'chrome_trace.json') with open(chrome_trace_path, 'w') as output_file: json.dump({'traceEvents': events, 'metadata': {}}, output_file) catapult_run_metric_bin_path = os.path.join(_SRC_DIR, 'third_party', 'catapult', 'tracing', 'bin', 'run_metric') output = subprocess.check_output([ catapult_run_metric_bin_path, 'firstPaintMetric', chrome_trace_path ]) json_output = json.loads(output) for metric in json_output[chrome_trace_path]['pairs']['values']: if metric['name'] == 'firstMeaningfulPaint_avg': return metric['numeric']['value'] logging.info(' Extracting first_meaningful_paint: failed') return _FAILED_CSV_VALUE
def _BuildPatchedCache(original_cache_run_path, original_cache_archive_path, cache_archive_dest_path): CACHE_CONTROL_VALUE = 'max-age=0,stale-while-revalidate=315360000' trace_path = os.path.join(original_cache_run_path, '0', sandwich_runner.TRACE_FILENAME) trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) patch_count = 0 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: cache_path = os.path.join(tmp_path, 'cache') chrome_cache.UnzipDirectoryContent(original_cache_archive_path, cache_path) cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') cache_keys = set(cache_backend.ListKeys()) for request in trace.request_track.GetEvents(): if request.url not in cache_keys: continue caching_policy = request_track.CachingPolicy(request) assert caching_policy.IsCacheable() freshness = caching_policy.GetFreshnessLifetimes() if freshness[0] == 0: continue request.SetHTTPResponseHeader('cache-control', CACHE_CONTROL_VALUE) raw_headers = request.GetRawResponseHeaders() cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) patch_count += 1 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) logging.info('Patched %d cached resources out of %d' % (patch_count, len(cache_keys)))
def PatchCacheArchive(cache_archive_path, loading_trace_path, cache_archive_dest_path): """Patch the cache archive. Note: This method update the raw response headers of cache entries' to store the ones such as Set-Cookie that were pruned by the net::HttpCacheTransaction, and remove the stream index 2 holding resource's compile meta data. Args: cache_archive_path: Input archive's path to patch. loading_trace_path: Path of the loading trace that have recorded the cache archive <cache_archive_path>. cache_archive_dest_path: Archive destination's path. """ trace = LoadingTrace.FromJsonFile(loading_trace_path) with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: cache_path = os.path.join(tmp_path, 'cache') chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') cache_entries = set(cache_backend.ListKeys()) logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) for request in _FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): # On requests having an upload data stream such as POST requests, # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with # the upload data stream's session unique identifier. # # It is fine to not patch these requests since when reopening Chrome, # there is no way the entry can be reused since the upload data stream's # identifier will be different. # # The fact that these entries are kept in the cache after closing Chrome # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() # do is a known Chrome bug (crbug.com/610725). # # TODO(gabadie): Add support in ValidateCacheArchiveContent() and in # VerifyBenchmarkOutputDirectory() for POST requests to be known as # impossible to use from cache. if request.url not in cache_entries: if request.method != 'POST': raise RuntimeError( 'Unexpected method that is not found in cache.' ''.format(request.method)) continue # Chrome prunes Set-Cookie from response headers before storing them in # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect # response headers. Sandwich manages the cache, but between recording the # cache and benchmarking the cookie jar is invalidated. This leads to # invalidation of all cacheable redirects. raw_headers = request.GetRawResponseHeaders() cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) # NoState-Prefetch would only fetch the resources, but not parse them. cache_backend.DeleteStreamForKey(request.url, 2) chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
def ValidateCacheArchiveContent(ref_urls, cache_archive_path): """Validates a cache archive content. Args: ref_urls: Reference list of urls. cache_archive_path: Cache archive's path to validate. """ # TODO(gabadie): What's the best way of propagating errors happening in here? logging.info('lists cached urls from %s' % cache_archive_path) with common_util.TemporaryDirectory() as cache_directory: chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) cached_urls = \ chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')
def _RecordWebServerTestTrace(args): with common_util.TemporaryDirectory() as out_path: sandwich_runner = SandwichRunner() sandwich_runner.android_device = _GetAndroidDeviceFromArgs(args) # Reuse the WPR's forwarding to access the webpage from Android. sandwich_runner.wpr_record = True sandwich_runner.wpr_archive_path = os.path.join(out_path, 'wpr') sandwich_runner.trace_output_directory = os.path.join(out_path, 'run') with WebServer.Context( source_dir=args.source_dir, communication_dir=out_path) as server: address = server.Address() sandwich_runner.urls = ['http://%s/%s' % (address, args.page)] sandwich_runner.Run() trace_path = os.path.join( out_path, 'run', '0', sandwich_runner.TRACE_FILENAME) shutil.copy(trace_path, args.output) return 0
def LocalWprHost(wpr_archive_path, record=False, network_condition_name=None, disable_script_injection=False, out_log_path=None): """Launches web page replay host. Args: wpr_archive_path: host sided WPR archive's path. record: Enables or disables WPR archive recording. network_condition_name: Network condition name available in emulation.NETWORK_CONDITIONS. disable_script_injection: Disable JavaScript file injections that is fighting against resources name entropy. out_log_path: Path of the WPR host's log. Returns: WprAttribute """ if wpr_archive_path == None: _VerifySilentWprHost(record, network_condition_name) yield [] return with common_util.TemporaryDirectory() as temp_home_dir: # Generate a root certification authority certificate for WPR. private_ca_cert_path = os.path.join(temp_home_dir, 'wpr.pem') ca_cert_path = os.path.join(temp_home_dir, 'wpr-cert.pem') certutils.write_dummy_ca_cert(*certutils.generate_dummy_ca_cert(), cert_path=private_ca_cert_path) assert os.path.isfile(ca_cert_path) certutils.install_cert_in_nssdb(temp_home_dir, ca_cert_path) with _WprHost(wpr_archive_path, record=record, network_condition_name=network_condition_name, disable_script_injection=disable_script_injection, wpr_ca_cert_path=private_ca_cert_path, out_log_path=out_log_path) as (http_port, https_port): chrome_args = _FormatWPRRelatedChromeArgumentFor(http_port, https_port, escape=False) yield WprAttribute(chrome_args=chrome_args, chrome_env_override={'HOME': temp_home_dir})
def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): """Validates a cache archive content. Args: cache_build_trace_path: Path of the generated trace at the cache build time. cache_archive_path: Cache archive's path to validate. Returns: { 'effective_encoded_data_lengths': {URL of all requests: encoded_data_length}, 'effective_post_requests': [URLs of POST requests], 'expected_cached_resources': [URLs of resources expected to be cached], 'successfully_cached': [URLs of cached sub-resources] } """ # TODO(gabadie): What's the best way of propagating errors happening in here? logging.info('lists cached urls from %s' % cache_archive_path) with common_util.TemporaryDirectory() as cache_directory: chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) cache_keys = set( chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) effective_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.All) effective_post_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.Post) effective_encoded_data_lengths = {} for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): if request.from_disk_cache or request.served_from_cache: # At cache archive creation time, a request might be loaded several times, # but avoid the request.encoded_data_length == 0 if loaded from cache. continue if request.url in effective_encoded_data_lengths: effective_encoded_data_lengths[request.url] = max( effective_encoded_data_lengths[request.url], request.GetResponseTransportLength()) else: effective_encoded_data_lengths[request.url] = ( request.GetResponseTransportLength()) upload_data_stream_cache_entry_keys = set() upload_data_stream_requests = set() for cache_entry_key in cache_keys: match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key) if not match: continue upload_data_stream_cache_entry_keys.add(cache_entry_key) upload_data_stream_requests.add(match.group('url')) expected_cached_requests = effective_requests.difference( effective_post_requests) effective_cache_keys = cache_keys.difference( upload_data_stream_cache_entry_keys) _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, 'POST resources') _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, 'Cached resources') return { 'effective_encoded_data_lengths': effective_encoded_data_lengths, 'effective_post_requests': [url for url in effective_post_requests], 'expected_cached_resources': [url for url in expected_cached_requests], 'successfully_cached_resources': [url for url in effective_cache_keys] }
def _BuildBenchmarkCache(original_wpr_trace_path, urls_to_enable_swr, original_cache_trace_path, original_cache_archive_path, cache_archive_dest_path): # Load trace that was generated at original cache creation. logging.info('loading %s', original_wpr_trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(original_wpr_trace_path) # Lists URLs that should not be in the cache or already have SWR headers. urls_should_not_be_cached = set() urls_already_with_swr = set() for request in trace.request_track.GetEvents(): caching_policy = request_track.CachingPolicy(request) if not caching_policy.IsCacheable(): urls_should_not_be_cached.add(request.url) elif caching_policy.GetFreshnessLifetimes()[1] > 0: urls_already_with_swr.add(request.url) # Trace are fat, kill this one to save up memory for the next one to load in # this scope. del trace # Load trace that was generated at original cache creation. logging.info('loading %s', original_cache_trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(original_cache_trace_path) # Create cache contents. delete_count = 0 swr_patch_count = 0 originaly_swr_patch_count = 0 noswr_patch_count = 0 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: cache_path = os.path.join(tmp_path, 'cache') chrome_cache.UnzipDirectoryContent(original_cache_archive_path, cache_path) cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') cache_keys = set(cache_backend.ListKeys()) for request in trace.request_track.GetEvents(): if request.url not in cache_keys: continue if request.url in urls_should_not_be_cached: cache_backend.DeleteKey(request.url) delete_count += 1 continue if not request.HasReceivedResponse(): continue if request.url in urls_to_enable_swr: request.SetHTTPResponseHeader( 'cache-control', 'max-age=0,stale-while-revalidate=315360000') request.SetHTTPResponseHeader('last-modified', 'Thu, 23 Jun 2016 11:30:00 GMT') swr_patch_count += 1 elif request.url in urls_already_with_swr: # Force to use SWR on resources that originally attempted to use it. request.SetHTTPResponseHeader( 'cache-control', 'max-age=0,stale-while-revalidate=315360000') # The resource originally had SWR enabled therefore we don't # Last-Modified to repro exactly the performance impact in case these # headers were not set properly causing an invalidation instead of a # revalidation. originaly_swr_patch_count += 1 else: # Force synchronous revalidation. request.SetHTTPResponseHeader('cache-control', 'max-age=0') noswr_patch_count += 1 raw_headers = request.GetRawResponseHeaders() cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) logging.info('patched %d cached resources with forced SWR', swr_patch_count) logging.info('patched %d cached resources with original SWR', originaly_swr_patch_count) logging.info('patched %d cached resources without SWR', noswr_patch_count) logging.info('deleted %d cached resources', delete_count)