def VerifyTrace(self, trace): """Verifies a trace with the cache validation result and the benchmark setup. """ effective_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.All) effective_post_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.Post) effective_cached_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.ServedFromCache) effective_uncached_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.NotServedFromCache) missing_requests = self._original_requests.difference(effective_requests) unexpected_requests = effective_requests.difference(self._original_requests) expected_cached_requests = \ self._original_cached_requests.difference(missing_requests) expected_uncached_requests = self._original_uncached_requests.union( unexpected_requests).difference(missing_requests) # POST requests are known to be unable to use the cache. expected_cached_requests.difference_update(effective_post_requests) expected_uncached_requests.update(effective_post_requests) _PrintUrlSetComparison(self._original_requests, effective_requests, 'All resources') _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources') _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, 'Cached resources') _PrintUrlSetComparison(expected_uncached_requests, effective_uncached_requests, 'Non cached resources') self._all_sent_url_requests.update(effective_uncached_requests)
def SetupBenchmark(): logging.info('loading %s', self._original_cache_trace_path) trace = loading_trace.LoadingTrace.FromJsonFile( self._original_cache_trace_path) logging.info('generating %s', SetupBenchmark.path) effective_subresource_urls = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.All) urls_to_enable_swr = _ExtractRegexMatchingUrls( effective_subresource_urls, domain_regexes) logging.info( 'count of urls to enable SWR: %s', len(urls_to_enable_swr)) with open(SetupBenchmark.path, 'w') as output: json.dump({ 'benchmark_name': benchmark_name, 'urls_to_enable_swr': [url for url in urls_to_enable_swr], 'effective_subresource_urls': [url for url in effective_subresource_urls] }, output)
def _ProcessRunOutputDir(benchmark_setup, runner_output_dir): """Process benchmark's run output directory. Args: cache_validation_result: Same as for _RunOutputVerifier benchmark_setup: Same as for _RunOutputVerifier runner_output_dir: Same as for SandwichRunner.output_dir Returns: List of dictionary. """ run_metrics_list = [] for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( runner_output_dir): trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) logging.info('processing trace: %s', trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) served_from_cache_urls = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.ServedFromCache) matching_subresource_count_used_from_cache = ( served_from_cache_urls.intersection( set(benchmark_setup['urls_to_enable_swr']))) run_metrics = { 'url': trace.url, 'repeat_id': repeat_id, 'benchmark_name': benchmark_setup['benchmark_name'], 'cache_recording.subresource_count': len(benchmark_setup['effective_subresource_urls']), 'cache_recording.matching_subresource_count': len(benchmark_setup['urls_to_enable_swr']), 'benchmark.matching_subresource_count_used_from_cache': len(matching_subresource_count_used_from_cache) } run_metrics.update( sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( repeat_dir, trace)) run_metrics_list.append(run_metrics) return run_metrics_list
def _ProcessRunOutputDir(cache_validation_result, benchmark_setup, runner_output_dir): """Process benchmark's run output directory. Args: cache_validation_result: Same as for _RunOutputVerifier benchmark_setup: Same as for _RunOutputVerifier runner_output_dir: Same as for SandwichRunner.output_dir Returns: List of dictionary. """ run_metrics_list = [] run_output_verifier = _RunOutputVerifier(cache_validation_result, benchmark_setup) cached_encoded_data_lengths = ( cache_validation_result['effective_encoded_data_lengths']) for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( runner_output_dir): trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) logging.info('loading trace: %s', trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) logging.info('verifying trace: %s', trace_path) run_output_verifier.VerifyTrace(trace) logging.info('extracting metrics from trace: %s', trace_path) # Gather response size per URLs. response_sizes = {} for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): # Ignore requests served from the blink's cache. if request.served_from_cache: continue if request.from_disk_cache: if request.url in cached_encoded_data_lengths: response_size = cached_encoded_data_lengths[request.url] else: # Some fat webpages may overflow the Memory cache, and so some # requests might be served from disk cache couple of times per page # load. logging.warning( 'Looks like could be served from memory cache: %s', request.url) if request.url in response_sizes: response_size = response_sizes[request.url] else: response_size = request.GetResponseTransportLength() response_sizes[request.url] = response_size # Sums the served from cache/network bytes. served_from_network_bytes = 0 served_from_cache_bytes = 0 urls_hitting_network = set() for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): # Ignore requests served from the blink's cache. if request.served_from_cache: continue urls_hitting_network.add(request.url) if request.from_disk_cache: served_from_cache_bytes += response_sizes[request.url] else: served_from_network_bytes += response_sizes[request.url] # Make sure the served from blink's cache requests have at least one # corresponding request that was not served from the blink's cache. for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): assert (request.url in urls_hitting_network or not request.served_from_cache) run_metrics = { 'url': trace.url, 'repeat_id': repeat_id, 'subresource_discoverer': benchmark_setup['subresource_discoverer'], 'cache_recording.subresource_count': len(cache_validation_result['effective_encoded_data_lengths']), 'cache_recording.cached_subresource_count_theoretic': len(cache_validation_result['successfully_cached_resources']), 'cache_recording.cached_subresource_count': len(cache_validation_result['expected_cached_resources']), 'benchmark.subresource_count': len( sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.All)), 'benchmark.served_from_cache_count_theoretic': len(benchmark_setup['cache_whitelist']), 'benchmark.served_from_cache_count': len( sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.ServedFromCache)), 'benchmark.served_from_network_bytes': served_from_network_bytes, 'benchmark.served_from_cache_bytes': served_from_cache_bytes } run_metrics.update( sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( repeat_dir, trace)) run_metrics_list.append(run_metrics) run_metrics_list.sort(key=lambda e: e['repeat_id']) wpr_log_path = os.path.join(runner_output_dir, sandwich_runner.WPR_LOG_FILENAME) logging.info('verifying wpr log: %s', wpr_log_path) run_output_verifier.VerifyWprLog(wpr_log_path) return run_metrics_list
def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): """Validates a cache archive content. Args: cache_build_trace_path: Path of the generated trace at the cache build time. cache_archive_path: Cache archive's path to validate. Returns: { 'effective_encoded_data_lengths': {URL of all requests: encoded_data_length}, 'effective_post_requests': [URLs of POST requests], 'expected_cached_resources': [URLs of resources expected to be cached], 'successfully_cached': [URLs of cached sub-resources] } """ # TODO(gabadie): What's the best way of propagating errors happening in here? logging.info('lists cached urls from %s' % cache_archive_path) with common_util.TemporaryDirectory() as cache_directory: chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) cache_keys = set( chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) effective_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.All) effective_post_requests = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.Post) effective_encoded_data_lengths = {} for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): if request.from_disk_cache or request.served_from_cache: # At cache archive creation time, a request might be loaded several times, # but avoid the request.encoded_data_length == 0 if loaded from cache. continue if request.url in effective_encoded_data_lengths: effective_encoded_data_lengths[request.url] = max( effective_encoded_data_lengths[request.url], request.GetResponseTransportLength()) else: effective_encoded_data_lengths[request.url] = ( request.GetResponseTransportLength()) upload_data_stream_cache_entry_keys = set() upload_data_stream_requests = set() for cache_entry_key in cache_keys: match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key) if not match: continue upload_data_stream_cache_entry_keys.add(cache_entry_key) upload_data_stream_requests.add(match.group('url')) expected_cached_requests = effective_requests.difference( effective_post_requests) effective_cache_keys = cache_keys.difference( upload_data_stream_cache_entry_keys) _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, 'POST resources') _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, 'Cached resources') return { 'effective_encoded_data_lengths': effective_encoded_data_lengths, 'effective_post_requests': [url for url in effective_post_requests], 'expected_cached_resources': [url for url in expected_cached_requests], 'successfully_cached_resources': [url for url in effective_cache_keys] }