Example #1
0
  def VerifyTrace(self, trace):
    """Verifies a trace with the cache validation result and the benchmark
    setup.
    """
    effective_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.All)
    effective_post_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.Post)
    effective_cached_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.ServedFromCache)
    effective_uncached_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.NotServedFromCache)

    missing_requests = self._original_requests.difference(effective_requests)
    unexpected_requests = effective_requests.difference(self._original_requests)
    expected_cached_requests = \
        self._original_cached_requests.difference(missing_requests)
    expected_uncached_requests = self._original_uncached_requests.union(
        unexpected_requests).difference(missing_requests)

    # POST requests are known to be unable to use the cache.
    expected_cached_requests.difference_update(effective_post_requests)
    expected_uncached_requests.update(effective_post_requests)

    _PrintUrlSetComparison(self._original_requests, effective_requests,
                           'All resources')
    _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')
    _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,
                           'Cached resources')
    _PrintUrlSetComparison(expected_uncached_requests,
                           effective_uncached_requests, 'Non cached resources')

    self._all_sent_url_requests.update(effective_uncached_requests)
Example #2
0
 def SetupBenchmark():
   logging.info('loading %s', self._original_cache_trace_path)
   trace = loading_trace.LoadingTrace.FromJsonFile(
       self._original_cache_trace_path)
   logging.info('generating %s', SetupBenchmark.path)
   effective_subresource_urls = sandwich_utils.ListUrlRequests(
       trace, sandwich_utils.RequestOutcome.All)
   urls_to_enable_swr = _ExtractRegexMatchingUrls(
       effective_subresource_urls, domain_regexes)
   logging.info(
       'count of urls to enable SWR: %s', len(urls_to_enable_swr))
   with open(SetupBenchmark.path, 'w') as output:
     json.dump({
         'benchmark_name': benchmark_name,
         'urls_to_enable_swr': [url for url in urls_to_enable_swr],
         'effective_subresource_urls':
             [url for url in effective_subresource_urls]
       }, output)
Example #3
0
def _ProcessRunOutputDir(benchmark_setup, runner_output_dir):
    """Process benchmark's run output directory.

  Args:
    cache_validation_result: Same as for _RunOutputVerifier
    benchmark_setup: Same as for _RunOutputVerifier
    runner_output_dir: Same as for SandwichRunner.output_dir

  Returns:
    List of dictionary.
  """
    run_metrics_list = []
    for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
            runner_output_dir):
        trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
        logging.info('processing trace: %s', trace_path)
        trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
        served_from_cache_urls = sandwich_utils.ListUrlRequests(
            trace, sandwich_utils.RequestOutcome.ServedFromCache)
        matching_subresource_count_used_from_cache = (
            served_from_cache_urls.intersection(
                set(benchmark_setup['urls_to_enable_swr'])))
        run_metrics = {
            'url':
            trace.url,
            'repeat_id':
            repeat_id,
            'benchmark_name':
            benchmark_setup['benchmark_name'],
            'cache_recording.subresource_count':
            len(benchmark_setup['effective_subresource_urls']),
            'cache_recording.matching_subresource_count':
            len(benchmark_setup['urls_to_enable_swr']),
            'benchmark.matching_subresource_count_used_from_cache':
            len(matching_subresource_count_used_from_cache)
        }
        run_metrics.update(
            sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
                repeat_dir, trace))
        run_metrics_list.append(run_metrics)
    return run_metrics_list
Example #4
0
def _ProcessRunOutputDir(cache_validation_result, benchmark_setup,
                         runner_output_dir):
    """Process benchmark's run output directory.

  Args:
    cache_validation_result: Same as for _RunOutputVerifier
    benchmark_setup: Same as for _RunOutputVerifier
    runner_output_dir: Same as for SandwichRunner.output_dir

  Returns:
    List of dictionary.
  """
    run_metrics_list = []
    run_output_verifier = _RunOutputVerifier(cache_validation_result,
                                             benchmark_setup)
    cached_encoded_data_lengths = (
        cache_validation_result['effective_encoded_data_lengths'])
    for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
            runner_output_dir):
        trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)

        logging.info('loading trace: %s', trace_path)
        trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

        logging.info('verifying trace: %s', trace_path)
        run_output_verifier.VerifyTrace(trace)

        logging.info('extracting metrics from trace: %s', trace_path)

        # Gather response size per URLs.
        response_sizes = {}
        for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
                trace.request_track.GetEvents()):
            # Ignore requests served from the blink's cache.
            if request.served_from_cache:
                continue
            if request.from_disk_cache:
                if request.url in cached_encoded_data_lengths:
                    response_size = cached_encoded_data_lengths[request.url]
                else:
                    # Some fat webpages may overflow the Memory cache, and so some
                    # requests might be served from disk cache couple of times per page
                    # load.
                    logging.warning(
                        'Looks like could be served from memory cache: %s',
                        request.url)
                    if request.url in response_sizes:
                        response_size = response_sizes[request.url]
            else:
                response_size = request.GetResponseTransportLength()
            response_sizes[request.url] = response_size

        # Sums the served from cache/network bytes.
        served_from_network_bytes = 0
        served_from_cache_bytes = 0
        urls_hitting_network = set()
        for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
                trace.request_track.GetEvents()):
            # Ignore requests served from the blink's cache.
            if request.served_from_cache:
                continue
            urls_hitting_network.add(request.url)
            if request.from_disk_cache:
                served_from_cache_bytes += response_sizes[request.url]
            else:
                served_from_network_bytes += response_sizes[request.url]

        # Make sure the served from blink's cache requests have at least one
        # corresponding request that was not served from the blink's cache.
        for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
                trace.request_track.GetEvents()):
            assert (request.url in urls_hitting_network
                    or not request.served_from_cache)

        run_metrics = {
            'url':
            trace.url,
            'repeat_id':
            repeat_id,
            'subresource_discoverer':
            benchmark_setup['subresource_discoverer'],
            'cache_recording.subresource_count':
            len(cache_validation_result['effective_encoded_data_lengths']),
            'cache_recording.cached_subresource_count_theoretic':
            len(cache_validation_result['successfully_cached_resources']),
            'cache_recording.cached_subresource_count':
            len(cache_validation_result['expected_cached_resources']),
            'benchmark.subresource_count':
            len(
                sandwich_utils.ListUrlRequests(
                    trace, sandwich_utils.RequestOutcome.All)),
            'benchmark.served_from_cache_count_theoretic':
            len(benchmark_setup['cache_whitelist']),
            'benchmark.served_from_cache_count':
            len(
                sandwich_utils.ListUrlRequests(
                    trace, sandwich_utils.RequestOutcome.ServedFromCache)),
            'benchmark.served_from_network_bytes':
            served_from_network_bytes,
            'benchmark.served_from_cache_bytes':
            served_from_cache_bytes
        }
        run_metrics.update(
            sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
                repeat_dir, trace))
        run_metrics_list.append(run_metrics)
    run_metrics_list.sort(key=lambda e: e['repeat_id'])

    wpr_log_path = os.path.join(runner_output_dir,
                                sandwich_runner.WPR_LOG_FILENAME)
    logging.info('verifying wpr log: %s', wpr_log_path)
    run_output_verifier.VerifyWprLog(wpr_log_path)
    return run_metrics_list
Example #5
0
def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
    """Validates a cache archive content.

  Args:
    cache_build_trace_path: Path of the generated trace at the cache build time.
    cache_archive_path: Cache archive's path to validate.

  Returns:
    {
      'effective_encoded_data_lengths':
        {URL of all requests: encoded_data_length},
      'effective_post_requests': [URLs of POST requests],
      'expected_cached_resources': [URLs of resources expected to be cached],
      'successfully_cached': [URLs of cached sub-resources]
    }
  """
    # TODO(gabadie): What's the best way of propagating errors happening in here?
    logging.info('lists cached urls from %s' % cache_archive_path)
    with common_util.TemporaryDirectory() as cache_directory:
        chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
        cache_keys = set(
            chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
    trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
    effective_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.All)
    effective_post_requests = sandwich_utils.ListUrlRequests(
        trace, sandwich_utils.RequestOutcome.Post)
    effective_encoded_data_lengths = {}
    for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
            trace.request_track.GetEvents()):
        if request.from_disk_cache or request.served_from_cache:
            # At cache archive creation time, a request might be loaded several times,
            # but avoid the request.encoded_data_length == 0 if loaded from cache.
            continue
        if request.url in effective_encoded_data_lengths:
            effective_encoded_data_lengths[request.url] = max(
                effective_encoded_data_lengths[request.url],
                request.GetResponseTransportLength())
        else:
            effective_encoded_data_lengths[request.url] = (
                request.GetResponseTransportLength())

    upload_data_stream_cache_entry_keys = set()
    upload_data_stream_requests = set()
    for cache_entry_key in cache_keys:
        match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key)
        if not match:
            continue
        upload_data_stream_cache_entry_keys.add(cache_entry_key)
        upload_data_stream_requests.add(match.group('url'))

    expected_cached_requests = effective_requests.difference(
        effective_post_requests)
    effective_cache_keys = cache_keys.difference(
        upload_data_stream_cache_entry_keys)

    _PrintUrlSetComparison(effective_post_requests,
                           upload_data_stream_requests, 'POST resources')
    _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
                           'Cached resources')

    return {
        'effective_encoded_data_lengths': effective_encoded_data_lengths,
        'effective_post_requests': [url for url in effective_post_requests],
        'expected_cached_resources': [url for url in expected_cached_requests],
        'successfully_cached_resources': [url for url in effective_cache_keys]
    }