def _AdRequestsReport(cls, trace, ad_rules, tracking_rules): has_rules = bool(ad_rules) or bool(tracking_rules) requests = trace.request_track.GetEvents() result = { 'request_count': len(requests), 'ad_requests': 0 if ad_rules else None, 'tracking_requests': 0 if tracking_rules else None, 'ad_or_tracking_requests': 0 if has_rules else None, 'ad_or_tracking_initiated_requests': 0 if has_rules else None } content_classification_lens = ContentClassificationLens( trace, ad_rules, tracking_rules) if not has_rules: return result for request in trace.request_track.GetEvents(): is_ad = content_classification_lens.IsAdRequest(request) is_tracking = content_classification_lens.IsTrackingRequest( request) if ad_rules: result['ad_requests'] += int(is_ad) if tracking_rules: result['tracking_requests'] += int(is_tracking) result['ad_or_tracking_requests'] += int(is_ad or is_tracking) result['ad_or_tracking_initiated_requests'] = len( content_classification_lens.AdAndTrackingRequests()) return result
def testAdFrame(self): request = copy.deepcopy(self._REQUEST) request.frame_id = '123.123' trace = test_utils.LoadingTraceFromEvents( [request] * 10 + [self._REQUEST] * 5, self._PAGE_EVENTS) lens = ContentClassificationLens(trace, self._RULES, []) self.assertTrue(lens.IsAdFrame(request.frame_id, .5))
def testGetDocumentUrlSeveralChanges(self): request = copy.deepcopy(self._REQUEST) request.status = 200 request.document_url = 'http://www.blabla.com' request2 = copy.deepcopy(request) request2.document_url = 'http://www.blablabla.com' trace = test_utils.LoadingTraceFromEvents( [self._REQUEST, request, request2], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, [], []) self.assertEquals(request2.document_url, lens._GetDocumentUrl())
def testGetDocumentUrl(self): trace = test_utils.LoadingTraceFromEvents( [self._REQUEST], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, [], []) self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl()) # Don't be fooled by redirects. request = copy.deepcopy(self._REQUEST) request.status = 302 request.document_url = 'http://www.bla.com' trace = test_utils.LoadingTraceFromEvents( [request, self._REQUEST], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, [], []) self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl())
def testAdAndTrackingRequests(self): ad_request = copy.deepcopy(self._REQUEST) ad_request.request_id = '1234.2' ad_request.frame_id = '123.123' non_ad_request_non_ad_frame = copy.deepcopy(self._REQUEST) non_ad_request_non_ad_frame.request_id = '1234.3' non_ad_request_non_ad_frame.url = 'http://www.example.com' non_ad_request_non_ad_frame.frame_id = '123.456' non_ad_request_ad_frame = copy.deepcopy(self._REQUEST) non_ad_request_ad_frame.request_id = '1234.4' non_ad_request_ad_frame.url = 'http://www.example.com' non_ad_request_ad_frame.frame_id = ad_request.frame_id trace = test_utils.LoadingTraceFromEvents([ self._REQUEST, ad_request, non_ad_request_non_ad_frame, non_ad_request_ad_frame ], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, self._RULES, []) self.assertSetEqual( set([self._REQUEST, ad_request, non_ad_request_ad_frame]), set(lens.AdAndTrackingRequests()))
def __init__(self, trace, ad_rules=None, tracking_rules=None): """Constructor. Args: trace: (LoadingTrace) a loading trace. ad_rules: ([str]) List of ad filtering rules. tracking_rules: ([str]) List of tracking filtering rules. """ self.trace = trace navigation_start_events = trace.tracing_track.GetMatchingEvents( 'blink.user_timing', 'navigationStart') self._navigation_start_msec = min(e.start_msec for e in navigation_start_events) self._dns_requests, self._dns_cost_msec = metrics.DnsRequestsAndCost( trace) self._connection_stats = metrics.ConnectionMetrics(trace) self._user_lens_reports = {} plt_lens = PLTLens(self.trace) first_text_paint_lens = FirstTextPaintLens(self.trace) first_contentful_paint_lens = FirstContentfulPaintLens(self.trace) first_significant_paint_lens = FirstSignificantPaintLens(self.trace) activity = ActivityLens(trace) network_lens = NetworkActivityLens(self.trace) for key, user_lens in [['plt', plt_lens], ['first_text', first_text_paint_lens], ['contentful', first_contentful_paint_lens], ['significant', first_significant_paint_lens]]: self._user_lens_reports[key] = PerUserLensReport( self.trace, user_lens, activity, network_lens, self._navigation_start_msec) self._transfer_size = metrics.TotalTransferSize(trace)[1] self._request_count = len(trace.request_track.GetEvents()) content_lens = ContentClassificationLens(trace, ad_rules or [], tracking_rules or []) has_ad_rules = bool(ad_rules) has_tracking_rules = bool(tracking_rules) self._ad_report = self._AdRequestsReport(trace, content_lens, has_ad_rules, has_tracking_rules) self._ads_cost = self._AdsAndTrackingCpuCost( self._navigation_start_msec, (self._navigation_start_msec + self._user_lens_reports['plt'].GenerateReport()['ms']), content_lens, activity, has_tracking_rules or has_ad_rules) self._queue_stats = self._ComputeQueueStats(QueuingLens(trace))
def testGetDocumentUrl(self): trace = test_utils.LoadingTraceFromEvents([self._REQUEST], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, [], []) self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl()) # Don't be fooled by redirects. request = copy.deepcopy(self._REQUEST) request.status = 302 request.document_url = 'http://www.bla.com' trace = test_utils.LoadingTraceFromEvents([request, self._REQUEST], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, [], []) self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl())
def testMainFrameIsNotAnAdFrame(self): trace = test_utils.LoadingTraceFromEvents([self._REQUEST], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, self._RULES, []) self.assertFalse(lens.IsAdOrTrackingFrame(self._MAIN_FRAME_ID))
def testTrackingRequest(self): trace = test_utils.LoadingTraceFromEvents([self._REQUEST], self._PAGE_EVENTS) lens = ContentClassificationLens(trace, [], self._RULES) self.assertFalse(lens.IsAdRequest(self._REQUEST)) self.assertTrue(lens.IsTrackingRequest(self._REQUEST))