def test_visitor_country(self): collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")]) pages = analyzer.filter_page_requests(collection) country_pageview = {} dict_ip_collection = pages.group("ip") for ip, col in dict_ip_collection.items(): country = geoip.country(ip) if country not in country_pageview.keys(): country_pageview[country] = 0 country_pageview[country] += analyzer.filter_page_requests(col).count() expected_output = sorted(country_pageview.items(), reverse=True, key=operator.itemgetter(1)) submission_output = analyzer.visitor_country(collection) self.assertEqual(str(expected_output), str(submission_output))
def test_filter_page_request(self): collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")]) submission_output = analyzer.filter_page_requests(collection) expected_output = Collection() expected_output.load_from_collection(collection) expected_output.filter(analyzer.predicate_page_request) self.assertEqual(str(expected_output), str(submission_output))
def test_popular_pages(self): collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")]) pages = analyzer.filter_page_requests(collection) # canonical code url_pageview = {} dict_req_events = pages.group("http_request") for request, col in dict_req_events.items(): url = request.split(" ")[1] if url not in url_pageview.keys(): url_pageview[url] = 0 url_pageview[url] += col.count() expected_output = sorted(url_pageview.items(), reverse=True, key=operator.itemgetter(1)) submission_output = analyzer.popular_pages(collection) self.assertEqual(str(expected_output), str(submission_output))
def test_referrers(self): collection = Collection([Event(log) for log in TestAnalyzer.log_string.split("\n")]) pages = analyzer.filter_page_requests(collection) # canonical code ref_pageview = {} ref_groups = pages.group("referrer") for referrer in ref_groups.keys(): if "www.cs.umd.edu/~bederson" in referrer: continue # ignore referrers within the self domain if referrer not in ref_pageview.keys(): ref_pageview[referrer] = 0 ref_pageview[referrer] += ref_groups[referrer].count() def cmp_count(x, y): return x[1] - y[1] expected_output = sorted(ref_pageview.items(), reverse=True, cmp=cmp_count) submission_output = analyzer.referrers(collection) self.assertEqual(str(expected_output), str(submission_output))