class PopiWSGIServer(object): def __init__(self, config, storage): self._conf = config self._storage = storage self._analyzer = Analyzer(self._storage) self._view = View() self._image = self._load_component('img/img.gif') def index(self): view = self._view.index() return Response(view) def cleardata(self): days = self.request.GET.get('days', 7) self._storage.clear_hits(days) return Response('done') def deviators(self): qfield = self.request.GET.get('qfield', 'hit_title') historic_length = self.request.GET.get('historic_length', None) recent_length = self.request.GET.get('recent_length', None) if historic_length is not None and recent_length is not None: boundary = int(int(time.time()) - int(recent_length)) start = int(boundary - int(historic_length)) output = self._analyzer.get_top_deviators(qfield=qfield, start_time=start, boundary_time=boundary) else: output = self._analyzer.get_top_deviators(qfield=qfield) return json_response(output) def toppages(self): timespan = self.request.GET.get('timespan', None) start_time = self.request.GET.get('start_time', None) end_time = self.request.GET.get('end_time', None) output = self._analyzer.get_top_pages( start_time=start_time, end_time=end_time, timespan=timespan) return json_response(output) def keywordcloud(self): timespan = self.request.GET.get('timespan', None) start_time = self.request.GET.get('start_time', None) end_time = self.request.GET.get('end_time', None) minimum_count = self.request.GET.get('minimum_count', None) output = self._analyzer.get_keyword_cloud(start_time=start_time, end_time=end_time, timespan=timespan, minimum_count=minimum_count, minimum_pct=80, maximum_pct=500) return json_response(output) def hitmonitor(self): last_timestamp = int(self.request.GET.get('last_timestamp', 0)) sources = {'external': int(self.request.GET.get('ext', 1)), 'searches': int(self.request.GET.get('sea', 1)), 'internal': int(self.request.GET.get('int', 1)), 'direct': int(self.request.GET.get('dir', 1))} output = self._storage.get_recenthits(sources, last_timestamp + 1) return json_response(output) def dummydata(self): dummy = Dummy(self._conf, self._storage, True) dummy.create_hits_linear(u'http://www.mysite.com/page', start_time=0, end_time=10000, start_hits_per_hour=0, end_hits_per_hour=50, referrer='http://google.com?q=cool%20page') dummy.create_hits_linear(u'http://www.mysite.com/page', start_time=5000, end_time=10000, start_hits_per_hour=0, end_hits_per_hour=50, referrer='http://www.google.com?q=cool') dummy.create_hits_linear(u'http://www.mysite.com/page2', start_time=0, end_time=10000, start_hits_per_hour=20, end_hits_per_hour=80) dummy.create_hits_linear(u'http://www.mysite.com/page3', start_time=0, end_time=10000, start_hits_per_hour=0, end_hits_per_hour=75) dummy.create_hits_linear(u'http://www.mysite.com/page4', start_time=0, end_time=10000, start_hits_per_hour=200, end_hits_per_hour=0) rand_start = random.random() * 60 rand_end = random.random() * 60 dummy.create_hits_linear(u'http://www.mysite.com/page2', start_time=0, end_time=10000, start_hits_per_hour=rand_start, end_hits_per_hour=rand_end, referrer='http://www.google.com?q=page2') return Response('done') def randomdata(self): dummy = Dummy(self._storage) rand = int(random.random() * 10) + 1 for i in range(rand): rand_start = random.random() * 60 rand_end = random.random() * 60 dummy.create_hits_linear( u'http://www.mysite.com/page' + str(rand), start_hits_per_hour=rand_start, end_hits_per_hour=rand_end, referrer='http://www.google.com?q=page' + str(rand)) return Response('done') def log_hit(self): # Make sure we get plain strings, not unicode #self.request.charset = None cur = self.request.str_GET.get('cur', None) ref = self.request.str_GET.get('ref', None) title = self.request.str_GET.get('title', None) if not cur: cur = self.request.headers.get('referer', None) if cur is not None: cur = get_unicode(unquote_plus(cur)) if ref is not None: ref = get_unicode(unquote_plus(ref)) if title is not None: title = get_unicode(unquote_plus(title)).strip() response = Response() response.headers['Content-Type'] = "image/gif" response.headers['Expires'] = "Sat, 26 Jul 1997 05:00:00 GMT" response.headers['Cache-Control'] = "no-cache, must-revalidate" response.body = self._image if not cur: return response visitor_ip = self.request.headers.get('X-Forwarded-For', None) if visitor_ip is None: visitor_ip = self.request.remote_addr hit = Hit(self._conf, cur, referrer=ref, title=title, visitor_ip=visitor_ip) if hit.is_whitelisted() and not hit.is_blacklisted(): self._storage.add_hit(hit) return response def _load_component(self, filepath): path = os.path.join(os.path.dirname(__file__), '..', '..', 'components', filepath) if not os.path.exists(path): return None with open(path) as f: data = f.read() return data def get_component(self): filepath = self.request.GET.get('file', None) if '..' in filepath.split('/'): return self.httperror(status=400, body="Bad Request") mimetype = mimetypes.guess_type(filepath, False) component = self._load_component(filepath) if component is None: return self.httperror() response = Response() response.headers['Content-Type'] = mimetype[0] response.body = component return response def httperror(self, status=404, body="Not Found"): response = Response() response.status = status response.body = body return response def __call__(self, environ, start_response): self.request = Request(environ) urlmap = self._conf['urlmap'] name = self.request.path_info.split('/')[1] if name == '': name = 'index' method_name = urlmap.get(name, 'httperror') method = getattr(self, method_name, None) response = method() return response(environ, start_response)
class TestAnalyzer(TestBase): def setUp(self): super(TestAnalyzer, self).setUp() self.analyzer = Analyzer(self._storage) self.dummy = Dummy(self._conf, self._storage, clear=True) def test_deviators_stable(self): """Test listing of top deviators - stable""" self.dummy.create_hits_linear(u'http://mysite.com/page1', start_hits_per_hour=5000, end_hits_per_hour=5000, start_time=0, end_time=10000) self.assertEqual(self.analyzer.get_top_deviators(qfield='hit_url', start_time=0, boundary_time=7500, end_time=10000), [{'name': u'http://mysite.com/page1', 'pct': 0, 'hph_recent': 5001, 'hph_historic': 4999, 'num_recent': 3473, 'num_historic': 10416}]) def test_deviators_increasing(self): """Test listing of top deviators - increasing""" self.dummy.create_hits_linear(u'http://mysite.com/page2', start_hits_per_hour=0, end_hits_per_hour=8000, start_time=0, end_time=10000) self.assertEqual(self.analyzer.get_top_deviators(qfield='hit_url', start_time=0, boundary_time=7500, end_time=10000), [{'name': u'http://mysite.com/page2', 'pct': 133, 'hph_recent': 6999, 'hph_historic': 2999, 'num_recent': 4861, 'num_historic': 6249}]) def test_deviators_decreasing(self): """Test listing of top deviators - decreasing""" self.dummy.create_hits_linear(u'http://mysite.com/page3', start_hits_per_hour=8000, end_hits_per_hour=0, start_time=0, end_time=10000) self.assertEqual(self.analyzer.get_top_deviators(qfield='hit_url', start_time=0, boundary_time=7500, end_time=10000), [{'name': u'http://mysite.com/page3', 'pct': -80, 'hph_recent': 1000, 'hph_historic': 5000, 'num_recent': 695, 'num_historic': 10417}]) def test_toppages_basic(self): """Test listing of top pages - basic""" self.dummy.create_hits_linear(u'http://mysite.com/page', start_hits_per_hour=50, end_hits_per_hour=50, start_time=0, end_time=3600) self.assertEqual(self.analyzer.get_top_pages(qfield='hit_url', start_time=0, end_time=3600), [ {'name': u'http://mysite.com/page', 'count': 50, 'hph': 50.0} ]) def test_toppages_strings(self): """Test listing of top pages - timestamps as string""" self.dummy.create_hits_linear(u'http://mysite.com/page', start_hits_per_hour=50, end_hits_per_hour=50, start_time=0, end_time=3600) self.assertEqual(self.analyzer.get_top_pages(qfield='hit_url', start_time='0', end_time='3600'), [ {'name': u'http://mysite.com/page', 'count': 50, 'hph': 50.0} ]) def test_toppages_timespan(self): """Test listing of top pages - using timespan""" self.dummy.create_hits_linear(u'http://mysite.com/page', start_hits_per_hour=50, end_hits_per_hour=50, start_time=time.time() - 3600, end_time=time.time()) self.assertEqual(self.analyzer.get_top_pages(qfield='hit_url', timespan='3600'), [ {'name': u'http://mysite.com/page', 'count': 50, 'hph': 50.0} ]) def test_keywordcloud_basic(self): """Test generation of keyword cloud - basic""" tests = [] # Regular search, single word tests.append({ 'ref': u'http://google.com?q=cool', 'expect': [(u'cool', 100.0)] }) # Regular search, phrase tests.append({ 'ref': u'http://google.com?q=cool page', 'expect': [(u'cool', 50.0), (u'page', 50.0)] }) # Empty search query tests.append({ 'ref': u'http://google.com?q=', 'expect': [] }) # No search query tests.append({ 'ref': u'http://google.com', 'expect': [] }) # With query, but no searchengine tests.append({ 'ref': u'http://mysite.com?q=test', 'expect': [] }) for test in tests: self._storage.clear_hits(0) hit = Hit(self._conf, u'http://mysite.com/page', referrer=test['ref']) self._storage.add_hit(hit) self.assertEqual(self.analyzer.get_keyword_cloud(), test['expect']) def test_keywordcloud_multi(self): """Test generation of keyword cloud - multiple hits""" searches = [ 'cool page', 'funny test', 'cool', 'page', 'cool', 'test page test', 'cool test page', 'page', 'cool page', 'very cool funny test page' ] for query in searches: hit = Hit(self._conf, u'http://mysite.com/page', referrer=u'http://google.com?q='+query) self._storage.add_hit(hit) self.assertEqual(sorted(self.analyzer.get_keyword_cloud()), sorted([ ('cool', 30.0), ('page', 35.0), ('funny', 10.0), ('test', 20.0), ('very', 5.0) ])) def test_keywordcloud_specialchars(self): """Test generation of keyword cloud - special characters""" hit = Hit(self._conf, u'http://mysite.com/page', referrer=u'http://google.com?q=éäüòñрусском') self._storage.add_hit(hit) self.assertEqual(self.analyzer.get_keyword_cloud(), [(u'éäüòñрусском', 100.0)])