Beispiel #1
0
 def __init__(self, config, storage):
     self._conf = config
     self._storage = storage
     self._analyzer = Analyzer(self._storage)
     self._view = View()
     self._image = self._load_component('img/img.gif')
Beispiel #2
0
class TestAnalyzer(TestBase):

    def setUp(self):
        super(TestAnalyzer, self).setUp()
        self.analyzer = Analyzer(self._storage)
        self.dummy = Dummy(self._conf, self._storage, clear=True)
    
    def test_deviators_stable(self):
        """Test listing of top deviators - stable"""
        self.dummy.create_hits_linear(u'http://mysite.com/page1',
            start_hits_per_hour=5000, end_hits_per_hour=5000,
            start_time=0, end_time=10000)
        self.assertEqual(self.analyzer.get_top_deviators(qfield='hit_url', 
            start_time=0, boundary_time=7500, end_time=10000), 
            [{'name': u'http://mysite.com/page1', 'pct': 0, 
                'hph_recent': 5001, 'hph_historic': 4999,
                'num_recent': 3473, 'num_historic': 10416}])
    
    def test_deviators_increasing(self):
        """Test listing of top deviators - increasing"""
        self.dummy.create_hits_linear(u'http://mysite.com/page2',
            start_hits_per_hour=0, end_hits_per_hour=8000,
            start_time=0, end_time=10000)
        self.assertEqual(self.analyzer.get_top_deviators(qfield='hit_url', 
            start_time=0, boundary_time=7500, end_time=10000), 
            [{'name': u'http://mysite.com/page2', 'pct': 133, 
                'hph_recent': 6999, 'hph_historic': 2999,
                'num_recent': 4861, 'num_historic': 6249}])
    
    def test_deviators_decreasing(self):
        """Test listing of top deviators - decreasing"""
        self.dummy.create_hits_linear(u'http://mysite.com/page3',
            start_hits_per_hour=8000, end_hits_per_hour=0,
            start_time=0, end_time=10000)
        self.assertEqual(self.analyzer.get_top_deviators(qfield='hit_url', 
            start_time=0, boundary_time=7500, end_time=10000), 
            [{'name': u'http://mysite.com/page3', 'pct': -80, 
                'hph_recent': 1000, 'hph_historic': 5000,
                'num_recent': 695, 'num_historic': 10417}])

    def test_toppages_basic(self):
        """Test listing of top pages - basic"""
        self.dummy.create_hits_linear(u'http://mysite.com/page',
            start_hits_per_hour=50, end_hits_per_hour=50,
            start_time=0, end_time=3600)
        self.assertEqual(self.analyzer.get_top_pages(qfield='hit_url',
            start_time=0, end_time=3600), [
                {'name': u'http://mysite.com/page', 'count': 50, 'hph': 50.0}
            ])

    def test_toppages_strings(self):
        """Test listing of top pages - timestamps as string"""
        self.dummy.create_hits_linear(u'http://mysite.com/page',
            start_hits_per_hour=50, end_hits_per_hour=50,
            start_time=0, end_time=3600)
        self.assertEqual(self.analyzer.get_top_pages(qfield='hit_url',
            start_time='0', end_time='3600'), [
                {'name': u'http://mysite.com/page', 'count': 50, 'hph': 50.0}
            ])
    
    def test_toppages_timespan(self):
        """Test listing of top pages - using timespan"""
        self.dummy.create_hits_linear(u'http://mysite.com/page',
            start_hits_per_hour=50, end_hits_per_hour=50,
            start_time=time.time() - 3600, end_time=time.time())
        self.assertEqual(self.analyzer.get_top_pages(qfield='hit_url',
            timespan='3600'), [
                {'name': u'http://mysite.com/page', 'count': 50, 'hph': 50.0}
            ])
    
    def test_keywordcloud_basic(self):
        """Test generation of keyword cloud - basic"""
        tests = []
        # Regular search, single word
        tests.append({
            'ref': u'http://google.com?q=cool', 
            'expect': [(u'cool', 100.0)]
        })
        # Regular search, phrase
        tests.append({
            'ref': u'http://google.com?q=cool page', 
            'expect': [(u'cool', 50.0), (u'page', 50.0)]
        })
        # Empty search query
        tests.append({
            'ref': u'http://google.com?q=', 
            'expect': []
        })
        # No search query
        tests.append({
            'ref': u'http://google.com', 
            'expect': []
        })
        # With query, but no searchengine
        tests.append({
            'ref': u'http://mysite.com?q=test', 
            'expect': []
        })

        for test in tests:
            self._storage.clear_hits(0)
            hit = Hit(self._conf, u'http://mysite.com/page',
                      referrer=test['ref'])
            self._storage.add_hit(hit)
            self.assertEqual(self.analyzer.get_keyword_cloud(), test['expect'])
    
    def test_keywordcloud_multi(self):
        """Test generation of keyword cloud - multiple hits"""
        searches = [
            'cool page',
            'funny test',
            'cool',
            'page',
            'cool',
            'test page test',
            'cool test page',
            'page',
            'cool page',
            'very cool funny test page'
        ]
        
        for query in searches:
            hit = Hit(self._conf, u'http://mysite.com/page',
                      referrer=u'http://google.com?q='+query)
            self._storage.add_hit(hit)

        self.assertEqual(sorted(self.analyzer.get_keyword_cloud()), 
            sorted([
                ('cool', 30.0), ('page', 35.0), ('funny', 10.0),
                ('test', 20.0), ('very', 5.0)
            ]))

    def test_keywordcloud_specialchars(self):
        """Test generation of keyword cloud - special characters"""
        hit = Hit(self._conf, u'http://mysite.com/page',
            referrer=u'http://google.com?q=éäüòñрусском')
        self._storage.add_hit(hit)
        self.assertEqual(self.analyzer.get_keyword_cloud(), 
            [(u'éäüòñрусском', 100.0)])
Beispiel #3
0
class PopiWSGIServer(object):

    def __init__(self, config, storage):
        self._conf = config
        self._storage = storage
        self._analyzer = Analyzer(self._storage)
        self._view = View()
        self._image = self._load_component('img/img.gif')

    def index(self):
        view = self._view.index()
        return Response(view)

    def cleardata(self):
        days = self.request.GET.get('days', 7)
        self._storage.clear_hits(days)
        return Response('done')

    def deviators(self):
        qfield = self.request.GET.get('qfield', 'hit_title')
        historic_length = self.request.GET.get('historic_length', None)
        recent_length = self.request.GET.get('recent_length', None)
        if historic_length is not None and recent_length is not None:
            boundary = int(int(time.time()) - int(recent_length))
            start = int(boundary - int(historic_length))
            output = self._analyzer.get_top_deviators(qfield=qfield, 
                start_time=start, boundary_time=boundary)
        else:
            output = self._analyzer.get_top_deviators(qfield=qfield)
        return json_response(output)

    def toppages(self):
        timespan = self.request.GET.get('timespan', None)
        start_time = self.request.GET.get('start_time', None)
        end_time = self.request.GET.get('end_time', None)
        output = self._analyzer.get_top_pages(
            start_time=start_time, end_time=end_time, timespan=timespan)
        return json_response(output)

    def keywordcloud(self):
        timespan = self.request.GET.get('timespan', None)
        start_time = self.request.GET.get('start_time', None)
        end_time = self.request.GET.get('end_time', None)
        minimum_count = self.request.GET.get('minimum_count', None)
        output = self._analyzer.get_keyword_cloud(start_time=start_time,
            end_time=end_time, timespan=timespan, minimum_count=minimum_count,
            minimum_pct=80, maximum_pct=500)
        return json_response(output)

    def hitmonitor(self):
        last_timestamp = int(self.request.GET.get('last_timestamp', 0))
        sources = {'external': int(self.request.GET.get('ext', 1)),
                   'searches': int(self.request.GET.get('sea', 1)),
                   'internal': int(self.request.GET.get('int', 1)),
                   'direct': int(self.request.GET.get('dir', 1))}
        output = self._storage.get_recenthits(sources, last_timestamp + 1)
        return json_response(output)

    def dummydata(self):
        dummy = Dummy(self._conf, self._storage, True)

        dummy.create_hits_linear(u'http://www.mysite.com/page',
                                 start_time=0, end_time=10000,
                                 start_hits_per_hour=0, end_hits_per_hour=50,
                                 referrer='http://google.com?q=cool%20page')
        dummy.create_hits_linear(u'http://www.mysite.com/page',
                                 start_time=5000, end_time=10000,
                                 start_hits_per_hour=0, end_hits_per_hour=50,
                                 referrer='http://www.google.com?q=cool')
        dummy.create_hits_linear(u'http://www.mysite.com/page2',
                                 start_time=0, end_time=10000,
                                 start_hits_per_hour=20, end_hits_per_hour=80)
        dummy.create_hits_linear(u'http://www.mysite.com/page3',
                                 start_time=0, end_time=10000,
                                 start_hits_per_hour=0, end_hits_per_hour=75)
        dummy.create_hits_linear(u'http://www.mysite.com/page4',
                                 start_time=0, end_time=10000,
                                 start_hits_per_hour=200, end_hits_per_hour=0)

        rand_start = random.random() * 60
        rand_end = random.random() * 60
        dummy.create_hits_linear(u'http://www.mysite.com/page2',
                                 start_time=0, end_time=10000,
                                 start_hits_per_hour=rand_start,
                                 end_hits_per_hour=rand_end,
                                 referrer='http://www.google.com?q=page2')
        return Response('done')

    def randomdata(self):
        dummy = Dummy(self._storage)
        rand = int(random.random() * 10) + 1
        for i in range(rand):
            rand_start = random.random() * 60
            rand_end = random.random() * 60
            dummy.create_hits_linear(
                u'http://www.mysite.com/page' + str(rand),
                start_hits_per_hour=rand_start, end_hits_per_hour=rand_end,
                referrer='http://www.google.com?q=page' + str(rand))
        return Response('done')

    def log_hit(self):
        # Make sure we get plain strings, not unicode
        #self.request.charset = None 
        cur = self.request.str_GET.get('cur', None)
        ref = self.request.str_GET.get('ref', None)
        title = self.request.str_GET.get('title', None)

        if not cur:
            cur = self.request.headers.get('referer', None)
        
        if cur is not None:
            cur = get_unicode(unquote_plus(cur))
        if ref is not None:
            ref = get_unicode(unquote_plus(ref))
        if title is not None:
            title = get_unicode(unquote_plus(title)).strip()

        response = Response()
        response.headers['Content-Type'] = "image/gif"
        response.headers['Expires'] = "Sat, 26 Jul 1997 05:00:00 GMT"
        response.headers['Cache-Control'] = "no-cache, must-revalidate"
        response.body = self._image

        if not cur:
            return response

        visitor_ip = self.request.headers.get('X-Forwarded-For', None)
        if visitor_ip is None:
            visitor_ip = self.request.remote_addr

        hit = Hit(self._conf, cur, referrer=ref, title=title,
            visitor_ip=visitor_ip)
        if hit.is_whitelisted() and not hit.is_blacklisted():
            self._storage.add_hit(hit)
        return response

    def _load_component(self, filepath):
        path = os.path.join(os.path.dirname(__file__), '..', '..',
            'components', filepath)
        if not os.path.exists(path):
            return None
        with open(path) as f:
            data = f.read()
        return data

    def get_component(self):
        filepath = self.request.GET.get('file', None)
        if '..' in filepath.split('/'):
            return self.httperror(status=400, body="Bad Request")
        mimetype = mimetypes.guess_type(filepath, False)
        component = self._load_component(filepath)
        if component is None:
            return self.httperror()
        response = Response()
        response.headers['Content-Type'] = mimetype[0]
        response.body = component
        return response

    def httperror(self, status=404, body="Not Found"):
        response = Response()
        response.status = status
        response.body = body
        return response

    def __call__(self, environ, start_response):
        self.request = Request(environ)
        urlmap = self._conf['urlmap']
        name = self.request.path_info.split('/')[1]
        if name == '':
            name = 'index'
        method_name = urlmap.get(name, 'httperror')
        method = getattr(self, method_name, None)
        response = method()
        return response(environ, start_response)
Beispiel #4
0
 def setUp(self):
     super(TestAnalyzer, self).setUp()
     self.analyzer = Analyzer(self._storage)
     self.dummy = Dummy(self._conf, self._storage, clear=True)