Example #1
0
 def visitloop(index, varname):
     loopreplacements = {
         'id'                : str(self._visits[index].number),
         'time'              : ApacheLogParser.stringdate( \
                                   self._visits[index].begin_time, \
                                   pattern="%m/%d/%y %H:%M:%S", \
                                   offset=self._visits[index].pages[-1].serveroffset),
         'countryicon'       : self._visits[index].country_icon,
         'countryname'       : self._visits[index].countryname,
         'countryextension'  : self._visits[index].countryextension,
         'hostname'          : self._visits[index].hostname,
         'hostname_short'    : self._visits[index].hostname[-27:],
         'number_of_pages'   : str(len(self._visits[index].pages)),
         'visitpage'         : os.path.join(
                                self._visit_page_folder,
                                str(self._visits[index].number).zfill(\
                                           self._visit_filename_length) + ".html" ),
         'os_icon'           : self._visits[index].os_icon,
         'full_os'           : self._visits[index].os + ' ' + self._visits[index].os_version,
         'os'                : self._visits[index].os,
         'browser_icon'      : self._visits[index].browser_icon,
         'full_browser'      : self._visits[index].browser + ' ' \
                               + self._visits[index].browser_version \
                               + ' (' +  self._visits[index].fullbrowser + ')',
         'browser'           : self._visits[index].browser,
         'referer_url'       : self._visits[index].referer,
         'referer_site'      : self._visits[index].referer_site,
         'referer_page'      : self._visits[index].referer_page,
         'last_page'         : self._visits[index].pages[-1].file,
         'last_page_short'   : self._visits[index].pages[-1].file[\
                                     self._visits[index].pages[-1].file.rfind("/", 0, -2)+1:],
         'search_term'       : self._visits[index].search.replace(r'"',r'"'),
         'search_term_short' : self._visits[index].search[:27],
         'is_bot'            : str(self._visits[index].is_bot),
         'anchor_name'       : 'a' + str(self._visits[index].number)
     }
     if len(loopreplacements['last_page_short']) > 30:
         loopreplacements['last_page_short'] = \
             loopreplacements['last_page_short'][:27] + "..."
     if len(self._visits[index].search) > 27:
         loopreplacements['search_term_short'] = loopreplacements['search_term_short'] + "..."
     if len(self._visits[index].hostname) > 27:
         loopreplacements['hostname_short'] = "..." +  loopreplacements['hostname_short']
     if self._visits[index].is_bot == "Yes":
         loopreplacements['os_icon'] =  self._visits[index].bot_icon
         loopreplacements['os'] =  self._visits[index].botname
         loopreplacements['full_os'] =  self._visits[index].botname + self._visits[index].bot_version
         loopreplacements['browser_icon'] =  self._visits[index].bot_icon
         loopreplacements['browser'] =  self._visits[index].botname
         loopreplacements['full_browser'] =  self._visits[index].botname + self._visits[index].bot_version
     try:
         return loopreplacements[varname]
     except:
         return None
Example #2
0
 def _test_and_set_anchor(self, index):
     """ Check if a visit is the first of the day, and mark as anchor position if so """
     visit = self._visits[index]
     visit_time = ApacheLogParser.extract_from_date(visit.begin_time, visit.pages[0].serveroffset)
     year = visit_time.tm_year
     month = visit_time.tm_mon
     day = visit_time.tm_mday
     day_of_year = visit_time.tm_yday
     prev_visit_day_of_year = day_of_year - 1
     if index > 0:
         prev_visit_day_of_year = \
             ApacheLogParser.extract_from_date(self._visits[index-1].begin_time).tm_yday
     if day_of_year > prev_visit_day_of_year: # there should be an anchor
         uri_filename = self._outfilename.replace( ".html", \
                str(self._number_of_pages+1).zfill(6) +".html" )
         uri = uri_filename + "#a" + str(visit.number)
         if not self._anchors.has_key(year):
             self._anchors[year] = {}
         if not self._anchors[year].has_key(month):
             self._anchors[year][month] = {}
         if not self._anchors[year][month].has_key(day):
             self._anchors[year][month][day] = uri
             self._anchor_ids.append(visit.number)
Example #3
0
# -*- coding: utf-8 -*-
import ApacheLogParser

if __name__ == '__main__':
    logParser = ApacheLogParser.ApacheLogParser('test/access.log', u"%h %l %u %t %r %s %b \"%{Referer}i\" \"%{User-Agent}i\"")
    logParser.run(3000, 'test.html')
Example #4
0
File: test.py Project: goerz/pyala
from ApacheLogParser import *
from DetailedStatComponent import DetailedStatComponent
from TemplateProcessor import TemplateProcessor
from visit import *
import re

testfiles = [
    r'/home/goerz/public_html/logs/access.log.2007-09.tempcopy.gz'
]

TemplateProcessor.overwrite = True

alp = ApacheLogParser()
alp.compressed = True
#alp.add_includepattern(('hostname', re.compile(r'physik.fu-berlin.de')))
alp.add_excludepattern(('hostname', re.compile(r'googlebot.com')))
alp.add_excludepattern(('hostname', re.compile(r'inktomisearch')))
alp.add_excludepattern(('hostname', re.compile(r'phx.gbl')))
alp.add_excludepattern(('hostname', re.compile(r'search.live.com')))
alp.add_excludepattern(('hostname', re.compile(r'crawl.yahoo.net')))
alp.add_excludepattern(('file', re.compile(r'/images/')))
alp.add_excludepattern(('file', re.compile(r'/fortunes/')))
alp.add_excludepattern(('file', re.compile(r'/pagestyle\.css')))
alp.add_excludepattern(('hostname', re.compile(r'pfeffer\.zedat')))

visits = VisitManager()
visits.add_excludepattern(('is_bot', re.compile(r'Yes')))
Visit.html_template = "../templates/visit.html"

detailedstats = DetailedStatComponent()
detailedstats.set_option('outdir', "../output")
Example #5
0
File: test3.py Project: goerz/pyala
from ApacheLogParser import *
from visit import *
import re

testfile = r'/home/goerz/public_html/logs/access.log.2006-07.gz'

alp = ApacheLogParser(testfile)
alp.compressed = True
#visits = VisitManager()
#alp.add_excludepattern(('hostname', re.compile(r'msnbot')))
#alp.add_excludepattern(('file', re.compile(r'/images/')))
#alp.add_excludepattern(('file', re.compile(r'abiz')))
#alp.add_includepattern(('hostname', re.compile(r'tlnk')))

i = 0
for x in alp:
    print x.hostname


#print "\n\nSummary:"
#print "Accepted Lines     : " + str(alp.acceptedlines())
#print "Rejected Lines     : " + str(alp.rejectedlines())
#print "Not parsable Lines : " + str(alp.notparsablelines())
#print "Total Lines        : " + str(alp.totallines())