def visitloop(index, varname): loopreplacements = { 'id' : str(self._visits[index].number), 'time' : ApacheLogParser.stringdate( \ self._visits[index].begin_time, \ pattern="%m/%d/%y %H:%M:%S", \ offset=self._visits[index].pages[-1].serveroffset), 'countryicon' : self._visits[index].country_icon, 'countryname' : self._visits[index].countryname, 'countryextension' : self._visits[index].countryextension, 'hostname' : self._visits[index].hostname, 'hostname_short' : self._visits[index].hostname[-27:], 'number_of_pages' : str(len(self._visits[index].pages)), 'visitpage' : os.path.join( self._visit_page_folder, str(self._visits[index].number).zfill(\ self._visit_filename_length) + ".html" ), 'os_icon' : self._visits[index].os_icon, 'full_os' : self._visits[index].os + ' ' + self._visits[index].os_version, 'os' : self._visits[index].os, 'browser_icon' : self._visits[index].browser_icon, 'full_browser' : self._visits[index].browser + ' ' \ + self._visits[index].browser_version \ + ' (' + self._visits[index].fullbrowser + ')', 'browser' : self._visits[index].browser, 'referer_url' : self._visits[index].referer, 'referer_site' : self._visits[index].referer_site, 'referer_page' : self._visits[index].referer_page, 'last_page' : self._visits[index].pages[-1].file, 'last_page_short' : self._visits[index].pages[-1].file[\ self._visits[index].pages[-1].file.rfind("/", 0, -2)+1:], 'search_term' : self._visits[index].search.replace(r'"',r'"'), 'search_term_short' : self._visits[index].search[:27], 'is_bot' : str(self._visits[index].is_bot), 'anchor_name' : 'a' + str(self._visits[index].number) } if len(loopreplacements['last_page_short']) > 30: loopreplacements['last_page_short'] = \ loopreplacements['last_page_short'][:27] + "..." if len(self._visits[index].search) > 27: loopreplacements['search_term_short'] = loopreplacements['search_term_short'] + "..." if len(self._visits[index].hostname) > 27: loopreplacements['hostname_short'] = "..." + loopreplacements['hostname_short'] if self._visits[index].is_bot == "Yes": loopreplacements['os_icon'] = self._visits[index].bot_icon loopreplacements['os'] = self._visits[index].botname loopreplacements['full_os'] = self._visits[index].botname + self._visits[index].bot_version loopreplacements['browser_icon'] = self._visits[index].bot_icon loopreplacements['browser'] = self._visits[index].botname loopreplacements['full_browser'] = self._visits[index].botname + self._visits[index].bot_version try: return loopreplacements[varname] except: return None
def _test_and_set_anchor(self, index): """ Check if a visit is the first of the day, and mark as anchor position if so """ visit = self._visits[index] visit_time = ApacheLogParser.extract_from_date(visit.begin_time, visit.pages[0].serveroffset) year = visit_time.tm_year month = visit_time.tm_mon day = visit_time.tm_mday day_of_year = visit_time.tm_yday prev_visit_day_of_year = day_of_year - 1 if index > 0: prev_visit_day_of_year = \ ApacheLogParser.extract_from_date(self._visits[index-1].begin_time).tm_yday if day_of_year > prev_visit_day_of_year: # there should be an anchor uri_filename = self._outfilename.replace( ".html", \ str(self._number_of_pages+1).zfill(6) +".html" ) uri = uri_filename + "#a" + str(visit.number) if not self._anchors.has_key(year): self._anchors[year] = {} if not self._anchors[year].has_key(month): self._anchors[year][month] = {} if not self._anchors[year][month].has_key(day): self._anchors[year][month][day] = uri self._anchor_ids.append(visit.number)
# -*- coding: utf-8 -*- import ApacheLogParser if __name__ == '__main__': logParser = ApacheLogParser.ApacheLogParser('test/access.log', u"%h %l %u %t %r %s %b \"%{Referer}i\" \"%{User-Agent}i\"") logParser.run(3000, 'test.html')
from ApacheLogParser import * from DetailedStatComponent import DetailedStatComponent from TemplateProcessor import TemplateProcessor from visit import * import re testfiles = [ r'/home/goerz/public_html/logs/access.log.2007-09.tempcopy.gz' ] TemplateProcessor.overwrite = True alp = ApacheLogParser() alp.compressed = True #alp.add_includepattern(('hostname', re.compile(r'physik.fu-berlin.de'))) alp.add_excludepattern(('hostname', re.compile(r'googlebot.com'))) alp.add_excludepattern(('hostname', re.compile(r'inktomisearch'))) alp.add_excludepattern(('hostname', re.compile(r'phx.gbl'))) alp.add_excludepattern(('hostname', re.compile(r'search.live.com'))) alp.add_excludepattern(('hostname', re.compile(r'crawl.yahoo.net'))) alp.add_excludepattern(('file', re.compile(r'/images/'))) alp.add_excludepattern(('file', re.compile(r'/fortunes/'))) alp.add_excludepattern(('file', re.compile(r'/pagestyle\.css'))) alp.add_excludepattern(('hostname', re.compile(r'pfeffer\.zedat'))) visits = VisitManager() visits.add_excludepattern(('is_bot', re.compile(r'Yes'))) Visit.html_template = "../templates/visit.html" detailedstats = DetailedStatComponent() detailedstats.set_option('outdir', "../output")
from ApacheLogParser import * from visit import * import re testfile = r'/home/goerz/public_html/logs/access.log.2006-07.gz' alp = ApacheLogParser(testfile) alp.compressed = True #visits = VisitManager() #alp.add_excludepattern(('hostname', re.compile(r'msnbot'))) #alp.add_excludepattern(('file', re.compile(r'/images/'))) #alp.add_excludepattern(('file', re.compile(r'abiz'))) #alp.add_includepattern(('hostname', re.compile(r'tlnk'))) i = 0 for x in alp: print x.hostname #print "\n\nSummary:" #print "Accepted Lines : " + str(alp.acceptedlines()) #print "Rejected Lines : " + str(alp.rejectedlines()) #print "Not parsable Lines : " + str(alp.notparsablelines()) #print "Total Lines : " + str(alp.totallines())