def consumer(index, filename, start, end, u): parser = apparser.apparser() t = timer.timer() # print('started job %d' % index) i = 0 t2 = timer.timer() # map(parser.proto, iterate_log(filename, start, end)) # t.stop('job %d' % index, i) # return (index, i) for l in iterate_log(filename, start, end): # if (i % 100001) == 0 and not i == 0: # t2.stop('[%d] parsed %10d lines' % (index, i), i) i += 1 # r = apparser._parse2(l) r = parser.proto(l) # break # print(parser) # u.get(parser.vhost, index) t.stop("job %d" % index, i) return (index, i)
#! /usr/bin/python3.1 # Try to detect a spamming bot from an apache log... import sys import apparser import os.path import io import geoip p = apparser.apparser() g = geoip.geoip('geoip.sqlite') files = {} if not len(sys.argv) == 2: sys.stderr.write('Syntax: %s <apache logfile>' % sys.argv[0]) exit() if not os.path.exists(sys.argv[1]): sys.stderr.write('file %s does not exist' % sys.argv[1]) exit() fd = io.open(sys.argv[1]) i = 0 import collections posts = collections.defaultdict(dict) uris = collections.defaultdict(dict) while True: i += 1 line = fd.readline() if line == '': # EOF break line = line.rstrip('\n') if p.feed(line) == False: # sys.stderr.write('PARSE ERROR (%d): %s\n' % (i, line))