Esempio n. 1
0
def consumer(index, filename, start, end, u):
    parser = apparser.apparser()
    t = timer.timer()
    #    print('started job %d' % index)
    i = 0
    t2 = timer.timer()
    #    map(parser.proto, iterate_log(filename, start, end))
    #    t.stop('job %d' % index, i)
    #    return (index, i)

    for l in iterate_log(filename, start, end):
        #        if (i % 100001) == 0 and not i == 0:
        #            t2.stop('[%d] parsed %10d lines' % (index, i), i)
        i += 1
        #        r = apparser._parse2(l)
        r = parser.proto(l)
    #        break
    #        print(parser)
    #        u.get(parser.vhost, index)
    t.stop("job %d" % index, i)
    return (index, i)
Esempio n. 2
0
#! /usr/bin/python3.1

# Try to detect a spamming bot from an apache log...
import sys
import apparser
import os.path
import io
import geoip

p = apparser.apparser()
g = geoip.geoip('geoip.sqlite')
files = {}
if not len(sys.argv) == 2:
    sys.stderr.write('Syntax: %s <apache logfile>' % sys.argv[0])
    exit()
if not os.path.exists(sys.argv[1]):
    sys.stderr.write('file %s does not exist' % sys.argv[1])
    exit()
fd = io.open(sys.argv[1])
i = 0
import collections
posts = collections.defaultdict(dict)
uris = collections.defaultdict(dict)
while True:
    i += 1
    line = fd.readline()
    if line == '': # EOF
        break
    line = line.rstrip('\n')
    if p.feed(line) == False:
#        sys.stderr.write('PARSE ERROR (%d): %s\n' % (i, line))