def testReading(self): reader = log_reader.ApacheReader(file('access.log')) lines = [f for f in reader] reader = None reader = log_reader.ApacheReader(file('access.log')) for fields in reader: pass
def testDatetime(self): input = ("[03/Mar/2005:06:47:18 -0500]\n\n", "[03/Jan/2005:00:41:10 -0800]") reader = log_reader.ApacheReader(iter(input), '%t') lines = [f for f in reader] self.assertEquals(lines[0]['time'], datetime.datetime(2005, 3, 3, 6, 47, 18)) self.assertEquals(lines[1]['time'], datetime.datetime(2005, 1, 3, 0, 41, 10))
def testCustomFormat(self): input = ('"test" "test2"\n', '"another test" "more spaces"') reader = log_reader.ApacheReader(iter(input), '"%{Foo}i" "%{Bar}i"') lines = [f for f in reader] self.assertEquals(lines[0], {'Foo': 'test', 'Bar': 'test2'}) self.assertEquals(lines[1], { 'Foo': 'another test', 'Bar': 'more spaces' })
def testRequest(self): input = ('"GET /path/to/page.html HTTP/1.0"\n\n', '"POST /foo/bar/?betz=10 HTTP/1.0"') reader = log_reader.ApacheReader(iter(input), '"%r"') lines = [f for f in reader] self.assertEquals(lines[0]['method'], 'GET') self.assertEquals(lines[0]['protocol'], 'HTTP/1.0') self.assertEquals(lines[0]['path'], '/path/to/page.html') self.assertEquals(lines[1]['method'], 'POST') self.assertEquals(lines[1]['protocol'], 'HTTP/1.0') self.assertEquals(lines[1]['path'], '/foo/bar/?betz=10')
def testRefererUser(self): input = ('"test" username\n\n', '"test test" foobar') reader = log_reader.ApacheReader(iter(input), '"%{Referer}i" %u') lines = [f for f in reader] self.assertEquals(lines[0], { 'referer': 'test', 'username': '******' }) self.assertEquals(lines[1], { 'referer': 'test test', 'username': '******' })
def testOptionalFields(self): input = ('foo\n', 'foo2 bar\n', 'foo3 bar2 baz\n', 'foo3 - baz2') reader = log_reader.ApacheReader(iter(input), '%{Foo}i%? %{Bar}i %{Baz}i') lines = [f for f in reader] self.assertEquals(lines[0], {'Foo': 'foo'}) self.assertEquals(lines[1], {'Foo': 'foo2', 'Bar': 'bar'}) self.assertEquals(lines[2], { 'Foo': 'foo3', 'Bar': 'bar2', 'Baz': 'baz' }) self.assertEquals(lines[3], {'Foo': 'foo3', 'Bar': '-', 'Baz': 'baz2'})
def testDoubleQuotes(self): input = ('"noquote" "escape test\\" test"\n', '"test" "double ""quote test"') reader = log_reader.ApacheReader(iter(input), '"%u" "%{User-Agent}i"') lines = [f for f in reader] self.assertEquals(lines[0], { 'username': '******', 'user-agent': 'escape test" test' }) self.assertEquals(lines[1], { 'username': '******', 'user-agent': 'double "quote test' })
def testIps(self): input = ("0.0.0.0\n", "1.2.3.4, 5.6.7.8\n", 'unknown, 255.255.255.255\n', '255.0.255.0, unknown\n', '100.100.100.100\n', '200.169.54.33, unknown, 200.169.63.242') reader = log_reader.ApacheReader(iter(input), '%h') lines = [f for f in reader] self.assertEquals(lines[0]['ips'], ['0.0.0.0']) self.assertEquals(lines[1]['ips'], ['1.2.3.4', '5.6.7.8']) self.assertEquals(lines[2]['ips'], ['unknown', '255.255.255.255']) self.assertEquals(lines[3]['ips'], ['255.0.255.0', 'unknown']) self.assertEquals(lines[4]['ips'], ['100.100.100.100']) self.assertEquals(lines[5]['ips'], ['200.169.54.33', 'unknown', '200.169.63.242'])
def read_log(filename, path_guesser, date_hasher=daily_date_hasher, max_reads=None): assert callable(path_guesser) lr = log_reader.ApacheReader(filename) ret = {} keys = [] for num, i in enumerate(lr): if max_reads and num > max_reads: return keys, ret if 'path' in i: if path_guesser(i['path']): hash = date_hasher(i['time']) if hash in ret: ret[hash] += 1 else: ret[hash] = 1 keys.append(hash) return keys, ret
def testNginxUpstreamCommaSpaceSeparation(self): input = ('success1 success2\n', 'fail1, success1 success2\n', 'success1 fail2, success2\n', 'fail1, success1 fail2, success2') reader = log_reader.ApacheReader(iter(input), '%{upstream_Foo}i %{upstream_Bar}i') lines = [f for f in reader] self.assertEquals(lines[0], { 'upstream_Foo': 'success1', 'upstream_Bar': 'success2' }) self.assertEquals(lines[1], { 'upstream_Foo': 'fail1, success1', 'upstream_Bar': 'success2' }) self.assertEquals(lines[2], { 'upstream_Foo': 'success1', 'upstream_Bar': 'fail2, success2' }) self.assertEquals(lines[3], { 'upstream_Foo': 'fail1, success1', 'upstream_Bar': 'fail2, success2' })
import gc import sys try: import log_reader except ImportError: print "unable to find log_reader module" sys.exit(1) line = ('"1.2.3.4 - - [03/Jul/2007:18:48:56 +1000] "GET /example' ' HTTP/1.1" 200 1234 "http://www.example.com/referrer"' ' "user-agent/1.0"') for i in xrange(1000000): #parse_line = log_reader.ApacheReader.parse_line #parse_line(line) #continue reader = log_reader.ApacheReader('access.log') for fields in reader: pass reader = None gc.collect() reader = log_reader.ApacheReader(file('access.log')) for fields in reader: pass sys.stdout.write('.'); sys.stdout.flush() reader = None gc.collect()
def testSizeAndReferer(self): input = ('1234 "test"\n\n', '22345 "test test"') reader = log_reader.ApacheReader(iter(input), '%b "%{Referer}i"') lines = [f for f in reader] self.assertEquals(lines[0], {'referer': 'test', 'size': 1234}) self.assertEquals(lines[1], {'referer': 'test test', 'size': 22345})
def testEscaping(self): input = ('"test test test"\n', '"foo"') reader = log_reader.ApacheReader(iter(input), '"%u"') lines = [f for f in reader] self.assertEquals(lines[0]['username'], 'test test test') self.assertEquals(lines[1]['username'], 'foo')
def testUsername(self): input = ["test\n", "foo"] reader = log_reader.ApacheReader(iter(input), '%u') lines = [f for f in reader] self.assertEquals(lines[0]['username'], 'test') self.assertEquals(lines[1]['username'], 'foo')
def testCurline(self): input = ('"test" username\n\n', '"test test" foobar') reader = log_reader.ApacheReader(iter(input), '"%{Referer}i" %u') lines = [reader.curline for f in reader] self.assertEquals(lines[0], input[0]) self.assertEquals(lines[1], input[1])