def test_parse_client_side_events(self): """Parser test: client-side events (%q %l %n %t %h).""" parser = eventlogging.LogParser('%q %l %n %t %h') raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22Generic' '%22%2C%22revision%22%3A13%2C%22clientValidated%22%3Atrue%2C' '%22event%22%3A%7B%22articleId%22%3A1%2C%22articleTitle%22%3' 'A%22H%C3%A9ctor%20Elizondo%22%7D%2C%22webHost%22%3A%22test.' 'wikipedia.org%22%7D; cp3022.esams.wikimedia.org 132073 2013' '-01-19T23:16:38 86.149.229.149') parsed = { 'recvFrom': 'cp3022.esams.wikimedia.org', 'clientValidated': True, 'wiki': 'testwiki', 'webHost': 'test.wikipedia.org', 'seqId': 132073, 'timestamp': 1358637398, 'clientIp': eventlogging.parse.hash_value('86.149.229.149'), 'schema': 'Generic', 'revision': 13, 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 } } self.assertEqual(parser.parse(raw), parsed)
def test_parser_bot_requests(self): parser = eventlogging.LogParser('%q %{recvFrom}s %{seqId}d %D %o %u') # Bot - recognised by uaparser raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22Generic' '%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22articleId%2' '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa' 'ms.wikimedia.org 132073 2013-01-19T23:16:38 - ' 'AppEngine-Google; (+http://code.google.com/appengine; appid' ': webetrex)') ua_map = parser.parse(raw)['userAgent'] self.assertEqual(ua_map['is_bot'], True) # Bot - not recognised by uaparser raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22G' 'eneric%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22artic' 'leId%22%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizond' 'o%22%7D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp30' '22.esams.wikimedia.org 132073 2013-01-19T23:16:38 - ' 'WikiDemo/10.2.0;') ua_map = parser.parse(raw)['userAgent'] self.assertEqual(ua_map['is_bot'], True) # Regular browser raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22' 'Generic%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22arti' 'cleId%22%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizon' 'do%22%7D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3' '022.esams.wikimedia.org 132073 2013-01-19T23:16:38 - ' 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)' ' Gecko/20100101 Firefox/10.0') ua_map = parser.parse(raw)['userAgent'] self.assertEqual(ua_map['is_bot'], False)
def test_parse_client_side_events(self): """Parser test: client-side events.""" parser = eventlogging.LogParser( '%q %{recvFrom}s %{seqId}d %t %h %{userAgent}i') raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22Generic' '%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22articleId%2' '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa' 'ms.wikimedia.org 132073 2013-01-19T23:16:38 86.149.229.149 ' 'Mozilla/5.0') parsed = { 'uuid': '799341a01ba957c79b15dc4d2d950864', 'recvFrom': 'cp3022.esams.wikimedia.org', 'wiki': 'testwiki', 'webHost': 'test.wikipedia.org', 'seqId': 132073, 'timestamp': 1358637398, 'clientIp': eventlogging.parse.hash_ip('86.149.229.149'), 'schema': 'Generic', 'revision': 13, 'userAgent': 'Mozilla/5.0', 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 } } self.assertEqual(parser.parse(raw), parsed)
def test_parse_capsule_user_agent(self): """ Parser test: client-side events with userAgent in submitted capsule. If a capsule has a field that is also parsed from the raw event line, the capsule's field should be preferred. """ parser = eventlogging.LogParser('%q %{recvFrom}s %{seqId}d %D %o %u') raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22Generic' '%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22articleId%2' '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%2C' '%22userAgent%22%3A%22Mozilla%2F5.0%5Cu0020%28Windows%5C' 'u0020NT%5Cu00206.1%3B%5Cu0020WOW64%29%5Cu0020AppleWebKit' '%2F537.36%5Cu0020%28KHTML%2C%5Cu0020like%5Cu0020Gecko%29' '%5Cu0020Chrome%2F61.0.3163.100%5Cu0020Safari%2F537.36%22' '%7D; srv0.example.org 132073 2013-01-19T23:16:38 - ' 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)' ' Gecko/20100101 Firefox/10.0') # This ua is the parsed userAgent from inside the event capsule, NOT # the last field in the log line. ua = { 'browser_family': 'Chrome', 'browser_major': '61', 'browser_minor': '0', 'device_family': 'Other', 'is_bot': False, 'is_mediawiki': False, 'os_family': 'Windows', 'os_major': '7', 'os_minor': None, 'wmf_app_version': '-' } parsed = { 'uuid': '749684341abf513186c71f74c6a09502', 'recvFrom': 'srv0.example.org', 'wiki': 'testwiki', 'webHost': 'test.wikipedia.org', 'seqId': 132073, 'dt': '2013-01-19T23:16:38Z', 'schema': 'Generic', 'revision': 13, 'userAgent': ua, 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 } } fromParser = parser.parse(raw) for key in parsed: self.assertEqual( fromParser[key], parsed[key], "%s parsed incorrectly. expected:\n%s'\nactual:\n'%s'" % (key, parsed[key], fromParser[key]))
def test_parse_client_side_events(self): """Parser test: client-side events.""" parser = eventlogging.LogParser('%q %{recvFrom}s %{seqId}d %D %o %u') raw = ( '?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22Generic' '%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22articleId%2' '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7' 'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; srv0.example.org ' '132073 2013-01-19T23:16:38 - ' 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)' ' Gecko/20100101 Firefox/10.0') ua = { 'os_minor': None, 'os_major': None, 'device_family': 'Other', 'os_family': 'Linux', 'browser_major': '10', 'browser_minor': '0', 'browser_family': 'Firefox', 'wmf_app_version': '-', 'is_bot': False, 'is_mediawiki': False } parsed = { 'uuid': '749684341abf513186c71f74c6a09502', 'recvFrom': 'srv0.example.org', 'wiki': 'testwiki', 'webHost': 'test.wikipedia.org', 'seqId': 132073, 'dt': '2013-01-19T23:16:38Z', 'schema': 'Generic', 'revision': 13, 'userAgent': ua, 'event': { 'articleTitle': 'Héctor Elizondo', 'articleId': 1 } } fromParser = parser.parse(raw) for key in parsed: self.assertEqual( fromParser[key], parsed[key], "%s parsed incorrectly. expected:\n%s'\nactual:\n'%s'" % (key, parsed[key], fromParser[key]))
def test_parser_server_side_events(self): """Parser test: server-side events (%n EventLogging %j).""" parser = eventlogging.LogParser('%n EventLogging %j') raw = ('99 EventLogging {"revision":123,"timestamp":1358627115,"sche' 'ma":"FakeSchema","clientValidated":true,"wiki":"enwiki","eve' 'nt":{"action":"save\\u0020page"},"recvFrom":"fenari"}') parsed = { 'recvFrom': 'fenari', 'timestamp': 1358627115, 'wiki': 'enwiki', 'seqId': 99, 'schema': 'FakeSchema', 'clientValidated': True, 'revision': 123, 'event': { 'action': 'save page' } } self.assertEqual(parser.parse(raw), parsed)
def test_parser_server_side_events(self): """Parser test: server-side events.""" parser = eventlogging.LogParser('%{seqId}d EventLogging %j') raw = ('99 EventLogging {"revision":123,"timestamp":1358627115,"sche' 'ma":"FakeSchema","wiki":"enwiki","event":{"action":"save\\u0' '020page"},"recvFrom":"fenari"}') parsed = { 'uuid': '67cc2c1afa5752ba80bbbd7c5fc41f28', 'recvFrom': 'fenari', 'timestamp': 1358627115, 'wiki': 'enwiki', 'seqId': 99, 'schema': 'FakeSchema', 'revision': 123, 'event': { 'action': 'save page' } } self.assertEqual(parser.parse(raw), parsed)
def test_repr(self): """Calling 'repr' on LogParser returns canonical string representation.""" parser = eventlogging.LogParser('%q %l %n %t %h') self.assertEqual(repr(parser), "<LogParser('%q %l %n %t %h')>")
def test_parse_failure(self): """Parse failure raises ValueError exception.""" parser = eventlogging.LogParser('%q %l %n %t %h') with self.assertRaises(ValueError): parser.parse('Fails to parse.')