def test_nginx_parsing_using_use_http_x_forwarded_for_option_set_to_true(): """test parsing of nginx_json log with use_http_x_forwarded_for set to true""" file_ = 'logs/nginx_json.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = True import_logs.config.options.use_http_x_forwarded_for = True import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['ip'] == u'1.2.3.4' assert hits[1]['ip'] == u'1.2.3.4' assert hits[2]['ip'] == u'0:0:0:0:0:ffff:7b2d:4350' assert hits[3]['ip'] == u'1.2.3.5' assert hits[4]['ip'] == u'0:0:0:0:0:ffff:7b2d:4351' assert hits[5]['ip'] == u'4.3.2.1' assert hits[6]['ip'] == u'0:0:0:0:0:ffff:7b2d:4359' assert len(hits) == 7
def test_w3c_custom_field_regex_option(): """Test that --w3c-field-regex can be used to match custom W3C log fields.""" file_ = 'logs/iis.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = True import_logs.config.options.w3c_field_regexes = { 'sc-substatus': '(?P<substatus>\S+)', 'sc-win32-status': '(?P<win32_status>\S+)' } format = import_logs.W3cExtendedFormat() file_handle = open(file_) format.check_format(file_handle) match = None while not match: line = file_handle.readline() if not line: break match = format.match(line) file_handle.close() assert match is not None assert format.get('substatus') == '654' assert format.get('win32_status') == '456'
def test_replay_tracking_seconds_to_add_to_date(): """Test data parsing from sample log file.""" file_ = 'logs/logs_to_tests.log' import_logs.stats = import_logs.Statistics() import_logs.config = Config() import_logs.config.options.seconds_to_add_to_date = 3600 import_logs.resolver = Resolver() import_logs.Recorder = Recorder() import_logs.parser = import_logs.Parser() import_logs.parser.parse(file_) hits = [hit.args for hit in import_logs.Recorder.recorders] assert hits[0]['_idts'] == 1360047661 + 3600 assert hits[0]['_viewts'] == 1360047661 + 3600 assert hits[0]['_refts'] == 1360047661 + 3600 assert hits[0]['_ects'] == 1360047634 + 3600 assert hits[1]['_idts'] == 1360047661 + 3600 assert hits[1]['_viewts'] == 1360047661 + 3600 assert hits[1]['_refts'] == 1360047661 + 3600 assert hits[1]['_ects'] == 1360047534 + 3600 assert hits[2]['_idts'] == 1360047661 + 3600 assert hits[2]['_viewts'] == 1360047661 + 3600 assert hits[2]['_refts'] == 1360047661 + 3600 assert hits[2]['_ects'] == 1360047614 + 3600
def test_regex_group_to_custom_var_options(): """Test that the --regex-group-to-visit-cvar and --regex-group-to-page-cvar track regex groups to custom vars.""" file_ = 'logs/iis.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = True import_logs.config.options.regex_groups_to_ignore = set() import_logs.config.options.regex_group_to_visit_cvars_map = { 'userid': "User Name", 'date': "The Date" } import_logs.config.options.regex_group_to_page_cvars_map = { 'generation_time_milli': 'Geneartion Time', 'referrer': 'The Referrer' } import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['args']['_cvar'] == {1: ['The Date', '2012-04-01 00:00:13'], 2: ['User Name', 'theuser']} # check visit custom vars assert hits[0]['args']['cvar'] == {1: ['Geneartion Time', '1687']} # check page custom vars assert hits[0]['userid'] == 'theuser' assert hits[0]['date'] == datetime.datetime(2012, 4, 1, 0, 0, 13) assert hits[0]['generation_time_milli'] == 1687 assert hits[0]['referrer'] == ''
def test_ovh_parsing(): """test parsing of ovh logs (which needs to be forced, as it's not autodetected)""" file_ = 'logs/ovh.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = import_logs.FORMATS['ovh'] import_logs.config.options.log_hostname = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['status'] == u'301' assert hits[0]['userid'] == u'theuser' assert hits[0]['is_error'] == False assert hits[0]['extension'] == u'/' assert hits[0]['is_download'] == False assert hits[0]['referrer'] == u'' assert hits[0]['args'] == {'uid': u'theuser'} assert hits[0]['generation_time_milli'] == 0 assert hits[0]['host'] == 'www.example.com' assert hits[0]['filename'] == 'logs/ovh.log' assert hits[0]['is_redirect'] == True assert hits[0]['date'] == datetime.datetime(2012, 2, 10, 21, 42, 07) assert hits[0]['lineno'] == 0 assert hits[0]['ip'] == u'1.2.3.4' assert hits[0]['query_string'] == '' assert hits[0]['path'] == u'/' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/' assert hits[0][ 'user_agent'] == u'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11' assert len(hits) == 1 import_logs.config.options.log_hostname = 'foo'
def test_static_ignores(): """Test static files are ignored.""" file_ = 'logs/static_ignores.log' import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_static = False import_logs.config.options.download_extensions = 'txt,doc' # ensure robots.txt would be imported if not detected as static import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.args for hit in import_logs.Recorder.recorders] assert len(hits) == 1
def test_amazon_cloudfront_web_parsing(): """test parsing of amazon cloudfront logs (which use extended W3C log format)""" file_ = 'logs/amazon_cloudfront_web.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] import_logs.logging.debug(hits) assert hits[0]['status'] == u'200' assert hits[0]['userid'] == None assert hits[0]['is_error'] == False assert hits[0]['extension'] == u'html' assert hits[0]['is_download'] == False assert hits[0]['referrer'] == u'www.displaymyfiles.com' assert hits[0]['args'] == {} assert hits[0]['generation_time_milli'] == 1.0 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/amazon_cloudfront_web.log' assert hits[0]['is_redirect'] == False assert hits[0]['date'] == datetime.datetime(2014, 5, 23, 1, 13, 11) assert hits[0]['lineno'] == 2 assert hits[0]['ip'] == u'192.0.2.10' assert hits[0]['query_string'] == '' assert hits[0]['path'] == u'/view/my/file.html' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/view/my/file.html' assert hits[0][ 'user_agent'] == u'Mozilla/4.0%20(compatible;%20MSIE%205.0b1;%20Mac_PowerPC)' assert len(hits) == 1
def test_elb_parsing(): """test parsing of elb logs""" file_ = 'logs/elb.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert len(hits) == 1 assert hits[0]['status'] == u'200' assert hits[0]['userid'] == None assert hits[0]['is_error'] == False assert hits[0]['extension'] == u'html' assert hits[0]['is_download'] == False assert hits[0]['referrer'] == '' assert hits[0]['args'] == {} assert hits[0]['generation_time_milli'] == 1.048 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/elb.log' assert hits[0]['is_redirect'] == False assert hits[0]['date'] == datetime.datetime(2015, 05, 13, 23, 39, 43) assert hits[0]['lineno'] == 0 assert hits[0]['ip'] == u'1.2.3.4' assert hits[0]['query_string'] == u'' assert hits[0]['path'] == u'/path/index.html' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/path/index.html' assert hits[0][ 'user_agent'] == u'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11' assert hits[0]['length'] == 57
def test_ignore_groups_option_removes_groups(): """Test that the --ignore-groups option removes groups so they do not appear in hits.""" file_ = 'logs/iis.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = True import_logs.config.options.regex_groups_to_ignore = set(['userid','generation_time_milli']) import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['userid'] == None assert hits[0]['generation_time_milli'] == 0
def test_amazon_cloudfront_web_parsing(): """test parsing of amazon cloudfront logs (which use extended W3C log format)""" file_ = 'logs/amazon_cloudfront_web.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['status'] == u'200' assert hits[0]['userid'] == None assert hits[0]['is_error'] == False assert hits[0]['extension'] == u'html' assert hits[0]['is_download'] == False assert hits[0]['referrer'] == u'https://example.com/' assert hits[0]['args'] == {'cvar': {1: ['HTTP-method', 'GET']}} assert hits[0]['generation_time_milli'] == 1.0 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/amazon_cloudfront_web.log' assert hits[0]['is_redirect'] == False assert hits[0]['date'] == datetime.datetime(2014, 5, 23, 1, 13, 11) assert hits[0]['lineno'] == 2 assert hits[0]['ip'] == u'192.0.2.10' assert hits[0]['query_string'] == '' assert hits[0]['path'] == u'/view/my/file.html' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/view/my/file.html' assert hits[0][ 'user_agent'] == u'Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17' assert len(hits) == 1
def test_shoutcast_parsing(): """test parsing of shoutcast logs (which use extended W3C log format)""" file_ = 'logs/shoutcast.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['status'] == u'200' assert hits[0]['userid'] == None assert hits[0]['is_error'] == False assert hits[0]['extension'] == u'/stream' assert hits[0]['is_download'] == False assert hits[0]['referrer'] == '' assert hits[0]['args'] == {} assert hits[0]['generation_time_milli'] == 1000.0 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/shoutcast.log' assert hits[0]['is_redirect'] == False assert hits[0]['date'] == datetime.datetime(2015, 12, 7, 10, 37, 5) assert hits[0]['lineno'] == 3 assert hits[0]['ip'] == u'1.2.3.4' assert hits[0]['query_string'] == u'title=UKR%20Nights' assert hits[0]['path'] == u'/stream' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/stream?title=UKR%20Nights' assert hits[0]['user_agent'] == u'NSPlayer/10.0.0.3702 WMFSDK/10.0' assert hits[0]['length'] == 65580
def test_netscaler_parsing(): """test parsing of netscaler logs (which use extended W3C log format)""" file_ = 'logs/netscaler.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['status'] == u'302' assert hits[0]['userid'] == None assert hits[0]['is_error'] == False assert hits[0]['extension'] == u'jsp' assert hits[0]['is_download'] == False assert hits[0]['referrer'] == '' assert hits[0]['args'] == {} assert hits[0]['generation_time_milli'] == 1000 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/netscaler.log' assert hits[0]['is_redirect'] == True assert hits[0]['date'] == datetime.datetime(2012, 8, 16, 11, 55, 13) assert hits[0]['lineno'] == 4 assert hits[0]['ip'] == u'172.20.1.0' assert hits[0]['query_string'] == '' assert hits[0]['path'] == u'/Citrix/XenApp/Wan/auth/login.jsp' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/Citrix/XenApp/Wan/auth/login.jsp' assert hits[0][ 'user_agent'] == u'Mozilla/4.0+(compatible;+MSIE+7.0;+Windows+NT+5.1;+Trident/4.0;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727;+.NET+CLR+3.0.04506.648;+.NET+CLR+3.5.21022)'
def test_custom_log_date_format_option(): """Test that --log-date-format will change how dates are parsed in a custom log format.""" file_ = 'logs/custom_regex_custom_date.log' # have to override previous globals override for this test Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.options.w3c_field_regexes = None import_logs.config.options.regex_group_to_visit_cvars_map = None import_logs.config.options.regex_group_to_page_cvars_map = None import_logs.config.options.log_format_regex = ( '(?P<ip>\S+)\s+\S+\s+\S+\s+\[(?P<date>.*?)\]\s+' '"\S+\s+(?P<path>.*?)\s+\S+"\s+(?P<status>\S+)\s+(?P<length>\S+)') import_logs.config.options.log_date_format = '%B - %d, %Y:%H:%M:%S' import_logs.config.format = import_logs.RegexFormat( 'custom', import_logs.config.options.log_format_regex, import_logs.config.options.log_date_format) import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['date'] == datetime.datetime(2012, 2, 10, 16, 42, 7)
def test_amazon_cloudfront_rtmp_parsing(): """test parsing of amazon cloudfront rtmp logs (which use extended W3C log format w/ custom fields for event info)""" file_ = 'logs/amazon_cloudfront_rtmp.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['is_download'] == False assert hits[0]['ip'] == u'192.0.2.147' assert hits[0]['is_redirect'] == False assert hits[0]['filename'] == 'logs/amazon_cloudfront_rtmp.log' assert hits[0]['event_category'] == 'cloudfront_rtmp' assert hits[0]['event_action'] == u'connect' assert hits[0]['lineno'] == 2 assert hits[0]['status'] == '200' assert hits[0]['is_error'] == False assert hits[0]['event_name'] == None assert hits[0]['args'] == {} assert hits[0]['host'] == 'foo' assert hits[0]['date'] == datetime.datetime(2010, 3, 12, 23, 51, 20) assert hits[0]['path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b' assert hits[0]['extension'] == u'net/cfx/st\u200b' assert hits[0]['referrer'] == '' assert hits[0]['userid'] == None assert hits[0]['user_agent'] == u'LNX 10,0,32,18' assert hits[0]['generation_time_milli'] == 0 assert hits[0]['query_string'] == u'key=value' assert hits[0]['is_robot'] == False assert hits[0][ 'full_path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b' assert hits[1]['is_download'] == False assert hits[1]['ip'] == u'192.0.2.222' assert hits[1]['is_redirect'] == False assert hits[1]['filename'] == 'logs/amazon_cloudfront_rtmp.log' assert hits[1]['event_category'] == 'cloudfront_rtmp' assert hits[1]['event_action'] == u'play' assert hits[1]['lineno'] == 3 assert hits[1]['status'] == '200' assert hits[1]['is_error'] == False assert hits[1]['event_name'] == u'myvideo' assert hits[1]['args'] == {} assert hits[1]['host'] == 'foo' assert hits[1]['date'] == datetime.datetime(2010, 3, 12, 23, 51, 21) assert hits[1]['path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b' assert hits[1]['extension'] == u'net/cfx/st\u200b' assert hits[1]['referrer'] == '' assert hits[1]['userid'] == None assert hits[1]['length'] == 3914 assert hits[1]['user_agent'] == u'LNX 10,0,32,18' assert hits[1]['generation_time_milli'] == 0 assert hits[1]['query_string'] == u'key=value' assert hits[1]['is_robot'] == False assert hits[1][ 'full_path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b' assert len(hits) == 2
def test_iis_custom_format(): """test IIS custom format name parsing.""" file_ = 'logs/iis_custom.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = { 'date-local': 'date', 'time-local': 'time', 'cs(Host)': 'cs-host', 'TimeTakenMS': 'time-taken' } Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False # import_logs.config.options.w3c_time_taken_in_millisecs = True test that even w/o this, we get the right values import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['status'] == '200' assert hits[0]['is_error'] == False assert hits[0]['extension'] == u'/products/theproduct' assert hits[0]['is_download'] == False assert hits[0][ 'referrer'] == u'http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en' assert hits[0]['args'] == {} assert hits[0]['generation_time_milli'] == 109 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/iis_custom.log' assert hits[0]['is_redirect'] == False assert hits[0]['date'] == datetime.datetime(2012, 8, 15, 17, 0) assert hits[0]['lineno'] == 7 assert hits[0]['ip'] == u'70.95.0.0' assert hits[0]['query_string'] == '' assert hits[0]['path'] == u'/Products/theProduct' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/Products/theProduct' assert hits[0][ 'user_agent'] == u'Mozilla/5.0 (Linux; Android 4.4.4; SM-G900V Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36' assert hits[1]['status'] == u'301' assert hits[1]['is_error'] == False assert hits[1]['extension'] == u'/topic/hw43061' assert hits[1]['is_download'] == False assert hits[1]['referrer'] == '' assert hits[1]['args'] == {} assert hits[1]['generation_time_milli'] == 0 assert hits[1]['host'] == 'foo' assert hits[1]['filename'] == 'logs/iis_custom.log' assert hits[1]['is_redirect'] == True assert hits[1]['date'] == datetime.datetime(2012, 8, 15, 17, 0) assert hits[1]['lineno'] == 8 assert hits[1]['ip'] == '-' assert hits[1]['query_string'] == '' assert hits[1]['path'] == u'/Topic/hw43061' assert hits[1]['is_robot'] == False assert hits[1]['full_path'] == u'/Topic/hw43061' assert hits[1][ 'user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36' assert hits[2]['status'] == u'404' assert hits[2]['is_error'] == True assert hits[2]['extension'] == u'/hello/world/6,681965' assert hits[2]['is_download'] == False assert hits[2]['referrer'] == '' assert hits[2]['args'] == {} assert hits[2]['generation_time_milli'] == 359 assert hits[2]['host'] == 'foo' assert hits[2]['filename'] == 'logs/iis_custom.log' assert hits[2]['is_redirect'] == False assert hits[2]['date'] == datetime.datetime(2012, 8, 15, 17, 0) assert hits[2]['lineno'] == 9 assert hits[2]['ip'] == u'173.5.0.0' assert hits[2]['query_string'] == '' assert hits[2]['path'] == u'/hello/world/6,681965' assert hits[2]['is_robot'] == False assert hits[2]['full_path'] == u'/hello/world/6,681965' assert hits[2][ 'user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36'
def test_replay_tracking_arguments(): """Test data parsing from sample log file.""" file_ = 'logs/logs_to_tests.log' import_logs.stats = import_logs.Statistics() import_logs.config = Config() import_logs.resolver = Resolver() import_logs.Recorder = Recorder() import_logs.parser = import_logs.Parser() import_logs.parser.parse(file_) hits = [hit.args for hit in import_logs.Recorder.recorders] assert hits[0]['_idn'] == '0' assert hits[0]['ag'] == '1' assert hits[0]['_viewts'] == '1360047661' assert hits[0]['urlref'] == 'http://clearcode.cc/welcome' assert hits[0]['_ref'] == 'http://piwik.org/thank-you-all/' assert hits[0]['_idts'] == '1360047661' assert hits[0]['java'] == '1' assert hits[0]['res'] == '1680x1050' assert hits[0]['idsite'] == '1' assert hits[0]['realp'] == '0' assert hits[0]['wma'] == '1' assert hits[0]['_idvc'] == '1' assert hits[0][ 'action_name'] == 'Clearcode - Web and Mobile Development | Technology With Passion' assert hits[0]['cookie'] == '1' assert hits[0]['rec'] == '1' assert hits[0]['qt'] == '1' assert hits[0]['url'] == 'http://clearcode.cc/' assert hits[0]['h'] == '17' assert hits[0]['m'] == '31' assert hits[0]['s'] == '25' assert hits[0]['r'] == '983420' assert hits[0]['gears'] == '0' assert hits[0]['fla'] == '1' assert hits[0]['pdf'] == '1' assert hits[0]['_id'] == '1da79fc743e8bcc4' assert hits[0]['dir'] == '1' assert hits[0]['_refts'] == '1360047661' assert hits[1]['_idn'] == '0' assert hits[1]['ag'] == '1' assert hits[1]['_viewts'] == '1360047661' assert hits[1]['urlref'] == 'http://clearcode.cc/welcome' assert hits[1]['_ref'] == 'http://piwik.org/thank-you-all/' assert hits[1]['_idts'] == '1360047661' assert hits[1]['java'] == '1' assert hits[1]['res'] == '1680x1050' assert hits[1]['idsite'] == '1' assert hits[1]['realp'] == '0' assert hits[1]['wma'] == '1' assert hits[1]['_idvc'] == '1' assert hits[1][ 'action_name'] == 'AdviserBrief - Track Your Investments and Plan Financial Future | Clearcode' assert hits[1]['cookie'] == '1' assert hits[1]['rec'] == '1' assert hits[1]['qt'] == '1' assert hits[1][ 'url'] == 'http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future/' assert hits[1]['h'] == '17' assert hits[1]['m'] == '31' assert hits[1]['s'] == '40' assert hits[1]['r'] == '109464' assert hits[1]['gears'] == '0' assert hits[1]['fla'] == '1' assert hits[1]['pdf'] == '1' assert hits[1]['_id'] == '1da79fc743e8bcc4' assert hits[1]['dir'] == '1' assert hits[1]['_refts'] == '1360047661' assert hits[2]['_idn'] == '0' assert hits[2]['ag'] == '1' assert hits[2]['_viewts'] == '1360047661' assert hits[2]['urlref'] == 'http://clearcode.cc/welcome' assert hits[2]['_ref'] == 'http://piwik.org/thank-you-all/' assert hits[2]['_idts'] == '1360047661' assert hits[2]['java'] == '1' assert hits[2]['res'] == '1680x1050' assert hits[2]['idsite'] == '1' assert hits[2]['realp'] == '0' assert hits[2]['wma'] == '1' assert hits[2]['_idvc'] == '1' assert hits[2][ 'action_name'] == 'ATL Apps - American Tailgating League Mobile Android IOS Games | Clearcode' assert hits[2]['cookie'] == '1' assert hits[2]['rec'] == '1' assert hits[2]['qt'] == '1' assert hits[2][ 'url'] == 'http://clearcode.cc/case/atl-apps-mobile-android-ios-games/' assert hits[2]['h'] == '17' assert hits[2]['m'] == '31' assert hits[2]['s'] == '46' assert hits[2]['r'] == '080064' assert hits[2]['gears'] == '0' assert hits[2]['fla'] == '1' assert hits[2]['pdf'] == '1' assert hits[2]['_id'] == '1da79fc743e8bcc4' assert hits[2]['dir'] == '1' assert hits[2]['_refts'] == '1360047661'
def test_incapsulaw3c_parsing(): """test parsing of incapsula w3c logs (which needs to be forced, as it's not autodetected)""" file_ = 'logs/incapsula_w3c.log' # have to override previous globals override for this test import_logs.config.options.custom_w3c_fields = {} Recorder.recorders = [] import_logs.parser = import_logs.Parser() import_logs.config.format = import_logs.FORMATS['incapsula_w3c'] import_logs.config.options.log_hostname = None import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False import_logs.config.options.w3c_time_taken_in_millisecs = False import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] assert hits[0]['status'] == u'200' assert hits[0]['userid'] == None assert hits[0]['is_error'] == False assert hits[0]['extension'] == 'php' assert hits[0]['is_download'] == False assert hits[0]['referrer'] == u'' assert hits[0]['args'] == {'cvar': {1: ['HTTP-method', u'"GET"']}} assert hits[0]['length'] == 10117 assert hits[0]['generation_time_milli'] == 0 assert hits[0]['host'] == 'www.example.com' assert hits[0]['filename'] == 'logs/incapsula_w3c.log' assert hits[0]['is_redirect'] == False assert hits[0]['date'] == datetime.datetime(2017, 6, 28, 07, 26, 35) assert hits[0]['lineno'] == 0 assert hits[0]['ip'] == u'123.123.123.123' assert hits[0]['query_string'] == u'variable=test' assert hits[0]['path'] == u'/page.php' assert hits[0]['is_robot'] == False assert hits[0]['full_path'] == u'/page.php' assert hits[0][ 'user_agent'] == u'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' assert hits[1]['status'] == u'200' assert hits[1]['userid'] == None assert hits[1]['is_error'] == False assert hits[1]['extension'] == '/rss/news' assert hits[1]['is_download'] == False assert hits[1]['referrer'] == u'' assert hits[0]['args'] == {'cvar': {1: ['HTTP-method', u'"GET"']}} assert hits[1]['length'] == 0 assert hits[1]['generation_time_milli'] == 0 assert hits[1]['host'] == 'www.example.com' assert hits[1]['filename'] == 'logs/incapsula_w3c.log' assert hits[1]['is_redirect'] == False assert hits[1]['date'] == datetime.datetime(2017, 6, 26, 18, 21, 17) assert hits[1]['lineno'] == 1 assert hits[1]['ip'] == u'125.125.125.125' assert hits[1]['query_string'] == u'' assert hits[1]['path'] == '/rss/news' assert hits[1]['is_robot'] == False assert hits[1]['full_path'] == u'/rss/news' assert hits[1][ 'user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Thunderbird/45.8.0 Lightning/4.7.8' assert len(hits) == 2