Exemple #1
0
def test_nginx_parsing_using_use_http_x_forwarded_for_option_set_to_true():
    """test parsing of nginx_json log with use_http_x_forwarded_for set to true"""

    file_ = 'logs/nginx_json.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = True
    import_logs.config.options.use_http_x_forwarded_for = True

    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['ip'] == u'1.2.3.4'
    assert hits[1]['ip'] == u'1.2.3.4'
    assert hits[2]['ip'] == u'0:0:0:0:0:ffff:7b2d:4350'
    assert hits[3]['ip'] == u'1.2.3.5'
    assert hits[4]['ip'] == u'0:0:0:0:0:ffff:7b2d:4351'
    assert hits[5]['ip'] == u'4.3.2.1'
    assert hits[6]['ip'] == u'0:0:0:0:0:ffff:7b2d:4359'

    assert len(hits) == 7
Exemple #2
0
def test_w3c_custom_field_regex_option():
    """Test that --w3c-field-regex can be used to match custom W3C log fields."""

    file_ = 'logs/iis.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = True
    import_logs.config.options.w3c_field_regexes = {
        'sc-substatus': '(?P<substatus>\S+)',
        'sc-win32-status': '(?P<win32_status>\S+)'
    }

    format = import_logs.W3cExtendedFormat()

    file_handle = open(file_)
    format.check_format(file_handle)
    match = None
    while not match:
        line = file_handle.readline()
        if not line:
            break
        match = format.match(line)
    file_handle.close()

    assert match is not None
    assert format.get('substatus') == '654'
    assert format.get('win32_status') == '456'
Exemple #3
0
def test_replay_tracking_seconds_to_add_to_date():
    """Test data parsing from sample log file."""
    file_ = 'logs/logs_to_tests.log'

    import_logs.stats = import_logs.Statistics()
    import_logs.config = Config()
    import_logs.config.options.seconds_to_add_to_date = 3600
    import_logs.resolver = Resolver()
    import_logs.Recorder = Recorder()
    import_logs.parser = import_logs.Parser()
    import_logs.parser.parse(file_)

    hits = [hit.args for hit in import_logs.Recorder.recorders]

    assert hits[0]['_idts'] == 1360047661 + 3600
    assert hits[0]['_viewts'] == 1360047661 + 3600
    assert hits[0]['_refts'] == 1360047661 + 3600
    assert hits[0]['_ects'] == 1360047634 + 3600

    assert hits[1]['_idts'] == 1360047661 + 3600
    assert hits[1]['_viewts'] == 1360047661 + 3600
    assert hits[1]['_refts'] == 1360047661 + 3600
    assert hits[1]['_ects'] == 1360047534 + 3600

    assert hits[2]['_idts'] == 1360047661 + 3600
    assert hits[2]['_viewts'] == 1360047661 + 3600
    assert hits[2]['_refts'] == 1360047661 + 3600
    assert hits[2]['_ects'] == 1360047614 + 3600
Exemple #4
0
def test_regex_group_to_custom_var_options():
    """Test that the --regex-group-to-visit-cvar and --regex-group-to-page-cvar track regex groups to custom vars."""

    file_ = 'logs/iis.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = True
    import_logs.config.options.regex_groups_to_ignore = set()
    import_logs.config.options.regex_group_to_visit_cvars_map = {
        'userid': "User Name",
        'date': "The Date"
    }
    import_logs.config.options.regex_group_to_page_cvars_map = {
        'generation_time_milli': 'Geneartion Time',
        'referrer': 'The Referrer'
    }
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['args']['_cvar'] == {1: ['The Date', '2012-04-01 00:00:13'], 2: ['User Name', 'theuser']} # check visit custom vars
    assert hits[0]['args']['cvar'] == {1: ['Geneartion Time', '1687']} # check page custom vars

    assert hits[0]['userid'] == 'theuser'
    assert hits[0]['date'] == datetime.datetime(2012, 4, 1, 0, 0, 13)
    assert hits[0]['generation_time_milli'] == 1687
    assert hits[0]['referrer'] == ''
def test_ovh_parsing():
    """test parsing of ovh logs (which needs to be forced, as it's not autodetected)"""

    file_ = 'logs/ovh.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = import_logs.FORMATS['ovh']
    import_logs.config.options.log_hostname = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['status'] == u'301'
    assert hits[0]['userid'] == u'theuser'
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == u'/'
    assert hits[0]['is_download'] == False
    assert hits[0]['referrer'] == u''
    assert hits[0]['args'] == {'uid': u'theuser'}
    assert hits[0]['generation_time_milli'] == 0
    assert hits[0]['host'] == 'www.example.com'
    assert hits[0]['filename'] == 'logs/ovh.log'
    assert hits[0]['is_redirect'] == True
    assert hits[0]['date'] == datetime.datetime(2012, 2, 10, 21, 42, 07)
    assert hits[0]['lineno'] == 0
    assert hits[0]['ip'] == u'1.2.3.4'
    assert hits[0]['query_string'] == ''
    assert hits[0]['path'] == u'/'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/'
    assert hits[0][
        'user_agent'] == u'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'

    assert len(hits) == 1

    import_logs.config.options.log_hostname = 'foo'
Exemple #6
0
def test_static_ignores():
    """Test static files are ignored."""
    file_ = 'logs/static_ignores.log'

    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_static = False
    import_logs.config.options.download_extensions = 'txt,doc'  # ensure robots.txt would be imported if not detected as static
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.args for hit in import_logs.Recorder.recorders]

    assert len(hits) == 1
Exemple #7
0
def test_amazon_cloudfront_web_parsing():
    """test parsing of amazon cloudfront logs (which use extended W3C log format)"""

    file_ = 'logs/amazon_cloudfront_web.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    import_logs.logging.debug(hits)

    assert hits[0]['status'] == u'200'
    assert hits[0]['userid'] == None
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == u'html'
    assert hits[0]['is_download'] == False
    assert hits[0]['referrer'] == u'www.displaymyfiles.com'
    assert hits[0]['args'] == {}
    assert hits[0]['generation_time_milli'] == 1.0
    assert hits[0]['host'] == 'foo'
    assert hits[0]['filename'] == 'logs/amazon_cloudfront_web.log'
    assert hits[0]['is_redirect'] == False
    assert hits[0]['date'] == datetime.datetime(2014, 5, 23, 1, 13, 11)
    assert hits[0]['lineno'] == 2
    assert hits[0]['ip'] == u'192.0.2.10'
    assert hits[0]['query_string'] == ''
    assert hits[0]['path'] == u'/view/my/file.html'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/view/my/file.html'
    assert hits[0][
        'user_agent'] == u'Mozilla/4.0%20(compatible;%20MSIE%205.0b1;%20Mac_PowerPC)'

    assert len(hits) == 1
def test_elb_parsing():
    """test parsing of elb logs"""

    file_ = 'logs/elb.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert len(hits) == 1

    assert hits[0]['status'] == u'200'
    assert hits[0]['userid'] == None
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == u'html'
    assert hits[0]['is_download'] == False
    assert hits[0]['referrer'] == ''
    assert hits[0]['args'] == {}
    assert hits[0]['generation_time_milli'] == 1.048
    assert hits[0]['host'] == 'foo'
    assert hits[0]['filename'] == 'logs/elb.log'
    assert hits[0]['is_redirect'] == False
    assert hits[0]['date'] == datetime.datetime(2015, 05, 13, 23, 39, 43)
    assert hits[0]['lineno'] == 0
    assert hits[0]['ip'] == u'1.2.3.4'
    assert hits[0]['query_string'] == u''
    assert hits[0]['path'] == u'/path/index.html'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/path/index.html'
    assert hits[0][
        'user_agent'] == u'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'
    assert hits[0]['length'] == 57
Exemple #9
0
def test_ignore_groups_option_removes_groups():
    """Test that the --ignore-groups option removes groups so they do not appear in hits."""

    file_ = 'logs/iis.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = True
    import_logs.config.options.regex_groups_to_ignore = set(['userid','generation_time_milli'])
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['userid'] == None
    assert hits[0]['generation_time_milli'] == 0
Exemple #10
0
def test_amazon_cloudfront_web_parsing():
    """test parsing of amazon cloudfront logs (which use extended W3C log format)"""

    file_ = 'logs/amazon_cloudfront_web.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['status'] == u'200'
    assert hits[0]['userid'] == None
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == u'html'
    assert hits[0]['is_download'] == False
    assert hits[0]['referrer'] == u'https://example.com/'
    assert hits[0]['args'] == {'cvar': {1: ['HTTP-method', 'GET']}}
    assert hits[0]['generation_time_milli'] == 1.0
    assert hits[0]['host'] == 'foo'
    assert hits[0]['filename'] == 'logs/amazon_cloudfront_web.log'
    assert hits[0]['is_redirect'] == False
    assert hits[0]['date'] == datetime.datetime(2014, 5, 23, 1, 13, 11)
    assert hits[0]['lineno'] == 2
    assert hits[0]['ip'] == u'192.0.2.10'
    assert hits[0]['query_string'] == ''
    assert hits[0]['path'] == u'/view/my/file.html'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/view/my/file.html'
    assert hits[0][
        'user_agent'] == u'Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17'

    assert len(hits) == 1
Exemple #11
0
def test_shoutcast_parsing():
    """test parsing of shoutcast logs (which use extended W3C log format)"""

    file_ = 'logs/shoutcast.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['status'] == u'200'
    assert hits[0]['userid'] == None
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == u'/stream'
    assert hits[0]['is_download'] == False
    assert hits[0]['referrer'] == ''
    assert hits[0]['args'] == {}
    assert hits[0]['generation_time_milli'] == 1000.0
    assert hits[0]['host'] == 'foo'
    assert hits[0]['filename'] == 'logs/shoutcast.log'
    assert hits[0]['is_redirect'] == False
    assert hits[0]['date'] == datetime.datetime(2015, 12, 7, 10, 37, 5)
    assert hits[0]['lineno'] == 3
    assert hits[0]['ip'] == u'1.2.3.4'
    assert hits[0]['query_string'] == u'title=UKR%20Nights'
    assert hits[0]['path'] == u'/stream'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/stream?title=UKR%20Nights'
    assert hits[0]['user_agent'] == u'NSPlayer/10.0.0.3702 WMFSDK/10.0'
    assert hits[0]['length'] == 65580
Exemple #12
0
def test_netscaler_parsing():
    """test parsing of netscaler logs (which use extended W3C log format)"""

    file_ = 'logs/netscaler.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['status'] == u'302'
    assert hits[0]['userid'] == None
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == u'jsp'
    assert hits[0]['is_download'] == False
    assert hits[0]['referrer'] == ''
    assert hits[0]['args'] == {}
    assert hits[0]['generation_time_milli'] == 1000
    assert hits[0]['host'] == 'foo'
    assert hits[0]['filename'] == 'logs/netscaler.log'
    assert hits[0]['is_redirect'] == True
    assert hits[0]['date'] == datetime.datetime(2012, 8, 16, 11, 55, 13)
    assert hits[0]['lineno'] == 4
    assert hits[0]['ip'] == u'172.20.1.0'
    assert hits[0]['query_string'] == ''
    assert hits[0]['path'] == u'/Citrix/XenApp/Wan/auth/login.jsp'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/Citrix/XenApp/Wan/auth/login.jsp'
    assert hits[0][
        'user_agent'] == u'Mozilla/4.0+(compatible;+MSIE+7.0;+Windows+NT+5.1;+Trident/4.0;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727;+.NET+CLR+3.0.04506.648;+.NET+CLR+3.5.21022)'
Exemple #13
0
def test_custom_log_date_format_option():
    """Test that --log-date-format will change how dates are parsed in a custom log format."""

    file_ = 'logs/custom_regex_custom_date.log'

    # have to override previous globals override for this test
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.options.w3c_field_regexes = None
    import_logs.config.options.regex_group_to_visit_cvars_map = None
    import_logs.config.options.regex_group_to_page_cvars_map = None
    import_logs.config.options.log_format_regex = (
        '(?P<ip>\S+)\s+\S+\s+\S+\s+\[(?P<date>.*?)\]\s+'
        '"\S+\s+(?P<path>.*?)\s+\S+"\s+(?P<status>\S+)\s+(?P<length>\S+)')
    import_logs.config.options.log_date_format = '%B - %d, %Y:%H:%M:%S'
    import_logs.config.format = import_logs.RegexFormat(
        'custom', import_logs.config.options.log_format_regex,
        import_logs.config.options.log_date_format)

    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['date'] == datetime.datetime(2012, 2, 10, 16, 42, 7)
Exemple #14
0
def test_amazon_cloudfront_rtmp_parsing():
    """test parsing of amazon cloudfront rtmp logs (which use extended W3C log format w/ custom fields for event info)"""

    file_ = 'logs/amazon_cloudfront_rtmp.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['is_download'] == False
    assert hits[0]['ip'] == u'192.0.2.147'
    assert hits[0]['is_redirect'] == False
    assert hits[0]['filename'] == 'logs/amazon_cloudfront_rtmp.log'
    assert hits[0]['event_category'] == 'cloudfront_rtmp'
    assert hits[0]['event_action'] == u'connect'
    assert hits[0]['lineno'] == 2
    assert hits[0]['status'] == '200'
    assert hits[0]['is_error'] == False
    assert hits[0]['event_name'] == None
    assert hits[0]['args'] == {}
    assert hits[0]['host'] == 'foo'
    assert hits[0]['date'] == datetime.datetime(2010, 3, 12, 23, 51, 20)
    assert hits[0]['path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b'
    assert hits[0]['extension'] == u'net/cfx/st\u200b'
    assert hits[0]['referrer'] == ''
    assert hits[0]['userid'] == None
    assert hits[0]['user_agent'] == u'LNX 10,0,32,18'
    assert hits[0]['generation_time_milli'] == 0
    assert hits[0]['query_string'] == u'key=value'
    assert hits[0]['is_robot'] == False
    assert hits[0][
        'full_path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b'

    assert hits[1]['is_download'] == False
    assert hits[1]['ip'] == u'192.0.2.222'
    assert hits[1]['is_redirect'] == False
    assert hits[1]['filename'] == 'logs/amazon_cloudfront_rtmp.log'
    assert hits[1]['event_category'] == 'cloudfront_rtmp'
    assert hits[1]['event_action'] == u'play'
    assert hits[1]['lineno'] == 3
    assert hits[1]['status'] == '200'
    assert hits[1]['is_error'] == False
    assert hits[1]['event_name'] == u'myvideo'
    assert hits[1]['args'] == {}
    assert hits[1]['host'] == 'foo'
    assert hits[1]['date'] == datetime.datetime(2010, 3, 12, 23, 51, 21)
    assert hits[1]['path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b'
    assert hits[1]['extension'] == u'net/cfx/st\u200b'
    assert hits[1]['referrer'] == ''
    assert hits[1]['userid'] == None
    assert hits[1]['length'] == 3914
    assert hits[1]['user_agent'] == u'LNX 10,0,32,18'
    assert hits[1]['generation_time_milli'] == 0
    assert hits[1]['query_string'] == u'key=value'
    assert hits[1]['is_robot'] == False
    assert hits[1][
        'full_path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b'

    assert len(hits) == 2
Exemple #15
0
def test_iis_custom_format():
    """test IIS custom format name parsing."""

    file_ = 'logs/iis_custom.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {
        'date-local': 'date',
        'time-local': 'time',
        'cs(Host)': 'cs-host',
        'TimeTakenMS': 'time-taken'
    }
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    # import_logs.config.options.w3c_time_taken_in_millisecs = True test that even w/o this, we get the right values
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['status'] == '200'
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == u'/products/theproduct'
    assert hits[0]['is_download'] == False
    assert hits[0][
        'referrer'] == u'http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en'
    assert hits[0]['args'] == {}
    assert hits[0]['generation_time_milli'] == 109
    assert hits[0]['host'] == 'foo'
    assert hits[0]['filename'] == 'logs/iis_custom.log'
    assert hits[0]['is_redirect'] == False
    assert hits[0]['date'] == datetime.datetime(2012, 8, 15, 17, 0)
    assert hits[0]['lineno'] == 7
    assert hits[0]['ip'] == u'70.95.0.0'
    assert hits[0]['query_string'] == ''
    assert hits[0]['path'] == u'/Products/theProduct'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/Products/theProduct'
    assert hits[0][
        'user_agent'] == u'Mozilla/5.0 (Linux; Android 4.4.4; SM-G900V Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36'

    assert hits[1]['status'] == u'301'
    assert hits[1]['is_error'] == False
    assert hits[1]['extension'] == u'/topic/hw43061'
    assert hits[1]['is_download'] == False
    assert hits[1]['referrer'] == ''
    assert hits[1]['args'] == {}
    assert hits[1]['generation_time_milli'] == 0
    assert hits[1]['host'] == 'foo'
    assert hits[1]['filename'] == 'logs/iis_custom.log'
    assert hits[1]['is_redirect'] == True
    assert hits[1]['date'] == datetime.datetime(2012, 8, 15, 17, 0)
    assert hits[1]['lineno'] == 8
    assert hits[1]['ip'] == '-'
    assert hits[1]['query_string'] == ''
    assert hits[1]['path'] == u'/Topic/hw43061'
    assert hits[1]['is_robot'] == False
    assert hits[1]['full_path'] == u'/Topic/hw43061'
    assert hits[1][
        'user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36'

    assert hits[2]['status'] == u'404'
    assert hits[2]['is_error'] == True
    assert hits[2]['extension'] == u'/hello/world/6,681965'
    assert hits[2]['is_download'] == False
    assert hits[2]['referrer'] == ''
    assert hits[2]['args'] == {}
    assert hits[2]['generation_time_milli'] == 359
    assert hits[2]['host'] == 'foo'
    assert hits[2]['filename'] == 'logs/iis_custom.log'
    assert hits[2]['is_redirect'] == False
    assert hits[2]['date'] == datetime.datetime(2012, 8, 15, 17, 0)
    assert hits[2]['lineno'] == 9
    assert hits[2]['ip'] == u'173.5.0.0'
    assert hits[2]['query_string'] == ''
    assert hits[2]['path'] == u'/hello/world/6,681965'
    assert hits[2]['is_robot'] == False
    assert hits[2]['full_path'] == u'/hello/world/6,681965'
    assert hits[2][
        'user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36'
Exemple #16
0
def test_replay_tracking_arguments():
    """Test data parsing from sample log file."""
    file_ = 'logs/logs_to_tests.log'

    import_logs.stats = import_logs.Statistics()
    import_logs.config = Config()
    import_logs.resolver = Resolver()
    import_logs.Recorder = Recorder()
    import_logs.parser = import_logs.Parser()
    import_logs.parser.parse(file_)

    hits = [hit.args for hit in import_logs.Recorder.recorders]

    assert hits[0]['_idn'] == '0'
    assert hits[0]['ag'] == '1'
    assert hits[0]['_viewts'] == '1360047661'
    assert hits[0]['urlref'] == 'http://clearcode.cc/welcome'
    assert hits[0]['_ref'] == 'http://piwik.org/thank-you-all/'
    assert hits[0]['_idts'] == '1360047661'
    assert hits[0]['java'] == '1'
    assert hits[0]['res'] == '1680x1050'
    assert hits[0]['idsite'] == '1'
    assert hits[0]['realp'] == '0'
    assert hits[0]['wma'] == '1'
    assert hits[0]['_idvc'] == '1'
    assert hits[0][
        'action_name'] == 'Clearcode - Web and Mobile Development | Technology With Passion'
    assert hits[0]['cookie'] == '1'
    assert hits[0]['rec'] == '1'
    assert hits[0]['qt'] == '1'
    assert hits[0]['url'] == 'http://clearcode.cc/'
    assert hits[0]['h'] == '17'
    assert hits[0]['m'] == '31'
    assert hits[0]['s'] == '25'
    assert hits[0]['r'] == '983420'
    assert hits[0]['gears'] == '0'
    assert hits[0]['fla'] == '1'
    assert hits[0]['pdf'] == '1'
    assert hits[0]['_id'] == '1da79fc743e8bcc4'
    assert hits[0]['dir'] == '1'
    assert hits[0]['_refts'] == '1360047661'

    assert hits[1]['_idn'] == '0'
    assert hits[1]['ag'] == '1'
    assert hits[1]['_viewts'] == '1360047661'
    assert hits[1]['urlref'] == 'http://clearcode.cc/welcome'
    assert hits[1]['_ref'] == 'http://piwik.org/thank-you-all/'
    assert hits[1]['_idts'] == '1360047661'
    assert hits[1]['java'] == '1'
    assert hits[1]['res'] == '1680x1050'
    assert hits[1]['idsite'] == '1'
    assert hits[1]['realp'] == '0'
    assert hits[1]['wma'] == '1'
    assert hits[1]['_idvc'] == '1'
    assert hits[1][
        'action_name'] == 'AdviserBrief - Track Your Investments and Plan Financial Future | Clearcode'
    assert hits[1]['cookie'] == '1'
    assert hits[1]['rec'] == '1'
    assert hits[1]['qt'] == '1'
    assert hits[1][
        'url'] == 'http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future/'
    assert hits[1]['h'] == '17'
    assert hits[1]['m'] == '31'
    assert hits[1]['s'] == '40'
    assert hits[1]['r'] == '109464'
    assert hits[1]['gears'] == '0'
    assert hits[1]['fla'] == '1'
    assert hits[1]['pdf'] == '1'
    assert hits[1]['_id'] == '1da79fc743e8bcc4'
    assert hits[1]['dir'] == '1'
    assert hits[1]['_refts'] == '1360047661'

    assert hits[2]['_idn'] == '0'
    assert hits[2]['ag'] == '1'
    assert hits[2]['_viewts'] == '1360047661'
    assert hits[2]['urlref'] == 'http://clearcode.cc/welcome'
    assert hits[2]['_ref'] == 'http://piwik.org/thank-you-all/'
    assert hits[2]['_idts'] == '1360047661'
    assert hits[2]['java'] == '1'
    assert hits[2]['res'] == '1680x1050'
    assert hits[2]['idsite'] == '1'
    assert hits[2]['realp'] == '0'
    assert hits[2]['wma'] == '1'
    assert hits[2]['_idvc'] == '1'
    assert hits[2][
        'action_name'] == 'ATL Apps - American Tailgating League Mobile Android IOS Games | Clearcode'
    assert hits[2]['cookie'] == '1'
    assert hits[2]['rec'] == '1'
    assert hits[2]['qt'] == '1'
    assert hits[2][
        'url'] == 'http://clearcode.cc/case/atl-apps-mobile-android-ios-games/'
    assert hits[2]['h'] == '17'
    assert hits[2]['m'] == '31'
    assert hits[2]['s'] == '46'
    assert hits[2]['r'] == '080064'
    assert hits[2]['gears'] == '0'
    assert hits[2]['fla'] == '1'
    assert hits[2]['pdf'] == '1'
    assert hits[2]['_id'] == '1da79fc743e8bcc4'
    assert hits[2]['dir'] == '1'
    assert hits[2]['_refts'] == '1360047661'
Exemple #17
0
def test_incapsulaw3c_parsing():
    """test parsing of incapsula w3c logs (which needs to be forced, as it's not autodetected)"""

    file_ = 'logs/incapsula_w3c.log'

    # have to override previous globals override for this test
    import_logs.config.options.custom_w3c_fields = {}
    Recorder.recorders = []
    import_logs.parser = import_logs.Parser()
    import_logs.config.format = import_logs.FORMATS['incapsula_w3c']
    import_logs.config.options.log_hostname = None
    import_logs.config.options.enable_http_redirects = True
    import_logs.config.options.enable_http_errors = True
    import_logs.config.options.replay_tracking = False
    import_logs.config.options.w3c_time_taken_in_millisecs = False
    import_logs.parser.parse(file_)

    hits = [hit.__dict__ for hit in Recorder.recorders]

    assert hits[0]['status'] == u'200'
    assert hits[0]['userid'] == None
    assert hits[0]['is_error'] == False
    assert hits[0]['extension'] == 'php'
    assert hits[0]['is_download'] == False
    assert hits[0]['referrer'] == u''
    assert hits[0]['args'] == {'cvar': {1: ['HTTP-method', u'"GET"']}}
    assert hits[0]['length'] == 10117
    assert hits[0]['generation_time_milli'] == 0
    assert hits[0]['host'] == 'www.example.com'
    assert hits[0]['filename'] == 'logs/incapsula_w3c.log'
    assert hits[0]['is_redirect'] == False
    assert hits[0]['date'] == datetime.datetime(2017, 6, 28, 07, 26, 35)
    assert hits[0]['lineno'] == 0
    assert hits[0]['ip'] == u'123.123.123.123'
    assert hits[0]['query_string'] == u'variable=test'
    assert hits[0]['path'] == u'/page.php'
    assert hits[0]['is_robot'] == False
    assert hits[0]['full_path'] == u'/page.php'
    assert hits[0][
        'user_agent'] == u'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'

    assert hits[1]['status'] == u'200'
    assert hits[1]['userid'] == None
    assert hits[1]['is_error'] == False
    assert hits[1]['extension'] == '/rss/news'
    assert hits[1]['is_download'] == False
    assert hits[1]['referrer'] == u''
    assert hits[0]['args'] == {'cvar': {1: ['HTTP-method', u'"GET"']}}
    assert hits[1]['length'] == 0
    assert hits[1]['generation_time_milli'] == 0
    assert hits[1]['host'] == 'www.example.com'
    assert hits[1]['filename'] == 'logs/incapsula_w3c.log'
    assert hits[1]['is_redirect'] == False
    assert hits[1]['date'] == datetime.datetime(2017, 6, 26, 18, 21, 17)
    assert hits[1]['lineno'] == 1
    assert hits[1]['ip'] == u'125.125.125.125'
    assert hits[1]['query_string'] == u''
    assert hits[1]['path'] == '/rss/news'
    assert hits[1]['is_robot'] == False
    assert hits[1]['full_path'] == u'/rss/news'
    assert hits[1][
        'user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Thunderbird/45.8.0 Lightning/4.7.8'

    assert len(hits) == 2