Esempio n. 1
0
def test_source_common():
    with apache.ApacheSource(EXAMPLE_01.splitlines(True)) as source:
        row = None
        for count, row in enumerate(source):
            if count == 0:
                assert row.remote_host == dt.hostname('64.242.88.10')
                assert row.ident is None
                assert row.remote_user is None
                assert row.time == dt.DateTime(2004, 3, 8, 0, 56, 39)
                assert row.request == dt.Request(
                    'GET', dt.url('/twiki/bin/view/Sandbox/WebHome?rev=1.6'),
                    'HTTP/1.1')
                assert row.status == 200
                assert row.size == 8545
            elif count == 1:
                assert row.remote_host == dt.hostname('lordgun.org')
                assert row.ident is None
                assert row.remote_user == 'foo'
                assert row.time == dt.DateTime(2004, 3, 8, 1, 1, 53)
                assert row.request == dt.Request('GET', dt.url('/razor.html'),
                                                 'HTTP/1.0')
                assert row.status == 302
                assert row.size == 2869
            else:
                assert False
        assert row
        assert count == 1
Esempio n. 2
0
def test_source_combined():
    with apache.ApacheSource(
            EXAMPLE_02.splitlines(True), log_format=apache.COMBINED) as source:
        row = None
        for count, row in enumerate(source):
            if count == 0:
                assert row.remote_host == dt.hostname('78.86.48.95')
                assert row.ident is None
                assert row.remote_user is None
                assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 5)
                assert row.request == dt.Request('GET', dt.url('/template/images/ITSheader.jpg'), 'HTTP/1.1')
                assert row.status == 200
                assert row.size == 14745
                assert row.req_Referer is None
                assert row.req_User_Agent == 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0; byond_4.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; InfoPath.2; OfficeLiveConnector.1.5; OfficeLivePatch.1.3; .NET4.0E; .NET4.0C)'
            elif count == 1:
                assert row.remote_host == dt.hostname('217.129.225.117')
                assert row.ident is None
                assert row.remote_user is None
                assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 7)
                assert row.request == dt.Request('GET', dt.url('/images/spacer.gif'), 'HTTP/1.1')
                assert row.status == 200
                assert row.size == 43
                assert row.req_Referer == dt.url('http://eprints.lse.ac.uk/33718/')
                assert row.req_User_Agent == 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR; rv:1.9.2.23) Gecko/20110920 Firefox/3.6.23'
            else:
                assert False
        assert row
        assert count == 1
Esempio n. 3
0
def rows_null_first():
    return [
        Row(
            datatypes.datetime('2002-06-24 16:40:23'),
            datatypes.address('172.224.24.114'),
            None,
            None,
            0.01,
            408,
            0,
            ),
        Row(
            datatypes.datetime('2002-05-02 20:18:01'),
            datatypes.address('172.22.255.255'),
            'GET',
            datatypes.url('/images/picture.jpg'),
            0.1,
            302,
            16328,
            ),
        Row(
            datatypes.datetime('2002-05-29 12:34:56'),
            datatypes.address('9.180.235.203'),
            'HEAD',
            datatypes.url('/images/picture.jpg'),
            0.1,
            202,
            None,
            ),
        ]
Esempio n. 4
0
def test_source_combined():
    with apache.ApacheSource(EXAMPLE_02.splitlines(True),
                             log_format=apache.COMBINED) as source:
        row = None
        for count, row in enumerate(source):
            if count == 0:
                assert row.remote_host == dt.hostname('78.86.48.95')
                assert row.ident is None
                assert row.remote_user is None
                assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 5)
                assert row.request == dt.Request(
                    'GET', dt.url('/template/images/ITSheader.jpg'),
                    'HTTP/1.1')
                assert row.status == 200
                assert row.size == 14745
                assert row.req_Referer is None
                assert row.req_User_Agent == 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0; byond_4.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; InfoPath.2; OfficeLiveConnector.1.5; OfficeLivePatch.1.3; .NET4.0E; .NET4.0C)'
            elif count == 1:
                assert row.remote_host == dt.hostname('217.129.225.117')
                assert row.ident is None
                assert row.remote_user is None
                assert row.time == dt.DateTime(2011, 10, 27, 23, 0, 7)
                assert row.request == dt.Request('GET',
                                                 dt.url('/images/spacer.gif'),
                                                 'HTTP/1.1')
                assert row.status == 200
                assert row.size == 43
                assert row.req_Referer == dt.url(
                    'http://eprints.lse.ac.uk/33718/')
                assert row.req_User_Agent == 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR; rv:1.9.2.23) Gecko/20110920 Firefox/3.6.23'
            else:
                assert False
        assert row
        assert count == 1
Esempio n. 5
0
def rows():
    return [
        Row(
            datatypes.datetime('2002-06-24 16:40:23'),
            datatypes.address('172.224.24.114'),
            'POST',
            datatypes.url('/Default.htm'),
            0.67,
            200,
            7930,
            ),
        Row(
            datatypes.datetime('2002-05-02 20:18:01'),
            datatypes.address('172.22.255.255'),
            'GET',
            datatypes.url('/images/picture.jpg'),
            0.1,
            302,
            16328,
            ),
        Row(
            datatypes.datetime('2002-05-29 12:34:56'),
            datatypes.address('9.180.235.203'),
            'HEAD',
            datatypes.url('/images/picture.jpg'),
            0.1,
            202,
            None,
            ),
        ]
Esempio n. 6
0
def rows():
    # Construct some test rows with appropriate namedtuples
    Row = namedtuple('Row', (
        'timestamp', 'client', 'method', 'url', 'time_taken', 'status',
        'size',
        ))
    return [
        Row(
            datatypes.datetime('2002-06-24 16:40:23'),
            datatypes.address('172.224.24.114'),
            'POST',
            datatypes.url('/Default.htm'),
            0.67,
            200,
            7930,
            ),
        Row(
            datatypes.datetime('2002-05-02 20:18:01'),
            datatypes.address('172.22.255.255'),
            'GET',
            datatypes.url('/images/picture.jpg'),
            0.1,
            302,
            16328,
            ),
        Row(
            datatypes.datetime('2002-05-29 12:34:56'),
            datatypes.address('9.180.235.203'),
            'HEAD',
            datatypes.url('/images/picture.jpg'),
            0.1,
            202,
            None,
            ),
        ]
Esempio n. 7
0
def test_source_date_formats():
    with apache.ApacheSource(
            EXAMPLE_04.splitlines(True),
            log_format="%{%Y-%m-%dT%H:%M:%S%z}t %H %m %U%q %>s %O") as source:
        row = None
        for count, row in enumerate(source):
            if count == 0:
                assert row.time == dt.datetime('2004-03-08 00:56:39')
                assert row.method == 'GET'
                assert row.protocol == 'HTTP/1.0'
                assert row.url_stem == dt.url(
                    '/twiki/bin/view/Sandbox/WebHome')
                assert row.url_query == dt.url('?rev=1.6')
                assert row.status == 200
                assert row.bytes_sent == 8545
            elif count == 1:
                assert row.time == dt.datetime('2004-03-07 22:01:53')
                assert row.method == 'HEAD'
                assert row.protocol == 'HTTP/1.1'
                assert row.url_stem == dt.url('/razor.html')
                assert row.url_query is None
                assert row.status == 302
                assert row.bytes_sent == 2869
        assert row
        assert count == 1
Esempio n. 8
0
def test_request():
    assert dt.request('OPTIONS * HTTP/1.1') == dt.Request('OPTIONS', None, 'HTTP/1.1')
    assert dt.request('GET / HTTP/1.0') == dt.Request('GET', dt.url('/'), 'HTTP/1.0')
    assert dt.request('POST /foo/bar/baz?query HTTP/1.0') == dt.Request('POST', dt.url('/foo/bar/baz?query'), 'HTTP/1.0')
    with pytest.raises(ValueError):
        assert dt.request('')
    with pytest.raises(ValueError):
        assert dt.request('GET')
    with pytest.raises(ValueError):
        assert dt.request('GET  HTTP/1.0')
    with pytest.raises(ValueError):
        assert dt.request('GET /foo/bar')
Esempio n. 9
0
def test_request():
    assert dt.request('OPTIONS * HTTP/1.1') == dt.Request(
        'OPTIONS', None, 'HTTP/1.1')
    assert dt.request('GET / HTTP/1.0') == dt.Request('GET', dt.url('/'),
                                                      'HTTP/1.0')
    assert dt.request('POST /foo/bar/baz?query HTTP/1.0') == dt.Request(
        'POST', dt.url('/foo/bar/baz?query'), 'HTTP/1.0')
    with pytest.raises(ValueError):
        assert dt.request('')
    with pytest.raises(ValueError):
        assert dt.request('GET')
    with pytest.raises(ValueError):
        assert dt.request('GET  HTTP/1.0')
    with pytest.raises(ValueError):
        assert dt.request('GET /foo/bar')
Esempio n. 10
0
def test_url_query():
    url = dt.url('http://foo/bar?baz=quux&x=1&y=')
    assert 'baz' in url.query
    assert 'x' in url.query
    assert 'y' in url.query
    assert not 'z' in url.query
    assert url.query['baz'] == ['quux']
    assert url.query['x'] == ['1']
    assert url.query['y'] == ['']
Esempio n. 11
0
def test_url_query():
    url = dt.url('http://foo/bar?baz=quux&x=1&y=')
    assert 'baz' in url.query
    assert 'x' in url.query
    assert 'y' in url.query
    assert not 'z' in url.query
    assert url.query['baz'] == ['quux']
    assert url.query['x'] == ['1']
    assert url.query['y'] == ['']
Esempio n. 12
0
def rows():
    # Construct some test rows with appropriate namedtuples
    Row = namedtuple('Row', (
        'timestamp',
        'client',
        'method',
        'url',
        'time_taken',
        'status',
        'size',
    ))
    return [
        Row(
            datatypes.datetime('2002-06-24 16:40:23'),
            datatypes.address('172.224.24.114'),
            'POST',
            datatypes.url('/Default.htm'),
            0.67,
            200,
            7930,
        ),
        Row(
            datatypes.datetime('2002-05-02 20:18:01'),
            datatypes.address('172.22.255.255'),
            'GET',
            datatypes.url('/images/picture.jpg'),
            0.1,
            302,
            16328,
        ),
        Row(
            datatypes.datetime('2002-05-29 12:34:56'),
            datatypes.address('9.180.235.203'),
            'HEAD',
            datatypes.url('/images/picture.jpg'),
            0.1,
            202,
            None,
        ),
    ]
Esempio n. 13
0
def test_source_normal():
    # Test two normal runs with INTERNET_EXAMPLE and INTRANET_EXAMPLE
    with iis.IISSource(INTERNET_EXAMPLE.splitlines(True)) as source:
        row = None
        for count, row in enumerate(source):
            assert source.version == '1.0'
            assert source.software == 'Microsoft Internet Information Services 6.0'
            assert source.date == dt.DateTime(2002, 5, 24, 20, 18, 1)
            assert source.fields == [
                'date', 'time', 'c-ip', 'cs-username', 's-ip', 's-port',
                'cs-method', 'cs-uri-stem', 'cs-uri-query', 'sc-status',
                'sc-bytes', 'cs-bytes', 'time-taken', 'cs(User-Agent)',
                'cs(Referrer)',
                ]
            assert row.date == dt.Date(2002, 5, 24)
            assert row.time == dt.Time(20, 18, 1)
            assert str(row.c_ip) == '172.224.24.114'
            assert row.cs_username is None
            assert str(row.s_ip) == '206.73.118.24'
            assert row.s_port == 80
            assert row.cs_method == 'GET'
            assert str(row.cs_uri_stem) == '/Default.htm'
            assert row.cs_uri_query is None
            assert row.sc_status == 200
            assert row.sc_bytes == 7930
            assert row.cs_bytes == 248
            assert row.time_taken == 31.0
            assert row.cs_User_Agent == 'Mozilla/4.0 (compatible; MSIE 5.01; Windows 2000 Server)'
            assert row.cs_Referrer == dt.url('http://64.224.24.114/')
        assert row
        assert count + 1 == source.count
    with iis.IISSource(INTRANET_EXAMPLE.splitlines(True)) as source:
        row = None
        for count, row in enumerate(source):
            assert source.fields == [
                'date', 'time', 'c-ip', 'cs-username', 's-ip', 's-port',
                'cs-method', 'cs-uri-stem', 'cs-uri-query', 'sc-status',
                'cs(User-Agent)',
                ]
            assert row.date == dt.Date(2002, 5, 2)
            assert row.time == dt.Time(17, 42, 15)
            assert str(row.c_ip) == '172.22.255.255'
            assert row.cs_username is None
            assert str(row.s_ip) == '172.30.255.255'
            assert row.s_port == 80
            assert row.cs_method == 'GET'
            assert str(row.cs_uri_stem) == '/images/picture.jpg'
            assert row.cs_uri_query is None
            assert row.sc_status == 200
            assert row.cs_User_Agent == 'Mozilla/4.0 (compatible;MSIE 5.5; Windows 2000 Server)'
        assert row
        assert count + 1 == source.count
Esempio n. 14
0
def url_parse(s):
    """
    Parse a URL string in a log file.

    This is a variant on the standard Python urlparse.urlparse function. The
    result type has been extended to include a
    :meth:`~lars.datatypes.Url.__str__` method which outputs the
    reconstructed URL, and to have specialized hostname and path properties
    which return enhanced objects instead of simple strings.

    :param str s: The string containing the URI to parse
    :returns: A :class:`~lars.datatypes.Url` tuple representing the URL
    """
    return dt.url(s) if s not in ('-', '') else None
Esempio n. 15
0
def test_source_date_formats():
    with apache.ApacheSource(
            EXAMPLE_04.splitlines(True),
            log_format="%{%Y-%m-%dT%H:%M:%S%z}t %H %m %U%q %>s %O") as source:
        row = None
        for count, row in enumerate(source):
            if count == 0:
                assert row.time == dt.datetime('2004-03-08 00:56:39')
                assert row.method == 'GET'
                assert row.protocol == 'HTTP/1.0'
                assert row.url_stem == dt.url('/twiki/bin/view/Sandbox/WebHome')
                assert row.url_query == dt.url('?rev=1.6')
                assert row.status == 200
                assert row.bytes_sent == 8545
            elif count == 1:
                assert row.time == dt.datetime('2004-03-07 22:01:53')
                assert row.method == 'HEAD'
                assert row.protocol == 'HTTP/1.1'
                assert row.url_stem == dt.url('/razor.html')
                assert row.url_query is None
                assert row.status == 302
                assert row.bytes_sent == 2869
        assert row
        assert count == 1
Esempio n. 16
0
def test_source_common():
    with apache.ApacheSource(EXAMPLE_01.splitlines(True)) as source:
        row = None
        for count, row in enumerate(source):
            if count == 0:
                assert row.remote_host == dt.hostname('64.242.88.10')
                assert row.ident is None
                assert row.remote_user is None
                assert row.time == dt.DateTime(2004, 3, 8, 0, 56, 39)
                assert row.request == dt.Request('GET', dt.url('/twiki/bin/view/Sandbox/WebHome?rev=1.6'), 'HTTP/1.1')
                assert row.status == 200
                assert row.size == 8545
            elif count == 1:
                assert row.remote_host == dt.hostname('lordgun.org')
                assert row.ident is None
                assert row.remote_user == 'foo'
                assert row.time == dt.DateTime(2004, 3, 8, 1, 1, 53)
                assert row.request == dt.Request('GET', dt.url('/razor.html'), 'HTTP/1.0')
                assert row.status == 302
                assert row.size == 2869
            else:
                assert False
        assert row
        assert count == 1
Esempio n. 17
0
def test_url():
    assert dt.url('foo') == dt.Url('', '', 'foo', '', '', '')
    assert dt.url('//foo/bar') == dt.Url('', 'foo', '/bar', '', '', '')
    assert dt.url('http://foo/') == dt.Url('http', 'foo', '/', '', '', '')
    assert dt.url('http://foo/bar?baz=quux') == dt.Url('http', 'foo', '/bar', '', 'baz=quux', '')
    assert dt.url('https://foo/bar#baz') == dt.Url('https', 'foo', '/bar', '', '', 'baz')
    u = dt.url('http://localhost/foo/bar#baz')
    assert u.scheme == 'http'
    assert u.netloc == 'localhost'
    assert u.path == dt.Path('/foo', 'bar', '')
    assert u.path_str == '/foo/bar'
    assert u.fragment == 'baz'
    assert u.username is None
    assert u.password is None
    assert u.port is None
    assert u.hostname == dt.hostname('localhost')
    assert u.hostname.address == dt.address('127.0.0.1')
Esempio n. 18
0
def test_url():
    assert dt.url('foo') == dt.Url('', '', 'foo', '', '', '')
    assert dt.url('//foo/bar') == dt.Url('', 'foo', '/bar', '', '', '')
    assert dt.url('http://foo/') == dt.Url('http', 'foo', '/', '', '', '')
    assert dt.url('http://foo/bar?baz=quux') == dt.Url('http', 'foo', '/bar',
                                                       '', 'baz=quux', '')
    assert dt.url('https://foo/bar#baz') == dt.Url('https', 'foo', '/bar', '',
                                                   '', 'baz')
    u = dt.url('http://localhost/foo/bar#baz')
    assert u.scheme == 'http'
    assert u.netloc == 'localhost'
    assert u.path == dt.Path('/foo', 'bar', '')
    assert u.path_str == '/foo/bar'
    assert u.fragment == 'baz'
    assert u.username is None
    assert u.password is None
    assert u.port is None
    assert u.hostname == dt.hostname('localhost')
    assert u.hostname.address == dt.address('127.0.0.1')
Esempio n. 19
0
def test_request_parse():
    assert parsers.request_parse('-') is None
    assert parsers.request_parse('OPTIONS * HTTP/1.0') == datatypes.Request(
        'OPTIONS', None, 'HTTP/1.0')
    assert parsers.request_parse('GET /foo/bar HTTP/1.1') == datatypes.Request(
        'GET', datatypes.url('/foo/bar'), 'HTTP/1.1')
Esempio n. 20
0
def test_source_normal():
    # Test two normal runs with INTERNET_EXAMPLE and INTRANET_EXAMPLE
    with iis.IISSource(INTERNET_EXAMPLE.splitlines(True)) as source:
        row = None
        for count, row in enumerate(source):
            assert source.version == '1.0'
            assert source.software == 'Microsoft Internet Information Services 6.0'
            assert source.date == dt.DateTime(2002, 5, 24, 20, 18, 1)
            assert source.fields == [
                'date',
                'time',
                'c-ip',
                'cs-username',
                's-ip',
                's-port',
                'cs-method',
                'cs-uri-stem',
                'cs-uri-query',
                'sc-status',
                'sc-bytes',
                'cs-bytes',
                'time-taken',
                'cs(User-Agent)',
                'cs(Referrer)',
            ]
            assert row.date == dt.Date(2002, 5, 24)
            assert row.time == dt.Time(20, 18, 1)
            assert str(row.c_ip) == '172.224.24.114'
            assert row.cs_username is None
            assert str(row.s_ip) == '206.73.118.24'
            assert row.s_port == 80
            assert row.cs_method == 'GET'
            assert str(row.cs_uri_stem) == '/Default.htm'
            assert row.cs_uri_query is None
            assert row.sc_status == 200
            assert row.sc_bytes == 7930
            assert row.cs_bytes == 248
            assert row.time_taken == 31.0
            assert row.cs_User_Agent == 'Mozilla/4.0 (compatible; MSIE 5.01; Windows 2000 Server)'
            assert row.cs_Referrer == dt.url('http://64.224.24.114/')
        assert row
        assert count + 1 == source.count
    with iis.IISSource(INTRANET_EXAMPLE.splitlines(True)) as source:
        row = None
        for count, row in enumerate(source):
            assert source.fields == [
                'date',
                'time',
                'c-ip',
                'cs-username',
                's-ip',
                's-port',
                'cs-method',
                'cs-uri-stem',
                'cs-uri-query',
                'sc-status',
                'cs(User-Agent)',
            ]
            assert row.date == dt.Date(2002, 5, 2)
            assert row.time == dt.Time(17, 42, 15)
            assert str(row.c_ip) == '172.22.255.255'
            assert row.cs_username is None
            assert str(row.s_ip) == '172.30.255.255'
            assert row.s_port == 80
            assert row.cs_method == 'GET'
            assert str(row.cs_uri_stem) == '/images/picture.jpg'
            assert row.cs_uri_query is None
            assert row.sc_status == 200
            assert row.cs_User_Agent == 'Mozilla/4.0 (compatible;MSIE 5.5; Windows 2000 Server)'
        assert row
        assert count + 1 == source.count