def cluster_and_test(urls, pattern_string):
    pm = PatternMaker(get_default_config())
    for url in urls:
        pm.load(url)

    for url_meta, clustered in pm.make(combine=True):
        for nodes in dump_tree(clustered):
            assert pack(url_meta,
                        [n.value for n in nodes[1:]]) == pattern_string
Esempio n. 2
0
def test_unpack_pack():
    data = [
        ('http://www.g.com/', '/'),
        ('http://www.g.com/abc', '/abc'),
        ('http://www.g.com/abc?a=1#c', '/abc[\\?]a=1#c'),
        ('http://www.g.com/abc???a=1#c', '/abc[\\?][\\?]{2}a=1#c'),
        ('http://www.g.com/abc?=1#c', '/abc[\\?]=1#c'),
        ('http://www.g.com/abc?a=1#', '/abc[\\?]a=1#'),
        ('http://www.g.com/abc?a=1&b=2#', '/abc[\\?]a=1&b=2#'),
    ]
    for url, expected in data:
        assert pack(*analyze_url(url)) == expected
Esempio n. 3
0
def test_parse_url_pattern():
    data = [
        'http://www.g.com/',
        'http://www.g.com/abc',
        'http://www.g.com/abc?a=1#c',
        'http://www.g.com/abc???a=1#c',
        'http://www.g.com/abc?=1#c',
        'http://www.g.com/abc?a=1#',
        'http://www.g.com/abc?a=1&b=2#',
    ]
    for url in data:
        meta1, parts1 = analyze_url(url)
        pattern_string = pack(meta1, parts1)
        meta2, parts2 = analyze_url_pattern_string(pattern_string)
        assert meta1 == meta2
        assert len(parts1) == len(parts2)