Beispiel #1
0
def test_uri_part_tokenizer():
    text = 'http://a.b/foo/bar?c=d#stuff'
    pattern = ANALYSIS_SETTINGS['tokenizer']['uri_part']['pattern']
    assert(re.split(pattern, text) == [
        'http', '', '', 'a', 'b', 'foo', 'bar', 'c', 'd', 'stuff'
    ])

    text = url_quote_plus(text)
    assert(re.split(pattern, 'http://jump.to/?u=' + text) == [
        'http', '', '', 'jump', 'to', '', 'u',
        'http', '', '', 'a', 'b', 'foo', 'bar', 'c', 'd', 'stuff'
    ])
Beispiel #2
0
def test_uri_part_tokenizer():
    text = 'http://a.b/foo/bar?c=d#stuff'
    pattern = ANALYSIS_SETTINGS['tokenizer']['uri_part']['pattern']
    assert(re.split(pattern, text) == [
        'http', '', '', 'a', 'b', 'foo', 'bar', 'c', 'd', 'stuff'
    ])

    text = url_quote_plus(text)
    assert(re.split(pattern, 'http://jump.to/?u=' + text) == [
        'http', '', '', 'jump', 'to', '', 'u',
        'http', '', '', 'a', 'b', 'foo', 'bar', 'c', 'd', 'stuff'
    ])
Beispiel #3
0
def test_uri_part_tokenizer():
    text = "http://a.b/foo/bar?c=d#stuff"
    pattern = ANALYSIS_SETTINGS["tokenizer"]["uri_part"]["pattern"]
    assert re.split(pattern, text) == [
        "http",
        "",
        "",
        "a",
        "b",
        "foo",
        "bar",
        "c",
        "d",
        "stuff",
    ]

    text = url_quote_plus(text)
    assert re.split(pattern, "http://jump.to/?u=" + text) == [
        "http",
        "",
        "",
        "jump",
        "to",
        "",
        "u",
        "http",
        "",
        "",
        "a",
        "b",
        "foo",
        "bar",
        "c",
        "d",
        "stuff",
    ]
Beispiel #4
0
def test_uri_part_tokenizer():
    text = "http://a.b/foo/bar?c=d#stuff"
    pattern = ANALYSIS_SETTINGS["tokenizer"]["uri_part"]["pattern"]
    assert re.split(pattern, text) == [
        "http",
        "",
        "",
        "a",
        "b",
        "foo",
        "bar",
        "c",
        "d",
        "stuff",
    ]

    text = url_quote_plus(text)
    assert re.split(pattern, "http://jump.to/?u=" + text) == [
        "http",
        "",
        "",
        "jump",
        "to",
        "",
        "u",
        "http",
        "",
        "",
        "a",
        "b",
        "foo",
        "bar",
        "c",
        "d",
        "stuff",
    ]
Beispiel #5
0
def _normalize_queryvalue(value):
    return url_quote_plus(url_unquote_plus(value), safe=UNRESERVED_QUERY_VALUE)
Beispiel #6
0
def _normalize_queryname(name):
    return url_quote_plus(url_unquote_plus(name), safe=UNRESERVED_QUERY_NAME)
Beispiel #7
0
Datei: uri.py Projekt: JJediny/h
def _normalize_queryvalue(value):
    return url_quote_plus(url_unquote_plus(value), safe=UNRESERVED_QUERY_VALUE)
Beispiel #8
0
Datei: uri.py Projekt: JJediny/h
def _normalize_queryname(name):
    return url_quote_plus(url_unquote_plus(name), safe=UNRESERVED_QUERY_NAME)