Python rewrite_urls Examples

Programming Language: Python

Namespace/Package Name: lib.spiderfetch.urlrewrite

Method/Function: rewrite_urls

Examples at hotexamples.com: 4

Python rewrite_urls - 4 examples found. These are the top rated real world Python examples of lib.spiderfetch.urlrewrite.rewrite_urls extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: wikipedia.py Project: pombreda/qontexte

def fetch_gracefully(url_byte, filename):
    """Simplified adaptation of spiderfetch.py:get_url()"""
    getter = fetch.Fetcher(url=url_byte, filename=filename)
    getter.write_progress = lambda *args, **kw: None  # no output or logging

    while True:
        try:
            getter.launch_w_tries()
            break
        except fetch.ChangedUrlWarning, e:
            url = urlrewrite.rewrite_urls(getter.url, [e.new_url]).next()
            getter.url = url

Example #2

Show file

File: wikipedia.py Project: numerodix/qontexte

def fetch_gracefully(url_byte, filename):
    """Simplified adaptation of spiderfetch.py:get_url()"""
    getter = fetch.Fetcher(url=url_byte, filename=filename)
    getter.write_progress = lambda *args, **kw: None # no output or logging

    while True:
        try:
            getter.launch_w_tries()
            break
        except fetch.ChangedUrlWarning, e:
            url = urlrewrite.rewrite_urls(getter.url, [e.new_url]).next()
            getter.url = url

Example #3

Show file

File: wikipedia.py Project: pombreda/qontexte

def find_urls_in_page(web, txt_byte, url_u, url_byte):
    urls_byte = []
    for u_b in spider.unbox_it_to_ss(spider.findall(txt_byte)):
        urls_byte.append(u_b)
    urls_byte = sorted(list(set(urls_byte)))

    filter_regex = get_regex_filter(url_u)

    candidates_byte = []
    for u_b in urlrewrite.rewrite_urls(url_byte, urls_byte):
        if re.match(filter_regex, u_b) and url_byte != u_b:
            if u_b not in web:
                web.add_url(u_b, [])
                candidates_byte.append(u_b)

    # if no candidate links are found, fall back on visited urls
    if len(candidates_byte) == 0:
        candidates_byte = web.urls()

    return candidates_byte

Example #4

Show file

File: wikipedia.py Project: numerodix/qontexte

def find_urls_in_page(web, txt_byte, url_u, url_byte):
    urls_byte = []
    for u_b in spider.unbox_it_to_ss(spider.findall(txt_byte)):
        urls_byte.append(u_b)
    urls_byte = sorted(list(set(urls_byte)))

    filter_regex = get_regex_filter(url_u)

    candidates_byte = []
    for u_b in urlrewrite.rewrite_urls(url_byte, urls_byte):
        if re.match(filter_regex, u_b) and url_byte != u_b:
            if u_b not in web:
                web.add_url(u_b, [])
                candidates_byte.append(u_b)

    # if no candidate links are found, fall back on visited urls
    if len(candidates_byte) == 0:
        candidates_byte = web.urls()

    return candidates_byte