Esempi in Python per urls

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: lib.spiderfetch.web

Metodo/funzione: urls

Esempi su hotexamples.com: 4

urls in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per lib.spiderfetch.web.urls, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: wikipedia.py Progetto: pombreda/qontexte

def find_urls_in_page(web, txt_byte, url_u, url_byte):
    urls_byte = []
    for u_b in spider.unbox_it_to_ss(spider.findall(txt_byte)):
        urls_byte.append(u_b)
    urls_byte = sorted(list(set(urls_byte)))

    filter_regex = get_regex_filter(url_u)

    candidates_byte = []
    for u_b in urlrewrite.rewrite_urls(url_byte, urls_byte):
        if re.match(filter_regex, u_b) and url_byte != u_b:
            if u_b not in web:
                web.add_url(u_b, [])
                candidates_byte.append(u_b)

    # if no candidate links are found, fall back on visited urls
    if len(candidates_byte) == 0:
        candidates_byte = web.urls()

    return candidates_byte

Esempio n. 2

Mostra file

File: wikipedia.py Progetto: numerodix/qontexte

def find_urls_in_page(web, txt_byte, url_u, url_byte):
    urls_byte = []
    for u_b in spider.unbox_it_to_ss(spider.findall(txt_byte)):
        urls_byte.append(u_b)
    urls_byte = sorted(list(set(urls_byte)))

    filter_regex = get_regex_filter(url_u)

    candidates_byte = []
    for u_b in urlrewrite.rewrite_urls(url_byte, urls_byte):
        if re.match(filter_regex, u_b) and url_byte != u_b:
            if u_b not in web:
                web.add_url(u_b, [])
                candidates_byte.append(u_b)

    # if no candidate links are found, fall back on visited urls
    if len(candidates_byte) == 0:
        candidates_byte = web.urls()

    return candidates_byte

Esempio n. 3

Mostra file

File: wikipedia.py Progetto: pombreda/qontexte

    txt_byte = get_page(url_byte)

    candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte)
    encoding = decoder.detect_encoding(txt_byte)
    chosen_u = pick_url(candidates_byte, encoding=encoding)

    return chosen_u


if __name__ == '__main__':
    url_byte = 'http://en.wikipedia.org/wiki/Main_Page'
    url_byte = 'http://ar.wikipedia.org/wiki/الصفحة_الرئيسية'
    url_byte = 'http://pt.wikipedia.org/wiki/Casa_da_Cascata'
    url_byte = 'http://it.wikipedia.org/wiki/Special:Random'

    web = web.Web()
    web.add_url(url_byte, [])

    url_u = decoder.decode(url_byte, 'utf-8')
    depth = -1
    while depth != 0:  # easy way to set depth as infinite
        depth -= 1
        try:
            url_u = find_next(url_u, web, handler=url_handler)
        except:
            io.output("Recovering from exception:")
            io.output(traceback.format_exc())
            url_u = pick_url(web.urls())

        pause()  # less hammer

Esempio n. 4

Mostra file

File: wikipedia.py Progetto: numerodix/qontexte

    txt_byte = get_page(url_byte)

    candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte)
    encoding = decoder.detect_encoding(txt_byte)
    chosen_u = pick_url(candidates_byte, encoding=encoding)

    return chosen_u


if __name__ == '__main__':
    url_byte = 'http://en.wikipedia.org/wiki/Main_Page'
    url_byte = 'http://ar.wikipedia.org/wiki/الصفحة_الرئيسية'
    url_byte = 'http://pt.wikipedia.org/wiki/Casa_da_Cascata'
    url_byte = 'http://it.wikipedia.org/wiki/Special:Random'

    web = web.Web()
    web.add_url(url_byte, [])

    url_u = decoder.decode(url_byte, 'utf-8')
    depth = -1
    while depth != 0: # easy way to set depth as infinite
        depth -= 1
        try:
            url_u = find_next(url_u, web, handler=url_handler)
        except:
            io.output("Recovering from exception:")
            io.output(traceback.format_exc())
            url_u = pick_url(web.urls())

        pause() # less hammer