Python urls Exemples

Langage de programmation: Python

Espace de nommage/Pack: lib.spiderfetch.web

Méthode/Fonction: urls

Exemples au hotexamples.com: 4

Python urls - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de lib.spiderfetch.web.urls extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Associées

ls

modrep

apply_svn_ignores

PL_SQL_Block

PDEntry

Filesequence

Game

Filter

ZPreferencesModel

Fetcher

Related in langs

DefaultItem (PHP)

Student (PHP)

CWE197_Numeric_Truncation_Error__double_NetClient_to_float_72b (C#)

AnalogTeensy (C#)

dpl_addrlist_count (C++)

vlc_event_attach (C++)

OSRSetStereographic (Go)

Socket (Go)

Function (Java)

ReaderState (Java)

Exemple #1

0

Afficher le fichier

Fichier : wikipedia.py Projet : pombreda/qontexte

def find_urls_in_page(web, txt_byte, url_u, url_byte): urls_byte = [] for u_b in spider.unbox_it_to_ss(spider.findall(txt_byte)): urls_byte.append(u_b) urls_byte = sorted(list(set(urls_byte))) filter_regex = get_regex_filter(url_u) candidates_byte = [] for u_b in urlrewrite.rewrite_urls(url_byte, urls_byte): if re.match(filter_regex, u_b) and url_byte != u_b: if u_b not in web: web.add_url(u_b, []) candidates_byte.append(u_b) # if no candidate links are found, fall back on visited urls if len(candidates_byte) == 0: candidates_byte = web.urls() return candidates_byte

Exemple #2

0

Afficher le fichier

Fichier : wikipedia.py Projet : numerodix/qontexte

def find_urls_in_page(web, txt_byte, url_u, url_byte): urls_byte = [] for u_b in spider.unbox_it_to_ss(spider.findall(txt_byte)): urls_byte.append(u_b) urls_byte = sorted(list(set(urls_byte))) filter_regex = get_regex_filter(url_u) candidates_byte = [] for u_b in urlrewrite.rewrite_urls(url_byte, urls_byte): if re.match(filter_regex, u_b) and url_byte != u_b: if u_b not in web: web.add_url(u_b, []) candidates_byte.append(u_b) # if no candidate links are found, fall back on visited urls if len(candidates_byte) == 0: candidates_byte = web.urls() return candidates_byte

Exemple #3

0

Afficher le fichier

Fichier : wikipedia.py Projet : pombreda/qontexte

txt_byte = get_page(url_byte) candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte) encoding = decoder.detect_encoding(txt_byte) chosen_u = pick_url(candidates_byte, encoding=encoding) return chosen_u if __name__ == '__main__': url_byte = 'http://en.wikipedia.org/wiki/Main_Page' url_byte = 'http://ar.wikipedia.org/wiki/الصفحة_الرئيسية' url_byte = 'http://pt.wikipedia.org/wiki/Casa_da_Cascata' url_byte = 'http://it.wikipedia.org/wiki/Special:Random' web = web.Web() web.add_url(url_byte, []) url_u = decoder.decode(url_byte, 'utf-8') depth = -1 while depth != 0: # easy way to set depth as infinite depth -= 1 try: url_u = find_next(url_u, web, handler=url_handler) except: io.output("Recovering from exception:") io.output(traceback.format_exc()) url_u = pick_url(web.urls()) pause() # less hammer

Exemple #4

0

Afficher le fichier

Fichier : wikipedia.py Projet : numerodix/qontexte

txt_byte = get_page(url_byte) candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte) encoding = decoder.detect_encoding(txt_byte) chosen_u = pick_url(candidates_byte, encoding=encoding) return chosen_u if __name__ == '__main__': url_byte = 'http://en.wikipedia.org/wiki/Main_Page' url_byte = 'http://ar.wikipedia.org/wiki/الصفحة_الرئيسية' url_byte = 'http://pt.wikipedia.org/wiki/Casa_da_Cascata' url_byte = 'http://it.wikipedia.org/wiki/Special:Random' web = web.Web() web.add_url(url_byte, []) url_u = decoder.decode(url_byte, 'utf-8') depth = -1 while depth != 0: # easy way to set depth as infinite depth -= 1 try: url_u = find_next(url_u, web, handler=url_handler) except: io.output("Recovering from exception:") io.output(traceback.format_exc()) url_u = pick_url(web.urls()) pause() # less hammer