Ejemplos de URLUtility.encode en Python

Lenguaje de programación: Python

Namespace/Package Name: urlutility

Clase / Tipo: URLUtility

Método / Función: encode

Ejemplos en hotexamples.com: 4

Python URLUtility.encode - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de urlutility.URLUtility.encode extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

normalize(21)

get_host(17)

load_urls(8)

get_tld(5)

validate_link(5)

clean_text(3)

encode(2)

is_same_site(2)

Ejemplo n.º 1

Mostrar archivo

Archivo: request.py Proyecto: ViDA-NYU/user-agent-study

def crawlprocess(url_objects, start, html_dir, status_dir, agent):
    status_file = open(status_dir + "/status_temp_" + str(start) + ".json", "w")
    content_file = None
    if Config.DATA_FORMAT == "ONE_FILE":
        content_file = open(html_dir  + "/html_" + str(start) + ".json", "a+")
        save_content = save_content_one_file(content_file)
    elif Config.DATA_FORMAT == "MULTI_FILE":
        save_content = save_content_multi_file(html_dir)

    for i in range(start, len(url_objects), Config.PROCESS_NUMBER):
        url_obj = url_objects[i]
        url = url_obj["url"] 
        try:
            if Config.USE_TOR:
                res = requests.get(url, headers=Config.HEADERS[agent], proxies=TOR_PROXY, verify=False, timeout=5)
            else:
                res = requests.get(url, headers=Config.HEADERS[agent], verify=False, timeout=5)
            if Config.SAVE_HTML:
                save_content(url, res)
            save_response(url, URLUtility.encode(url), str(res.status_code), None, res.headers, agent, url_obj, status_file)
        except requests.ConnectionError:
            #In the event of a network problem (e.g. DNS failure, refused connection, etc)
            save_response(url, URLUtility.encode(url), None, "ConnectionError", None, agent, url_obj, status_file)
        except requests.HTTPError:
            #In the rare event of an invalid HTTP response
            save_response(url, URLUtility.encode(url), None, "HTTPError", None, agent, url_obj, status_file)
        except requests.Timeout:
            save_response(url, URLUtility.encode(url), None, "Timeout", None, agent, url_obj, status_file)
        except requests.TooManyRedirects:
            save_response(url, URLUtility.encode(url), None, "TooManyRedirects", None, agent, url_obj, status_file)
        except Exception:
            save_response(url, URLUtility.encode(url), None, "OtherExceptions", None, agent, url_obj, status_file)
    status_file.close()
    if content_file:
        content_file.close()

Ejemplo n.º 2

Mostrar archivo

Archivo: request.py Proyecto: felix4webscience/user-agent-study

def crawlprocess(url_objects, start, html_dir, status_dir, agent):
    status_file = open(status_dir + "/status_temp_" + str(start) + ".json",
                       "w")
    content_file = None
    if Config.DATA_FORMAT == "ONE_FILE":
        content_file = open(html_dir + "/html_" + str(start) + ".json", "a+")
        save_content = save_content_one_file(content_file)
    elif Config.DATA_FORMAT == "MULTI_FILE":
        save_content = save_content_multi_file(html_dir)

    for i in range(start, len(url_objects), Config.PROCESS_NUMBER):
        url_obj = url_objects[i]
        url = url_obj["url"]
        try:
            if Config.USE_TOR:
                res = requests.get(url,
                                   headers=Config.HEADERS[agent],
                                   proxies=TOR_PROXY,
                                   verify=False,
                                   timeout=5)
            else:
                res = requests.get(url,
                                   headers=Config.HEADERS[agent],
                                   verify=False,
                                   timeout=5)
            if Config.SAVE_HTML:
                save_content(url, res)
            save_response(url, URLUtility.encode(url), str(res.status_code),
                          None, res.headers, agent, url_obj, status_file)
        except requests.ConnectionError:
            #In the event of a network problem (e.g. DNS failure, refused connection, etc)
            save_response(url, URLUtility.encode(url), None, "ConnectionError",
                          None, agent, url_obj, status_file)
        except requests.HTTPError:
            #In the rare event of an invalid HTTP response
            save_response(url, URLUtility.encode(url), None, "HTTPError", None,
                          agent, url_obj, status_file)
        except requests.Timeout:
            save_response(url, URLUtility.encode(url), None, "Timeout", None,
                          agent, url_obj, status_file)
        except requests.TooManyRedirects:
            save_response(url, URLUtility.encode(url), None,
                          "TooManyRedirects", None, agent, url_obj,
                          status_file)
        except Exception:
            save_response(url, URLUtility.encode(url), None, "OtherExceptions",
                          None, agent, url_obj, status_file)
    status_file.close()
    if content_file:
        content_file.close()

Ejemplo n.º 3

Mostrar archivo

Archivo: request.py Proyecto: ViDA-NYU/user-agent-study

 def save_content(url, res):
     html_filename = html_dir + "/" + URLUtility.encode(url) + ".html"
     html_file = open(html_filename, "w")
     text = res.text.encode('utf-8')
     html_file.write(text)
     html_file.close()

Ejemplo n.º 4

Mostrar archivo

Archivo: request.py Proyecto: felix4webscience/user-agent-study

 def save_content(url, res):
     html_filename = html_dir + "/" + URLUtility.encode(url) + ".html"
     html_file = open(html_filename, "w")
     text = res.text.encode('utf-8')
     html_file.write(text)
     html_file.close()