Esempio n. 1
0
 def wrapper(*args, **kw):
     try:
         doc = lhtml.document_fromstring(get(url, cache=True, **kw))
         return fn(doc, *args, **kw)
     except Exception:
         write_cache(url, None)
         doc = lhtml.document_fromstring(get(url, cache=True, **kw))
         return fn(doc, *args, **kw)
Esempio n. 2
0
File: web.py Progetto: goavki/phenny
 def wrapper(*args, **kw):
     try:
         doc = lhtml.document_fromstring(get(url, cache=True, **kw))
         return fn(doc, *args, **kw)
     except Exception:
         write_cache(url, None)
         doc = lhtml.document_fromstring(get(url, cache=True, **kw))
         return fn(doc, *args, **kw)
Esempio n. 3
0
def get(url,
        cache=False,
        headers={},
        verify=True,
        timeout=REQUEST_TIMEOUT,
        **kwargs):
    if not url.startswith('http'):
        return

    if cache:
        response = read_cache(url)
    else:
        response = None

    if not response:
        headers.update(default_headers)
        response = requests.get(url,
                                headers=headers,
                                verify=verify,
                                timeout=timeout,
                                **kwargs)
        response.raise_for_status()

    if cache:
        write_cache(url, response)

    # Fix charset if necessary
    if 'Content-Type' in response.headers:
        content_type = response.headers['Content-Type']
        if 'text/html' in content_type and 'charset' not in content_type:
            doc = lhtml.document_fromstring(response.text)
            head = doc.find("head")
            metas = head.findall("meta")
            for meta in metas:
                http_equiv = meta.get("http-equiv")
                if http_equiv != None and http_equiv.lower() == "content-type":
                    contents = [
                        x.strip() for x in meta.get("content").split(";")
                    ]
                    for content in contents:
                        splitted = content.split("=")
                        if splitted[0] != None and splitted[0].lower(
                        ) == "charset":
                            response.encoding = splitted[1]
                            return response.text
                if meta.get("charset"):
                    response.encoding = meta.get("charset")
                    return response.text
    return response.text
Esempio n. 4
0
File: web.py Progetto: goavki/phenny
def get(url, cache=False, headers={}, verify=True, timeout=REQUEST_TIMEOUT, **kwargs):
    if not url.startswith('http'):
        return

    if cache:
        response = read_cache(url)
    else:
        response = None

    if not response:
        headers.update(default_headers)
        response = requests.get(url, headers=headers, verify=verify, timeout=timeout, **kwargs)
        response.raise_for_status()

    if cache:
        write_cache(url, response)

    # Fix charset if necessary
    if 'Content-Type' in response.headers:
        content_type = response.headers['Content-Type']
        if 'text/html' in content_type and 'charset' not in content_type:
            doc = lhtml.document_fromstring(response.text)
            head = doc.find("head")
            metas = head.findall("meta")
            for meta in metas:
                http_equiv = meta.get("http-equiv")
                if http_equiv != None and http_equiv.lower() == "content-type":
                    contents = [x.strip() for x in meta.get("content").split(";")]
                    for content in contents:
                        splitted = content.split("=")
                        if splitted[0] != None and splitted[0].lower() == "charset":
                            response.encoding = splitted[1]
                            return response.text
                if meta.get("charset"):
                    response.encoding = meta.get("charset")
                    return response.text
    return response.text