Esempio n. 1
0
def get_http_data(url,
                  header=None,
                  data=None,
                  useragent=FIREFOX_UA,
                  referer=None,
                  cookiejar=None):
    """ Get the page to parse it for streams """
    if not cookiejar:
        cookiejar = CookieJar()

    log.debug("HTTP getting %r", url)
    starttime = time.time()

    request = Request(url)
    standard_header = {'Referer': referer, 'User-Agent': useragent}
    for key, value in [head for head in standard_header.items() if head[1]]:
        request.add_header(key, value)
    if header:
        for key, value in [head for head in header.items() if head[1]]:
            request.add_header(key, value)
    if data:
        request.add_data(data)

    opener = build_opener(HTTPCookieProcessor(cookiejar))

    try:
        response = opener.open(request)
    except HTTPError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.code)
        sys.exit(5)
    except URLError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.reason)
        sys.exit(5)
    except ValueError as e:
        log.error("Try adding http:// before the url")
        sys.exit(5)
    if is_py3:
        data = response.read()
        try:
            data = data.decode("utf-8")
        except UnicodeDecodeError:
            pass
    else:
        try:
            data = response.read()
        except socket.error as e:
            log.error("Lost the connection to the server")
            sys.exit(5)
    response.close()

    spent_time = time.time() - starttime
    bps = 8 * len(data) / max(spent_time, 0.001)

    log.debug("HTTP got %d bytes from %r in %.2fs (= %dbps)", len(data), url,
              spent_time, bps)

    return data
Esempio n. 2
0
def get_http_data(url, header=None, data=None, useragent=FIREFOX_UA,
                  referer=None, cookiejar=None):
    """ Get the page to parse it for streams """
    if not cookiejar:
        cookiejar = CookieJar()

    log.debug("HTTP getting %r", url)
    starttime = time.time()

    request = Request(url)
    standard_header = {'Referer': referer, 'User-Agent': useragent}
    for key, value in [head for head in standard_header.items() if head[1]]:
        request.add_header(key, value)
    if header:
        for key, value in [head for head in header.items() if head[1]]:
            request.add_header(key, value)
    if data:
        request.add_data(data)

    opener = build_opener(HTTPCookieProcessor(cookiejar))

    try:
        response = opener.open(request)
    except HTTPError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.code)
        sys.exit(5)
    except URLError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.reason)
        sys.exit(5)
    except ValueError as e:
        log.error("Try adding http:// before the url")
        sys.exit(5)
    if is_py3:
        data = response.read()
        try:
            data = data.decode("utf-8")
        except UnicodeDecodeError:
            pass
    else:
        try:
            data = response.read()
        except socket.error as e:
            log.error("Lost the connection to the server")
            sys.exit(5)
    response.close()

    spent_time = time.time() - starttime
    bps = 8 * len(data) / max(spent_time, 0.001)

    log.debug("HTTP got %d bytes from %r in %.2fs (= %dbps)",
              len(data), url, spent_time, bps)

    return data