Ejemplo n.º 1
0
 def perform(cls):
     if cls._futures:
         while True:
             status, num_active = cls._multi.perform()
             if status != pycurl.E_CALL_MULTI_PERFORM:
                 break
         while True:
             num_ready, success, fail = cls._multi.info_read()
             for c in success:
                 cc = cls._futures.pop(c)
                 result = curl_result(c)
                 result['id'] = c._raw_id
                 result['state'] = 'normal'
                 cc.set_result(result)
             for c, err_num, err_msg in fail:
                 print('error:', err_num, err_msg,
                       c.getinfo(pycurl.EFFECTIVE_URL))
                 result = curl_result(c)
                 result['url'] = c._raw_url
                 result['id'] = c._raw_id
                 result['state'] = 'error'
                 result['error_code'] = err_num
                 result['error_desc'] = err_msg
                 cls._futures.pop(c).set_exception(
                     CurlLoop.CurlException(code=err_num,
                                            desc=err_msg,
                                            data=result))
             if num_ready == 0:
                 break
Ejemplo n.º 2
0
    def perform(cls):
        if cls._futures:
            while True:
                status, num_active = cls._multi.perform()
                if status != pycurl.E_CALL_MULTI_PERFORM:
                    break
            while True:
                num_ready, success, fail = cls._multi.info_read()
                for c in success:
                    cc = cls._futures.pop(c)
                    result = curl_result(c)
                    result['url'] = c._raw_url
                    result['id'] = c._raw_id
                    result['state'] = 'normal'
                    result['spider'] = 'pycurl'
                    result['payload'] = payload = c._raw_payload

                    # post_func = payload.get('post_func')
                    # if type(post_func) == str:
                    #     post_func = load(post_func)
                    # if post_func:
                    #     result = post_func(payload, result)

                    cc.set_result(result)
                for c, err_num, err_msg in fail:
                    print('error:', err_num, err_msg,
                          c.getinfo(pycurl.EFFECTIVE_URL))
                    result = curl_result(c)

                    result['url'] = c._raw_url
                    result['id'] = c._raw_id
                    result['state'] = 'error'
                    result['spider'] = 'pycurl'
                    result['error_code'] = err_num
                    result['error_desc'] = err_msg

                    result['payload'] = payload = c._raw_payload

                    # post_func = payload.get('post_func')
                    # if type(post_func) == str:
                    #     post_func = load(post_func)
                    # if post_func:
                    #     result2 = post_func(payload, result)
                    #     if type(result2) is dict and len(result2) >= len(result):
                    #         result = result2
                    cls._futures.pop(c).set_exception(
                        CurlLoop.CurlException(code=err_num,
                                               desc=err_msg,
                                               data=result))
                if num_ready == 0:
                    break
Ejemplo n.º 3
0
def get_it(payload):
    if type(payload) is list:
        payload = payload[0]
    c = pycurl.Curl()
    data_buf = BytesIO()
    # header_buf = BytesIO()
    headers = {'count': 0, 'content': [{}]}
    try:
        setup_curl_for_get(c, payload, data_buf, headers)  # header_buf)
        c.perform()

        resp = curl_result(c)
        resp['url'] = payload.get('url')
        resp['id'] = payload.get('id')
        resp['state'] = 'normal'
        resp['spider'] = 'pycurl'
        resp['payload'] = payload

        pycurl_get_resp(data_buf, headers, payload, resp)
        return resp
    except pycurl.error as e:
        resp = curl_result(c)
        resp['url'] = payload.get('url')
        resp['id'] = payload.get('id')
        resp['state'] = 'error'
        resp['spider'] = 'pycurl'
        resp['error_code'] = code = e.args[0]
        resp['error_desc'] = desc = e.args[1]
        if code in [18, 47]:
            resp['state'] = 'abnormal'
            pycurl_get_resp(data_buf, headers, payload, resp)
        return resp
    except Exception as e:
        resp = curl_result(c)
        resp['url'] = payload.get('url')
        resp['id'] = payload.get('id')
        resp['state'] = 'critical'
        resp['spider'] = 'pycurl'
        resp['error_code'] = '-1'
        resp['error_desc'] = 'pycurl re-one exception leaked: ' + str(
            e) + ' ' + str(type(e))
        return resp
    finally:
        c.close()