def __init__(self): self.curl = Curl() self.curl.setopt(self.curl.SSL_VERIFYPEER, 0) self.curl.setopt(self.curl.SSL_VERIFYHOST, 0) self.curl.setopt(self.curl.TIMEOUT, DEFAULT_TIMEOUT) self.curl.setopt(self.curl.PROXY, HTTP_PROXY) self.curl.setopt(self.curl.FOLLOWLOCATION, True)
def save_buffer(self, buffer, key): # proxy = os.environ.get('HTTP_PROXY') # c.setopt(c.PROXY, proxy) # logger.print('proxy:', proxy) if isinstance(buffer, BytesIO): from requests_toolbelt import MultipartEncoder encoder = MultipartEncoder({'file': (key, buf), 'canary': true}) self.session.post(self.url, data=encoder, headers={'Content-Type': encoder.content_type}) elif isinstance(buffer, StringIO): from pycurl import Curl c = Curl() c.setopt(c.URL, self.url) c.setopt(c.TIMEOUT, 3600) c.setopt(c.HTTPPOST, [ ('file', ( c.FORM_BUFFER, source_path, c.FORM_BUFFERPTR, buffer.read(), c.FORM_CONTENTTYPE, 'plain/text', )), ]) c.perform() c.close()
def save_file(self, source_path, key): """ :param source_path: is relative to the local file system. :param key: the key is relative to the current prefix. :return: """ if self.local_server: if source_path.startswith('/'): source_path = "/" + source_path return self.local_server.copy(source_path, key) # proxy = os.environ.get('HTTP_PROXY') # c.setopt(c.PROXY, proxy) # logger.print('proxy:', proxy) from pycurl import Curl c = Curl() c.setopt(c.URL, self.url) c.setopt(c.TIMEOUT, 3600) c.setopt(c.HTTPPOST, [ ('file', ( c.FORM_FILE, source_path, c.FORM_FILENAME, key, c.FORM_CONTENTTYPE, 'plain/text', )), ]) c.perform() c.close()
def __init__(self): self.curl = Curl() self.url = None self.headers = {} self.status = '' self.code = 0 self.charset_re = re.compile('charset=(\S+)')
def getc(url): buf = BytesIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, buf) c.perform() c.close() return buf
def prepare_connection(cls, conf, timeout=DEFAULT_HTTP_TIMEOUT): url = 'http://%s:%s' % (conf['rpchost'], conf['rpcport']) conn = Curl() conn.setopt(conn.CONNECTTIMEOUT, timeout) conn.setopt(conn.TIMEOUT, timeout) conn.setopt(conn.URL, url) conn.setopt(conn.POST, 1) return conn
def init_curl(self): self.curl = Curl() self.curl.setopt(self.curl.SSL_VERIFYPEER, 0) self.curl.setopt(self.curl.SSL_VERIFYHOST, 0) self.curl.setopt(pycurl.TIMEOUT, HttpDirectory.TIMEOUT) self.curl_head = self._curl_handle()
def http_perform(curl: pycurl.Curl): # Utility function for curl - just do our usual stuff try: curl.perform() except pycurl.error as e: raise CurlError from e status = curl.getinfo(pycurl.HTTP_CODE) HTTPFamily.check_status(status)
def init_curl(self): self.curl = Curl() self.curl.setopt(self.curl.SSL_VERIFYPEER, 0) self.curl.setopt(self.curl.SSL_VERIFYHOST, 0) self.curl.setopt(pycurl.TIMEOUT, HttpDirectory.TIMEOUT) self.curl.setopt(pycurl.USERAGENT, config.HEADERS["User-Agent"]) self.curl_head = self._curl_handle()
def _curl_handle(): curl_head = Curl() curl_head.setopt(pycurl.SSL_VERIFYPEER, 0) curl_head.setopt(pycurl.SSL_VERIFYHOST, 0) curl_head.setopt(pycurl.NOBODY, 1) curl_head.setopt(pycurl.TIMEOUT, HttpDirectory.TIMEOUT) curl_head.setopt(pycurl.USERAGENT, config.HEADERS["User-Agent"]) return curl_head
def load_url(self, url): buffer = BytesIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, buffer) c.perform() c.close() return CSVFile(buffer)
def curl(url): io = BytesIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, io) c.perform() c.close() res = io.getvalue() io.close() return res
class Cacher (Thread) : def __init__(self, queue, func) : Thread.__init__(self) self.queue = queue self.curl = Curl() self.curl.setopt(self.curl.FOLLOWLOCATION, True) self.func = func def run(self) : while True : url = self.queue.get() self.func(self.curl, url) self.queue.task_done()
def get(name): base = 'https://www1.ncdc.noaa.gov/pub/data/igra/data/data-por/{}-data.txt.zip' buf = BytesIO() c = Curl() c.setopt(c.URL, base.format(name)) c.setopt(c.WRITEDATA, buf) c.perform() c.close() z = ZipFile(buf) out = z.open(z.infolist()[0]).read() z.close() return out.decode()
def _finish( self, curl: pycurl.Curl, curl_error: Optional[int] = None, curl_message: Optional[str] = None, ) -> None: info = curl.info # type: ignore curl.info = None # type: ignore self._multi.remove_handle(curl) self._free_list.append(curl) buffer = info["buffer"] if curl_error: assert curl_message is not None error = CurlError(curl_error, curl_message) # type: Optional[CurlError] assert error is not None code = error.code effective_url = None buffer.close() buffer = None else: error = None code = curl.getinfo(pycurl.HTTP_CODE) effective_url = curl.getinfo(pycurl.EFFECTIVE_URL) buffer.seek(0) # the various curl timings are documented at # http://curl.haxx.se/libcurl/c/curl_easy_getinfo.html time_info = dict( queue=info["curl_start_ioloop_time"] - info["queue_start_time"], namelookup=curl.getinfo(pycurl.NAMELOOKUP_TIME), connect=curl.getinfo(pycurl.CONNECT_TIME), appconnect=curl.getinfo(pycurl.APPCONNECT_TIME), pretransfer=curl.getinfo(pycurl.PRETRANSFER_TIME), starttransfer=curl.getinfo(pycurl.STARTTRANSFER_TIME), total=curl.getinfo(pycurl.TOTAL_TIME), redirect=curl.getinfo(pycurl.REDIRECT_TIME), ) try: info["callback"](HTTPResponse( request=info["request"], code=code, headers=info["headers"], buffer=buffer, effective_url=effective_url, error=error, reason=info["headers"].get("X-Http-Reason", None), request_time=self.io_loop.time() - info["curl_start_ioloop_time"], start_time=info["curl_start_time"], time_info=time_info, )) except Exception: self.handle_callback_exception(info["callback"])
def fetchAnnotationJson(self, rawRequestURL=None): try: postData = {'sequence': self.rawSequence} # Using configuration here causes circular dependency. So I'll just pass it in. if(rawRequestURL is None): logging.error('You must pass a rawRequestURL to fetchAnnotationJson.') return else: requestURL = rawRequestURL + '?' + urlencode(postData) resultsIoObject = BytesIO() curlObject = Curl() curlObject.setopt(curlObject.URL, requestURL) curlObject.setopt(curlObject.WRITEDATA, resultsIoObject) curlObject.perform() curlObject.close() getBody = resultsIoObject.getvalue().decode('utf8') logging.debug('JSON Request Body:\n' + getBody) # TODO: # Detect error <head><title>414 Request-URI Too Large</title></head> # For larger DRB alleles the webserver fails. # Detect error if the result is not json. # Maybe this error detection happens in parseExons. But i maybe need to detect server errors here. # Simple case is an empty string. if(getBody is None or len(getBody)<1): logging.error('The JSON results were an empty string. Is there a problem with the ACT server?:' + str(requestURL)) showInfoBox('Problem Accessing Annotation Service','The JSON results were an empty string. Is there a problem with the ACT server?') return None # If it's an html error we can respond nicely. if(getBody[0:5]=='<html>'): # TODO: this might not work if i get some other kind of html. errorCode = getBody[getBody.find('<title>'):getBody.find('</title>')] logging.error('The annotation JSON results are html, this probably indicates an issue with the annotation webserver:\n' + str(requestURL)) showInfoBox('Problem Accessing Annotation Service', 'The annotation results are HTML, not JSON, probably an issue with the ACT webserver:\n' + str(errorCode)) return None return getBody except Exception: logging.error('Exception when performing CURL:\n') logging.error(str(exc_info())) logging.error('URL:' + str(requestURL)) raise
def moodle_smoke_test(self): print("\nMoodle Smoke Test...") url = 'https://' + self.deployment['siteURL'] curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, lambda x: None) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** DEPLOY FAILED ***") print('HTTP Status Code: {}'.format(status)) sys.exit(1) print('(ok: {})'.format(status))
def _curl_a_link(self, target_url, post_target, commit_date=None): ''' 解析一个地址,返回一个字典,从其中可以读取json字符串の内容,相当于curl get指令,如果这个请求的结果在今天的缓存当中已经有了,则从缓存中取,不从elastic里面再重复读取 ''' buffer = StringIO() c = Curl() c.setopt(c.URL, target_url) c.setopt(c.WRITEDATA, buffer) c.perform() c.close() load_target = json.loads(buffer.getvalue()) return load_target pass
def _curl_handle(): curl_head = Curl() curl_head.setopt(pycurl.SSL_VERIFYPEER, 0) curl_head.setopt(pycurl.SSL_VERIFYHOST, 0) curl_head.setopt(pycurl.NOBODY, 1) curl_head.setopt(pycurl.TIMEOUT, HttpDirectory.TIMEOUT) return curl_head
def _get(url: str, headers: Mapping[str, str] = None, rate_limiters: List[RateLimiter] = None, connection: Curl = None) -> (int, bytes, dict): if not headers: request_headers = ["Accept-Encoding: gzip"] else: request_headers = ["{header}: {value}".format(header=key, value=value) for key, value in headers.items()] if "Accept-Encoding" not in headers: request_headers.append("Accept-Encoding: gzip") response_headers = {} def get_response_headers(header_line: bytes) -> None: header_line = header_line.decode("ISO-8859-1") if ":" not in header_line: return name, value = header_line.split(":", 1) response_headers[name.strip()] = value.strip() buffer = BytesIO() curl = connection if connection is not None else Curl() curl.setopt(curl.URL, url) curl.setopt(curl.WRITEDATA, buffer) curl.setopt(curl.HEADERFUNCTION, get_response_headers) curl.setopt(curl.HTTPHEADER, request_headers) if certifi: curl.setopt(curl.CAINFO, certifi.where()) if _print_calls: _url = url if isinstance(_url, bytes): _url = str(_url)[2:-1] if _print_api_key and ".api.riotgames.com/lol" in _url: _url += "?api_key={}".format(headers["X-Riot-Token"]) print("Making call: {}".format(_url)) if rate_limiters: with ExitStack() as stack: # Enter each context manager / rate limiter limiters = [stack.enter_context(rate_limiter) for rate_limiter in rate_limiters] status_code = HTTPClient._execute(curl, connection is None) else: status_code = HTTPClient._execute(curl, connection is None) body = buffer.getvalue() # Decompress if we got gzipped data try: content_encoding = response_headers["Content-Encoding"].upper() if "GZIP" == content_encoding: body = zlib.decompress(body, zlib.MAX_WBITS | 16) except KeyError: pass return status_code, body, response_headers
def cache_and_parse(self, url) : """A low level shortcut that Caches and Parses a PDSC file. :param url: The URL of the PDSC file. :type url: str :return: A parsed representation of the PDSC file. :rtype: BeautifulSoup """ self.cache_file(Curl(), url) return self.pdsc_from_cache(url)
def _get(url: str, headers: Mapping[str, str] = None, rate_limiter: RateLimiter = None, connection: Curl = None) -> (int, bytes, dict): if not headers: request_headers = ["Accept-Encoding: gzip"] else: request_headers = [ "{header}: {value}".format(header=key, value=value) for key, value in headers.items() ] if "Accept-Encoding" not in headers: request_headers.append("Accept-Encoding: gzip") response_headers = {} def get_response_headers(header_line: bytes) -> None: header_line = header_line.decode("ISO-8859-1") if ":" not in header_line: return name, value = header_line.split(":", 1) response_headers[name.strip()] = value.strip() buffer = BytesIO() curl = connection if connection is not None else Curl() curl.setopt(curl.URL, url) curl.setopt(curl.WRITEDATA, buffer) curl.setopt(curl.HEADERFUNCTION, get_response_headers) curl.setopt(curl.HTTPHEADER, request_headers) if certifi: curl.setopt(curl.CAINFO, certifi.where()) if _print_calls: # TODO print("Making call: {}".format(url)) if rate_limiter: with rate_limiter: status_code = HTTPClient._execute(curl, connection is None) else: status_code = HTTPClient._execute(curl, connection is None) body = buffer.getvalue() # Decompress if we got gzipped data try: content_encoding = response_headers["Content-Encoding"].upper() if "GZIP" == content_encoding: body = zlib.decompress(body, zlib.MAX_WBITS | 16) except KeyError: pass return status_code, body, response_headers
def post_progress(progress, slotX, slotY, exp_name, message="") : from pycurl import Curl import cStringIO from socket import gethostname response = cStringIO.StringIO() address ='www.doc.ic.ac.uk/~zf509/'+exp_name+'/ip.php?name='+gethostname()+\ '-'+message+'&slot='+str(slotX)+'-'+str(slotY)+\ '&stage='+str(progress) c = Curl() c.setopt(c.WRITEFUNCTION, response.write) c.setopt(c.URL, address) c.perform() c.close() server_res = response.getvalue() print "Server replied:", server_res if server_res[0]=="T" and server_res[1]=="E" and server_res[2]=="R" : return False else : return True
def curl_connection(): config_params = get_chain_config_params() curl = Curl() curl.setopt(curl.URL, 'http://127.0.0.1:%s' % config_params['rpcport']) b64cred = base64.b64encode(('%s:%s' % (config_params['rpcuser'], config_params['rpcpassword'])).encode('utf8')) curl.setopt(curl.HTTPHEADER, ["Content-Type: text/plain", "Authorization: Basic {}".format(b64cred.decode('utf8'))]) # curl.setopt(curl.VERBOSE, True) # to print entire request flow # curl.setopt(curl.DEBUGFUNCTION, curl_debug_log) return curl
def runst(msg, STATE, rounds=27): assert(len(msg) == 243) h = Curl() h._state = STATE[:] h._state [0:243] = msg[:] for r in xrange(rounds): h._transform(1) return h._state
def setopts(self, curl: pycurl.Curl): """Set cURL options for this form of auth.""" curl.setopt(pycurl.HTTPAUTH, self.curl_httpauth) if self.username: curl.setopt(pycurl.USERNAME, self.username) if self.password: curl.setopt(pycurl.PASSWORD, self.password)
def process(self): from pycurl import Curl import json from io import BytesIO b = BytesIO() c = Curl() c.setopt(c.URL, self.resource_uri) c.setopt(c.USERAGENT, "FooBar/1.0") c.setopt(c.WRITEDATA, b) c.perform() response = json.loads(b.getvalue()) album_meta = AlbumMetadata() # Album metadata album_meta.title = response["title"] album_meta.year = response["year"] album_meta.genres = response["genres"] artists = [] for artist in response["artists"]: artists.append(artist["name"]) album_meta.artists = ",".join(artists) album_meta.tracktotal = len(response["tracklist"]) millis = [] # Iterate over tracklist tracks_meta = [] for track in response['tracklist']: meta = TrackMetadata() meta.duration = track["duration"] meta.position = track["position"] meta.title = track["title"] tracks_meta.append(meta) return (album_meta, tracks_meta)
def callAPI(self, url, params={}): # pull data buf = StringIO() curl = Curl() curl.setopt(curl.URL, url + '?' + urlencode(params)) curl.setopt(curl.WRITEFUNCTION, buf.write) try: curl.perform() except: print 'Failed on all routes curl' response = buf.getvalue() buf.close() return response
def callAPI(self,params={}): # pull data buf = StringIO() curl = Curl() url = params.url.__str__() del params.url curl.setopt(curl.URL, url + '?' + urlencode(params)) curl.setopt(curl.WRITEFUNCTION, buf.write) try: curl.perform() except error: print 'Failed on curl' response = buf.getvalue() buf.close() return response
def http_req_perform(self, curl: pycurl.Curl, writefun: object) -> None: curl.setopt(pycurl.WRITEFUNCTION, writefun) try: curl.perform() except pycurl.error as e: code = e.args[0]; errstr = e.args[1] if (code == pycurl.E_URL_MALFORMAT or code == pycurl.E_PARTIAL_FILE): raise NetError(code, errstr) elif (code == pycurl.E_COULDNT_RESOLVE_PROXY or code == pycurl.E_COULDNT_RESOLVE_HOST): raise DNSError(code, errstr) elif code == pycurl.E_HTTP_RETURNED_ERROR: raise HTTPError(curl.getinfo(pycurl.HTTP_CODE), None, code, errstr) elif (code == pycurl.E_COULDNT_CONNECT or code == pycurl.E_OPERATION_TIMEOUTED or code == pycurl.E_SEND_ERROR or code == pycurl.E_RECV_ERROR): raise ConnError(curl.getinfo(pycurl.OS_ERRNO), code, errstr) else: raise NetError(code, errstr)
def http_query(url, timeout=1000): print url c = Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.TIMEOUT_MS, timeout) body_writer = StringIO() head_writer = StringIO() c.setopt(pycurl.WRITEFUNCTION, body_writer.write) c.setopt(pycurl.HEADERFUNCTION, head_writer.write) result = {} c.perform() head_writer.seek(0) first = head_writer.readline() result['header'] = {} for line in head_writer: parts = line.split(':' , 1) if len(parts) == 2: result['header'][parts[0]] = parts[1].strip() result['code'] = c.getinfo(pycurl.HTTP_CODE) result['body'] = body_writer.getvalue() return result
print('') # About 18 seconds? LIBRARY="'requests'" print ("Testing {0} performance with {1} cycles".format(LIBRARY, CYCLES)) mytime = timeit.timeit("r = requests.get('{0}')".format(URL), setup='import requests', number=CYCLES) print('{0}: ran {1} HTTP GET requests in {2} seconds'.format(LIBRARY, CYCLES, mytime)) print('') ### CONNECTION REUSE TESTS FOLLOW ### LIBRARY="pycurl (saving response body by cStringIO BUT MAKING A NEW HANDLE EVERY TIME) " print ("Testing {0} performance with {1} cycles".format(LIBRARY, CYCLES)) start = time.clock() for i in xrange(1, CYCLES): mycurl=Curl(); mycurl.setopt(mycurl.URL, URL) body = StringIO(); mycurl.setopt(mycurl.WRITEDATA, body) mycurl.perform() output = body.getvalue() body.close() mycurl.close() end = time.clock() print('{0}: ran {1} HTTP GET requests in {2} seconds'.format(LIBRARY, CYCLES, (end-start))) print('') LIBRARY="pycurl (saving response body by cStringIO) " print ("Testing {0} CONNECTION REUSE performance with {1} cycles".format(LIBRARY, CYCLES))
def new_session(self) -> Curl: session = Curl() yield session session.close()
def prepareGet(self, moreHeader = []): """ Prepares a HTTP GET request for the URL provided during initialization. @param moreHeader: An optional list of headers to be appended to the HTTP requests headers. This list must contain string values of the form <param>: <value> as they are directly injected into the HTTP header. """ logging.debug("prepare GET") self.__data = StringIO() self.__response = 0 self.__header = [] c = Curl() c.setopt(c.CONNECTTIMEOUT, 10) c.setopt(c.TIMEOUT, 10) c.setopt(c.URL, self.__url) c.setopt(c.HTTPHEADER, moreHeader) c.setopt(c.WRITEDATA, self.__data) c.setopt(c.HEADERFUNCTION, self.__writeHeader) self.__curl = c return True
def _curl_setup_request( self, curl: pycurl.Curl, request: HTTPRequest, buffer: BytesIO, headers: httputil.HTTPHeaders, ) -> None: curl.setopt(pycurl.URL, native_str(request.url)) # libcurl's magic "Expect: 100-continue" behavior causes delays # with servers that don't support it (which include, among others, # Google's OpenID endpoint). Additionally, this behavior has # a bug in conjunction with the curl_multi_socket_action API # (https://sourceforge.net/tracker/?func=detail&atid=100976&aid=3039744&group_id=976), # which increases the delays. It's more trouble than it's worth, # so just turn off the feature (yes, setting Expect: to an empty # value is the official way to disable this) if "Expect" not in request.headers: request.headers["Expect"] = "" # libcurl adds Pragma: no-cache by default; disable that too if "Pragma" not in request.headers: request.headers["Pragma"] = "" curl.setopt( pycurl.HTTPHEADER, [ "%s: %s" % (native_str(k), native_str(v)) for k, v in request.headers.get_all() ], ) curl.setopt( pycurl.HEADERFUNCTION, functools.partial( self._curl_header_callback, headers, request.header_callback ), ) if request.streaming_callback: def write_function(b: Union[bytes, bytearray]) -> int: assert request.streaming_callback is not None self.io_loop.add_callback(request.streaming_callback, b) return len(b) else: write_function = buffer.write curl.setopt(pycurl.WRITEFUNCTION, write_function) curl.setopt(pycurl.FOLLOWLOCATION, request.follow_redirects) curl.setopt(pycurl.MAXREDIRS, request.max_redirects) assert request.connect_timeout is not None curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(1000 * request.connect_timeout)) assert request.request_timeout is not None curl.setopt(pycurl.TIMEOUT_MS, int(1000 * request.request_timeout)) if request.user_agent: curl.setopt(pycurl.USERAGENT, native_str(request.user_agent)) else: curl.setopt(pycurl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)") if request.network_interface: curl.setopt(pycurl.INTERFACE, request.network_interface) if request.decompress_response: curl.setopt(pycurl.ENCODING, "gzip,deflate") else: curl.setopt(pycurl.ENCODING, "none") if request.proxy_host and request.proxy_port: curl.setopt(pycurl.PROXY, request.proxy_host) curl.setopt(pycurl.PROXYPORT, request.proxy_port) if request.proxy_username: assert request.proxy_password is not None credentials = httputil.encode_username_password( request.proxy_username, request.proxy_password ) curl.setopt(pycurl.PROXYUSERPWD, credentials) if request.proxy_auth_mode is None or request.proxy_auth_mode == "basic": curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_BASIC) elif request.proxy_auth_mode == "digest": curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError( "Unsupported proxy_auth_mode %s" % request.proxy_auth_mode ) else: curl.setopt(pycurl.PROXY, "") curl.unsetopt(pycurl.PROXYUSERPWD) if request.validate_cert: curl.setopt(pycurl.SSL_VERIFYPEER, 1) curl.setopt(pycurl.SSL_VERIFYHOST, 2) else: curl.setopt(pycurl.SSL_VERIFYPEER, 0) curl.setopt(pycurl.SSL_VERIFYHOST, 0) if request.ca_certs is not None: curl.setopt(pycurl.CAINFO, request.ca_certs) else: # There is no way to restore pycurl.CAINFO to its default value # (Using unsetopt makes it reject all certificates). # I don't see any way to read the default value from python so it # can be restored later. We'll have to just leave CAINFO untouched # if no ca_certs file was specified, and require that if any # request uses a custom ca_certs file, they all must. pass if request.allow_ipv6 is False: # Curl behaves reasonably when DNS resolution gives an ipv6 address # that we can't reach, so allow ipv6 unless the user asks to disable. curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) else: curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) # Set the request method through curl's irritating interface which makes # up names for almost every single method curl_options = { "GET": pycurl.HTTPGET, "POST": pycurl.POST, "PUT": pycurl.UPLOAD, "HEAD": pycurl.NOBODY, } custom_methods = set(["DELETE", "OPTIONS", "PATCH"]) for o in curl_options.values(): curl.setopt(o, False) if request.method in curl_options: curl.unsetopt(pycurl.CUSTOMREQUEST) curl.setopt(curl_options[request.method], True) elif request.allow_nonstandard_methods or request.method in custom_methods: curl.setopt(pycurl.CUSTOMREQUEST, request.method) else: raise KeyError("unknown method " + request.method) body_expected = request.method in ("POST", "PATCH", "PUT") body_present = request.body is not None if not request.allow_nonstandard_methods: # Some HTTP methods nearly always have bodies while others # almost never do. Fail in this case unless the user has # opted out of sanity checks with allow_nonstandard_methods. if (body_expected and not body_present) or ( body_present and not body_expected ): raise ValueError( "Body must %sbe None for method %s (unless " "allow_nonstandard_methods is true)" % ("not " if body_expected else "", request.method) ) if body_expected or body_present: if request.method == "GET": # Even with `allow_nonstandard_methods` we disallow # GET with a body (because libcurl doesn't allow it # unless we use CUSTOMREQUEST). While the spec doesn't # forbid clients from sending a body, it arguably # disallows the server from doing anything with them. raise ValueError("Body must be None for GET request") request_buffer = BytesIO(utf8(request.body or "")) def ioctl(cmd: int) -> None: if cmd == curl.IOCMD_RESTARTREAD: request_buffer.seek(0) curl.setopt(pycurl.READFUNCTION, request_buffer.read) curl.setopt(pycurl.IOCTLFUNCTION, ioctl) if request.method == "POST": curl.setopt(pycurl.POSTFIELDSIZE, len(request.body or "")) else: curl.setopt(pycurl.UPLOAD, True) curl.setopt(pycurl.INFILESIZE, len(request.body or "")) if request.auth_username is not None: assert request.auth_password is not None if request.auth_mode is None or request.auth_mode == "basic": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) elif request.auth_mode == "digest": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError("Unsupported auth_mode %s" % request.auth_mode) userpwd = httputil.encode_username_password( request.auth_username, request.auth_password ) curl.setopt(pycurl.USERPWD, userpwd) curl_log.debug( "%s %s (username: %r)", request.method, request.url, request.auth_username, ) else: curl.unsetopt(pycurl.USERPWD) curl_log.debug("%s %s", request.method, request.url) if request.client_cert is not None: curl.setopt(pycurl.SSLCERT, request.client_cert) if request.client_key is not None: curl.setopt(pycurl.SSLKEY, request.client_key) if request.ssl_options is not None: raise ValueError("ssl_options not supported in curl_httpclient") if threading.active_count() > 1: # libcurl/pycurl is not thread-safe by default. When multiple threads # are used, signals should be disabled. This has the side effect # of disabling DNS timeouts in some environments (when libcurl is # not linked against ares), so we don't do it when there is only one # thread. Applications that use many short-lived threads may need # to set NOSIGNAL manually in a prepare_curl_callback since # there may not be any other threads running at the time we call # threading.activeCount. curl.setopt(pycurl.NOSIGNAL, 1) if request.prepare_curl_callback is not None: request.prepare_curl_callback(curl)
def performSubmission(submissionFileName, POST_DATA): logging.info('Performing submission of ' + submissionFileName + '\n') logging.info('POST Data:\n' + str(POST_DATA) + '\n') if (str(getConfigurationValue('test_submission')) == '0'): logging.info ('THIS IS A LIVE SUBMISSION AT ENA.') requestURL = str(getConfigurationValue('ena_rest_address_prod')) + '?auth=ENA%20' + str(getConfigurationValue('ena_username')) + '%20' + str(getConfigurationValue('ena_password')) else: logging.info ('THIS IS A TEST SUBMISSION AT ENA.') requestURL = str(getConfigurationValue('ena_rest_address_test')) + '?auth=ENA%20' + str(getConfigurationValue('ena_username')) + '%20' + str(getConfigurationValue('ena_password')) # Problem: StringIO Doesn't work with pycurl in python 3.6. Must replace this with a BytesIO. #curlResponseBuffer = StringIO() curlResponseBuffer = BytesIO() try: curlObject = Curl() curlObject.setopt(curlObject.URL, requestURL) curlObject.setopt(curlObject.POST, 1) curlObject.setopt(curlObject.HTTPPOST, POST_DATA) curlObject.setopt(curlObject.USERAGENT, 'Curl') curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write) curlObject.setopt(HTTPHEADER, ['Accept:application/xml']) # Insecure. Any security experts want to make this better? curlObject.setopt(SSL_VERIFYHOST, 0) curlObject.setopt(SSL_VERIFYPEER, 0) curlObject.perform() curlObject.close() except Exception: logging.error ('Exception when performing CURL:\n') #logging.error (str(exc_info())) logging.error('Exception when performing CURL.\n') logging.error('URL:' + str(requestURL)) raise responseText = curlResponseBuffer.getvalue() #logging.info ('the type of the responseText is:' + str(type(responseText))) #logging.info ('after it becomes a string:' + str(type(str(responseText)))) # write XML to file. projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml') resultsFile = createOutputFile(projectSubResultsFileName) resultsFile.write(str(responseText)) resultsFile.close() return responseText
def _execute(curl: Curl, close_connection: bool) -> int: curl.perform() status_code = curl.getinfo(curl.HTTP_CODE) if close_connection: curl.close() return status_code
def processVideo(self, vID, number): for _attempt in range(self.retryCount): title = '' download = None for i in count(): try: self.goTo(vID) title = self.getElement('h1[itemprop=name]').text.strip().rstrip('.') self.driver.find_element_by_class_name('iconify_down_b').click() download = self.getElement('#download') break except NoSuchElementException as e: self.logger.warning(e.msg) if i >= self.retryCount: self.logger.error("Page load failed") self.errors += 1 break # Parse download links link = linkSize = localSize = downloadOK = downloadSkip = None if download: for preference in FILE_PREFERENCES: try: link = download.find_element_by_partial_link_text(preference) break except NoSuchElementException: pass if link: # Parse chosen download link userAgent = str(self.driver.execute_script('return window.navigator.userAgent')) cookies = self.driver.get_cookies() extension = link.get_attribute('download').split('.')[-1] description = '%s/%s' % (link.text, extension.upper()) link = str(link.get_attribute('href')) if self.getFileSizes: try: request = requests.get(link, stream = True, headers = { 'user-agent': userAgent }, cookies = dict((str(cookie['name']), str(cookie['value'])) for cookie in cookies)) request.close() linkSize = int(request.headers['content-length']) self.totalFileSize += linkSize description += ', %s' % readableSize(linkSize) except Exception as e: self.logger.warning(e) else: description = extension = 'NONE' # Prepare file information prefix = ' '.join((title, '(%s)' % description)) suffix = ' '.join((('%d/%d %d%%' % (number, len(self.vIDs), int(number * 100.0 / len(self.vIDs)))),) + ((readableSize(self.totalFileSize),) if self.totalFileSize else ())) self.logger.info(' '.join((prefix, suffix))) fileName = cleanupFileName('%s.%s' % (' '.join(((title,) if title else ()) + (str(vID),)), extension.lower())) targetFileName = join(self.targetDirectory, fileName) if self.setLanguage: try: self.driver.find_element_by_id('change_settings').click() languages = self.driver.find_elements_by_css_selector('select[name=language] option') currentLanguage = ([l for l in languages if l.is_selected()] or [None,])[0] if currentLanguage is None or currentLanguage is languages[0]: ls = [l for l in languages if l.text.capitalize().startswith(self.setLanguage)] if len(ls) != 1: ls = [l for l in languages if l.get_attribute('value').capitalize().startswith(self.setLanguage)] if len(ls) == 1: self.logger.info("Language not set, setting to %s", ls[0].text) ls[0].click() self.driver.find_element_by_css_selector('#settings_form input[type=submit]').click() else: self.logger.error("Unsupported language: %s", self.setLanguage) self.setLanguage = None else: self.logger.info("Language already set to %s / %s", currentLanguage.get_attribute('value').upper(), currentLanguage.text) except NoSuchElementException: self.logger.warning("Failed to set language to %s, settings not available", self.setLanguage) if link: # Downloading file if linkSize: localSize = getFileSize(targetFileName) if localSize == linkSize: downloadOK = True elif localSize and localSize > linkSize: self.errors += 1 self.logger.error("Local file is larger (%d) than remote file (%d)", localSize, linkSize) downloadSkip = True #remove(targetFileName) #localSize = None if self.doDownload and not downloadOK: class ProgressIndicator(object): QUANTUM = 10 * 1024 * 1024 # 10 megabytes ACTION = r'--\\||//' # update() often gets called in pairs, this smoothes things up action = len(ACTION) - 1 def __init__(self, timeout): self.timeout = timeout self.started = False self.totalRead = 0 self.lastData = time() self.count = 0 self.action = len(self.ACTION) - 1 self.progress("Dowloading: ") def progress(self, s, suffix = ''): self.action = (self.action + 1) % len(self.ACTION) print('\b%s%s' % (s, suffix + '\n' if suffix else self.ACTION[self.action]), end = '', flush = True) def update(self, _length, totalRead, *_args): if totalRead <= self.totalRead: if time() > self.lastData + self.timeout: raise curlError("Download seems stalled") else: self.totalRead = totalRead self.lastData = time() oldCount = self.count self.count = int(totalRead // self.QUANTUM) + 1 self.progress(('=' if self.started else '+') * max(0, self.count - oldCount)) self.started = True def end(self): self.progress("OK") progressIndicator = ProgressIndicator(self.timeout) curl = Curl() curl.setopt(curl.CAINFO, certifi.where()) curl.setopt(curl.COOKIE, '; '.join('%s=%s' % (cookie['name'], cookie['value']) for cookie in cookies)) curl.setopt(curl.TIMEOUT, self.timeout) curl.setopt(curl.USERAGENT, userAgent) curl.setopt(curl.FOLLOWLOCATION, True) curl.setopt(curl.URL, link) curl.setopt(curl.PROGRESSFUNCTION, progressIndicator.update) try: with open(targetFileName, 'wb') as f: curl.setopt(curl.WRITEDATA, f) curl.perform() curl.close() progressIndicator.end() downloadOK = True except curlError as e: self.errors += 1 self.logger.error("Download failed: %s", e) except KeyboardInterrupt: self.errors += 1 self.logger.error("Download interrupted") if downloadOK: localSize = getFileSize(targetFileName) if not localSize: self.errors += 1 downloadOK = False self.logger.error("Downloaded file seems corrupt") elif linkSize: if localSize > linkSize: self.errors += 1 downloadOK = False self.logger.error("Downloaded file larger (%d) than remote file (%d)", localSize, linkSize) elif localSize < linkSize: self.errors += 1 downloadOK = False self.logger.error("Downloaded file smaller (%d) than remote file (%d)", localSize, linkSize) if downloadOK: self.logger.info("OK") break elif downloadSkip or not self.doDownload: self.logger.info("Downloading SKIPPED") break else: self.logger.info("Download ultimately failed after %d retries", self.retryCount) # Creating symbolic links, if enabled for dirName in (dirName for (dirName, vIDs) in self.folders if vID in vIDs): linkFileName = join(dirName, fileName) try: if lexists(linkFileName): remove(linkFileName) except: pass try: (hardlink if self.useHardLinks else symlink)(join('..', fileName), linkFileName) except Exception as e: self.logger.warning("Can't create link at %s: %s", linkFileName, e) self.errors += 1
if "-q" in sys.argv: opts.verbose = opts.verbose - 1 print "Python", sys.version print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM) print "PycURL version info", pycurl.version_info() print " %s, compiled %s" % (pycurl.__file__, pycurl.COMPILE_DATE) # /*********************************************************************** # // test misc # ************************************************************************/ if 1: c = Curl() assert c.URL is pycurl.URL del c # /*********************************************************************** # // test handles # ************************************************************************/ # remove an invalid handle: this should fail if 1: m = CurlMulti() c = Curl() try: m.remove_handle(c) except pycurl.error:
def build_thread(gitpath, ref, buildid, cburl=None, submodules=False): tmpdir = os.path.join(conf('buildbot.buildpath'), buildid) repo = GitRepository(tmpdir) output, retcode = repo.clone(gitpath) if retcode: buildlog(buildid, 'Unable to clone %s. %s\n' % (gitpath, '\n'.join(output))) return output, retcode = repo.checkout(ref) if retcode: buildlog(buildid, 'Unable to checkout %s. %s\n' % (ref, '\n'.join(output))) return if submodules: output, retcode = repo.submodule_init() buildlog(buildid, output[0]) buildlog(buildid, output[1]) output, retcode = repo.submodule_update() buildlog(buildid, output[0]) buildlog(buildid, output[1]) resultsdir = os.path.join(tmpdir, '.build_results') os.makedirs(resultsdir) output, retcode = repo.build(conf('buildbot.signkey'), conf('buildbot.pbuilderrc'), resultsdir) buildlog(buildid, output[0]) buildlog(buildid, output[1]) #logging.debug(output[0]) #logging.debug(output[1]) os.chdir(resultsdir) if not os.listdir(resultsdir) or retcode != 0: buildlog(buildid, 'Nothing in results directory. Giving up.') return tarpath = os.path.join(tmpdir, 'package.tar.gz') tar = tarfile.open(tarpath, 'w:gz') for name in os.listdir(resultsdir): tar.add(name) tar.close() buildlog(buildid, 'Build complete. Results in %s\n' % tarpath) data = file(tarpath, 'rb').read() buildlog(buildid, 'Built %i byte tarball' % len(data)) if cburl: buildlog(buildid, 'Performing callback: %s' % cburl) req = Curl() req.setopt(req.POST, 1) req.setopt(req.URL, str(cburl)) req.setopt(req.HTTPPOST, [('package', (req.FORM_FILE, str(tarpath)))]) req.setopt(req.WRITEDATA, file('%s/build.log' % tmpdir, 'a+')) req.perform() req.close()
def _complete_request(curl: pycurl.Curl, buffer: BytesIO, response: Response): curl.perform() response.status = curl.getinfo(curl.RESPONSE_CODE) response.body = buffer.getvalue().decode(_CHAR_ENCODING) curl.close()
def _curl_setup_request( self, curl: pycurl.Curl, request: HTTPRequest, buffer: BytesIO, headers: httputil.HTTPHeaders, ) -> None: curl.setopt(pycurl.URL, native_str(request.url)) # libcurl's magic "Expect: 100-continue" behavior causes delays # with servers that don't support it (which include, among others, # Google's OpenID endpoint). Additionally, this behavior has # a bug in conjunction with the curl_multi_socket_action API # (https://sourceforge.net/tracker/?func=detail&atid=100976&aid=3039744&group_id=976), # which increases the delays. It's more trouble than it's worth, # so just turn off the feature (yes, setting Expect: to an empty # value is the official way to disable this) if "Expect" not in request.headers: request.headers["Expect"] = "" # libcurl adds Pragma: no-cache by default; disable that too if "Pragma" not in request.headers: request.headers["Pragma"] = "" curl.setopt( pycurl.HTTPHEADER, [ "%s: %s" % (native_str(k), native_str(v)) for k, v in request.headers.get_all() ], ) curl.setopt( pycurl.HEADERFUNCTION, functools.partial(self._curl_header_callback, headers, request.header_callback), ) if request.streaming_callback: def write_function(b: Union[bytes, bytearray]) -> int: assert request.streaming_callback is not None self.io_loop.add_callback(request.streaming_callback, b) return len(b) else: write_function = buffer.write curl.setopt(pycurl.WRITEFUNCTION, write_function) curl.setopt(pycurl.FOLLOWLOCATION, request.follow_redirects) curl.setopt(pycurl.MAXREDIRS, request.max_redirects) assert request.connect_timeout is not None curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(1000 * request.connect_timeout)) assert request.request_timeout is not None curl.setopt(pycurl.TIMEOUT_MS, int(1000 * request.request_timeout)) if request.user_agent: curl.setopt(pycurl.USERAGENT, native_str(request.user_agent)) else: curl.setopt(pycurl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)") if request.network_interface: curl.setopt(pycurl.INTERFACE, request.network_interface) if request.decompress_response: curl.setopt(pycurl.ENCODING, "gzip,deflate") else: curl.setopt(pycurl.ENCODING, None) if request.proxy_host and request.proxy_port: curl.setopt(pycurl.PROXY, request.proxy_host) curl.setopt(pycurl.PROXYPORT, request.proxy_port) if request.proxy_username: assert request.proxy_password is not None credentials = httputil.encode_username_password( request.proxy_username, request.proxy_password) curl.setopt(pycurl.PROXYUSERPWD, credentials) if request.proxy_auth_mode is None or request.proxy_auth_mode == "basic": curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_BASIC) elif request.proxy_auth_mode == "digest": curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError("Unsupported proxy_auth_mode %s" % request.proxy_auth_mode) else: try: curl.unsetopt(pycurl.PROXY) except TypeError: # not supported, disable proxy curl.setopt(pycurl.PROXY, "") curl.unsetopt(pycurl.PROXYUSERPWD) if request.validate_cert: curl.setopt(pycurl.SSL_VERIFYPEER, 1) curl.setopt(pycurl.SSL_VERIFYHOST, 2) else: curl.setopt(pycurl.SSL_VERIFYPEER, 0) curl.setopt(pycurl.SSL_VERIFYHOST, 0) if request.ca_certs is not None: curl.setopt(pycurl.CAINFO, request.ca_certs) else: # There is no way to restore pycurl.CAINFO to its default value # (Using unsetopt makes it reject all certificates). # I don't see any way to read the default value from python so it # can be restored later. We'll have to just leave CAINFO untouched # if no ca_certs file was specified, and require that if any # request uses a custom ca_certs file, they all must. pass if request.allow_ipv6 is False: # Curl behaves reasonably when DNS resolution gives an ipv6 address # that we can't reach, so allow ipv6 unless the user asks to disable. curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) else: curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) # Set the request method through curl's irritating interface which makes # up names for almost every single method curl_options = { "GET": pycurl.HTTPGET, "POST": pycurl.POST, "PUT": pycurl.UPLOAD, "HEAD": pycurl.NOBODY, } custom_methods = set(["DELETE", "OPTIONS", "PATCH"]) for o in curl_options.values(): curl.setopt(o, False) if request.method in curl_options: curl.unsetopt(pycurl.CUSTOMREQUEST) curl.setopt(curl_options[request.method], True) elif request.allow_nonstandard_methods or request.method in custom_methods: curl.setopt(pycurl.CUSTOMREQUEST, request.method) else: raise KeyError("unknown method " + request.method) body_expected = request.method in ("POST", "PATCH", "PUT") body_present = request.body is not None if not request.allow_nonstandard_methods: # Some HTTP methods nearly always have bodies while others # almost never do. Fail in this case unless the user has # opted out of sanity checks with allow_nonstandard_methods. if (body_expected and not body_present) or (body_present and not body_expected): raise ValueError( "Body must %sbe None for method %s (unless " "allow_nonstandard_methods is true)" % ("not " if body_expected else "", request.method)) if body_expected or body_present: if request.method == "GET": # Even with `allow_nonstandard_methods` we disallow # GET with a body (because libcurl doesn't allow it # unless we use CUSTOMREQUEST). While the spec doesn't # forbid clients from sending a body, it arguably # disallows the server from doing anything with them. raise ValueError("Body must be None for GET request") request_buffer = BytesIO(utf8(request.body or "")) def ioctl(cmd: int) -> None: if cmd == curl.IOCMD_RESTARTREAD: # type: ignore request_buffer.seek(0) curl.setopt(pycurl.READFUNCTION, request_buffer.read) curl.setopt(pycurl.IOCTLFUNCTION, ioctl) if request.method == "POST": curl.setopt(pycurl.POSTFIELDSIZE, len(request.body or "")) else: curl.setopt(pycurl.UPLOAD, True) curl.setopt(pycurl.INFILESIZE, len(request.body or "")) if request.auth_username is not None: assert request.auth_password is not None if request.auth_mode is None or request.auth_mode == "basic": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) elif request.auth_mode == "digest": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError("Unsupported auth_mode %s" % request.auth_mode) userpwd = httputil.encode_username_password( request.auth_username, request.auth_password) curl.setopt(pycurl.USERPWD, userpwd) curl_log.debug( "%s %s (username: %r)", request.method, request.url, request.auth_username, ) else: curl.unsetopt(pycurl.USERPWD) curl_log.debug("%s %s", request.method, request.url) if request.client_cert is not None: curl.setopt(pycurl.SSLCERT, request.client_cert) if request.client_key is not None: curl.setopt(pycurl.SSLKEY, request.client_key) if request.ssl_options is not None: raise ValueError("ssl_options not supported in curl_httpclient") if threading.active_count() > 1: # libcurl/pycurl is not thread-safe by default. When multiple threads # are used, signals should be disabled. This has the side effect # of disabling DNS timeouts in some environments (when libcurl is # not linked against ares), so we don't do it when there is only one # thread. Applications that use many short-lived threads may need # to set NOSIGNAL manually in a prepare_curl_callback since # there may not be any other threads running at the time we call # threading.activeCount. curl.setopt(pycurl.NOSIGNAL, 1) if request.prepare_curl_callback is not None: request.prepare_curl_callback(curl)
def get(url, headers=[]): '''Given a URL and headers send a HTTP GET request. Return the response.''' b = StringIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.FOLLOWLOCATION, True) c.setopt(c.WRITEFUNCTION, b.write) c.setopt(c.COOKIEJAR, cookiePath) c.setopt(c.COOKIEFILE, cookiePath) c.setopt(c.VERBOSE, verbose) c.setopt(c.CONNECTTIMEOUT, connectTimeout) c.setopt(c.TIMEOUT, defaultTimeout) c.setopt(c.USERAGENT, userAgent) c.setopt(c.HTTPHEADER, defaultHeaders + headers) c.perform() r = copy(b.getvalue()) b.close() return r
def moodle_admin_login_curl(self): fd, path = tempfile.mkstemp() try: response = BytesIO() url = 'https://' + self.deployment['siteURL'] + '/login/index.php' curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, response.write) curl.setopt(pycurl.POST, True) curl.setopt(pycurl.COOKIEJAR, path) curl.setopt(pycurl.COOKIEFILE, path) post = urllib.parse.urlencode({'username': '******', 'password': self.deployment['moodleAdminPassword']}) curl.setopt(pycurl.POSTFIELDS, post) curl.setopt(pycurl.FOLLOWLOCATION, True) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** FAILED: {} ***".format(status)) sys.exit(1) response = response.getvalue().decode('utf-8') finally: os.remove(path) return response
def _set_postfields(curl_obj: pycurl.Curl, postfields: str): postfieldsize = len(postfields) logger.debug("postfieldsize: {pf_size}", pf_size=postfieldsize) logger.debug("postfields: {pf}", pf=postfields) curl_obj.setopt(curl_obj.POSTFIELDS, postfields) curl_obj.setopt(curl_obj.POSTFIELDSIZE_LARGE, postfieldsize)
def _perform(self, url: str, curl_obj: pycurl.Curl = None, headers: dict = None, postfields: dict = None, skip_auth=False) -> bytes: if not skip_auth: self._wait_authenticated() if not curl_obj: curl_obj = pycurl.Curl() if postfields: postfields = urlencode(postfields) _set_postfields(curl_obj, postfields) logger.debug("url={url}, headers={headers}", url=url, headers=headers) if not headers: headers = self.BASE_HEADERS.copy() headers = self._headers_to_list(headers) logger.debug("prepared headers={h}", h=headers) buffer = BytesIO() curl_obj.setopt(pycurl.WRITEFUNCTION, buffer.write) curl_obj.setopt(pycurl.HEADERFUNCTION, self._header_function) curl_obj.setopt(pycurl.BUFFERSIZE, 102400) curl_obj.setopt(pycurl.URL, url) curl_obj.setopt(pycurl.HTTPHEADER, headers) curl_obj.setopt(pycurl.USERAGENT, CURL_USERAGENT) curl_obj.setopt(pycurl.MAXREDIRS, 50) curl_obj.setopt(pycurl.ACCEPT_ENCODING, "") curl_obj.setopt(pycurl.TCP_KEEPALIVE, 1) curl_obj.setopt(pycurl.FOLLOWLOCATION, True) curl_obj.setopt(pycurl.ENCODING, "gzip, deflate") try: curl_obj.perform() except pycurl.error as e: logger.debug(e, exc_info=True) logger.warning(e) return b"" status = curl_obj.getinfo(pycurl.HTTP_CODE) logger.debug("HTTP status: {s}", s=status) curl_obj.close() if status != HTTPStatus.OK: hdrs = None try: hdrs = {k: v[-1] for k, v in self._headers.items()} except (IndexError, KeyError): pass phrase = "error" try: phrase = http.client.responses[status] logger.error("HTTP status error: {s}", s=status) except KeyError: pass raise HTTPError(url=url, msg=phrase, code=status, hdrs=hdrs, fp=None) # Server changing maps will trigger sessionid change, # keep track of latest sessionid in response headers. sessionid = self._find_sessionid() if sessionid and self._auth_data: self._auth_data.sessionid = sessionid return buffer.getvalue()
class PageFetcher(): """Fetches a page""" def __init__(self): self.curl = Curl() self.url = None self.headers = {} self.status = '' self.code = 0 self.charset_re = re.compile('charset=(\S+)') def handle_headers(self, header): """Parses the headers from a HTTP response""" header = header.decode('iso-8859-1') #headers always in iso-8859-1 if ':' in header: #split out the headers name and value n, v = header.split(': ', 1) self.headers[n] = v.rstrip('\r\n') elif 'HTTP' in header: h, code, status = header.split(' ', 2) self.code = int(code) self.status = status.rstrip('\r\n') def encoding(self): """Gets the encoding from the headers, otherwise assumes iso-8859-1""" if 'Content-Type' in self.headers: match = self.charset_re.search(self.headers['Content-Type'].lower()) if match: return match.group(1) return 'iso-8859-1' def fetch(self, url, headers_only=False): """Gets the specified webpage""" #reset the gathered data self.headers = {} self.code = 0 self.status = None links = [] #get the page buff = BytesIO() self.curl.setopt(self.curl.URL, url) if headers_only: self.curl.setopt(self.curl.NOBODY, 1) else: self.curl.setopt(self.curl.NOBODY, 0) self.curl.setopt(self.curl.WRITEDATA, buff) self.curl.setopt(self.curl.HEADERFUNCTION, self.handle_headers) self.curl.perform() #decode the returned data to the correct type body = buff.getvalue().decode(self.encoding()) return self.code, self.headers, body
def _finish( self, curl: pycurl.Curl, curl_error: int = None, curl_message: str = None ) -> None: info = curl.info curl.info = None self._multi.remove_handle(curl) self._free_list.append(curl) buffer = info["buffer"] if curl_error: assert curl_message is not None error = CurlError(curl_error, curl_message) # type: Optional[CurlError] assert error is not None code = error.code effective_url = None buffer.close() buffer = None else: error = None code = curl.getinfo(pycurl.HTTP_CODE) effective_url = curl.getinfo(pycurl.EFFECTIVE_URL) buffer.seek(0) # the various curl timings are documented at # http://curl.haxx.se/libcurl/c/curl_easy_getinfo.html time_info = dict( queue=info["curl_start_ioloop_time"] - info["queue_start_time"], namelookup=curl.getinfo(pycurl.NAMELOOKUP_TIME), connect=curl.getinfo(pycurl.CONNECT_TIME), appconnect=curl.getinfo(pycurl.APPCONNECT_TIME), pretransfer=curl.getinfo(pycurl.PRETRANSFER_TIME), starttransfer=curl.getinfo(pycurl.STARTTRANSFER_TIME), total=curl.getinfo(pycurl.TOTAL_TIME), redirect=curl.getinfo(pycurl.REDIRECT_TIME), ) try: info["callback"]( HTTPResponse( request=info["request"], code=code, headers=info["headers"], buffer=buffer, effective_url=effective_url, error=error, reason=info["headers"].get("X-Http-Reason", None), request_time=self.io_loop.time() - info["curl_start_ioloop_time"], start_time=info["curl_start_time"], time_info=time_info, ) ) except Exception: self.handle_callback_exception(info["callback"])