def save_file(self, source_path, key): """ :param source_path: is relative to the local file system. :param key: the key is relative to the current prefix. :return: """ if self.local_server: if source_path.startswith('/'): source_path = "/" + source_path return self.local_server.copy(source_path, key) # proxy = os.environ.get('HTTP_PROXY') # c.setopt(c.PROXY, proxy) # logger.print('proxy:', proxy) from pycurl import Curl c = Curl() c.setopt(c.URL, self.url) c.setopt(c.TIMEOUT, 3600) c.setopt(c.HTTPPOST, [ ('file', ( c.FORM_FILE, source_path, c.FORM_FILENAME, key, c.FORM_CONTENTTYPE, 'plain/text', )), ]) c.perform() c.close()
def save_buffer(self, buffer, key): # proxy = os.environ.get('HTTP_PROXY') # c.setopt(c.PROXY, proxy) # logger.print('proxy:', proxy) if isinstance(buffer, BytesIO): from requests_toolbelt import MultipartEncoder encoder = MultipartEncoder({'file': (key, buf), 'canary': true}) self.session.post(self.url, data=encoder, headers={'Content-Type': encoder.content_type}) elif isinstance(buffer, StringIO): from pycurl import Curl c = Curl() c.setopt(c.URL, self.url) c.setopt(c.TIMEOUT, 3600) c.setopt(c.HTTPPOST, [ ('file', ( c.FORM_BUFFER, source_path, c.FORM_BUFFERPTR, buffer.read(), c.FORM_CONTENTTYPE, 'plain/text', )), ]) c.perform() c.close()
def curl_ix(content=[]): # Provide a filename to generate a ix.io link # import necesssary classes and functions global ERROR from pycurl import Curl from io import BytesIO from urllib.parse import urlencode curl=Curl() buf=BytesIO() curl.setopt(curl.URL, "ix.io") curl.setopt(curl.WRITEDATA, buf) if content==[]: try: with open(LOGFILE, 'r') as f: content=f.readlines() except FileNotFoundError: ERROR(f"{LOGFILE} not found.") except Exception as e: ERROR(f"Error occured:\n{str(e)}") curl.setopt(curl.POSTFIELDS, urlencode({"f:1": '\n'.join(content)})) try: curl.perform() except Exception as e: ERROR(f"Error occured:\n{str(e)}") curl.close() return buf.getvalue().decode().strip()
def get_login(c: pycurl.Curl, url: str) -> bytes: logger.info("get_login() called") buffer = BytesIO() header = [ "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0)", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: en-US,en;q=0.7,fi;q=0.3", "DNT: 1", "Connection: keep-alive", "Upgrade-Insecure-Requests: 1", ] c.setopt(c.WRITEFUNCTION, buffer.write) c.setopt(c.HEADERFUNCTION, header_function) c.setopt(c.BUFFERSIZE, 102400) c.setopt(c.URL, url) c.setopt(c.HTTPHEADER, header) c.setopt(c.USERAGENT, "curl/7.65.1") c.setopt(c.MAXREDIRS, 50) # c.setopt(c.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS) c.setopt(c.ACCEPT_ENCODING, "") # c.setopt(c.HTTP09_ALLOWED, 1) c.setopt(c.TCP_KEEPALIVE, 1) c.setopt(c.FOLLOWLOCATION, True) c.perform() logger.info("get_login() HTTP response: %s", c.getinfo(c.HTTP_CODE)) return buffer.getvalue()
def moodle_admin_login_curl(self): fd, path = tempfile.mkstemp() try: response = BytesIO() url = 'https://' + self.deployment['siteURL'] + '/login/index.php' curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, response.write) curl.setopt(pycurl.POST, True) curl.setopt(pycurl.COOKIEJAR, path) curl.setopt(pycurl.COOKIEFILE, path) post = urllib.parse.urlencode({ 'username': '******', 'password': self.deployment['moodleAdminPassword'] }) curl.setopt(pycurl.POSTFIELDS, post) curl.setopt(pycurl.FOLLOWLOCATION, True) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** FAILED: {} ***".format(status)) sys.exit(1) response = response.getvalue().decode('utf-8') finally: os.remove(path) return response
def curl(url, file_ids, log): log.info('\tstarting curl fetch of gdc files') params = {'ids': file_ids} c = None with open('gdc_curl_download.tar.gz', 'wb') as f: try: c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, f) c.setopt(c.HTTPHEADER, ["Content-Type: application/json"]) c.setopt(pycurl.CUSTOMREQUEST, "POST") c.setopt(pycurl.POSTFIELDS, json.dumps(params)) # TODO: set up using a local certificate c.setopt(pycurl.SSL_VERIFYPEER, 0) c.setopt(pycurl.SSL_VERIFYHOST, 0) c.perform() except: log.exception('problem with curl') raise finally: if None != c: if int(c.getinfo(pycurl.RESPONSE_CODE)) != 200: f.close() with open('gdc_curl_download.tar.gz') as e: err = e.read() log.error('\tbad status on curl call(%s):\n%s' % (c.getinfo(pycurl.RESPONSE_CODE), err)) c.close()
def getc(url): buf = BytesIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, buf) c.perform() c.close() return buf
def build_thread(gitpath, ref, buildid, cburl=None, submodules=False): tmpdir = os.path.join(conf('buildbot.buildpath'), buildid) repo = GitRepository(tmpdir) output, retcode = repo.clone(gitpath) if retcode: buildlog(buildid, 'Unable to clone %s. %s\n' % (gitpath, '\n'.join(output))) return output, retcode = repo.checkout(ref) if retcode: buildlog(buildid, 'Unable to checkout %s. %s\n' % (ref, '\n'.join(output))) return if submodules: output, retcode = repo.submodule_init() buildlog(buildid, output[0]) buildlog(buildid, output[1]) output, retcode = repo.submodule_update() buildlog(buildid, output[0]) buildlog(buildid, output[1]) resultsdir = os.path.join(tmpdir, '.build_results') os.makedirs(resultsdir) output, retcode = repo.build(conf('buildbot.signkey'), conf('buildbot.pbuilderrc'), resultsdir) buildlog(buildid, output[0]) buildlog(buildid, output[1]) #logging.debug(output[0]) #logging.debug(output[1]) os.chdir(resultsdir) if not os.listdir(resultsdir) or retcode != 0: buildlog(buildid, 'Nothing in results directory. Giving up.') return tarpath = os.path.join(tmpdir, 'package.tar.gz') tar = tarfile.open(tarpath, 'w:gz') for name in os.listdir(resultsdir): tar.add(name) tar.close() buildlog(buildid, 'Build complete. Results in %s\n' % tarpath) data = file(tarpath, 'rb').read() buildlog(buildid, 'Built %i byte tarball' % len(data)) if cburl: buildlog(buildid, 'Performing callback: %s' % cburl) req = Curl() req.setopt(req.POST, 1) req.setopt(req.URL, str(cburl)) req.setopt(req.HTTPPOST, [('package', (req.FORM_FILE, str(tarpath)))]) req.setopt(req.WRITEDATA, file('%s/build.log' % tmpdir, 'a+')) req.perform() req.close()
def http_perform(curl: pycurl.Curl): # Utility function for curl - just do our usual stuff try: curl.perform() except pycurl.error as e: raise CurlError from e status = curl.getinfo(pycurl.HTTP_CODE) HTTPFamily.check_status(status)
def load_url(self, url): buffer = BytesIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, buffer) c.perform() c.close() return CSVFile(buffer)
def curl(url): io = BytesIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, io) c.perform() c.close() res = io.getvalue() io.close() return res
def blocking_io(num): # TODO: Use pycurl buf = BytesIO() c = Curl() c.setopt(c.URL, f'https://xkcd.com/{num}/info.0.json') c.setopt(c.WRITEDATA, buf) c.setopt(c.CAINFO, certifi.where()) c.perform() c.close() buf.seek(0) return load(buf)
class PageFetcher(): """Fetches a page""" def __init__(self): self.curl = Curl() self.url = None self.headers = {} self.status = '' self.code = 0 self.charset_re = re.compile('charset=(\S+)') def handle_headers(self, header): """Parses the headers from a HTTP response""" header = header.decode('iso-8859-1') #headers always in iso-8859-1 if ':' in header: #split out the headers name and value n, v = header.split(': ', 1) self.headers[n] = v.rstrip('\r\n') elif 'HTTP' in header: h, code, status = header.split(' ', 2) self.code = int(code) self.status = status.rstrip('\r\n') def encoding(self): """Gets the encoding from the headers, otherwise assumes iso-8859-1""" if 'Content-Type' in self.headers: match = self.charset_re.search(self.headers['Content-Type'].lower()) if match: return match.group(1) return 'iso-8859-1' def fetch(self, url, headers_only=False): """Gets the specified webpage""" #reset the gathered data self.headers = {} self.code = 0 self.status = None links = [] #get the page buff = BytesIO() self.curl.setopt(self.curl.URL, url) if headers_only: self.curl.setopt(self.curl.NOBODY, 1) else: self.curl.setopt(self.curl.NOBODY, 0) self.curl.setopt(self.curl.WRITEDATA, buff) self.curl.setopt(self.curl.HEADERFUNCTION, self.handle_headers) self.curl.perform() #decode the returned data to the correct type body = buff.getvalue().decode(self.encoding()) return self.code, self.headers, body
class Httpy: """ Easily perform GET and POST requests with web servers. Keeps cookies to retain web sessions. Includes helpful methods that go beyond GET and POST: * get_meta - retrieves meta info about a URL * unshorten - returns (some) redirected URLs """ def __init__(self): self.curl = Curl() self.curl.setopt(self.curl.SSL_VERIFYPEER, 0) self.curl.setopt(self.curl.SSL_VERIFYHOST, 0) self.curl.setopt(self.curl.TIMEOUT, DEFAULT_TIMEOUT) self.curl.setopt(self.curl.PROXY, HTTP_PROXY) self.curl.setopt(self.curl.FOLLOWLOCATION, True) def get(self, url): """ GET request """ try: body = BytesIO() self.curl.setopt(self.curl.WRITEFUNCTION, body.write) self.curl.setopt(self.curl.URL, url) self.curl.perform() r = body.getvalue() body.close() return r.decode() except Exception as e: raise e def download(self, url): """ Downloads file from URL to save_as path. """ retries = 3 while retries: try: body = BytesIO() self.curl.setopt(self.curl.WRITEFUNCTION, body.write) self.curl.setopt(self.curl.URL, url) self.curl.perform() if self.curl.getinfo(self.curl.HTTP_CODE) != 200: text = body.getvalue() if "404" not in text: raise Exception( "HTTP" + str(self.curl.getinfo(self.curl.HTTP_CODE))) r = body.getvalue() body.close() return r except Exception as e: if str(e).find("transfer closed") > 0 and retries: retries -= 1 continue raise Exception( str(e) + " HTTP" + str(self.curl.getinfo(self.curl.HTTP_CODE)))
def get(name): base = 'https://www1.ncdc.noaa.gov/pub/data/igra/data/data-por/{}-data.txt.zip' buf = BytesIO() c = Curl() c.setopt(c.URL, base.format(name)) c.setopt(c.WRITEDATA, buf) c.perform() c.close() z = ZipFile(buf) out = z.open(z.infolist()[0]).read() z.close() return out.decode()
def post_login(c: pycurl.Curl, url: str, sessionid: str, token: str, username: str, password: str, remember=2678400) -> bytes: logger.info("post_login() called") buffer = BytesIO() header = [ "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0)", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: en-US,en;q=0.7,fi;q=0.3", "Referer: http://81.19.210.136:1005/", "Content-Type: application/x-www-form-urlencoded", "DNT: 1", "Connection: keep-alive", f"Cookie: {sessionid}", "Upgrade-Insecure-Requests: 1", ] password_hash = hashlib.sha1( bytearray(password, "utf-8") + bytearray(username, "utf-8")).hexdigest() postfields = (f"token={token}&password_hash=%24sha1%24{password_hash}" + f"&username={username}&password=&remember={remember}") postfieldsize = len(postfields) logger.info("postfieldsize: %s", postfieldsize) logger.debug("postfields: %s", postfields) c.setopt(c.WRITEFUNCTION, buffer.write) c.setopt(c.HEADERFUNCTION, header_function) c.setopt(c.BUFFERSIZE, 102400) c.setopt(c.URL, url) c.setopt(c.POSTFIELDS, postfields) c.setopt(c.POSTFIELDSIZE_LARGE, postfieldsize) c.setopt(c.HTTPHEADER, header) c.setopt(c.USERAGENT, "curl/7.65.1") c.setopt(c.MAXREDIRS, 50) # c.setopt(c.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS) c.setopt(c.ACCEPT_ENCODING, "") # c.setopt(c.HTTP09_ALLOWED, True) c.setopt(c.TCP_KEEPALIVE, 1) c.setopt(c.FOLLOWLOCATION, True) c.perform() logger.info("post_login() HTTP response: %s", c.getinfo(c.HTTP_CODE)) return buffer.getvalue()
def performSubmission(submissionFileName, POST_DATA): logging.info('Performing submission of ' + submissionFileName + '\n') logging.info('POST Data:\n' + str(POST_DATA) + '\n') if (str(getConfigurationValue('test_submission')) == '0'): logging.info ('THIS IS A LIVE SUBMISSION AT ENA.') requestURL = str(getConfigurationValue('ena_rest_address_prod')) + '?auth=ENA%20' + str(getConfigurationValue('ena_username')) + '%20' + str(getConfigurationValue('ena_password')) else: logging.info ('THIS IS A TEST SUBMISSION AT ENA.') requestURL = str(getConfigurationValue('ena_rest_address_test')) + '?auth=ENA%20' + str(getConfigurationValue('ena_username')) + '%20' + str(getConfigurationValue('ena_password')) # Problem: StringIO Doesn't work with pycurl in python 3.6. Must replace this with a BytesIO. #curlResponseBuffer = StringIO() curlResponseBuffer = BytesIO() try: curlObject = Curl() curlObject.setopt(curlObject.URL, requestURL) curlObject.setopt(curlObject.POST, 1) curlObject.setopt(curlObject.HTTPPOST, POST_DATA) curlObject.setopt(curlObject.USERAGENT, 'Curl') curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write) curlObject.setopt(HTTPHEADER, ['Accept:application/xml']) # Insecure. Any security experts want to make this better? curlObject.setopt(SSL_VERIFYHOST, 0) curlObject.setopt(SSL_VERIFYPEER, 0) curlObject.perform() curlObject.close() except Exception: logging.error ('Exception when performing CURL:\n') #logging.error (str(exc_info())) logging.error('Exception when performing CURL.\n') logging.error('URL:' + str(requestURL)) raise responseText = curlResponseBuffer.getvalue() #logging.info ('the type of the responseText is:' + str(type(responseText))) #logging.info ('after it becomes a string:' + str(type(str(responseText)))) # write XML to file. projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml') resultsFile = createOutputFile(projectSubResultsFileName) resultsFile.write(str(responseText)) resultsFile.close() return responseText
def fetchAnnotationJson(self, rawRequestURL=None): try: postData = {'sequence': self.rawSequence} # Using configuration here causes circular dependency. So I'll just pass it in. if(rawRequestURL is None): logging.error('You must pass a rawRequestURL to fetchAnnotationJson.') return else: requestURL = rawRequestURL + '?' + urlencode(postData) resultsIoObject = BytesIO() curlObject = Curl() curlObject.setopt(curlObject.URL, requestURL) curlObject.setopt(curlObject.WRITEDATA, resultsIoObject) curlObject.perform() curlObject.close() getBody = resultsIoObject.getvalue().decode('utf8') logging.debug('JSON Request Body:\n' + getBody) # TODO: # Detect error <head><title>414 Request-URI Too Large</title></head> # For larger DRB alleles the webserver fails. # Detect error if the result is not json. # Maybe this error detection happens in parseExons. But i maybe need to detect server errors here. # Simple case is an empty string. if(getBody is None or len(getBody)<1): logging.error('The JSON results were an empty string. Is there a problem with the ACT server?:' + str(requestURL)) showInfoBox('Problem Accessing Annotation Service','The JSON results were an empty string. Is there a problem with the ACT server?') return None # If it's an html error we can respond nicely. if(getBody[0:5]=='<html>'): # TODO: this might not work if i get some other kind of html. errorCode = getBody[getBody.find('<title>'):getBody.find('</title>')] logging.error('The annotation JSON results are html, this probably indicates an issue with the annotation webserver:\n' + str(requestURL)) showInfoBox('Problem Accessing Annotation Service', 'The annotation results are HTML, not JSON, probably an issue with the ACT webserver:\n' + str(errorCode)) return None return getBody except Exception: logging.error('Exception when performing CURL:\n') logging.error(str(exc_info())) logging.error('URL:' + str(requestURL)) raise
def performSubmission(submissionFileName, POST_DATA, enaUserName, enaPassword): logging.info('Performing submission of ' + submissionFileName + '\n') logging.info('POST Data:\n' + str(POST_DATA) + '\n') if (str(getConfigurationValue('test_submission')) == '0'): logging.info ('THIS IS A LIVE SUBMISSION AT ENA.') requestURL = str(getConfigurationValue('ena_rest_address_prod')) + '?auth=ENA%20' + str(enaUserName) + '%20' + str(enaPassword) else: logging.info ('THIS IS A TEST SUBMISSION AT ENA.') requestURL = str(getConfigurationValue('ena_rest_address_test')) + '?auth=ENA%20' + str(enaUserName) + '%20' + str(enaPassword) # Problem: StringIO Doesn't work with pycurl in python 3.6. Must replace this with a BytesIO. curlResponseBuffer = BytesIO() try: curlObject = Curl() curlObject.setopt(curlObject.URL, requestURL) curlObject.setopt(curlObject.POST, 1) curlObject.setopt(curlObject.HTTPPOST, POST_DATA) curlObject.setopt(curlObject.USERAGENT, 'Curl') curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write) curlObject.setopt(HTTPHEADER, ['Accept:application/xml']) # Insecure. Any security experts want to make this better? curlObject.setopt(SSL_VERIFYHOST, 0) curlObject.setopt(SSL_VERIFYPEER, 0) curlObject.perform() curlObject.close() except Exception: logging.error ('Exception when performing CURL:\n') #logging.error (str(exc_info())) logging.error('Exception when performing CURL.\n') logging.error('URL:' + str(requestURL)) raise responseText = curlResponseBuffer.getvalue() #logging.info ('the type of the responseText is:' + str(type(responseText))) #logging.info ('after it becomes a string:' + str(type(str(responseText)))) # write XML to file. projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml') resultsFile = createOutputFile(projectSubResultsFileName) resultsFile.write(str(responseText)) resultsFile.close() return responseText
def moodle_smoke_test(self): print("\nMoodle Smoke Test...") url = 'https://' + self.deployment['siteURL'] curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, lambda x: None) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** DEPLOY FAILED ***") print('HTTP Status Code: {}'.format(status)) sys.exit(1) print('(ok: {})'.format(status))
def _fetch(self, url, query, on_progress=None): logging.debug('query={query}'.format(query=query)) from pycurl import Curl, POST, POSTFIELDS from io import BytesIO c = Curl() c.setopt(c.URL, url) c.setopt(POST, 1) c.setopt(POSTFIELDS, query) if on_progress: c.setopt(c.HEADERFUNCTION, self._on_header(on_progress)) buffer = BytesIO() c.setopt(c.WRITEDATA, buffer) c.perform() c.close() return buffer.getvalue().decode('UTF-8')
def _curl_a_link(self, target_url, post_target, commit_date=None): ''' 解析一个地址,返回一个字典,从其中可以读取json字符串の内容,相当于curl get指令,如果这个请求的结果在今天的缓存当中已经有了,则从缓存中取,不从elastic里面再重复读取 ''' buffer = StringIO() c = Curl() c.setopt(c.URL, target_url) c.setopt(c.WRITEDATA, buffer) c.perform() c.close() load_target = json.loads(buffer.getvalue()) return load_target pass
def curlGet(url: str) -> bytes: # Generate objects. data = BytesIO() curl = Curl() # Setup curl. curl.setopt(curl.URL, url) curl.setopt(curl.WRITEDATA, data) curl.setopt(curl.FOLLOWLOCATION, True) curl.setopt(curl.HTTPHEADER, ['User-Agent: curl/7.68.0']) # Send curl request. curl.perform() curl.close() return data.getvalue()
def callAPI(self, url, params={}): # pull data buf = StringIO() curl = Curl() curl.setopt(curl.URL, url + '?' + urlencode(params)) curl.setopt(curl.WRITEFUNCTION, buf.write) try: curl.perform() except: print 'Failed on all routes curl' response = buf.getvalue() buf.close() return response
def torch_upload(): from ml_logger import logger import numpy as np logger.configure(root_dir="http://54.71.92.65:9080", prefix="geyang/ml_logger-debug/test-1", register_experiment=True) logger.log_params(args={}) with logger.Sync(): import os import torch from pycurl import Curl from tempfile import NamedTemporaryFile logger.remove('upload/example.pt') with NamedTemporaryFile(delete=True) as f: torch.save(np.ones([10_000_000]), f) # torch.save(np.ones([1000_000]), f) logger.print(f.name) c = Curl() c.setopt(c.URL, logger.root_dir) # proxy = os.environ.get('HTTP_PROXY') # c.setopt(c.PROXY, proxy) # logger.print('proxy:', proxy) c.setopt(c.TIMEOUT, 100000) c.setopt(c.HTTPPOST, [ ('file', ( c.FORM_FILE, f.name, c.FORM_FILENAME, logger.prefix + '/upload/example.pt', c.FORM_CONTENTTYPE, 'plain/text', )), ]) c.perform() c.close() logger.print('done') # logger.remove(".") # a = np.ones([1, 1, 100_000_000 // 4]) # logger.print(f"the size of the tensor is {a.size}") # data = dict(key="ok", large=a) # logger.torch_save(data, f"save/data-{logger.now('%H.%M.%S')}.pkl") logger.print('done')
def get(url, headers=[]): '''Given a URL and headers send a HTTP GET request. Return the response.''' b = StringIO() c = Curl() c.setopt(c.URL, url) c.setopt(c.FOLLOWLOCATION, True) c.setopt(c.WRITEFUNCTION, b.write) c.setopt(c.COOKIEJAR, cookiePath) c.setopt(c.COOKIEFILE, cookiePath) c.setopt(c.VERBOSE, verbose) c.setopt(c.CONNECTTIMEOUT, connectTimeout) c.setopt(c.TIMEOUT, defaultTimeout) c.setopt(c.USERAGENT, userAgent) c.setopt(c.HTTPHEADER, defaultHeaders + headers) c.perform() r = copy(b.getvalue()) b.close() return r
def get_messages(c: pycurl.Curl, url: str, sessionid: str, authcred: str, authtimeout: int) -> bytes: logger.info("get_messages() called") buffer = BytesIO() header = [ "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0)", "Accept: */*", "Accept-Language: en-US,en;q=0.7,fi;q=0.3", "Referer: http://81.19.210.136:1005/", "Content-Type: application/x-www-form-urlencoded", "X-Requested-With: XMLHttpRequest", "DNT: 1", "Connection: keep-alive", f"Cookie: {sessionid}; {authcred}; {authtimeout}", "Upgrade-Insecure-Requests: 1", ] postfields = "ajax=1" postfieldsize = len(postfields) logger.info("postfieldsize: %s", postfieldsize) c.setopt(c.WRITEFUNCTION, buffer.write) c.setopt(c.HEADERFUNCTION, header_function) c.setopt(c.BUFFERSIZE, 102400) c.setopt(c.URL, url) c.setopt(c.POSTFIELDS, postfields) c.setopt(c.POSTFIELDSIZE_LARGE, postfieldsize) c.setopt(c.HTTPHEADER, header) c.setopt(c.USERAGENT, "curl/7.65.1") c.setopt(c.MAXREDIRS, 50) # c.setopt(c.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS) c.setopt(c.ACCEPT_ENCODING, "") # c.setopt(c.HTTP09_ALLOWED, True) c.setopt(c.TCP_KEEPALIVE, 1) c.setopt(c.FOLLOWLOCATION, True) # print_headers(HEADERS) c.perform() logger.info("get_messages() HTTP response: %s", c.getinfo(c.HTTP_CODE)) return buffer.getvalue()
def __fetch_page(self, url): useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36' encoding = 'gzip, deflate, sdch' httpheader = [ 'Accept: text/html, application/xhtml+xml, application/xml; q=0.9, image/webp, */*; q=0.8', 'Accept-Language: it-IT, it; q=0.8, en-US; q=0.6, en; q=0.4', 'Host: uniparthenope.esse3.cineca.it' ] cookiefile = 'cookiefile' page = StringIO() c = Curl() c.setopt(c.FOLLOWLOCATION, True) c.setopt(c.WRITEFUNCTION, page.write) c.setopt(c.COOKIEJAR, cookiefile) c.setopt(c.URL, url) c.perform() c.close() page.close() page = StringIO() c = Curl() c.setopt(c.USERPWD, self.__username + ':' + self.__password) c.setopt(c.FOLLOWLOCATION, 1) c.setopt(c.WRITEFUNCTION, page.write) c.setopt(c.COOKIEFILE, cookiefile) c.setopt(c.ENCODING, encoding) c.setopt(c.HTTPHEADER, httpheader) c.setopt(c.REFERER, url) c.setopt(c.USERAGENT, useragent) c.setopt(c.URL, url) c.perform() if (c.getinfo(pycurl.HTTP_CODE) != 200): return None c.close() page_str = page.getvalue() page.close() p = re.compile('\\s+') page_str = p.sub(" ", page_str) return page_str
def callAPI(self,params={}): # pull data buf = StringIO() curl = Curl() url = params.url.__str__() del params.url curl.setopt(curl.URL, url + '?' + urlencode(params)) curl.setopt(curl.WRITEFUNCTION, buf.write) try: curl.perform() except error: print 'Failed on curl' response = buf.getvalue() buf.close() return response
def post_progress(progress, slotX, slotY, exp_name, message="") : from pycurl import Curl import cStringIO from socket import gethostname response = cStringIO.StringIO() address ='www.doc.ic.ac.uk/~zf509/'+exp_name+'/ip.php?name='+gethostname()+\ '-'+message+'&slot='+str(slotX)+'-'+str(slotY)+\ '&stage='+str(progress) c = Curl() c.setopt(c.WRITEFUNCTION, response.write) c.setopt(c.URL, address) c.perform() c.close() server_res = response.getvalue() print "Server replied:", server_res if server_res[0]=="T" and server_res[1]=="E" and server_res[2]=="R" : return False else : return True
def http_req_perform(self, curl: pycurl.Curl, writefun: object) -> None: curl.setopt(pycurl.WRITEFUNCTION, writefun) try: curl.perform() except pycurl.error as e: code = e.args[0]; errstr = e.args[1] if (code == pycurl.E_URL_MALFORMAT or code == pycurl.E_PARTIAL_FILE): raise NetError(code, errstr) elif (code == pycurl.E_COULDNT_RESOLVE_PROXY or code == pycurl.E_COULDNT_RESOLVE_HOST): raise DNSError(code, errstr) elif code == pycurl.E_HTTP_RETURNED_ERROR: raise HTTPError(curl.getinfo(pycurl.HTTP_CODE), None, code, errstr) elif (code == pycurl.E_COULDNT_CONNECT or code == pycurl.E_OPERATION_TIMEOUTED or code == pycurl.E_SEND_ERROR or code == pycurl.E_RECV_ERROR): raise ConnError(curl.getinfo(pycurl.OS_ERRNO), code, errstr) else: raise NetError(code, errstr)
def http_query(url, timeout=1000): print url c = Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.TIMEOUT_MS, timeout) body_writer = StringIO() head_writer = StringIO() c.setopt(pycurl.WRITEFUNCTION, body_writer.write) c.setopt(pycurl.HEADERFUNCTION, head_writer.write) result = {} c.perform() head_writer.seek(0) first = head_writer.readline() result['header'] = {} for line in head_writer: parts = line.split(':' , 1) if len(parts) == 2: result['header'][parts[0]] = parts[1].strip() result['code'] = c.getinfo(pycurl.HTTP_CODE) result['body'] = body_writer.getvalue() return result
def http_query(url, timeout=1000): print url c = Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.TIMEOUT_MS, timeout) body_writer = StringIO() head_writer = StringIO() c.setopt(pycurl.WRITEFUNCTION, body_writer.write) c.setopt(pycurl.HEADERFUNCTION, head_writer.write) result = {} c.perform() head_writer.seek(0) first = head_writer.readline() result['header'] = {} for line in head_writer: parts = line.split(':', 1) if len(parts) == 2: result['header'][parts[0]] = parts[1].strip() result['code'] = c.getinfo(pycurl.HTTP_CODE) result['body'] = body_writer.getvalue() return result
def process(self): from pycurl import Curl import json from io import BytesIO b = BytesIO() c = Curl() c.setopt(c.URL, self.resource_uri) c.setopt(c.USERAGENT, "FooBar/1.0") c.setopt(c.WRITEDATA, b) c.perform() response = json.loads(b.getvalue()) album_meta = AlbumMetadata() # Album metadata album_meta.title = response["title"] album_meta.year = response["year"] album_meta.genres = response["genres"] artists = [] for artist in response["artists"]: artists.append(artist["name"]) album_meta.artists = ",".join(artists) album_meta.tracktotal = len(response["tracklist"]) millis = [] # Iterate over tracklist tracks_meta = [] for track in response['tracklist']: meta = TrackMetadata() meta.duration = track["duration"] meta.position = track["position"] meta.title = track["title"] tracks_meta.append(meta) return (album_meta, tracks_meta)
def moodle_admin_login_curl(self): fd, path = tempfile.mkstemp() try: response = BytesIO() url = 'https://' + self.deployment['siteURL'] + '/login/index.php' curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, response.write) curl.setopt(pycurl.POST, True) curl.setopt(pycurl.COOKIEJAR, path) curl.setopt(pycurl.COOKIEFILE, path) post = urllib.parse.urlencode({'username': '******', 'password': self.deployment['moodleAdminPassword']}) curl.setopt(pycurl.POSTFIELDS, post) curl.setopt(pycurl.FOLLOWLOCATION, True) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** FAILED: {} ***".format(status)) sys.exit(1) response = response.getvalue().decode('utf-8') finally: os.remove(path) return response
def _perform(self, url: str, curl_obj: pycurl.Curl = None, headers: dict = None, postfields: dict = None, skip_auth=False) -> bytes: if not skip_auth: self._wait_authenticated() if not curl_obj: curl_obj = pycurl.Curl() if postfields: postfields = urlencode(postfields) _set_postfields(curl_obj, postfields) logger.debug("url={url}, headers={headers}", url=url, headers=headers) if not headers: headers = self.BASE_HEADERS.copy() headers = self._headers_to_list(headers) logger.debug("prepared headers={h}", h=headers) buffer = BytesIO() curl_obj.setopt(pycurl.WRITEFUNCTION, buffer.write) curl_obj.setopt(pycurl.HEADERFUNCTION, self._header_function) curl_obj.setopt(pycurl.BUFFERSIZE, 102400) curl_obj.setopt(pycurl.URL, url) curl_obj.setopt(pycurl.HTTPHEADER, headers) curl_obj.setopt(pycurl.USERAGENT, CURL_USERAGENT) curl_obj.setopt(pycurl.MAXREDIRS, 50) curl_obj.setopt(pycurl.ACCEPT_ENCODING, "") curl_obj.setopt(pycurl.TCP_KEEPALIVE, 1) curl_obj.setopt(pycurl.FOLLOWLOCATION, True) curl_obj.setopt(pycurl.ENCODING, "gzip, deflate") try: curl_obj.perform() except pycurl.error as e: logger.debug(e, exc_info=True) logger.warning(e) return b"" status = curl_obj.getinfo(pycurl.HTTP_CODE) logger.debug("HTTP status: {s}", s=status) curl_obj.close() if status != HTTPStatus.OK: hdrs = None try: hdrs = {k: v[-1] for k, v in self._headers.items()} except (IndexError, KeyError): pass phrase = "error" try: phrase = http.client.responses[status] logger.error("HTTP status error: {s}", s=status) except KeyError: pass raise HTTPError(url=url, msg=phrase, code=status, hdrs=hdrs, fp=None) # Server changing maps will trigger sessionid change, # keep track of latest sessionid in response headers. sessionid = self._find_sessionid() if sessionid and self._auth_data: self._auth_data.sessionid = sessionid return buffer.getvalue()
def _execute(curl: Curl, close_connection: bool) -> int: curl.perform() status_code = curl.getinfo(curl.HTTP_CODE) if close_connection: curl.close() return status_code
def processVideo(self, vID, number): for _attempt in range(self.retryCount): title = '' download = None for i in count(): try: self.goTo(vID) title = self.getElement('h1[itemprop=name]').text.strip().rstrip('.') self.driver.find_element_by_class_name('iconify_down_b').click() download = self.getElement('#download') break except NoSuchElementException as e: self.logger.warning(e.msg) if i >= self.retryCount: self.logger.error("Page load failed") self.errors += 1 break # Parse download links link = linkSize = localSize = downloadOK = downloadSkip = None if download: for preference in FILE_PREFERENCES: try: link = download.find_element_by_partial_link_text(preference) break except NoSuchElementException: pass if link: # Parse chosen download link userAgent = str(self.driver.execute_script('return window.navigator.userAgent')) cookies = self.driver.get_cookies() extension = link.get_attribute('download').split('.')[-1] description = '%s/%s' % (link.text, extension.upper()) link = str(link.get_attribute('href')) if self.getFileSizes: try: request = requests.get(link, stream = True, headers = { 'user-agent': userAgent }, cookies = dict((str(cookie['name']), str(cookie['value'])) for cookie in cookies)) request.close() linkSize = int(request.headers['content-length']) self.totalFileSize += linkSize description += ', %s' % readableSize(linkSize) except Exception as e: self.logger.warning(e) else: description = extension = 'NONE' # Prepare file information prefix = ' '.join((title, '(%s)' % description)) suffix = ' '.join((('%d/%d %d%%' % (number, len(self.vIDs), int(number * 100.0 / len(self.vIDs)))),) + ((readableSize(self.totalFileSize),) if self.totalFileSize else ())) self.logger.info(' '.join((prefix, suffix))) fileName = cleanupFileName('%s.%s' % (' '.join(((title,) if title else ()) + (str(vID),)), extension.lower())) targetFileName = join(self.targetDirectory, fileName) if self.setLanguage: try: self.driver.find_element_by_id('change_settings').click() languages = self.driver.find_elements_by_css_selector('select[name=language] option') currentLanguage = ([l for l in languages if l.is_selected()] or [None,])[0] if currentLanguage is None or currentLanguage is languages[0]: ls = [l for l in languages if l.text.capitalize().startswith(self.setLanguage)] if len(ls) != 1: ls = [l for l in languages if l.get_attribute('value').capitalize().startswith(self.setLanguage)] if len(ls) == 1: self.logger.info("Language not set, setting to %s", ls[0].text) ls[0].click() self.driver.find_element_by_css_selector('#settings_form input[type=submit]').click() else: self.logger.error("Unsupported language: %s", self.setLanguage) self.setLanguage = None else: self.logger.info("Language already set to %s / %s", currentLanguage.get_attribute('value').upper(), currentLanguage.text) except NoSuchElementException: self.logger.warning("Failed to set language to %s, settings not available", self.setLanguage) if link: # Downloading file if linkSize: localSize = getFileSize(targetFileName) if localSize == linkSize: downloadOK = True elif localSize and localSize > linkSize: self.errors += 1 self.logger.error("Local file is larger (%d) than remote file (%d)", localSize, linkSize) downloadSkip = True #remove(targetFileName) #localSize = None if self.doDownload and not downloadOK: class ProgressIndicator(object): QUANTUM = 10 * 1024 * 1024 # 10 megabytes ACTION = r'--\\||//' # update() often gets called in pairs, this smoothes things up action = len(ACTION) - 1 def __init__(self, timeout): self.timeout = timeout self.started = False self.totalRead = 0 self.lastData = time() self.count = 0 self.action = len(self.ACTION) - 1 self.progress("Dowloading: ") def progress(self, s, suffix = ''): self.action = (self.action + 1) % len(self.ACTION) print('\b%s%s' % (s, suffix + '\n' if suffix else self.ACTION[self.action]), end = '', flush = True) def update(self, _length, totalRead, *_args): if totalRead <= self.totalRead: if time() > self.lastData + self.timeout: raise curlError("Download seems stalled") else: self.totalRead = totalRead self.lastData = time() oldCount = self.count self.count = int(totalRead // self.QUANTUM) + 1 self.progress(('=' if self.started else '+') * max(0, self.count - oldCount)) self.started = True def end(self): self.progress("OK") progressIndicator = ProgressIndicator(self.timeout) curl = Curl() curl.setopt(curl.CAINFO, certifi.where()) curl.setopt(curl.COOKIE, '; '.join('%s=%s' % (cookie['name'], cookie['value']) for cookie in cookies)) curl.setopt(curl.TIMEOUT, self.timeout) curl.setopt(curl.USERAGENT, userAgent) curl.setopt(curl.FOLLOWLOCATION, True) curl.setopt(curl.URL, link) curl.setopt(curl.PROGRESSFUNCTION, progressIndicator.update) try: with open(targetFileName, 'wb') as f: curl.setopt(curl.WRITEDATA, f) curl.perform() curl.close() progressIndicator.end() downloadOK = True except curlError as e: self.errors += 1 self.logger.error("Download failed: %s", e) except KeyboardInterrupt: self.errors += 1 self.logger.error("Download interrupted") if downloadOK: localSize = getFileSize(targetFileName) if not localSize: self.errors += 1 downloadOK = False self.logger.error("Downloaded file seems corrupt") elif linkSize: if localSize > linkSize: self.errors += 1 downloadOK = False self.logger.error("Downloaded file larger (%d) than remote file (%d)", localSize, linkSize) elif localSize < linkSize: self.errors += 1 downloadOK = False self.logger.error("Downloaded file smaller (%d) than remote file (%d)", localSize, linkSize) if downloadOK: self.logger.info("OK") break elif downloadSkip or not self.doDownload: self.logger.info("Downloading SKIPPED") break else: self.logger.info("Download ultimately failed after %d retries", self.retryCount) # Creating symbolic links, if enabled for dirName in (dirName for (dirName, vIDs) in self.folders if vID in vIDs): linkFileName = join(dirName, fileName) try: if lexists(linkFileName): remove(linkFileName) except: pass try: (hardlink if self.useHardLinks else symlink)(join('..', fileName), linkFileName) except Exception as e: self.logger.warning("Can't create link at %s: %s", linkFileName, e) self.errors += 1
print ("Testing {0} performance with {1} cycles".format(LIBRARY, CYCLES)) mytime = timeit.timeit("r = requests.get('{0}')".format(URL), setup='import requests', number=CYCLES) print('{0}: ran {1} HTTP GET requests in {2} seconds'.format(LIBRARY, CYCLES, mytime)) print('') ### CONNECTION REUSE TESTS FOLLOW ### LIBRARY="pycurl (saving response body by cStringIO BUT MAKING A NEW HANDLE EVERY TIME) " print ("Testing {0} performance with {1} cycles".format(LIBRARY, CYCLES)) start = time.clock() for i in xrange(1, CYCLES): mycurl=Curl(); mycurl.setopt(mycurl.URL, URL) body = StringIO(); mycurl.setopt(mycurl.WRITEDATA, body) mycurl.perform() output = body.getvalue() body.close() mycurl.close() end = time.clock() print('{0}: ran {1} HTTP GET requests in {2} seconds'.format(LIBRARY, CYCLES, (end-start))) print('') LIBRARY="pycurl (saving response body by cStringIO) " print ("Testing {0} CONNECTION REUSE performance with {1} cycles".format(LIBRARY, CYCLES)) mycurl=Curl(); mycurl.setopt(mycurl.URL, URL) start = time.clock()
def _complete_request(curl: pycurl.Curl, buffer: BytesIO, response: Response): curl.perform() response.status = curl.getinfo(curl.RESPONSE_CODE) response.body = buffer.getvalue().decode(_CHAR_ENCODING) curl.close()