def open(self, fullurl, data=None, method=None): """Use URLopener().open(file) instead of open(file, 'r').""" fullurl = unwrap(toBytes(fullurl)) # percent encode url, fixing lame server errors for e.g, like space # within url paths. fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") if self.tempcache and fullurl in self.tempcache: filename, headers = self.tempcache[fullurl] fp = open(filename, 'rb') return addinfourl(fp, headers, fullurl) urltype, url = splittype(fullurl) if not urltype: urltype = 'file' if urltype in self.proxies: proxy = self.proxies[urltype] urltype, proxyhost = splittype(proxy) host, selector = splithost(proxyhost) url = (host, fullurl) # Signal special case to open_*() else: proxy = None name = 'open_' + urltype self.type = urltype name = name.replace('-', '_') if not hasattr(self, name): if proxy: return self.open_unknown_proxy(proxy, fullurl, data) else: return self.open_unknown(fullurl, data) try: return getattr(self, name)(url, data, method) except socket.error, msg: raise IOError, ('socket error', msg), sys.exc_info()[2]
def retrieve(self, url, filename=None, reporthook=None, data=None, maxtries=5, r_range=None): """retrieve(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object. If it fails, it relaunches itself until the dl is complete or maxtries == 0 (maxtries == -1 for unlimited tries). Range tuple(start, end) indicates the range of the remote object we have to retrieve (ignored for local files)""" if maxtries < -1: raise ValueError, 'maxtries must be at least equal with -1' url = unwrap(toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return url2pathname(splithost(url1)[1]), hdrs except IOError, msg: pass
def __init__(self, url, data=None, headers={}): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) self.type = None # self.__r_type is what's left after doing the splittype self.host = None self.port = None self.data = data self.headers = {} self.headers.update(headers)
def __init__(self, url, data=None, headers={}): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) self.type = None # self.__r_type is what's left after doing the splittype self.host = None self.port = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value)
def retrieve_resume(self, url, filename, reporthook=None, data=None): """retrieve_resume(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object. The filename argument is REQUIRED (no tempfile creation code here!) Additionally resumes a download if the local filename exists""" current_size = 0 tfp = None if os.path.exists(filename): try: current_size = os.path.getsize(filename) tfp = open(filename, 'ab') #If the file exists, then only download the remainder self.addheader('Range', 'bytes=%s-' % (current_size)) except: log('Cannot open file for resuming: %s', filename, sender=self, traceback=True) tfp = None current_size = 0 if tfp is None: tfp = open(filename, 'wb') url = urllib.unwrap(urllib.toBytes(url)) fp = self.open(url, data) headers = fp.info() result = filename, headers bs = 1024*8 size = -1 read = current_size blocknum = int(current_size/bs) if reporthook: if "content-length" in headers: size = int(headers["Content-Length"]) + current_size reporthook(blocknum, bs, size) while 1: block = fp.read(bs) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) fp.close() tfp.close() del fp del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError("retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) return result
def begin_retrieve(self, url, filename=None, reporthook=None): url = unwrap(url) self.__clean_retrieve(url) type, url1 = splittype(url) if not filename and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() del fp return url2pathname(splithost(url1)[1]), hdrs except IOError, msg: pass
def retrieve(self, url, filename=None, reporthook=None, blocksize=262144): url = urllib.unwrap(url) if self.tempcache and self.tempcache.has_key(url): return self.tempcache[url] type, url1 = urllib.splittype(url) if not filename and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() del fp return url2pathname(urllib.splithost(url1)[1]), hdrs except IOError, msg: pass
def lambda_handler(event, context): try: body_dict = event['body-json'] payload = str(urllib.unwrap(body_dict)) payload = urlparse.parse_qs(payload) payload_dict = payload['payload'][0] json_dict = json.loads(payload_dict) except Exception, e: print event print e print 'event parsing error' raise Exception("Bad Request: request failed")
def retrieve(self, url, filename=None, reporthook=None, data=None): """retrieve(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object.""" url = urllib.unwrap(urllib.toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = urllib.splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() del fp return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs except IOError, msg: pass
def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) self.type = None # self.__r_type is what's left after doing the splittype self.host = None self.port = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = cookielib.request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable
def __init__(self, url, data = None, headers = {}, origin_req_host = None, unverifiable = False): self.__original = unwrap(url) self.__original, fragment = splittag(self.__original) self.type = None self.host = None self.port = None self._tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable
def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) self.type = None # self.__r_type is what's left after doing the splittype self.host = None self.port = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable
def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): self.__original = unwrap(url) self.__original, self.__fragment = splittag(self.__original) self.type = None self.host = None self.port = None self._tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable return
def retrieve(self, url, filename=None, reporthook=None): # retrieve(url) returns (filename, None) for a local object # or (tempfilename, headers) for a remote object. url = unwrap(url) import urlparse scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) if not scheme or scheme == 'file': i = string.find(path, '?') if i > 0: path = path[:i] url = urlparse.urlunparse((scheme, netloc, path, '', '', '')) if self.__prefetchcache.has_key(url): # complete prefetch first #print 'completing prefetch' self.__fin_retrieve(url) if self.__prefetchtempfiles.has_key(url): #print 'retrieving prefetched',self.__prefetchtempfiles[url] return self.__prefetchtempfiles[url] return _OriginalFancyURLopener.retrieve(self, url, filename, reporthook)
def __init__(self, url, data = None, headers = {}, origin_req_host = None, unverifiable = False, method = None, follow_redirects = True, on_redirect = None, accumulate_body = True, adjust_headers = True): self.adjust_headers = adjust_headers self._original = urllib.unwrap(url) util.Events.EventMixin.__init__(self) urllib2.Request.__init__(self, url, data, headers, origin_req_host, unverifiable) self.follow_redirects = follow_redirects self._method = method self.headers = HTTPHeaders(self.headers) self.unredirected_hdrs = HTTPHeaders(self.unredirected_hdrs) self.redirect_cb = on_redirect self.callback = None self.accumulate_body = accumulate_body self.redirected = False
def im_handler(message): print 'IM handler' try: body_dict = message['body-json'] payload = str(urllib.unwrap(body_dict)) # payload = str(urllib.unquote(payload).decode('utf8')) payload = urlparse.parse_qs(payload) payload_dict = payload['payload'][0] json_dict = json.loads(payload_dict) actions = json_dict['actions'] menu_name = str(actions[0]['name']) channel = json_dict['channel']['id'] team_id = json_dict['team']['id'] user_id = json_dict['user']['id'] except KeyError, e: raise Exception('Bad Request: %s' % e)
def test_unwrap(self): url = urllib.unwrap('<URL:type://host/path>') self.assertEqual(url, 'type://host/path')
def retrieve(self, url, filename=None, reporthook=None, data=None): """ Retrieves data from the given url and returns a tuple of filename and headers Args: url (str): url of the data to be retrieved filename (str, optional): filename from the url to download reporthook: (function, optional): function that should be called for e.g. keeping an UI updated with current state data (, optional): Returns: result: (filename, headers) See Also: urllib.URLopener """ self._canceled=False url = urllib.unwrap(urllib.toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = urllib.splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs except IOError: pass fp = self.open(url, data) try: headers = fp.info() if filename: tfp = open(filename, 'wb') else: import tempfile garbage, path = urllib.splittype(url) garbage, path = urllib.splithost(path or "") path, garbage = urllib.splitquery(path or "") path, garbage = urllib.splitattr(path or "") suffix = os.path.splitext(path)[1] (fd, filename) = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) tfp = os.fdopen(fd, 'wb') try: result = filename, headers if self.tempcache is not None: self.tempcache[url] = result bs = 1024 * 8 size = -1 read = 0 blocknum = 0 if "content-length" in headers: size = int(headers["Content-Length"]) if reporthook: reporthook(blocknum, bs, size) while not self._canceled: block = fp.read(bs) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError("retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) if self._canceled and os.path.exists(filename): os.remove(filename) return result
def test_unwrap(self): url = urllib.unwrap("<URL:type://host/path>") self.assertEqual(url, "type://host/path")
"""An extensible library for opening URLs using a variety of protocols
def retrieve_resume(self, url, filename, reporthook=None, data=None): """Download files from an URL; return (headers, real_url) Resumes a download if the local filename exists and the server supports download resuming. """ current_size = 0 tfp = None if os.path.exists(filename): try: current_size = os.path.getsize(filename) tfp = open(filename, 'ab') #If the file exists, then only download the remainder if current_size > 0: self.addheader('Range', 'bytes=%s-' % (current_size)) except: logger.warn('Cannot resume download: %s', filename, exc_info=True) tfp = None current_size = 0 if tfp is None: tfp = open(filename, 'wb') # Fix a problem with bad URLs that are not encoded correctly (bug 549) url = url.decode('ascii', 'ignore') url = url.translate(self.ESCAPE_CHARS) url = url.encode('ascii') url = urllib.unwrap(urllib.toBytes(url)) fp = self.open(url, data) headers = fp.info() if current_size > 0: # We told the server to resume - see if she agrees # See RFC2616 (206 Partial Content + Section 14.16) # XXX check status code here, too... range = ContentRange.parse(headers.get('content-range', '')) if range is None or range.start != current_size: # Ok, that did not work. Reset the download # TODO: seek and truncate if content-range differs from request tfp.close() tfp = open(filename, 'wb') current_size = 0 logger.warn('Cannot resume: Invalid Content-Range (RFC2616).') result = headers, fp.geturl() bs = 1024*8 size = -1 read = current_size blocknum = int(current_size/bs) if reporthook: if "content-length" in headers: size = int(headers.getrawheader("Content-Length")) + current_size reporthook(blocknum, bs, size) while read < size or size == -1: if size == -1: block = fp.read(bs) else: block = fp.read(min(size-read, bs)) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) fp.close() tfp.close() del fp del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError("retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) return result
def retrieve_resume(self, url, filename, reporthook=None, data=None): """Download files from an URL; return (headers, real_url) Resumes a download if the local filename exists and the server supports download resuming. """ current_size = 0 tfp = None if os.path.exists(filename): try: current_size = os.path.getsize(filename) tfp = open(filename, 'ab') #If the file exists, then only download the remainder if current_size > 0: self.addheader('Range', 'bytes=%s-' % (current_size)) except: logger.warn('Cannot resume download: %s', filename, exc_info=True) tfp = None current_size = 0 if tfp is None: tfp = open(filename, 'wb') # Fix a problem with bad URLs that are not encoded correctly (bug 549) url = url.decode('ascii', 'ignore') url = url.translate(self.ESCAPE_CHARS) url = url.encode('ascii') url = urllib.unwrap(urllib.toBytes(url)) fp = self.open(url, data) headers = fp.info() if current_size > 0: # We told the server to resume - see if she agrees # See RFC2616 (206 Partial Content + Section 14.16) # XXX check status code here, too... range = ContentRange.parse(headers.get('content-range', '')) if range is None or range.start != current_size: # Ok, that did not work. Reset the download # TODO: seek and truncate if content-range differs from request tfp.close() tfp = open(filename, 'wb') current_size = 0 logger.warn('Cannot resume: Invalid Content-Range (RFC2616).') result = headers, fp.geturl() bs = 1024 * 8 size = -1 read = current_size blocknum = int(current_size / bs) if reporthook: if "content-length" in headers: size = int( headers.getrawheader("Content-Length")) + current_size reporthook(blocknum, bs, size) while read < size or size == -1: if size == -1: block = fp.read(bs) else: block = fp.read(min(size - read, bs)) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) fp.close() tfp.close() del fp del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError( "retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) return result
def retrieve(self, url, filename=None, reporthook=None, data=None): # overridden method from urllib.URLopener self._cancelDownload = False url = urllib.unwrap(urllib.toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = urllib.splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs except IOError: pass fp = self.open(url, data) try: headers = fp.info() if filename: tfp = open(filename, 'wb') else: import tempfile garbage, path = urllib.splittype(url) garbage, path = urllib.splithost(path or "") path, garbage = urllib.splitquery(path or "") path, garbage = urllib.splitattr(path or "") suffix = os.path.splitext(path)[1] (fd, filename) = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) tfp = os.fdopen(fd, 'wb') try: result = filename, headers if self.tempcache is not None: self.tempcache[url] = result bs = 1024 * 8 size = -1 read = 0 blocknum = 0 if "content-length" in headers: size = int(headers["Content-Length"]) if reporthook: reporthook(blocknum, bs, size) while not self._cancelDownload: block = fp.read(bs) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError( "retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) if self._cancelDownload and os.path.exists(filename): os.remove(filename) self.wasCanceled = True return result
def __init__(self, response): self._next = None link, params = parse_header(response.headers.get('link', '')) if params.get('rel','').strip('"\'') == 'next': self._next = unwrap(link)