def retrieve(self, url, filename=None, reporthook=None, data=None, maxtries=5, r_range=None): """retrieve(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object. If it fails, it relaunches itself until the dl is complete or maxtries == 0 (maxtries == -1 for unlimited tries). Range tuple(start, end) indicates the range of the remote object we have to retrieve (ignored for local files)""" if maxtries < -1: raise ValueError, 'maxtries must be at least equal with -1' url = unwrap(toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return url2pathname(splithost(url1)[1]), hdrs except IOError, msg: pass
def open(self, fullurl, data=None, method=None): """Use URLopener().open(file) instead of open(file, 'r').""" fullurl = unwrap(toBytes(fullurl)) # percent encode url, fixing lame server errors for e.g, like space # within url paths. fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") if self.tempcache and fullurl in self.tempcache: filename, headers = self.tempcache[fullurl] fp = open(filename, 'rb') return addinfourl(fp, headers, fullurl) urltype, url = splittype(fullurl) if not urltype: urltype = 'file' if urltype in self.proxies: proxy = self.proxies[urltype] urltype, proxyhost = splittype(proxy) host, selector = splithost(proxyhost) url = (host, fullurl) # Signal special case to open_*() else: proxy = None name = 'open_' + urltype self.type = urltype name = name.replace('-', '_') if not hasattr(self, name): if proxy: return self.open_unknown_proxy(proxy, fullurl, data) else: return self.open_unknown(fullurl, data) try: return getattr(self, name)(url, data, method) except socket.error, msg: raise IOError, ('socket error', msg), sys.exc_info()[2]
def retrieve_resume(self, url, filename, reporthook=None, data=None): """retrieve_resume(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object. The filename argument is REQUIRED (no tempfile creation code here!) Additionally resumes a download if the local filename exists""" current_size = 0 tfp = None if os.path.exists(filename): try: current_size = os.path.getsize(filename) tfp = open(filename, 'ab') #If the file exists, then only download the remainder self.addheader('Range', 'bytes=%s-' % (current_size)) except: log('Cannot open file for resuming: %s', filename, sender=self, traceback=True) tfp = None current_size = 0 if tfp is None: tfp = open(filename, 'wb') url = urllib.unwrap(urllib.toBytes(url)) fp = self.open(url, data) headers = fp.info() result = filename, headers bs = 1024*8 size = -1 read = current_size blocknum = int(current_size/bs) if reporthook: if "content-length" in headers: size = int(headers["Content-Length"]) + current_size reporthook(blocknum, bs, size) while 1: block = fp.read(bs) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) fp.close() tfp.close() del fp del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError("retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) return result
def retrieve(self, url, filename=None, reporthook=None, data=None): """retrieve(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object.""" url = urllib.unwrap(urllib.toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = urllib.splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() del fp return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs except IOError, msg: pass
def open(self, fullurl, data=None): if self.tries > self.maxtries: # print 'bailing after %d tries (check username and password)' % (self.tries -1) self.tries = 0 raise IOError, ('too many tries - bailing') fullurl = unwrap(toBytes(fullurl)) if self.tempcache and fullurl in self.tempcache: filename, headers = self.tempcache[fullurl] fp = open(filename, 'rb') return addinfourl(fp, headers, fullurl) urltype, url = splittype(fullurl) if not urltype: urltype = 'file' if urltype in self.proxies: proxy = self.proxies[urltype] urltype, proxyhost = splittype(proxy) host, selector = splithost(proxyhost) url = (host, fullurl) # Signal special case to open_*() else: proxy = None name = 'open_' + urltype self.type = urltype if '-' in name: # replace - with _ name = '_'.join(name.split('-')) if not hasattr(self, name): if proxy: return self.open_unknown_proxy(proxy, fullurl, data) else: return self.open_unknown(fullurl, data) try: if data is None: return getattr(self, name)(url) else: return getattr(self, name)(url, data) except socket.error, msg: raise IOError, ('socket error', msg), sys.exc_info()[2]
def test_toBytes(self): result = urllib.toBytes(u'http://www.python.org') self.assertEqual(result, 'http://www.python.org') self.assertRaises( UnicodeError, urllib.toBytes, test_support.u(r'http://www.python.org/medi\u00e6val'))
def retrieve_resume(self, url, filename, reporthook=None, data=None): """Download files from an URL; return (headers, real_url) Resumes a download if the local filename exists and the server supports download resuming. """ current_size = 0 tfp = None if os.path.exists(filename): try: current_size = os.path.getsize(filename) tfp = open(filename, 'ab') #If the file exists, then only download the remainder if current_size > 0: self.addheader('Range', 'bytes=%s-' % (current_size)) except: logger.warn('Cannot resume download: %s', filename, exc_info=True) tfp = None current_size = 0 if tfp is None: tfp = open(filename, 'wb') # Fix a problem with bad URLs that are not encoded correctly (bug 549) url = url.decode('ascii', 'ignore') url = url.translate(self.ESCAPE_CHARS) url = url.encode('ascii') url = urllib.unwrap(urllib.toBytes(url)) fp = self.open(url, data) headers = fp.info() if current_size > 0: # We told the server to resume - see if she agrees # See RFC2616 (206 Partial Content + Section 14.16) # XXX check status code here, too... range = ContentRange.parse(headers.get('content-range', '')) if range is None or range.start != current_size: # Ok, that did not work. Reset the download # TODO: seek and truncate if content-range differs from request tfp.close() tfp = open(filename, 'wb') current_size = 0 logger.warn('Cannot resume: Invalid Content-Range (RFC2616).') result = headers, fp.geturl() bs = 1024*8 size = -1 read = current_size blocknum = int(current_size/bs) if reporthook: if "content-length" in headers: size = int(headers.getrawheader("Content-Length")) + current_size reporthook(blocknum, bs, size) while read < size or size == -1: if size == -1: block = fp.read(bs) else: block = fp.read(min(size-read, bs)) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) fp.close() tfp.close() del fp del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError("retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) return result
def retrieve_resume(self, url, filename, reporthook=None, data=None): """Download files from an URL; return (headers, real_url) Resumes a download if the local filename exists and the server supports download resuming. """ current_size = 0 tfp = None if os.path.exists(filename): try: current_size = os.path.getsize(filename) tfp = open(filename, 'ab') #If the file exists, then only download the remainder if current_size > 0: self.addheader('Range', 'bytes=%s-' % (current_size)) except: logger.warn('Cannot resume download: %s', filename, exc_info=True) tfp = None current_size = 0 if tfp is None: tfp = open(filename, 'wb') # Fix a problem with bad URLs that are not encoded correctly (bug 549) url = url.decode('ascii', 'ignore') url = url.translate(self.ESCAPE_CHARS) url = url.encode('ascii') url = urllib.unwrap(urllib.toBytes(url)) fp = self.open(url, data) headers = fp.info() if current_size > 0: # We told the server to resume - see if she agrees # See RFC2616 (206 Partial Content + Section 14.16) # XXX check status code here, too... range = ContentRange.parse(headers.get('content-range', '')) if range is None or range.start != current_size: # Ok, that did not work. Reset the download # TODO: seek and truncate if content-range differs from request tfp.close() tfp = open(filename, 'wb') current_size = 0 logger.warn('Cannot resume: Invalid Content-Range (RFC2616).') result = headers, fp.geturl() bs = 1024 * 8 size = -1 read = current_size blocknum = int(current_size / bs) if reporthook: if "content-length" in headers: size = int( headers.getrawheader("Content-Length")) + current_size reporthook(blocknum, bs, size) while read < size or size == -1: if size == -1: block = fp.read(bs) else: block = fp.read(min(size - read, bs)) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) fp.close() tfp.close() del fp del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError( "retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) return result
def test_toBytes(self): result = urllib.toBytes(u'http://www.python.org') self.assertEqual(result, 'http://www.python.org') self.assertRaises(UnicodeError, urllib.toBytes, test_support.u(r'http://www.python.org/medi\u00e6val'))
def retrieve(self, url, filename=None, reporthook=None, data=None): """ Retrieves data from the given url and returns a tuple of filename and headers Args: url (str): url of the data to be retrieved filename (str, optional): filename from the url to download reporthook: (function, optional): function that should be called for e.g. keeping an UI updated with current state data (, optional): Returns: result: (filename, headers) See Also: urllib.URLopener """ self._canceled=False url = urllib.unwrap(urllib.toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = urllib.splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs except IOError: pass fp = self.open(url, data) try: headers = fp.info() if filename: tfp = open(filename, 'wb') else: import tempfile garbage, path = urllib.splittype(url) garbage, path = urllib.splithost(path or "") path, garbage = urllib.splitquery(path or "") path, garbage = urllib.splitattr(path or "") suffix = os.path.splitext(path)[1] (fd, filename) = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) tfp = os.fdopen(fd, 'wb') try: result = filename, headers if self.tempcache is not None: self.tempcache[url] = result bs = 1024 * 8 size = -1 read = 0 blocknum = 0 if "content-length" in headers: size = int(headers["Content-Length"]) if reporthook: reporthook(blocknum, bs, size) while not self._canceled: block = fp.read(bs) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError("retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) if self._canceled and os.path.exists(filename): os.remove(filename) return result
'http://10.3.254.233', 'Referer': 'http://10.3.254.233/webAuth/index.htm', 'User-Agent': 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.9.3.1000 Chrome/39.0.2146.0 Safari/537.36', 'X-DevTools-Emulate-Network-Conditions-Client-Id': '7DA993FC-A05A-4FC9-A693-C1DA47EDABF0' } #) data = urllib.urlencode({ 'username': sys.argv[1], 'password': sys.argv[2], 'pwd': sys.argv[2], 'secret': 'true', 'savename': None }) #'password':en_passwd, #'pwd':en_passwd, #data = params.encode('ascii') #data = params.encode('utf-8') #f= urllib2.urlopen("http://10.3.254.233/webAuth/index.htm", data) f = urllib2.Request("http://10.3.254.233/webAuth/index.htm", data, head) f = urllib2.urlopen(f) turl = f.geturl() print(urllib2.unquote(turl)) turl = urllib.toBytes(turl) tres_hex = turl.split('=')[1] thex_ch = tres_hex.replace('%', '\\x') print(thex_ch.decode('utf-8')) print("done")
def retrieve(self, url, filename=None, reporthook=None, data=None): # overridden method from urllib.URLopener self._cancelDownload = False url = urllib.unwrap(urllib.toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = urllib.splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs except IOError: pass fp = self.open(url, data) try: headers = fp.info() if filename: tfp = open(filename, 'wb') else: import tempfile garbage, path = urllib.splittype(url) garbage, path = urllib.splithost(path or "") path, garbage = urllib.splitquery(path or "") path, garbage = urllib.splitattr(path or "") suffix = os.path.splitext(path)[1] (fd, filename) = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) tfp = os.fdopen(fd, 'wb') try: result = filename, headers if self.tempcache is not None: self.tempcache[url] = result bs = 1024 * 8 size = -1 read = 0 blocknum = 0 if "content-length" in headers: size = int(headers["Content-Length"]) if reporthook: reporthook(blocknum, bs, size) while not self._cancelDownload: block = fp.read(bs) if block == "": break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise urllib.ContentTooShortError( "retrieval incomplete: got only %i out " "of %i bytes" % (read, size), result) if self._cancelDownload and os.path.exists(filename): os.remove(filename) self.wasCanceled = True return result