Example #1
0
def retrieve(self, url, filename=None, reporthook=None, data=None,
             maxtries=5, r_range=None):
    """retrieve(url) returns (filename, headers) for a local object
    or (tempfilename, headers) for a remote object.
    If it fails, it relaunches itself until the dl is complete or
    maxtries == 0 (maxtries == -1 for unlimited tries).
    Range tuple(start, end) indicates the range of the remote object
    we have to retrieve (ignored for local files)"""
 
    if maxtries < -1:
        raise ValueError, 'maxtries must be at least equal with -1'
 
    url = unwrap(toBytes(url))
 
    if self.tempcache and url in self.tempcache:
        return self.tempcache[url]
 
    type, url1 = splittype(url)
 
    if filename is None and (not type or type == 'file'):
        try:
            fp = self.open_local_file(url1)
            hdrs = fp.info()
            fp.close()
 
            return url2pathname(splithost(url1)[1]), hdrs
        except IOError, msg:
            pass
Example #2
0
 def open(self, fullurl, data=None, method=None):
     """Use URLopener().open(file) instead of open(file, 'r')."""
     fullurl = unwrap(toBytes(fullurl))
     # percent encode url, fixing lame server errors for e.g, like space
     # within url paths.
     fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
     if self.tempcache and fullurl in self.tempcache:
         filename, headers = self.tempcache[fullurl]
         fp = open(filename, 'rb')
         return addinfourl(fp, headers, fullurl)
     urltype, url = splittype(fullurl)
     if not urltype:
         urltype = 'file'
     if urltype in self.proxies:
         proxy = self.proxies[urltype]
         urltype, proxyhost = splittype(proxy)
         host, selector = splithost(proxyhost)
         url = (host, fullurl) # Signal special case to open_*()
     else:
         proxy = None
     name = 'open_' + urltype
     self.type = urltype
     name = name.replace('-', '_')
     if not hasattr(self, name):
         if proxy:
             return self.open_unknown_proxy(proxy, fullurl, data)
         else:
             return self.open_unknown(fullurl, data)
     try:
         return getattr(self, name)(url, data, method)
     except socket.error, msg:
         raise IOError, ('socket error', msg), sys.exc_info()[2]
Example #3
0
def retrieve(self,
             url,
             filename=None,
             reporthook=None,
             data=None,
             maxtries=5,
             r_range=None):
    """retrieve(url) returns (filename, headers) for a local object
    or (tempfilename, headers) for a remote object.
    If it fails, it relaunches itself until the dl is complete or
    maxtries == 0 (maxtries == -1 for unlimited tries).
    Range tuple(start, end) indicates the range of the remote object
    we have to retrieve (ignored for local files)"""

    if maxtries < -1:
        raise ValueError, 'maxtries must be at least equal with -1'

    url = unwrap(toBytes(url))

    if self.tempcache and url in self.tempcache:
        return self.tempcache[url]

    type, url1 = splittype(url)

    if filename is None and (not type or type == 'file'):
        try:
            fp = self.open_local_file(url1)
            hdrs = fp.info()
            fp.close()

            return url2pathname(splithost(url1)[1]), hdrs
        except IOError, msg:
            pass
Example #4
0
    def retrieve_resume(self, url, filename, reporthook=None, data=None):
        """retrieve_resume(url) returns (filename, headers) for a local object
        or (tempfilename, headers) for a remote object.
        The filename argument is REQUIRED (no tempfile creation code here!)
        Additionally resumes a download if the local filename exists"""

        current_size = 0
        tfp = None
        if os.path.exists(filename):
            try:
                current_size = os.path.getsize(filename)
                tfp = open(filename, 'ab')
                #If the file exists, then only download the remainder
                self.addheader('Range', 'bytes=%s-' % (current_size))
            except:
                log('Cannot open file for resuming: %s', filename, sender=self, traceback=True)
                tfp = None
                current_size = 0

        if tfp is None:
            tfp = open(filename, 'wb')

        url = urllib.unwrap(urllib.toBytes(url))
        fp = self.open(url, data)
        headers = fp.info()
        result = filename, headers
        bs = 1024*8
        size = -1
        read = current_size
        blocknum = int(current_size/bs)
        if reporthook:
            if "content-length" in headers:
                size = int(headers["Content-Length"]) + current_size
            reporthook(blocknum, bs, size)
        while 1:
            block = fp.read(bs)
            if block == "":
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            if reporthook:
                reporthook(blocknum, bs, size)
        fp.close()
        tfp.close()
        del fp
        del tfp

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
                                       "of %i bytes" % (read, size), result)

        return result
Example #5
0
 def retrieve(self, url, filename=None, reporthook=None, data=None):
     """retrieve(url) returns (filename, headers) for a local object
     or (tempfilename, headers) for a remote object."""
     url = urllib.unwrap(urllib.toBytes(url))
     if self.tempcache and url in self.tempcache:
         return self.tempcache[url]
     type, url1 = urllib.splittype(url)
     if filename is None and (not type or type == 'file'):
         try:
             fp = self.open_local_file(url1)
             hdrs = fp.info()
             del fp
             return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
         except IOError, msg:
             pass
Example #6
0
 def retrieve(self, url, filename=None, reporthook=None, data=None):
     """retrieve(url) returns (filename, headers) for a local object
     or (tempfilename, headers) for a remote object."""
     url = urllib.unwrap(urllib.toBytes(url))
     if self.tempcache and url in self.tempcache:
         return self.tempcache[url]
     type, url1 = urllib.splittype(url)
     if filename is None and (not type or type == 'file'):
         try:
             fp = self.open_local_file(url1)
             hdrs = fp.info()
             del fp
             return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
         except IOError, msg:
             pass
Example #7
0
    def open(self, fullurl, data=None):

        if self.tries > self.maxtries:
            # print 'bailing after %d tries (check username and password)' % (self.tries -1)
            self.tries = 0
            raise IOError, ('too many tries - bailing')

        fullurl = unwrap(toBytes(fullurl))
        if self.tempcache and fullurl in self.tempcache:
            filename, headers = self.tempcache[fullurl]
            fp = open(filename, 'rb')
            return addinfourl(fp, headers, fullurl)
        urltype, url = splittype(fullurl)
        if not urltype:
            urltype = 'file'
        if urltype in self.proxies:
            proxy = self.proxies[urltype]
            urltype, proxyhost = splittype(proxy)
            host, selector = splithost(proxyhost)
            url = (host, fullurl)  # Signal special case to open_*()
        else:
            proxy = None
        name = 'open_' + urltype
        self.type = urltype
        if '-' in name:
            # replace - with _
            name = '_'.join(name.split('-'))
        if not hasattr(self, name):
            if proxy:
                return self.open_unknown_proxy(proxy, fullurl, data)
            else:
                return self.open_unknown(fullurl, data)
        try:
            if data is None:
                return getattr(self, name)(url)
            else:
                return getattr(self, name)(url, data)
        except socket.error, msg:
            raise IOError, ('socket error', msg), sys.exc_info()[2]
Example #8
0
 def test_toBytes(self):
     result = urllib.toBytes(u'http://www.python.org')
     self.assertEqual(result, 'http://www.python.org')
     self.assertRaises(
         UnicodeError, urllib.toBytes,
         test_support.u(r'http://www.python.org/medi\u00e6val'))
Example #9
0
    def retrieve_resume(self, url, filename, reporthook=None, data=None):
        """Download files from an URL; return (headers, real_url)

        Resumes a download if the local filename exists and
        the server supports download resuming.
        """

        current_size = 0
        tfp = None
        if os.path.exists(filename):
            try:
                current_size = os.path.getsize(filename)
                tfp = open(filename, 'ab')
                #If the file exists, then only download the remainder
                if current_size > 0:
                    self.addheader('Range', 'bytes=%s-' % (current_size))
            except:
                logger.warn('Cannot resume download: %s', filename, exc_info=True)
                tfp = None
                current_size = 0

        if tfp is None:
            tfp = open(filename, 'wb')

        # Fix a problem with bad URLs that are not encoded correctly (bug 549)
        url = url.decode('ascii', 'ignore')
        url = url.translate(self.ESCAPE_CHARS)
        url = url.encode('ascii')

        url = urllib.unwrap(urllib.toBytes(url))
        fp = self.open(url, data)
        headers = fp.info()

        if current_size > 0:
            # We told the server to resume - see if she agrees
            # See RFC2616 (206 Partial Content + Section 14.16)
            # XXX check status code here, too...
            range = ContentRange.parse(headers.get('content-range', ''))
            if range is None or range.start != current_size:
                # Ok, that did not work. Reset the download
                # TODO: seek and truncate if content-range differs from request
                tfp.close()
                tfp = open(filename, 'wb')
                current_size = 0
                logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')

        result = headers, fp.geturl()
        bs = 1024*8
        size = -1
        read = current_size
        blocknum = int(current_size/bs)
        if reporthook:
            if "content-length" in headers:
                size = int(headers.getrawheader("Content-Length"))  + current_size
            reporthook(blocknum, bs, size)
        while read < size or size == -1:
            if size == -1:
                block = fp.read(bs)
            else:
                block = fp.read(min(size-read, bs))
            if block == "":
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            if reporthook:
                reporthook(blocknum, bs, size)
        fp.close()
        tfp.close()
        del fp
        del tfp

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
                                       "of %i bytes" % (read, size), result)

        return result
Example #10
0
    def retrieve_resume(self, url, filename, reporthook=None, data=None):
        """Download files from an URL; return (headers, real_url)

        Resumes a download if the local filename exists and
        the server supports download resuming.
        """

        current_size = 0
        tfp = None
        if os.path.exists(filename):
            try:
                current_size = os.path.getsize(filename)
                tfp = open(filename, 'ab')
                #If the file exists, then only download the remainder
                if current_size > 0:
                    self.addheader('Range', 'bytes=%s-' % (current_size))
            except:
                logger.warn('Cannot resume download: %s',
                            filename,
                            exc_info=True)
                tfp = None
                current_size = 0

        if tfp is None:
            tfp = open(filename, 'wb')

        # Fix a problem with bad URLs that are not encoded correctly (bug 549)
        url = url.decode('ascii', 'ignore')
        url = url.translate(self.ESCAPE_CHARS)
        url = url.encode('ascii')

        url = urllib.unwrap(urllib.toBytes(url))
        fp = self.open(url, data)
        headers = fp.info()

        if current_size > 0:
            # We told the server to resume - see if she agrees
            # See RFC2616 (206 Partial Content + Section 14.16)
            # XXX check status code here, too...
            range = ContentRange.parse(headers.get('content-range', ''))
            if range is None or range.start != current_size:
                # Ok, that did not work. Reset the download
                # TODO: seek and truncate if content-range differs from request
                tfp.close()
                tfp = open(filename, 'wb')
                current_size = 0
                logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')

        result = headers, fp.geturl()
        bs = 1024 * 8
        size = -1
        read = current_size
        blocknum = int(current_size / bs)
        if reporthook:
            if "content-length" in headers:
                size = int(
                    headers.getrawheader("Content-Length")) + current_size
            reporthook(blocknum, bs, size)
        while read < size or size == -1:
            if size == -1:
                block = fp.read(bs)
            else:
                block = fp.read(min(size - read, bs))
            if block == "":
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            if reporthook:
                reporthook(blocknum, bs, size)
        fp.close()
        tfp.close()
        del fp
        del tfp

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError(
                "retrieval incomplete: got only %i out "
                "of %i bytes" % (read, size), result)

        return result
Example #11
0
 def test_toBytes(self):
     result = urllib.toBytes(u'http://www.python.org')
     self.assertEqual(result, 'http://www.python.org')
     self.assertRaises(UnicodeError, urllib.toBytes,
                       test_support.u(r'http://www.python.org/medi\u00e6val'))
  def retrieve(self, url, filename=None, reporthook=None, data=None):
    """ Retrieves data from the given url and returns a tuple of filename and headers

    Args:
      url (str): url of the data to be retrieved
      filename (str, optional): filename from the url to download
      reporthook: (function, optional): function that should be called for e.g. keeping an UI updated with current state
      data (, optional):

    Returns:
      result: (filename, headers)

    See Also:
        urllib.URLopener
    """
    self._canceled=False
    url = urllib.unwrap(urllib.toBytes(url))
    if self.tempcache and url in self.tempcache:
      return self.tempcache[url]
    type, url1 = urllib.splittype(url)
    if filename is None and (not type or type == 'file'):
      try:
        fp = self.open_local_file(url1)
        hdrs = fp.info()
        fp.close()
        return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
      except IOError:
        pass
    fp = self.open(url, data)
    try:
      headers = fp.info()
      if filename:
        tfp = open(filename, 'wb')
      else:
        import tempfile
        garbage, path = urllib.splittype(url)
        garbage, path = urllib.splithost(path or "")
        path, garbage = urllib.splitquery(path or "")
        path, garbage = urllib.splitattr(path or "")
        suffix = os.path.splitext(path)[1]
        (fd, filename) = tempfile.mkstemp(suffix)
        self.__tempfiles.append(filename)
        tfp = os.fdopen(fd, 'wb')
      try:
        result = filename, headers
        if self.tempcache is not None:
          self.tempcache[url] = result
        bs = 1024 * 8
        size = -1
        read = 0
        blocknum = 0
        if "content-length" in headers:
          size = int(headers["Content-Length"])
        if reporthook:
          reporthook(blocknum, bs, size)
        while not self._canceled:
          block = fp.read(bs)
          if block == "":
            break
          read += len(block)
          tfp.write(block)
          blocknum += 1
          if reporthook:
            reporthook(blocknum, bs, size)
      finally:
        tfp.close()
    finally:
      fp.close()

    # raise exception if actual size does not match content-length header
    if size >= 0 and read < size:
      raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
                                 "of %i bytes" % (read, size), result)

    if self._canceled and os.path.exists(filename):
      os.remove(filename)
    return result
Example #13
0
    'http://10.3.254.233',
    'Referer':
    'http://10.3.254.233/webAuth/index.htm',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.9.3.1000 Chrome/39.0.2146.0 Safari/537.36',
    'X-DevTools-Emulate-Network-Conditions-Client-Id':
    '7DA993FC-A05A-4FC9-A693-C1DA47EDABF0'
}
#)
data = urllib.urlencode({
    'username': sys.argv[1],
    'password': sys.argv[2],
    'pwd': sys.argv[2],
    'secret': 'true',
    'savename': None
})
#'password':en_passwd,
#'pwd':en_passwd,
#data = params.encode('ascii')
#data = params.encode('utf-8')
#f= urllib2.urlopen("http://10.3.254.233/webAuth/index.htm", data)
f = urllib2.Request("http://10.3.254.233/webAuth/index.htm", data, head)
f = urllib2.urlopen(f)
turl = f.geturl()
print(urllib2.unquote(turl))
turl = urllib.toBytes(turl)
tres_hex = turl.split('=')[1]
thex_ch = tres_hex.replace('%', '\\x')
print(thex_ch.decode('utf-8'))
print("done")
Example #14
0
    def retrieve(self, url, filename=None, reporthook=None, data=None):
        # overridden method from urllib.URLopener
        self._cancelDownload = False
        url = urllib.unwrap(urllib.toBytes(url))
        if self.tempcache and url in self.tempcache:
            return self.tempcache[url]
        type, url1 = urllib.splittype(url)
        if filename is None and (not type or type == 'file'):
            try:
                fp = self.open_local_file(url1)
                hdrs = fp.info()
                fp.close()
                return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
            except IOError:
                pass
        fp = self.open(url, data)
        try:
            headers = fp.info()
            if filename:
                tfp = open(filename, 'wb')
            else:
                import tempfile
                garbage, path = urllib.splittype(url)
                garbage, path = urllib.splithost(path or "")
                path, garbage = urllib.splitquery(path or "")
                path, garbage = urllib.splitattr(path or "")
                suffix = os.path.splitext(path)[1]
                (fd, filename) = tempfile.mkstemp(suffix)
                self.__tempfiles.append(filename)
                tfp = os.fdopen(fd, 'wb')
            try:
                result = filename, headers
                if self.tempcache is not None:
                    self.tempcache[url] = result
                bs = 1024 * 8
                size = -1
                read = 0
                blocknum = 0
                if "content-length" in headers:
                    size = int(headers["Content-Length"])
                if reporthook:
                    reporthook(blocknum, bs, size)
                while not self._cancelDownload:
                    block = fp.read(bs)
                    if block == "":
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    if reporthook:
                        reporthook(blocknum, bs, size)
            finally:
                tfp.close()
        finally:
            fp.close()

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError(
                "retrieval incomplete: got only %i out "
                "of %i bytes" % (read, size), result)

        if self._cancelDownload and os.path.exists(filename):
            os.remove(filename)
            self.wasCanceled = True
        return result