Exemple #1
0
 def ftp_open(self, req):
     host = req.get_host()
     if not host:
         raise IOError, ('ftp error', 'no host given')
     # XXX handle custom username & password
     host = socket.gethostbyname(host)
     host, port = splitport(host)
     if port is None:
         port = ftplib.FTP_PORT
     path, attrs = splitattr(req.get_selector())
     path = unquote(path)
     dirs = string.splitfields(path, '/')
     dirs, file = dirs[:-1], dirs[-1]
     if dirs and not dirs[0]:
         dirs = dirs[1:]
     user = passwd = ''  # XXX
     try:
         fw = self.connect_ftp(user, passwd, host, port, dirs)
         type = file and 'I' or 'D'
         for attr in attrs:
             attr, value = splitattr(attr)
             if string.lower(attr) == 'type' and \
                value in ('a', 'A', 'i', 'I', 'd', 'D'):
                 type = string.upper(value)
         fp, retrlen = fw.retrfile(file, type)
         if retrlen is not None and retrlen >= 0:
             sf = StringIO('Content-Length: %d\n' % retrlen)
             headers = mimetools.Message(sf)
         else:
             headers = noheaders()
         return addinfourl(fp, headers, req.get_full_url())
     except ftplib.all_errors, msg:
         raise IOError, ('ftp error', msg), sys.exc_info()[2]
Exemple #2
0
class FTPRangeHandler(urllib2.FTPHandler):
    """
    FTP Range support..
    """
    def ftp_open(self, req):
        host = req.get_host()
        host, port = urllib.splitport(host)
        if port is None:
            port = ftplib.FTP_PORT

        try:
            host = socket.gethostbyname(host)
        except socket.error, msg:
            raise FetchError(msg)

        path, attrs = urllib.splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(urllib.unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp('', '', host, port, dirs)
            type = file and 'I' or 'D'
            for attr in attrs:
                attr, value = urllib.splitattr(attr)
                if attr.lower() == 'type' and \
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()

            rawr = req.headers.get('Range', None)
            if rawr:
                rest = int(rawr.split("=")[1].rstrip("-"))
            else:
                rest = 0

            fp, retrlen = fw.retrfile(file, type, rest)

            fb, lb = rest, retrlen
            if retrlen is None or retrlen == 0:
                raise RangeError
            retrlen = lb - fb
            if retrlen < 0:
                # beginning of range is larger than file
                raise RangeError

            headers = ''
            mtype = guess_type(req.get_full_url())[0]
            if mtype:
                headers += 'Content-Type: %s\n' % mtype
            if retrlen is not None and retrlen >= 0:
                headers += 'Content-Length: %d\n' % retrlen

            try:
                from cStringIO import StringIO
            except ImportError, msg:
                from StringIO import StringIO

            return urllib.addinfourl(fp, Message(StringIO(headers)),
                                     req.get_full_url())
class FTPHandler(BaseHandler):
    def ftp_open(self, req):
        host = req.get_host()
        if not host:
            raise IOError, ('ftp error', 'no host given')
        host, port = splitport(host)
        if port is None:
            port = ftplib.FTP_PORT

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = unquote(user or '')
        passwd = unquote(passwd or '')

        try:
            host = socket.gethostbyname(host)
        except socket.error, msg:
            raise URLError(msg)
        path, attrs = splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp(user, passwd, host, port, dirs)
            type = file and 'I' or 'D'
            for attr in attrs:
                attr, value = splitattr(attr)
                if attr.lower() == 'type' and \
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()
            fp, retrlen = fw.retrfile(file, type)
            headers = ""
            mtype = mimetypes.guess_type(req.get_full_url())[0]
            if mtype:
                headers += "Content-type: %s\n" % mtype
            if retrlen is not None and retrlen >= 0:
                headers += "Content-length: %d\n" % retrlen
            sf = StringIO(headers)
            headers = mimetools.Message(sf)
            return addinfourl(fp, headers, req.get_full_url())
        except ftplib.all_errors, msg:
            raise IOError, ('ftp error', msg), sys.exc_info()[2]
Exemple #4
0
    def start(self, destfile=None, destfd=None):
        urllib._urlopener = OLPCURLopener()
        self._info = urllib.urlopen(self._url)
        self._outf = None
        self._fname = None
        if destfd and not destfile:
            raise ValueError('Must provide destination file too when'
                             ' specifying file descriptor')
        if destfile:
            self._suggested_fname = os.path.basename(destfile)
            self._fname = os.path.abspath(os.path.expanduser(destfile))
            if destfd:
                # Use the user-supplied destination file descriptor
                self._outf = destfd
            else:
                self._outf = os.open(self._fname, os.O_RDWR |
                                     os.O_TRUNC | os.O_CREAT, 0644)
        else:
            fname = self._get_filename_from_headers(self._info.headers)
            self._suggested_fname = fname
            garbage_, path = urllib.splittype(self._url)
            garbage_, path = urllib.splithost(path or "")
            path, garbage_ = urllib.splitquery(path or "")
            path, garbage_ = urllib.splitattr(path or "")
            suffix = os.path.splitext(path)[1]
            (self._outf, self._fname) = tempfile.mkstemp(suffix=suffix,
                                                         dir=self._destdir)

        fcntl.fcntl(self._info.fp.fileno(), fcntl.F_SETFD, os.O_NDELAY)
        self._srcid = GObject.io_add_watch(self._info.fp.fileno(),
                                           GObject.IO_IN | GObject.IO_ERR,
                                           self._read_next_chunk)
Exemple #5
0
    def start(self, destfile=None, destfd=None):
        self._info = urllib.urlopen(self._url)
        self._outf = None
        self._fname = None
        if destfd and not destfile:
            raise ValueError('Must provide destination file too when'
                             ' specifying file descriptor')
        if destfile:
            self._suggested_fname = os.path.basename(destfile)
            self._fname = os.path.abspath(os.path.expanduser(destfile))
            if destfd:
                # Use the user-supplied destination file descriptor
                self._outf = destfd
            else:
                self._outf = os.open(self._fname, os.O_RDWR |
                                     os.O_TRUNC | os.O_CREAT, 0644)
        else:
            fname = self._get_filename_from_headers(self._info.headers)
            self._suggested_fname = fname
            garbage_, path = urllib.splittype(self._url)
            garbage_, path = urllib.splithost(path or "")
            path, garbage_ = urllib.splitquery(path or "")
            path, garbage_ = urllib.splitattr(path or "")
            suffix = os.path.splitext(path)[1]
            (self._outf, self._fname) = tempfile.mkstemp(suffix=suffix,
                                                         dir=self._destdir)

        fcntl.fcntl(self._info.fp.fileno(), fcntl.F_SETFD, os.O_NDELAY)
        self._srcid = GLib.io_add_watch(self._info.fp.fileno(),
                                        GLib.IO_IN | GLib.IO_ERR,
                                        self._read_next_chunk)
Exemple #6
0
 def _splitsuffix(self, url):
     'Split the suffix off of a url.'
     garbage, pathpart = urllib.splittype(url)
     garbage, path = urllib.splithost(pathpart or '')
     pathpart, garbage = urllib.splitquery(pathpart or '')
     pathpart, garbage = urllib.splitattr(pathpart or '')
     return os.path.splitext(pathpart)[1]
Exemple #7
0
 def open_ftp(self, url):
     host, path = urllib.splithost(url)
     if not host: raise IOError, ('ftp error', 'no host given')
     host, port = urllib.splitport(host)
     user, host = urllib.splituser(host)
     # if user: user, passwd = splitpasswd(user)
     if user: passwd = getpass.getpass()
     else: passwd = None
     host = urllib.unquote(host)
     user = urllib.unquote(user or '')
     passwd = urllib.unquote(passwd or '')
     host = socket.gethostbyname(host)
     if not port:
         import ftplib
         port = ftplib.FTP_PORT
     else:
         port = int(port)
     path, attrs = urllib.splitattr(path)
     path = urllib.unquote(path)
     dirs = string.splitfields(path, '/')
     dirs, file = dirs[:-1], dirs[-1]
     if dirs and not dirs[0]: dirs = dirs[1:]
     key = (user, host, port, string.joinfields(dirs, '/'))
     # XXX thread unsafe!
     if len(self.ftpcache) > MAXFTPCACHE:
         # Prune the cache, rather arbitrarily
         for k in self.ftpcache.keys():
             if k != key:
                 v = self.ftpcache[k]
                 del self.ftpcache[k]
                 v.close()
     try:
         if not self.ftpcache.has_key(key):
             print 'Creating ftpwrapper: ',user,host,port,dirs
             self.ftpcache[key] = \
                 urllib.ftpwrapper(user, passwd, host, port, dirs)
         if not file: type = 'D'
         else: type = 'I'
         for attr in attrs:
             attr, value = urllib.splitvalue(attr)
             if string.lower(attr) == 'type' and \
                value in ('a', 'A', 'i', 'I', 'd', 'D'):
                 type = string.upper(value)
         (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
         if retrlen is not None and retrlen >= 0:
             import mimetools, StringIO
             headers = mimetools.Message(StringIO.StringIO(
                 'Content-Length: %d\n' % retrlen))
         else:
             headers = noheaders()
         return urllib.addinfourl(fp, headers, "ftp:" + url)
     except urllib.ftperrors(), msg:
         raise IOError, ('ftp error', msg), sys.exc_info()[2]
Exemple #8
0
 def open_ftp(self, url):
     host, path = urllib.splithost(url)
     if not host: raise IOError, ('ftp error', 'no host given')
     host, port = urllib.splitport(host)
     user, host = urllib.splituser(host)
     # if user: user, passwd = splitpasswd(user)
     if user: passwd = getpass.getpass()
     else: passwd = None
     host = urllib.unquote(host)
     user = urllib.unquote(user or '')
     passwd = urllib.unquote(passwd or '')
     host = socket.gethostbyname(host)
     if not port:
         import ftplib
         port = ftplib.FTP_PORT
     else:
         port = int(port)
     path, attrs = urllib.splitattr(path)
     path = urllib.unquote(path)
     dirs = string.splitfields(path, '/')
     dirs, file = dirs[:-1], dirs[-1]
     if dirs and not dirs[0]: dirs = dirs[1:]
     key = (user, host, port, string.joinfields(dirs, '/'))
     # XXX thread unsafe!
     if len(self.ftpcache) > MAXFTPCACHE:
         # Prune the cache, rather arbitrarily
         for k in self.ftpcache.keys():
             if k != key:
                 v = self.ftpcache[k]
                 del self.ftpcache[k]
                 v.close()
     try:
         if not self.ftpcache.has_key(key):
             print 'Creating ftpwrapper: ', user, host, port, dirs
             self.ftpcache[key] = \
                 urllib.ftpwrapper(user, passwd, host, port, dirs)
         if not file: type = 'D'
         else: type = 'I'
         for attr in attrs:
             attr, value = urllib.splitvalue(attr)
             if string.lower(attr) == 'type' and \
                value in ('a', 'A', 'i', 'I', 'd', 'D'):
                 type = string.upper(value)
         (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
         if retrlen is not None and retrlen >= 0:
             import mimetools, StringIO
             headers = mimetools.Message(
                 StringIO.StringIO('Content-Length: %d\n' % retrlen))
         else:
             headers = noheaders()
         return urllib.addinfourl(fp, headers, "ftp:" + url)
     except urllib.ftperrors(), msg:
         raise IOError, ('ftp error', msg), sys.exc_info()[2]
Exemple #9
0
    def ftp_open(self, req):
        import ftplib
        import mimetypes
        host = req.get_host()
        if not host:
            raise URLError('ftp error: no host given')
        host, port = splitport(host)
        if port is None:
            port = ftplib.FTP_PORT
        else:
            port = int(port)
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = user or ''
        passwd = passwd or ''
        try:
            host = socket.gethostbyname(host)
        except socket.error as msg:
            raise URLError(msg)

        path, attrs = splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
            type = file and 'I' or 'D'
            for attr in attrs:
                attr, value = splitvalue(attr)
                if attr.lower() == 'type' and value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()

            fp, retrlen = fw.retrfile(file, type)
            headers = ''
            mtype = mimetypes.guess_type(req.get_full_url())[0]
            if mtype:
                headers += 'Content-type: %s\n' % mtype
            if retrlen is not None and retrlen >= 0:
                headers += 'Content-length: %d\n' % retrlen
            sf = StringIO(headers)
            headers = mimetools.Message(sf)
            return addinfourl(fp, headers, req.get_full_url())
        except ftplib.all_errors as msg:
            raise URLError, 'ftp error: %s' % msg, sys.exc_info()[2]

        return
Exemple #10
0
 def ftp_open(self, req):
     host = req.get_host()
     if not host:
         raise IOError('ftp error', 'no host given')
     # XXX handle custom username & password
     try:
         host = socket.gethostbyname(host)
     except socket.error(msg):
         raise URLError(msg)
     host, port = splitport(host)
     if port is None:
         port = ftplib.FTP_PORT
     path, attrs = splitattr(req.get_selector())
     path = unquote(path)
     dirs = path.split('/')
     dirs, file = dirs[:-1], dirs[-1]
     if dirs and not dirs[0]:
         dirs = dirs[1:]
     user = passwd = '' # XXX
     try:
         fw = self.connect_ftp(user, passwd, host, port, dirs)
         type = file and 'I' or 'D'
         for attr in attrs:
             attr, value = splitattr(attr)
             if attr.lower() == 'type' and \
                value in ('a', 'A', 'i', 'I', 'd', 'D'):
                 type = value.upper()
         fp, retrlen = fw.retrfile(file, type)
         headers = ""
         mtype = mimetypes.guess_type(req.get_full_url())[0]
         if mtype:
             headers += "Content-Type: %s\n" % mtype
         if retrlen is not None and retrlen >= 0:
             headers += "Content-Length: %d\n" % retrlen
         sf = StringIO(headers)
         headers = mimetools.Message(sf)
         return addinfourl(fp, headers, req.get_full_url())
     except ftplib.all_errors(msg):
         raise IOError(('ftp error', msg), sys.exc_info()[2])
def relative_uri(uri, fn):
    path, query = urllib.splitquery(uri)
    path, attrs = urllib.splitattr(path)
    if fn.startswith('/'):
        scheme, _, path = path.partition(':')
        host, path = urllib.splithost(path)
        new_uri = '{}://{}/{}'.format(scheme, host, fn[1:])
    else:
        parent = path.rpartition('/')[0]
        new_uri = '{}/{}'.format(parent, fn)
    if attrs:
        new_uri = '{};{}'.format(new_uri, ';'.join(attrs))
    return new_uri
 def __extract_video_id_from_uri(self, uri):
     """
     GET uri like '/watch?v=AsXf9v&param=1&p=3#junk'
     RETURNS value for 'v' parameter --> 'AsXf9v'
     """
     uri = uri.replace('&', ';')
     uri = uri.replace('?', ';')
     req, params = urllib.splitattr(uri)
     for item in params:
         k, v = urllib.splitvalue(params[0])
         if k == 'v':
             return v
     raise ValueError("Can't find parameter 'v' from '%s'" % uri)
Exemple #13
0
 def __extract_video_id_from_uri(self, uri):
     """
     GET uri like '/watch?v=AsXf9v&param=1&p=3#junk'
     RETURNS value for 'v' parameter --> 'AsXf9v'
     """
     uri = uri.replace('&', ';')
     uri = uri.replace('?', ';')
     req, params = urllib.splitattr(uri)
     for item in params:
         k, v = urllib.splitvalue(params[0])
         if k == 'v':
             return v
     raise ValueError("Can't find parameter 'v' from '%s'" % uri)
Exemple #14
0
 def addSubscriber(self, channel, session):
     """
         Subscribe to a channel. 
         A subscribe request of /foo?a=b will listen to foo messages 
         if attribute a of the message has value "b".
     """
     channel, attrs = urllib.splitquery(channel)
     pattern = {}
     if attrs:
         attr, rest = urllib.splitattr(attrs)
         for attr in [attr] + rest:
             key, value = urllib.splitvalue(attr)
             pattern[key] = value
     self._subscribers.setdefault(channel, []).append((session, pattern))
Exemple #15
0
 def addSubscriber(self, channel, session):
     """
         Subscribe to a channel. 
         A subscribe request of /foo?a=b will listen to foo messages 
         if attribute a of the message has value "b".
     """
     channel, attrs = urllib.splitquery(channel)
     pattern = {}
     if attrs:
         attr, rest = urllib.splitattr(attrs)
         for attr in [attr] + rest:
             key, value = urllib.splitvalue(attr)
             pattern[key] = value
     self._subscribers.setdefault(channel, []).append((session, pattern))
Exemple #16
0
    def rsync_rsh_open(self, req):
        logger.debug('Opening rsync+rsh')
        host = req.get_host()
        if not host:
            raise urllib2.URLError('rsync+ssh error: not host given')
        if ':' in host:
            raise urllib2.URLError(
                'rsync+ssh error: \':\' character not supported in host')

        path, attrs = urllib.splitattr(req.get_selector())
        if not path:
            raise urllib2.URLError('rsync+ssh error: no path given')

        source = '{}:{}'.format(host, path)
        return self.do_rsync(source, attrs=attrs)
Exemple #17
0
def parse_handle(hdl):
    """Parse off options from handle.

    E.g. 'auth.subauth/path;type=url' will return
    ('auth.subauth.path', {'type': 'url'}).

    This also interprets % quoting in the non-option part.

    """
    hdl, attrs = urllib.splitattr(hdl)
    d = {}
    if attrs:
        for attr in attrs:
            i = string.find(attr, '=')
            if i < 0:
                key, value = attr, None
            else:
                key, value = attr[:i], urllib.unquote(attr[i+1:])
            d[string.lower(key)] = value
    return urllib.unquote(hdl), d
Exemple #18
0
    def gotHeaders(self, headers):
        """ The downloader will feeds headers via this function """
        debug(str(self) + ' gotHeaders')
        self.isGzipped = headers.get('content-encoding', [None])[0] == 'gzip'
        # Grab the file name of the NZB via content-disposition header
        keys = headers.keys()

        found = None
        for key in keys:
            if key.lower() == 'content-disposition':
                found = key
                break

        if found is None:
            return

        type, attrs = splitattr(headers[found][0])
        key, val = splitvalue(attrs[0].strip())
        val = val.strip().strip('"')
        if val:
            debug(str(self) + ' gotHeaders: found filename: %s' % val)
            self.nzbFilename = val
def parse_url(url, default_port=None):
    '''
    Parse url in the following form:
      PROTO://[USER:[:PASSWD]@]HOST[:PORT][/PATH[;ATTR][?QUERY]]
    A tuple containing (proto, user, passwd, host, port, path, tag, attrs, query) is returned,
    where `attrs' is a tuple containing ('attr1=value1', 'attr2=value2', ...)
    '''
    proto, user, passwd, host, port, path, tag, attrs, query = (None, ) * 9

    try:
        proto, tmp_host = urllib.splittype(url)
        tmp_host, tmp_path = urllib.splithost(tmp_host)
        tmp_user, tmp_host = urllib.splituser(tmp_host)
        if tmp_user:
            user, passwd = urllib.splitpasswd(tmp_user)
        host, port = urllib.splitport(tmp_host)
        port = int(port) if port else default_port
        tmp_path, query = urllib.splitquery(tmp_path)
        tmp_path, attrs = urllib.splitattr(tmp_path)
        path, tag = urllib.splittag(tmp_path)
    except Exception, err:
        raise Exception('parse_db_url error - {0}'.format(str(err)))
def parse_url(url, default_port=None):
    '''
    Parse url in the following form:
      PROTO://[USER:[:PASSWD]@]HOST[:PORT][/PATH[;ATTR][?QUERY]]
    A tuple containing (proto, user, passwd, host, port, path, tag, attrs, query) is returned,
    where `attrs' is a tuple containing ('attr1=value1', 'attr2=value2', ...)
    '''
    proto, user, passwd, host, port, path, tag, attrs, query = (None, ) * 9

    try:
        proto, tmp_host = urllib.splittype(url)
        tmp_host, tmp_path = urllib.splithost(tmp_host)
        tmp_user, tmp_host = urllib.splituser(tmp_host)
        if tmp_user:
            user, passwd = urllib.splitpasswd(tmp_user)
        host, port = urllib.splitport(tmp_host)
        port = int(port) if port else default_port
        tmp_path, query = urllib.splitquery(tmp_path)
        tmp_path, attrs = urllib.splitattr(tmp_path)
        path, tag = urllib.splittag(tmp_path)
    except Exception, err:
        raise Exception('parse_db_url error - {0}'.format(str(err)))
Exemple #21
0
    def gotHeaders(self, headers):
        """ The downloader will feeds headers via this function """
        debug(str(self) + ' gotHeaders')
        self.isGzipped = headers.get('content-encoding', [None])[0] == 'gzip'
        # Grab the file name of the NZB via content-disposition header
        keys = headers.keys()

        found = None
        for key in keys:
            if key.lower() == 'content-disposition':
                found = key
                break

        if found is None:
            return

        type, attrs = splitattr(headers[found][0])
        key, val = splitvalue(attrs[0].strip())
        val = val.strip().strip('"')
        if val:
            debug(str(self) + ' gotHeaders: found filename: %s' % val)
            self.nzbFilename = val
Exemple #22
0
class FTPChunkHandler(FTPHandler):
    """The code was taken from urllib2.py.

    The only difference is that offsets are supported by this class
    using the REST-command. Offsets are needed for chunked loading.
    """
    def ftp_open(self, req):
        import mimetypes
        host = req.get_host()
        if not host:
            raise URLError('ftp error: no host given')
        host, port = splitport(host)
        if port is None:
            port = ftplib.FTP_PORT
        else:
            port = int(port)

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = unquote(user or '')
        passwd = unquote(passwd or '')

        try:
            host = socket.gethostbyname(host)
        except socket.error, msg:
            raise URLError(msg)
        path, attrs = splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
            type = file and 'I' or 'D'
            for attr in attrs:
                attr, value = splitvalue(attr)
                if attr.lower() == 'type' and \
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()

            # EDIT START
            # get REST (file offset) from headers
            rest = 0
            offset = req.headers.get('Offset', None)
            if offset is not None and offset > 0:
                rest = offset
            # EDIT END

            fp, retrlen = fw.retrfile(file, type, rest)
            headers = ""
            mtype = mimetypes.guess_type(req.get_full_url())[0]
            if mtype:
                headers += "Content-type: %s\n" % mtype
            if retrlen is not None and retrlen >= 0:
                headers += "Content-length: %d\n" % retrlen
            sf = StringIO(headers)
            headers = mimetools.Message(sf)
            return addinfourl(fp, headers, req.get_full_url())
        except ftplib.all_errors, msg:
            raise URLError, ('ftp error: %s' % msg), sys.exc_info()[2]
  def retrieve(self, url, filename=None, reporthook=None, data=None):
    """ Retrieves data from the given url and returns a tuple of filename and headers

    Args:
      url (str): url of the data to be retrieved
      filename (str, optional): filename from the url to download
      reporthook: (function, optional): function that should be called for e.g. keeping an UI updated with current state
      data (, optional):

    Returns:
      result: (filename, headers)

    See Also:
        urllib.URLopener
    """
    self._canceled=False
    url = urllib.unwrap(urllib.toBytes(url))
    if self.tempcache and url in self.tempcache:
      return self.tempcache[url]
    type, url1 = urllib.splittype(url)
    if filename is None and (not type or type == 'file'):
      try:
        fp = self.open_local_file(url1)
        hdrs = fp.info()
        fp.close()
        return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
      except IOError:
        pass
    fp = self.open(url, data)
    try:
      headers = fp.info()
      if filename:
        tfp = open(filename, 'wb')
      else:
        import tempfile
        garbage, path = urllib.splittype(url)
        garbage, path = urllib.splithost(path or "")
        path, garbage = urllib.splitquery(path or "")
        path, garbage = urllib.splitattr(path or "")
        suffix = os.path.splitext(path)[1]
        (fd, filename) = tempfile.mkstemp(suffix)
        self.__tempfiles.append(filename)
        tfp = os.fdopen(fd, 'wb')
      try:
        result = filename, headers
        if self.tempcache is not None:
          self.tempcache[url] = result
        bs = 1024 * 8
        size = -1
        read = 0
        blocknum = 0
        if "content-length" in headers:
          size = int(headers["Content-Length"])
        if reporthook:
          reporthook(blocknum, bs, size)
        while not self._canceled:
          block = fp.read(bs)
          if block == "":
            break
          read += len(block)
          tfp.write(block)
          blocknum += 1
          if reporthook:
            reporthook(blocknum, bs, size)
      finally:
        tfp.close()
    finally:
      fp.close()

    # raise exception if actual size does not match content-length header
    if size >= 0 and read < size:
      raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
                                 "of %i bytes" % (read, size), result)

    if self._canceled and os.path.exists(filename):
      os.remove(filename)
    return result
Exemple #24
0
def main():
    """Main routine"""
    mgr = NClientsOptionParser()
    (opts, args) = mgr.get_opt()

    url = opts.url.replace('?', ';').replace('&amp;', ';').replace('&', ';')
    logname = opts.logname
    dasquery = opts.dasquery
    idx = opts.idx
    limit = 1
    nclients = opts.nclients
    minclients = opts.minclients
    debug = opts.debug
    headers = {'Accept': opts.accept}
    urlpath, args = urllib.splitattr(url)
    repeat = opts.repeat
    arr = urlpath.split('/')
    if arr[0] == 'http:' or arr[0] == 'https:':
        host = arr[0] + '//' + arr[2]
    else:
        msg = 'Provided URL="%s" does not contain http:// part' % opts.url
        raise Exception(msg)
    method = '/' + '/'.join(arr[3:])
    params = {}
    for item in args:
        key, val = item.split('=')
        params[key] = val

    # do clean-up
    for filename in os.listdir('.'):
        if filename.find('.log') != -1 and filename.find(logname) != -1:
            os.remove(filename)

    # perform action
    array = []
    if nclients <= 10:
        array += range(1, nclients + 1)
    if 10 < nclients <= 100:
        array = chain(range(1, 10),
                      range(10, nclients + 1, 10))
    if 100 < nclients <= 1000:
        array = chain(range(1, 10),
                      range(10, 100, 10),
                      range(100, nclients + 1, 100))

    # allow to specify the starting nclients
    array = ifilter(lambda x: x >= minclients, array)

    for nclients in array:
        sys.stdout.write("Run job with %s clients" % nclients)
        for _ in range(repeat):
            runjob(nclients, host, method, params, headers, idx, limit,
                   debug, logname, dasquery)
            sys.stdout.write('.')
        print('')

    # analyze results
    file_list = []
    for filename in os.listdir('.'):
        if filename.find('.log') != -1:
            file_list.append(filename)
    xxx = []
    yyy = []
    std = []

    for ifile in natsorted(file_list):
        name, _ = ifile.split('.')
        # ignore non related .log files and .smf
        if not logname in name or not name:
            continue
        xxx.append(int(name.split(logname)[-1]))
        mean, std2 = avg_std(ifile)
        yyy.append(mean)
        std.append(std2)
    try:
        make_plot(xxx, yyy, std, opts.filename, title=dasquery)
    except Exception as e:
        print(e)
        print("xxx =", xxx)
        print("yyy =", yyy)
        print("std =", std)
Exemple #25
0
 def rsync_open(self, req):
     logger.debug('Opening rsync')
     source, attrs = urllib.splitattr(req.get_full_url())
     return self.do_rsync(source, attrs=attrs)
Exemple #26
0
 if not filename and (not type or type == 'file'):
     try:
         fp = self.open_local_file(url1)
         hdrs = fp.info()
         del fp
         return url2pathname(urllib.splithost(url1)[1]), hdrs
     except IOError, msg:
         pass
 fp = self.open(url)
 headers = fp.info()
 if not filename:
     import tempfile
     garbage, path = urllib.splittype(url)
     garbage, path = urllib.splithost(path or "")
     path, garbage = urllib.splitquery(path or "")
     path, garbage = urllib.splitattr(path or "")
     suffix = os.path.splitext(path)[1]
     filename = tempfile.mktemp(suffix)
     self.__tempfiles.append(filename)
 result = filename, headers
 if self.tempcache is not None:
     self.tempcache[url] = result
 tfp = open(filename, 'wb')
 bs = blocksize
 size = -1
 blocknum = 1
 if reporthook:
     if headers.has_key("content-length"):
         size = int(headers["Content-Length"])
     stayopen = reporthook(0, bs, size, self._userObject)
     if stayopen==0:
Exemple #27
0
    def smb_open(self, req):
        global USE_NTLM, MACHINE_NAME

        host = req.get_host()
        if not host:
            raise urllib2.URLError('SMB error: no host given')
        host, port = splitport(host)
        if port is None:
            port = 139
        else:
            port = int(port)

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = user or ''
        passwd = passwd or ''
        myname = MACHINE_NAME or self.generateClientMachineName()

        n = NetBIOS()
        names = n.queryIPForName(host)
        if names:
            server_name = names[0]
        else:
            raise urllib2.URLError(
                'SMB error: Hostname does not reply back with its machine name'
            )

        path, attrs = splitattr(req.get_selector())
        if path.startswith('/'):
            path = path[1:]
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        service, path = dirs[0], '/'.join(dirs[1:])

        try:
            conn = SMBConnection(user,
                                 passwd,
                                 myname,
                                 server_name,
                                 use_ntlm_v2=USE_NTLM)
            conn.connect(host, port)

            if req.has_data():
                data_fp = req.get_data()
                filelen = conn.storeFile(service, path, data_fp)

                headers = "Content-length: 0\n"
                fp = StringIO("")
            else:
                fp = self.createTempFile()
                file_attrs, retrlen = conn.retrieveFile(service, path, fp)
                fp.seek(0)

                headers = ""
                mtype = mimetypes.guess_type(req.get_full_url())[0]
                if mtype:
                    headers += "Content-type: %s\n" % mtype
                if retrlen is not None and retrlen >= 0:
                    headers += "Content-length: %d\n" % retrlen

            sf = StringIO(headers)
            headers = mimetools.Message(sf)

            return addinfourl(fp, headers, req.get_full_url())
        except Exception, ex:
            raise urllib2.URLError, ('smb error: %s' % ex), sys.exc_info()[2]
Exemple #28
0
class FTPRangeHandler(urllib2.FTPHandler):
    def ftp_open(self, req):
        host = req.get_host()
        if not host:
            raise IOError, ('ftp error', 'no host given')
        host, port = splitport(host)
        if port is None:
            port = ftplib.FTP_PORT

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = unquote(user or '')
        passwd = unquote(passwd or '')

        try:
            host = socket.gethostbyname(host)
        except socket.error, msg:
            raise urllib2.URLError(msg)
        path, attrs = splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp(user, passwd, host, port, dirs)
            type = file and 'I' or 'D'
            for attr in attrs:
                attr, value = splitattr(attr)
                if attr.lower() == 'type' and \
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()

            # -- range support modifications start here
            rest = None
            range_tup = range_header_to_tuple(req.headers.get('Range', None))
            assert range_tup != ()
            if range_tup:
                (fb, lb) = range_tup
                if fb > 0:
                    rest = fb
            # -- range support modifications end here

            fp, retrlen = fw.retrfile(file, type, rest)

            # -- range support modifications start here
            if range_tup:
                (fb, lb) = range_tup
                if lb == '':
                    if retrlen is None or retrlen == 0:
                        raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
                    lb = retrlen
                    retrlen = lb - fb
                    if retrlen < 0:
                        # beginning of range is larger than file
                        raise RangeError('Requested Range Not Satisfiable')
                else:
                    retrlen = lb - fb
                    fp = RangeableFileObject(fp, (0, retrlen))
            # -- range support modifications end here

            headers = ""
            mtype = mimetypes.guess_type(req.get_full_url())[0]
            if mtype:
                headers += "Content-Type: %s\n" % mtype
            if retrlen is not None and retrlen >= 0:
                headers += "Content-Length: %d\n" % retrlen
            sf = StringIO(headers)
            headers = mimetools.Message(sf)
            return addinfourl(fp, headers, req.get_full_url())
        except ftplib.all_errors, msg:
            raise IOError, ('ftp error', msg), sys.exc_info()[2]
Exemple #29
0
def main():
    """Main routine"""
    mgr = NClientsOptionParser()
    (opts, args) = mgr.get_opt()

    url = opts.url.replace('?', ';').replace('&amp;', ';').replace('&', ';')
    logname  = opts.logname
    dasquery = opts.dasquery
    idx      = opts.idx
    limit    = 1
    nclients = opts.nclients
    debug    = opts.debug
    headers  = {'Accept': opts.accept}
    urlpath, args = urllib.splitattr(url)
    arr      = urlpath.split('/')
    if  arr[0] == 'http:' or arr[0] == 'https:':
        host = arr[0] + '//' + arr[2]
    else:
        msg  = 'Provided URL="%s" does not contain http:// part' % opts.url
        raise Exception(msg)
    method   = '/' + '/'.join(arr[3:])
    params   = {}
    for item in args:
        key, val = item.split('=')
        params[key] = val

    # do clean-up
    for filename in os.listdir('.'):
        if  filename.find('.log') != -1 and filename.find(logname) != -1:
            os.remove(filename)

    # perform action
    array = []
    if  nclients <= 10:
        array += range(1, nclients+1)
    if  nclients <= 100 and nclients > 10:
        array  = range(1, 10)
        array += range(10, nclients+1, 10)
    if  nclients <= 1000 and nclients > 100:
        array  = range(1, 10)
        array += range(10, 100, 10)
        array += range(100, nclients+1, 100)

    for nclients in array:
        print "Run job with %s clients" % nclients
        runjob(nclients, host, method, params, headers, idx, limit, 
               debug, logname, dasquery)

    # analyze results
    file_list = []
    for filename in os.listdir('.'):
        if  filename.find('.log') != -1:
            file_list.append(filename)
    xxx = []
    yyy = []
    std = []
    for file in natsorted(file_list):
        name, _ = file.split('.')
        xxx.append(int(name.split(logname)[-1]))
        mean, std2 = avg_std(file)
        yyy.append(mean)
        std.append(std2)
    try:
        make_plot(xxx, yyy, std, opts.pdf)
    except:
        print "xxx =", xxx
        print "yyy =", yyy
        print "std =", std
Exemple #30
0
"""An extensible library for opening URLs using a variety of protocols
Exemple #31
0
    def smb_open(self, req):
        global USE_NTLM, MACHINE_NAME

        host = req.get_host()
        if not host:
            raise urllib2.URLError('SMB error: no host given')
        host, port = splitport(host)
        if port is None:
            port = 139
        else:
            port = int(port)

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = user or ''

        domain = ''
        if ';' in user:
            domain, user = user.split(';', 1)

        passwd = passwd or ''
        myname = MACHINE_NAME or self.generateClientMachineName()

        n = NetBIOS()
        names = n.queryIPForName(host)
        if names:
            server_name = names[0]
        else:
            raise urllib2.URLError('SMB error: Hostname does not reply back with its machine name')

        path, attrs = splitattr(req.get_selector())
        if path.startswith('/'):
            path = path[1:]
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        service, path = dirs[0], '/'.join(dirs[1:])

        try:
            conn = SMBConnection(user, passwd, myname, server_name, domain=domain, use_ntlm_v2 = USE_NTLM)
            conn.connect(host, port)

            if req.has_data():
                data_fp = req.get_data()
                filelen = conn.storeFile(service, path, data_fp)

                headers = "Content-length: 0\n"
                fp = StringIO("")
            else:
                fp = self.createTempFile()
                file_attrs, retrlen = conn.retrieveFile(service, path, fp)
                fp.seek(0)

                headers = ""
                mtype = mimetypes.guess_type(req.get_full_url())[0]
                if mtype:
                    headers += "Content-type: %s\n" % mtype
                if retrlen is not None and retrlen >= 0:
                    headers += "Content-length: %d\n" % retrlen

            sf = StringIO(headers)
            headers = mimetools.Message(sf)

            return addinfourl(fp, headers, req.get_full_url())
        except Exception, ex:
            raise urllib2.URLError, ('smb error: %s' % ex), sys.exc_info()[2]
Exemple #32
0
 def __init__(self, url, method, params):
     Assert(method == 'GET')
     netloc, path = splithost(url)
     if not netloc: raise IOError, ('ftp error', 'no host given')
     host, port = splitport(netloc)
     user, host = splituser(host)
     if user: user, passwd = splitpasswd(user)
     else: passwd = None
     host = socket.gethostbyname(host)
     if port:
         try:
             port = string.atoi(port)
         except string.atoi_error:
             raise IOError, ('ftp error', 'bad port')
     else:
         port = ftplib.FTP_PORT
     path, attrs = splitattr(path)
     self.url = "ftp://%s%s" % (netloc, path)
     dirs = string.splitfields(path, '/')
     dirs, file = dirs[:-1], dirs[-1]
     self.content_length = None
     if not file:
         self.content_type, self.content_encoding = None, None
         type = 'd'
     else:
         self.content_type, self.content_encoding = app.guess_type(file)
         if self.content_encoding:
             type = 'i'
         elif self.content_type and self.content_type[:5] == 'text/':
             type = 'a'
         elif file[-1] == '/':
             type = 'd'
         else:
             type = 'i'
     if dirs and not dirs[0]: dirs = dirs[1:]
     key = (user, host, port, string.joinfields(dirs, '/'))
     self.debuglevel = None
     try:
         if not ftpcache.has_key(key):
             ftpcache[key] = []
         for attr in attrs:
             [attr, value] = map(string.lower, splitvalue(attr))
             if attr == 'type' and value in ('a', 'i', 'd'):
                 type = value
             elif attr == 'debug':
                 try:
                     self.debuglevel = string.atoi(value)
                 except string.atoi_error:
                     pass
         candidates = ftpcache[key]
         for cand in candidates:
             if not cand.busy():
                 break
         else:
             cand = ftpwrapper(user, passwd,
                               host, port, dirs, self.debuglevel)
             candidates.append(cand)
         # XXX Ought to clean the cache every once in a while
         self.cand = cand
         self.sock, self.isdir = cand.retrfile(file, type)
         self.content_length = cand.content_length
     except ftplib.all_errors, msg:
         raise IOError, ('ftp error', msg)
Exemple #33
0
 def open_ftp(self, url):
     """Use FTP protocol."""
     if not isinstance(url, str):
         raise IOError(('ftp error', 'proxy support for ftp protocol currently not implemented'))
     import mimetypes
     import mimetools
     try:
         from cStringIO import StringIO
     except ImportError:
         from StringIO import StringIO
     host, path = urllib.splithost(url)
     if not host:
         raise IOError(('ftp error', 'no host given'))
     host, port = urllib.splitport(host)
     user, host = urllib.splituser(host)
     if user:
         user, passwd = urllib.splitpasswd(user)
     else:
         passwd = None
     host = urllib.unquote(host)
     user = urllib.unquote(user or '')
     passwd = urllib.unquote(passwd or '')
     host = socket.gethostbyname(host)
     if not port:
         import ftplib  # noqa
         port = ftplib.FTP_PORT
     else:
         port = int(port)
     path, attrs = urllib.splitattr(path)
     path = urllib.unquote(path)
     dirs = path.split('/')
     dirs, file = dirs[:-1], dirs[-1]
     if dirs and not dirs[0]:
         dirs = dirs[1:]
     if dirs and not dirs[0]:
         dirs[0] = '/'
     key = user, host, port, '/'.join(dirs)
     # XXX thread unsafe!
     if len(self.ftpcache) > urllib.MAXFTPCACHE:
         # Prune the cache, rather arbitrarily
         for k in self.ftpcache.keys():
             if k != key:
                 v = self.ftpcache[k]
                 del self.ftpcache[k]
                 v.close()
     try:
         if not key in self.ftpcache:
             self.ftpcache[key] = \
                 Myftpwrapper(user, passwd, host, port, dirs)
         if not file:
             type = 'D'
         else:
             type = 'I'
         for attr in attrs:
             attr, value = urllib.splitvalue(attr)
             if attr.lower() == 'type' and \
                value in ('a', 'A', 'i', 'I', 'd', 'D'):
                 type = value.upper()
         (fp, retrlen) = self.ftpcache[key].retrfile(file, type,
                                                     rest=os.environ.get("REST"))
         mtype = mimetypes.guess_type("ftp:" + url)[0]
         headers = ""
         if mtype:
             headers += "Content-Type: %s\n" % mtype
         if retrlen is not None and retrlen >= 0:
             headers += "Content-Length: %d\n" % retrlen
         headers = mimetools.Message(StringIO(headers))
         return urllib.addinfourl(fp, headers, "ftp:" + url)
     except urllib.ftperrors() as msg:
         raise IOError(('ftp error', msg), sys.exc_info()[2])
Exemple #34
0
 if not filename and (not type or type == 'file'):
     try:
         fp = self.open_local_file(url1)
         hdrs = fp.info()
         del fp
         return url2pathname(urllib.splithost(url1)[1]), hdrs
     except IOError, msg:
         pass
 fp = self.open(url)
 headers = fp.info()
 if not filename:
     import tempfile
     garbage, path = urllib.splittype(url)
     garbage, path = urllib.splithost(path or "")
     path, garbage = urllib.splitquery(path or "")
     path, garbage = urllib.splitattr(path or "")
     suffix = os.path.splitext(path)[1]
     filename = tempfile.mktemp(suffix)
     self.__tempfiles.append(filename)
 result = filename, headers
 if self.tempcache is not None:
     self.tempcache[url] = result
 tfp = open(filename, 'wb')
 bs = blocksize
 size = -1
 blocknum = 1
 if reporthook:
     if headers.has_key("content-length"):
         size = int(headers["Content-Length"])
     stayopen = reporthook(0, bs, size, self._userObject)
     if stayopen == 0:
Exemple #35
0
class FancyURLopener(_OriginalFancyURLopener):
    def __init__(self, *args):
        apply(_OriginalFancyURLopener.__init__, (self, ) + args)
        self.tempcache = {}
        self.__unlink = os.unlink  # See cleanup()
        self.__OriginalFancyURLopener = _OriginalFancyURLopener

        # prefetch support
        self.__prefetchcache = {}
        self.__prefetchtempfiles = {}

    def __del__(self):
        self.__OriginalFancyURLopener.__del__(self)
        del self.__OriginalFancyURLopener

    def http_error_default(self, url, fp, errcode, errmsg, headers):
        void = fp.read()
        fp.close()
        raise IOError, (errcode, 'http error: ' + errmsg, headers)

    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
        # XXX The server can force infinite recursion here!
        if headers.has_key('location'):
            newurl = headers['location']
        elif headers.has_key('uri'):
            newurl = headers['uri']
        else:
            return
        void = fp.read()
        fp.close()
        fp = self.open(newurl)
        h = fp.info()
        if not h.has_key('Content-Location') and \
           not h.has_key('Content-Base'):
            h.dict['content-location'] = newurl
            h.headers.append('Content-Location: %s\r\n' % newurl)
        return fp

    def prompt_user_passwd(self, host, realm):
        import windowinterface
        try:
            w = windowinterface.Window('passwd', grab=1)
        except AttributeError:
            return _OriginalFancyURLopener.prompt_user_passwd(
                self, host, realm)
        l = w.Label('Enter username and password for %s at %s' % (realm, host))
        t1 = w.TextInput('User:'******'',
                         None, (self.usercb, ()),
                         top=l,
                         left=None,
                         right=None)
        t2 = w.TextInput('Passwd:',
                         '',
                         None, (self.passcb, ()),
                         modifyCB=self.modifycb,
                         top=t1,
                         left=None,
                         right=None)
        b = w.ButtonRow([('OK', (self.do_return, ())),
                         ('Cancel', (self.cancelcb, ()))],
                        vertical=0,
                        top=t2,
                        left=None,
                        right=None,
                        bottom=None)
        self.userw = t1
        self.passwdw = t2
        self.passwd = []
        self.user = ''
        self.password = ''
        w.show()
        try:
            windowinterface.mainloop()
        except _end_loop:
            pass
        w.hide()
        w.close()
        del self.userw, self.passwdw
        return self.user, self.password

    def modifycb(self, text):
        if text:
            if text == '\b':
                if self.passwd:
                    del self.passwd[-1]
                return ''
            self.passwd.append(text)
            return '*' * len(text)

    def usercb(self):
        self.user = self.userw.gettext()
        if self.password:
            self.do_return()
        else:
            self.passwdw.setfocus()

    def passcb(self):
        self.password = string.joinfields(self.passwd, '')
        if self.user:
            self.do_return()
        else:
            self.userw.setfocus()

    def cancelcb(self):
        self.user = self.password = None
        self.do_return()

    def do_return(self):
        raise _end_loop

    def open_local_file(self, url):
        import urlparse
        scheme, netloc, url, params, query, fragment = urlparse.urlparse(url)
        url = urlparse.urlunparse((scheme, netloc, url, '', '', ''))
        return _OriginalFancyURLopener.open_local_file(self, url)

    #
    # Prefetch section
    #
    # override retrieve for prefetch implementation
    def retrieve(self, url, filename=None, reporthook=None):
        # retrieve(url) returns (filename, None) for a local object
        # or (tempfilename, headers) for a remote object.
        url = unwrap(url)
        import urlparse
        scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
        if not scheme or scheme == 'file':
            i = string.find(path, '?')
            if i > 0:
                path = path[:i]
            url = urlparse.urlunparse((scheme, netloc, path, '', '', ''))
        if self.__prefetchcache.has_key(url):
            # complete prefetch first
            #print 'completing prefetch'
            self.__fin_retrieve(url)
        if self.__prefetchtempfiles.has_key(url):
            #print 'retrieving prefetched',self.__prefetchtempfiles[url]
            return self.__prefetchtempfiles[url]
        return _OriginalFancyURLopener.retrieve(self, url, filename,
                                                reporthook)

    # override cleanup for prefetch implementation
    def cleanup(self):
        # This code sometimes runs when the rest of this module
        # has already been deleted, so it can't use any globals
        # or import anything.

        # first close open streams
        for fp, tfp in self.__prefetchcache.values():
            fp.close()
            tfp.close()
        self.__prefetchcache = {}

        # unlink temp files
        for file, header in self.__prefetchtempfiles.values():
            try:
                self.__unlink(file)
            except:
                pass
        self.__prefetchtempfiles = {}

        # call original cleanup
        self.__OriginalFancyURLopener.cleanup(self)

    # open stream to url and read headers but not data yet
    # see retrieve for signature
    def begin_retrieve(self, url, filename=None, reporthook=None):
        url = unwrap(url)
        self.__clean_retrieve(url)
        type, url1 = splittype(url)
        if not filename and (not type or type == 'file'):
            try:
                fp = self.open_local_file(url1)
                hdrs = fp.info()
                del fp
                return url2pathname(splithost(url1)[1]), hdrs
            except IOError, msg:
                pass
        fp = self.open(url)
        headers = fp.info()
        if not filename:
            import tempfile
            garbage, path = splittype(url)
            garbage, path = splithost(path or "")
            path, garbage = splitquery(path or "")
            path, garbage = splitattr(path or "")
            suffix = os.path.splitext(path)[1]
            filename = tempfile.mktemp(suffix)
            self.__prefetchtempfiles[url] = filename, headers
        tfp = open(filename, 'wb')
        self.__prefetchcache[url] = fp, tfp
        return filename, headers
Exemple #36
0
class MyFancyUrlopener(urllib.FancyURLopener):
    def retrieve(self, url, filename=None, reporthook=None, data=None):
        """retrieve(url) returns (filename, headers) for a local object
        or (tempfilename, headers) for a remote object."""
        url = urllib.unwrap(urllib.toBytes(url))
        if self.tempcache and url in self.tempcache:
            return self.tempcache[url]
        type, url1 = urllib.splittype(url)
        if filename is None and (not type or type == 'file'):
            try:
                fp = self.open_local_file(url1)
                hdrs = fp.info()
                del fp
                return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
            except IOError, msg:
                pass
        fp = self.open(url, data)
        try:
            headers = fp.info()
            code = fp.code
            if filename:
                tfp = open(filename, 'wb')
            else:
                import tempfile
                garbage, path = urllib.splittype(url)
                garbage, path = urllib.splithost(path or "")
                path, garbage = urllib.splitquery(path or "")
                path, garbage = urllib.splitattr(path or "")
                suffix = os.path.splitext(path)[1]
                (fd, filename) = tempfile.mkstemp(suffix)
                self.__tempfiles.append(filename)
                tfp = os.fdopen(fd, 'wb')
            try:
                result = filename, headers, code
                if self.tempcache is not None:
                    self.tempcache[url] = result
                bs = 1024 * 8
                size = -1
                read = 0
                blocknum = 0
                if reporthook:
                    if "content-length" in headers:
                        size = int(headers["Content-Length"])
                    reporthook(blocknum, bs, size)
                while 1:
                    block = fp.read(bs)
                    if block == "":
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    if reporthook:
                        reporthook(blocknum, bs, size)
            finally:
                tfp.close()
        finally:
            fp.close()
        del fp
        del tfp

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError(
                "retrieval incomplete: got only %i out "
                "of %i bytes" % (read, size), result)

        return result
Exemple #37
0
    def ftp_open(self, req):
        host = req.get_host()
        if not host:
            raise IOError('ftp error', 'no host given')
        host, port = splitport(host)
        if port is None:
            port = ftplib.FTP_PORT
        else:
            port = int(port)

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = unquote(user or '')
        passwd = unquote(passwd or '')

        try:
            host = socket.gethostbyname(host)
        except socket.error as msg:
            raise urllib2.URLError(msg)
        path, attrs = splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp(user, passwd, host, port, dirs)
            if file:
                type = 'I'
            else:
                type = 'D'

            for attr in attrs:
                attr, value = splitattr(attr)
                if attr.lower() == 'type' and \
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()

            # -- range support modifications start here
            rest = None
            range_tup = range_header_to_tuple(req.headers.get('Range', None))
            assert range_tup != ()
            if range_tup:
                (fb, lb) = range_tup
                if fb > 0:
                    rest = fb
            # -- range support modifications end here

            fp, retrlen = fw.retrfile(file, type, rest)

            # -- range support modifications start here
            if range_tup:
                (fb, lb) = range_tup
                if lb == '':
                    if retrlen is None or retrlen == 0:
                        raise RangeError('Requested Range Not Satisfiable due'
                                         ' to unobtainable file length.')
                    lb = retrlen
                    retrlen = lb - fb
                    if retrlen < 0:
                        # beginning of range is larger than file
                        raise RangeError('Requested Range Not Satisfiable')
                else:
                    retrlen = lb - fb
                    fp = RangeableFileObject(fp, (0, retrlen))
            # -- range support modifications end here

            headers = ""
            mtype = mimetypes.guess_type(req.get_full_url())[0]
            if mtype:
                headers += "Content-Type: %s\n" % mtype
            if retrlen is not None and retrlen >= 0:
                headers += "Content-Length: %d\n" % retrlen
            headers = email.message_from_string(headers)
            return addinfourl(fp, headers, req.get_full_url())
        except ftplib.all_errors as msg:
            raise IOError('ftp error', msg)
Exemple #38
0
    def retrieve(self, url, filename=None, reporthook=None, data=None):
        # overridden method from urllib.URLopener
        self._cancelDownload = False
        url = urllib.unwrap(urllib.toBytes(url))
        if self.tempcache and url in self.tempcache:
            return self.tempcache[url]
        type, url1 = urllib.splittype(url)
        if filename is None and (not type or type == 'file'):
            try:
                fp = self.open_local_file(url1)
                hdrs = fp.info()
                fp.close()
                return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
            except IOError:
                pass
        fp = self.open(url, data)
        try:
            headers = fp.info()
            if filename:
                tfp = open(filename, 'wb')
            else:
                import tempfile
                garbage, path = urllib.splittype(url)
                garbage, path = urllib.splithost(path or "")
                path, garbage = urllib.splitquery(path or "")
                path, garbage = urllib.splitattr(path or "")
                suffix = os.path.splitext(path)[1]
                (fd, filename) = tempfile.mkstemp(suffix)
                self.__tempfiles.append(filename)
                tfp = os.fdopen(fd, 'wb')
            try:
                result = filename, headers
                if self.tempcache is not None:
                    self.tempcache[url] = result
                bs = 1024 * 8
                size = -1
                read = 0
                blocknum = 0
                if "content-length" in headers:
                    size = int(headers["Content-Length"])
                if reporthook:
                    reporthook(blocknum, bs, size)
                while not self._cancelDownload:
                    block = fp.read(bs)
                    if block == "":
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    if reporthook:
                        reporthook(blocknum, bs, size)
            finally:
                tfp.close()
        finally:
            fp.close()

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError(
                "retrieval incomplete: got only %i out "
                "of %i bytes" % (read, size), result)

        if self._cancelDownload and os.path.exists(filename):
            os.remove(filename)
            self.wasCanceled = True
        return result