Exemple #1
0
    def get(self, hash, mirror, peers = [], method="GET", modtime=None):
        """Download from a list of peers or fallback to a mirror.
        
        @type hash: L{Hash.HashObject}
        @param hash: the hash object containing the expected hash for the file
        @param mirror: the URI of the file on the mirror
        @type peers: C{list} of C{string}
        @param peers: a list of the peer info where the file can be found
            (optional, defaults to downloading from the mirror)
        @type method: C{string}
        @param method: the HTTP method to use, 'GET' or 'HEAD'
            (optional, defaults to 'GET')
        @type modtime: C{int}
        @param modtime: the modification time to use for an 'If-Modified-Since'
            header, as seconds since the epoch
            (optional, defaults to not sending that header)
        """
        if not peers or method != "GET" or modtime is not None:
            log.msg('Downloading (%s) from mirror %s' % (method, mirror))
            parsed = urlparse(mirror)
            assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
            site = splitHostPort(parsed[0], parsed[1])
            path = urlunparse(('', '') + parsed[2:])
            peer = self.getPeer(site, mirror = True)
            return peer.get(path, method, modtime)
#        elif len(peers) == 1:
#            site = uncompact(peers[0]['c'])
#            log.msg('Downloading from peer %r' % (site, ))
#            path = '/~/' + quote_plus(hash.expected())
#            peer = self.getPeer(site)
#            return peer.get(path)
        else:
            tmpfile = self.cache_dir.child(hash.hexexpected())
            return FileDownload(self, hash, mirror, peers, tmpfile).run()
Exemple #2
0
    def _fixupURLParts(self):
        hostaddr, secure = self.chanRequest.getHostInfo()
        if not self.scheme:
            self.scheme = ('http', 'https')[secure]

        if self.host:
            self.host, self.port = http.splitHostPort(self.scheme, self.host)
        else:
            # If GET line wasn't an absolute URL
            host = self.headers.getHeader('host')
            if host:
                self.host, self.port = http.splitHostPort(self.scheme, host)
            else:
                # When no hostname specified anywhere, either raise an
                # error, or use the interface hostname, depending on
                # protocol version
                if self.clientproto >= (1, 1):
                    raise http.HTTPError(responsecode.BAD_REQUEST)
                self.host = hostaddr.host
                self.port = hostaddr.port
Exemple #3
0
    def _fixupURLParts(self):
        hostaddr, secure = self.chanRequest.getHostInfo()
        if not self.scheme:
            self.scheme = ('http', 'https')[secure]

        if self.host:
            self.host, self.port = http.splitHostPort(self.scheme, self.host)
        else:
            # If GET line wasn't an absolute URL
            host = self.headers.getHeader('host')
            if host:
                self.host, self.port = http.splitHostPort(self.scheme, host)
            else:
                # When no hostname specified anywhere, either raise an
                # error, or use the interface hostname, depending on
                # protocol version
                if self.clientproto >= (1,1):
                    raise http.HTTPError(responsecode.BAD_REQUEST)
                self.host = hostaddr.host
                self.port = hostaddr.port
Exemple #4
0
 def addMirror(self):
     """Use the mirror if there are few peers."""
     if not self.addedMirror and len(self.sitelist) + self.outstanding < config.getint('DEFAULT', 'MIN_DOWNLOAD_PEERS'):
         self.addedMirror = True
         parsed = urlparse(self.mirror)
         if parsed[0] == "http":
             site = splitHostPort(parsed[0], parsed[1])
             self.mirror_path = urlunparse(('', '') + parsed[2:])
             peer = self.manager.getPeer(site, mirror = True)
             self.peers[site] = {}
             self.peers[site]['peer'] = peer
             self.sitelist.append(site)
Exemple #5
0
 def addMirror(self):
     """Use the mirror if there are few peers."""
     if not self.addedMirror and len(
             self.sitelist) + self.outstanding < config.getint(
                 'DEFAULT', 'MIN_DOWNLOAD_PEERS'):
         self.addedMirror = True
         parsed = urlparse(self.mirror)
         if parsed[0] == "http":
             site = splitHostPort(parsed[0], parsed[1])
             self.mirror_path = urlunparse(('', '') + parsed[2:])
             peer = self.manager.getPeer(site, mirror=True)
             self.peers[site] = {}
             self.peers[site]['peer'] = peer
             self.sitelist.append(site)
Exemple #6
0
    def extractPath(self, url):
        """Break the full URI down into the site, base directory and path.
        
        Site is the host and port of the mirror. Base directory is the
        directory to the mirror location (usually just '/debian'). Path is
        the remaining path to get to the file.
        
        E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
        would return ('ftp.debian.org:80', '/debian', 
        '/dists/sid/binary-i386/Packages.bz2').
        
        @param url: the URI of the file's location on the mirror
        @rtype: (C{string}, C{string}, C{string})
        @return: the site, base directory and path to the file
        """
        # Extract the host and port
        parsed = urlparse(url)
        host, port = splitHostPort(parsed[0], parsed[1])
        site = host + ":" + str(port)
        path = parsed[2]

        # Try to find the base directory (most can be found this way)
        i = max(path.rfind('/dists/'), path.rfind('/pool/'))
        if i >= 0:
            baseDir = path[:i]
            path = path[i:]
        else:
            # Uh oh, this is not good
            log.msg("Couldn't find a good base directory for path: %s" %
                    (site + path))

            # Try to find an existing cache that starts with this one
            # (fallback to using an empty base directory)
            baseDir = ''
            if site in self.apt_caches:
                longest_match = 0
                for base in self.apt_caches[site]:
                    base_match = ''
                    for dirs in path.split('/'):
                        if base.startswith(base_match + '/' + dirs):
                            base_match += '/' + dirs
                        else:
                            break
                    if len(base_match) > longest_match:
                        longest_match = len(base_match)
                        baseDir = base_match
            log.msg("Settled on baseDir: %s" % baseDir)

        return site, baseDir, path
Exemple #7
0
    def extractPath(self, url):
        """Break the full URI down into the site, base directory and path.
        
        Site is the host and port of the mirror. Base directory is the
        directory to the mirror location (usually just '/debian'). Path is
        the remaining path to get to the file.
        
        E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
        would return ('ftp.debian.org:80', '/debian', 
        '/dists/sid/binary-i386/Packages.bz2').
        
        @param url: the URI of the file's location on the mirror
        @rtype: (C{string}, C{string}, C{string})
        @return: the site, base directory and path to the file
        """
        # Extract the host and port
        parsed = urlparse(url)
        host, port = splitHostPort(parsed[0], parsed[1])
        site = host + ":" + str(port)
        path = parsed[2]

        # Try to find the base directory (most can be found this way)
        i = max(path.rfind('/dists/'), path.rfind('/pool/'))
        if i >= 0:
            baseDir = path[:i]
            path = path[i:]
        else:
            # Uh oh, this is not good
            log.msg("Couldn't find a good base directory for path: %s" % (site + path))
            
            # Try to find an existing cache that starts with this one
            # (fallback to using an empty base directory)
            baseDir = ''
            if site in self.apt_caches:
                longest_match = 0
                for base in self.apt_caches[site]:
                    base_match = ''
                    for dirs in path.split('/'):
                        if base.startswith(base_match + '/' + dirs):
                            base_match += '/' + dirs
                        else:
                            break
                    if len(base_match) > longest_match:
                        longest_match = len(base_match)
                        baseDir = base_match
            log.msg("Settled on baseDir: %s" % baseDir)
        
        return site, baseDir, path
Exemple #8
0
    def __init__(self, uri, resource):
        """
        @param uri: The URI to be used for mutating the request.  This MUST 
            include scheme://hostname/path.
        @type uri: C{str}
        
        @param resource: The resource to serve after mutating the request.
        @type resource: L{twisted.web2.iweb.IResource}
        """

        self.resource = resource
        
        (self.scheme, self.host, self.path,
         params, querystring, fragment) = urlparse.urlparse(uri)
        if params or querystring or fragment:
            raise ValueError("Must not specify params, query args, or fragment to VHostURIRewrite")
        self.path = map(urllib.unquote, self.path[1:].split('/'))[:-1]
        self.host, self.port = http.splitHostPort(self.scheme, self.host)
Exemple #9
0
    def locateChild(self, req, segments):
        scheme = req.headers.getRawHeaders('x-app-scheme')

        if self.sendsRealHost:
            host = req.headers.getRawHeaders('host')
        else:
            host = req.headers.getRawHeaders('x-forwarded-host')

        app_location = req.headers.getRawHeaders('x-app-location')
        remote_ip = req.headers.getRawHeaders('x-forwarded-for')

        if not (host and remote_ip):
            if not host:
                warnings.warn(
                    ("No host was obtained either from Host or "
                     "X-Forwarded-Host headers.  If your proxy does not "
                     "send either of these headers use VHostURIRewrite. "
                     "If your proxy sends the real host as the Host header "
                     "use "
                     "AutoVHostURIRewrite(resrc, sendsRealHost=True)"))

            # some header unspecified => Error
            raise http.HTTPError(responsecode.BAD_REQUEST)
        host = host[0]
        remote_ip = remote_ip[0]
        if app_location:
            app_location = app_location[0]
        else:
            app_location = '/'
        if scheme:
            scheme = scheme[0]
        else:
            scheme='http'
        
        req.host, req.port = http.splitHostPort(scheme, host)
        req.scheme = scheme
        
        req.remoteAddr = address.IPv4Address('TCP', remote_ip, 0)
            
        req.prepath = app_location[1:].split('/')[:-1]
        req.path = '/'+('/'.join([urllib.quote(s, '') for s in (req.prepath + segments)]))
        
        return self.resource, segments
Exemple #10
0
    def get(self, hash, mirror, peers=[], method="GET", modtime=None):
        """Download from a list of peers or fallback to a mirror.
        
        @type hash: L{Hash.HashObject}
        @param hash: the hash object containing the expected hash for the file
        @param mirror: the URI of the file on the mirror
        @type peers: C{list} of C{string}
        @param peers: a list of the peer info where the file can be found
            (optional, defaults to downloading from the mirror)
        @type method: C{string}
        @param method: the HTTP method to use, 'GET' or 'HEAD'
            (optional, defaults to 'GET')
        @type modtime: C{int}
        @param modtime: the modification time to use for an 'If-Modified-Since'
            header, as seconds since the epoch
            (optional, defaults to not sending that header)
        """
        if not peers or method != "GET" or modtime is not None:
            log.msg('Downloading (%s) from mirror %s' % (method, mirror))
            parsed = urlparse(mirror)
            assert parsed[
                0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
            site = splitHostPort(parsed[0], parsed[1])
            path = urlunparse(('', '') + parsed[2:])
            peer = self.getPeer(site, mirror=True)
            return peer.get(path, method, modtime)


#        elif len(peers) == 1:
#            site = uncompact(peers[0]['c'])
#            log.msg('Downloading from peer %r' % (site, ))
#            path = '/~/' + quote_plus(hash.expected())
#            peer = self.getPeer(site)
#            return peer.get(path)
        else:
            tmpfile = self.cache_dir.child(hash.hexexpected())
            return FileDownload(self, hash, mirror, peers, tmpfile).run()