Exemple #1
0
 def setup(self, config, cache_dir):
     """Setup all the Khashmir sub-modules.
     
     @type config: C{dictionary}
     @param config: the configuration parameters for the DHT
     @type cache_dir: C{string}
     @param cache_dir: the directory to store all files in
     """
     self.config = config
     self.port = config['PORT']
     self.store = DB(os.path.join(cache_dir, 'khashmir.' + str(self.port) + '.db'))
     self.node = self._loadSelfNode('', self.port)
     self.table = KTable(self.node, config)
     self.token_secrets = [newID()]
     self.stats = StatsLogger(self.table, self.store)
     
     # Start listening
     self.udp = krpc.hostbroker(self, self.stats, config)
     self.udp.protocol = krpc.KRPC
     self.listenport = reactor.listenUDP(self.port, self.udp)
     
     # Load the routing table and begin checkpointing
     self._loadRoutingTable()
     self.refreshTable(force = True)
     self.next_checkpoint = reactor.callLater(60, self.checkpoint)
Exemple #2
0
def make(port):
    from stats import StatsLogger
    af = Receiver()
    a = hostbroker(af, StatsLogger(None, None),
                   {'KRPC_TIMEOUT': 9, 'KRPC_INITIAL_DELAY': 2, 'SPEW': False})
    a.protocol = KRPC
    p = reactor.listenUDP(port, a)
    return af, a, p
Exemple #3
0
 def _startFactory(self):
     log.msg('Starting the main apt_p2p application')
     self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
     if not self.cache_dir.child(download_dir).exists():
         self.cache_dir.child(download_dir).makedirs()
     if not self.cache_dir.child(peer_dir).exists():
         self.cache_dir.child(peer_dir).makedirs()
     self.db = DB(self.cache_dir.child('apt-p2p.db'))
     self.dht = DHT(self.dhtClass, self.db)
     df = self.dht.start()
     df.addCallback(self._dhtStarted)
     self.stats = StatsLogger(self.db)
     self.http_server = TopLevel(self.cache_dir.child(download_dir),
                                 self.db, self)
     self.http_server.getHTTPFactory().startFactory()
     self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht,
                              self.stats)
     self.mirrors = MirrorManager(self.cache_dir)
     self.cache = CacheManager(self.cache_dir.child(download_dir), self.db,
                               self)
Exemple #4
0
 def _startFactory(self):
     log.msg('Starting the main apt_p2p application')
     self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
     if not self.cache_dir.child(download_dir).exists():
         self.cache_dir.child(download_dir).makedirs()
     if not self.cache_dir.child(peer_dir).exists():
         self.cache_dir.child(peer_dir).makedirs()
     self.db = DB(self.cache_dir.child('apt-p2p.db'))
     self.dht = DHT(self.dhtClass, self.db)
     df = self.dht.start()
     df.addCallback(self._dhtStarted)
     self.stats = StatsLogger(self.db)
     self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self)
     self.http_server.getHTTPFactory().startFactory()
     self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht, self.stats)
     self.mirrors = MirrorManager(self.cache_dir)
     self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, self)
Exemple #5
0
class AptP2P(protocol.Factory):
    """The main code object that does all of the work.
    
    Contains all of the sub-components that do all the low-level work, and
    coordinates communication between them.
    
    @type dhtClass: L{interfaces.IDHT}
    @ivar dhtClass: the DHT class to use
    @type cache_dir: L{twisted.python.filepath.FilePath}
    @ivar cache_dir: the directory to use for storing all files
    @type db: L{db.DB}
    @ivar db: the database to use for tracking files and hashes
    @type dht: L{DHTManager.DHT}
    @ivar dht: the manager for DHT requests
    @type stats: L{stats.StatsLogger}
    @ivar stats: the statistics logger to record sent data to
    @type http_server: L{HTTPServer.TopLevel}
    @ivar http_server: the web server that will handle all requests from apt
        and from other peers
    @type peers: L{PeerManager.PeerManager}
    @ivar peers: the manager of all downloads from mirrors and other peers
    @type mirrors: L{MirrorManager.MirrorManager}
    @ivar mirrors: the manager of downloaded information about mirrors which
        can be queried to get hashes from file names
    @type cache: L{CacheManager.CacheManager}
    @ivar cache: the manager of all downloaded files
    @type my_addr: C{string}, C{int}
    @ivar my_addr: the IP address and port of this peer
    """
    def __init__(self, dhtClass):
        """Initialize all the sub-components.
        
        @type dhtClass: L{interfaces.IDHT}
        @param dhtClass: the DHT class to use
        """
        log.msg('Initializing the main apt_p2p application')
        self.dhtClass = dhtClass
        self.my_addr = None

    #{ Factory interface
    def startFactory(self):
        reactor.callLater(0, self._startFactory)

    def _startFactory(self):
        log.msg('Starting the main apt_p2p application')
        self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
        if not self.cache_dir.child(download_dir).exists():
            self.cache_dir.child(download_dir).makedirs()
        if not self.cache_dir.child(peer_dir).exists():
            self.cache_dir.child(peer_dir).makedirs()
        self.db = DB(self.cache_dir.child('apt-p2p.db'))
        self.dht = DHT(self.dhtClass, self.db)
        df = self.dht.start()
        df.addCallback(self._dhtStarted)
        self.stats = StatsLogger(self.db)
        self.http_server = TopLevel(self.cache_dir.child(download_dir),
                                    self.db, self)
        self.http_server.getHTTPFactory().startFactory()
        self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht,
                                 self.stats)
        self.mirrors = MirrorManager(self.cache_dir)
        self.cache = CacheManager(self.cache_dir.child(download_dir), self.db,
                                  self)

    def _dhtStarted(self, result):
        """Save the returned address and start scanning the cache."""
        self.my_addr = result
        self.cache.scanDirectories()

    def stopFactory(self):
        log.msg('Stoppping the main apt_p2p application')
        self.http_server.getHTTPFactory().stopFactory()
        self.mirrors.cleanup()
        self.stats.save()
        self.db.close()

    def buildProtocol(self, addr):
        return self.http_server.getHTTPFactory().buildProtocol(addr)

    #{ Other functions
    def getStats(self):
        """Retrieve and format the statistics for the program.
        
        @rtype: C{string}
        @return: the formatted HTML page containing the statistics
        """
        out = '<html><body>\n\n'
        out += self.stats.formatHTML(self.my_addr)
        out += '\n\n'
        out += self.dht.getStats()
        out += '\n</body></html>\n'
        return out

    #{ Main workflow
    def get_resp(self, req, url, orig_resp=None):
        """Lookup a hash for the file in the local mirror info.
        
        Starts the process of getting a response to an apt request.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
            (optional, ignored if missing)
        @rtype: L{twisted.internet.defer.Deferred}
        @return: a deferred that will be called back with the response
        """
        d = defer.Deferred()

        log.msg('Trying to find hash for %s' % url)
        findDefer = self.mirrors.findHash(unquote(url))

        findDefer.addCallbacks(self.findHash_done,
                               self.findHash_error,
                               callbackArgs=(req, url, orig_resp, d),
                               errbackArgs=(req, url, orig_resp, d))
        return d

    def findHash_error(self, failure, req, url, orig_resp, d):
        """Process the error in hash lookup by returning an empty L{HashObject}."""
        log.msg('Hash lookup for %s resulted in an error: %s' %
                (url, failure.getErrorMessage()))
        self.findHash_done(HashObject(), req, url, orig_resp, d)

    def findHash_done(self, hash, req, url, orig_resp, d):
        """Use the returned hash to lookup the file in the cache.
        
        If the hash was not found, the workflow skips down to download from
        the mirror (L{startDownload}), or checks the freshness of an old
        response if there is one.
        
        @type hash: L{Hash.HashObject}
        @param hash: the hash object containing the expected hash for the file
        """
        if hash.expected() is None:
            log.msg('Hash for %s was not found' % url)
            # Send the old response or get a new one
            if orig_resp:
                self.check_freshness(req, url, orig_resp, d)
            else:
                self.startDownload([], req, hash, url, d)
        else:
            log.msg('Found hash %s for %s' % (hash.hexexpected(), url))

            # Lookup hash in cache
            locations = self.db.lookupHash(hash.expected(), filesOnly=True)
            self.getCachedFile(hash, req, url, d, locations)

    def check_freshness(self, req, url, orig_resp, d):
        """Send a HEAD to the mirror to check if the response from the cache is still valid.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        log.msg('Checking if %s is still fresh' % url)
        modtime = orig_resp.headers.getHeader('Last-Modified')
        headDefer = self.peers.get(HashObject(),
                                   url,
                                   method="HEAD",
                                   modtime=modtime)
        headDefer.addCallbacks(self.check_freshness_done,
                               self.check_freshness_error,
                               callbackArgs=(req, url, orig_resp, d),
                               errbackArgs=(req, url, d))

    def check_freshness_done(self, resp, req, url, orig_resp, d):
        """Return the fresh response, if stale start to redownload.
        
        @type resp: L{twisted.web2.http.Response}
        @param resp: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        if resp.code == 304:
            log.msg('Still fresh, returning: %s' % url)
            d.callback(orig_resp)
        else:
            log.msg('Stale, need to redownload: %s' % url)
            self.startDownload([], req, HashObject(), url, d)

    def check_freshness_error(self, err, req, url, d):
        """Mirror request failed, continue with download.
        
        @param err: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        """
        log.err(err)
        self.startDownload([], req, HashObject(), url, d)

    def getCachedFile(self, hash, req, url, d, locations):
        """Try to return the file from the cache, otherwise move on to a DHT lookup.
        
        @type locations: C{list} of C{dictionary}
        @param locations: the files in the cache that match the hash,
            the dictionary contains a key 'path' whose value is a
            L{twisted.python.filepath.FilePath} object for the file.
        """
        if not locations:
            log.msg('Failed to return file from cache: %s' % url)
            self.lookupHash(req, hash, url, d)
            return

        # Get the first possible location from the list
        file = locations.pop(0)['path']
        log.msg('Returning cached file: %s' % file.path)

        # Get it's response
        resp = static.File(file.path).renderHTTP(req)
        if isinstance(resp, defer.Deferred):
            resp.addBoth(self._getCachedFile, hash, req, url, d, locations)
        else:
            self._getCachedFile(resp, hash, req, url, d, locations)

    def _getCachedFile(self, resp, hash, req, url, d, locations):
        """Check the returned response to be sure it is valid."""
        if isinstance(resp, failure.Failure):
            log.msg('Got error trying to get cached file')
            log.err(resp)
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)
            return

        log.msg('Cached response: %r' % resp)

        if resp.code >= 200 and resp.code < 400:
            d.callback(resp)
        else:
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)

    def lookupHash(self, req, hash, url, d):
        """Lookup the hash in the DHT."""
        log.msg('Looking up hash in DHT for file: %s' % url)
        key = hash.expected()
        lookupDefer = self.dht.get(key)
        lookupDefer.addBoth(self.startDownload, req, hash, url, d)

    def startDownload(self, values, req, hash, url, d):
        """Start the download of the file.
        
        The download will be from peers if the DHT lookup succeeded, or
        from the mirror otherwise.
        
        @type values: C{list} of C{dictionary}
        @param values: the returned values from the DHT containing peer
            download information
        """
        # Remove some headers Apt sets in the request
        req.headers.removeHeader('If-Modified-Since')
        req.headers.removeHeader('Range')
        req.headers.removeHeader('If-Range')

        if not isinstance(values, list) or not values:
            if not isinstance(values, list):
                log.msg('DHT lookup for %s failed with error %r' %
                        (url, values))
            else:
                log.msg('Peers for %s were not found' % url)
            getDefer = self.peers.get(hash, url)
            #            getDefer.addErrback(self.final_fallback, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)
        else:
            log.msg('Found peers for %s: %r' % (url, values))
            # Download from the found peers
            getDefer = self.peers.get(hash, url, values)
            getDefer.addCallback(self.check_response, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)

    def check_response(self, response, hash, url):
        """Check the response from peers, and download from the mirror if it is not."""
        if response.code < 200 or response.code >= 300:
            log.msg(
                'Download from peers failed, going to direct download: %s' %
                url)
            getDefer = self.peers.get(hash, url)
            #            getDefer.addErrback(self.final_fallback, hash, url)
            return getDefer
        return response

    def final_fallback(self, err, hash, url):
        """Final retry if the mirror still generated an error."""
        log.msg('Download from mirror failed, retrying once only: %s' % url)
        log.err(err)
        getDefer = self.peers.get(hash, url)
        return getDefer

    def new_cached_file(self,
                        file_path,
                        hash,
                        new_hash,
                        url=None,
                        forceDHT=False):
        """Add a newly cached file to the mirror info and/or the DHT.
        
        If the file was downloaded, set url to the path it was downloaded for.
        Doesn't add a file to the DHT unless a hash was found for it
        (but does add it anyway if forceDHT is True).
        
        @type file_path: L{twisted.python.filepath.FilePath}
        @param file_path: the location of the file in the local cache
        @type hash: L{Hash.HashObject}
        @param hash: the original (expected) hash object containing also the
            hash of the downloaded file
        @type new_hash: C{boolean}
        @param new_hash: whether the has was new to this peer, and so should
            be added to the DHT
        @type url: C{string}
        @param url: the URI of the location of the file in the mirror
            (optional, defaults to not adding the file to the mirror info)
        @type forceDHT: C{boolean}
        @param forceDHT: whether to force addition of the file to the DHT
            even if the hash was not found in a mirror
            (optional, defaults to False)
        """
        if url:
            self.mirrors.updatedFile(url, file_path)

        if self.my_addr and hash and new_hash and (hash.expected() is not None
                                                   or forceDHT):
            return self.dht.store(hash)
        return None
Exemple #6
0
class AptP2P(protocol.Factory):
    """The main code object that does all of the work.
    
    Contains all of the sub-components that do all the low-level work, and
    coordinates communication between them.
    
    @type dhtClass: L{interfaces.IDHT}
    @ivar dhtClass: the DHT class to use
    @type cache_dir: L{twisted.python.filepath.FilePath}
    @ivar cache_dir: the directory to use for storing all files
    @type db: L{db.DB}
    @ivar db: the database to use for tracking files and hashes
    @type dht: L{DHTManager.DHT}
    @ivar dht: the manager for DHT requests
    @type stats: L{stats.StatsLogger}
    @ivar stats: the statistics logger to record sent data to
    @type http_server: L{HTTPServer.TopLevel}
    @ivar http_server: the web server that will handle all requests from apt
        and from other peers
    @type peers: L{PeerManager.PeerManager}
    @ivar peers: the manager of all downloads from mirrors and other peers
    @type mirrors: L{MirrorManager.MirrorManager}
    @ivar mirrors: the manager of downloaded information about mirrors which
        can be queried to get hashes from file names
    @type cache: L{CacheManager.CacheManager}
    @ivar cache: the manager of all downloaded files
    @type my_addr: C{string}, C{int}
    @ivar my_addr: the IP address and port of this peer
    """
    
    def __init__(self, dhtClass):
        """Initialize all the sub-components.
        
        @type dhtClass: L{interfaces.IDHT}
        @param dhtClass: the DHT class to use
        """
        log.msg('Initializing the main apt_p2p application')
        self.dhtClass = dhtClass
        self.my_addr = None

    #{ Factory interface
    def startFactory(self):
        reactor.callLater(0, self._startFactory)
        
    def _startFactory(self):
        log.msg('Starting the main apt_p2p application')
        self.cache_dir = FilePath(config.get('DEFAULT', 'CACHE_DIR'))
        if not self.cache_dir.child(download_dir).exists():
            self.cache_dir.child(download_dir).makedirs()
        if not self.cache_dir.child(peer_dir).exists():
            self.cache_dir.child(peer_dir).makedirs()
        self.db = DB(self.cache_dir.child('apt-p2p.db'))
        self.dht = DHT(self.dhtClass, self.db)
        df = self.dht.start()
        df.addCallback(self._dhtStarted)
        self.stats = StatsLogger(self.db)
        self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self)
        self.http_server.getHTTPFactory().startFactory()
        self.peers = PeerManager(self.cache_dir.child(peer_dir), self.dht, self.stats)
        self.mirrors = MirrorManager(self.cache_dir)
        self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, self)
    
    def _dhtStarted(self, result):
        """Save the returned address and start scanning the cache."""
        self.my_addr = result
        self.cache.scanDirectories()
        
    def stopFactory(self):
        log.msg('Stoppping the main apt_p2p application')
        self.http_server.getHTTPFactory().stopFactory()
        self.mirrors.cleanup()
        self.stats.save()
        self.db.close()
    
    def buildProtocol(self, addr):
        return self.http_server.getHTTPFactory().buildProtocol(addr)

    #{ Other functions
    def getStats(self):
        """Retrieve and format the statistics for the program.
        
        @rtype: C{string}
        @return: the formatted HTML page containing the statistics
        """
        out = '<html><body>\n\n'
        out += self.stats.formatHTML(self.my_addr)
        out += '\n\n'
        out += self.dht.getStats()
        out += '\n</body></html>\n'
        return out

    #{ Main workflow
    def get_resp(self, req, url, orig_resp = None):
        """Lookup a hash for the file in the local mirror info.
        
        Starts the process of getting a response to an apt request.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
            (optional, ignored if missing)
        @rtype: L{twisted.internet.defer.Deferred}
        @return: a deferred that will be called back with the response
        """
        d = defer.Deferred()
        
        log.msg('Trying to find hash for %s' % url)
        findDefer = self.mirrors.findHash(unquote(url))
        
        findDefer.addCallbacks(self.findHash_done, self.findHash_error, 
                               callbackArgs=(req, url, orig_resp, d),
                               errbackArgs=(req, url, orig_resp, d))
        return d
    
    def findHash_error(self, failure, req, url, orig_resp, d):
        """Process the error in hash lookup by returning an empty L{HashObject}."""
        log.msg('Hash lookup for %s resulted in an error: %s' %
                (url, failure.getErrorMessage()))
        self.findHash_done(HashObject(), req, url, orig_resp, d)
        
    def findHash_done(self, hash, req, url, orig_resp, d):
        """Use the returned hash to lookup the file in the cache.
        
        If the hash was not found, the workflow skips down to download from
        the mirror (L{startDownload}), or checks the freshness of an old
        response if there is one.
        
        @type hash: L{Hash.HashObject}
        @param hash: the hash object containing the expected hash for the file
        """
        if hash.expected() is None:
            log.msg('Hash for %s was not found' % url)
            # Send the old response or get a new one
            if orig_resp:
                self.check_freshness(req, url, orig_resp, d)
            else:
                self.startDownload([], req, hash, url, d)
        else:
            log.msg('Found hash %s for %s' % (hash.hexexpected(), url))
            
            # Lookup hash in cache
            locations = self.db.lookupHash(hash.expected(), filesOnly = True)
            self.getCachedFile(hash, req, url, d, locations)

    def check_freshness(self, req, url, orig_resp, d):
        """Send a HEAD to the mirror to check if the response from the cache is still valid.
        
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        log.msg('Checking if %s is still fresh' % url)
        modtime = orig_resp.headers.getHeader('Last-Modified')
        headDefer = self.peers.get(HashObject(), url, method = "HEAD",
                                   modtime = modtime)
        headDefer.addCallbacks(self.check_freshness_done,
                               self.check_freshness_error,
                               callbackArgs = (req, url, orig_resp, d),
                               errbackArgs = (req, url, d))
    
    def check_freshness_done(self, resp, req, url, orig_resp, d):
        """Return the fresh response, if stale start to redownload.
        
        @type resp: L{twisted.web2.http.Response}
        @param resp: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        @type orig_resp: L{twisted.web2.http.Response}
        @param orig_resp: the response from the cache to be sent to apt
        """
        if resp.code == 304:
            log.msg('Still fresh, returning: %s' % url)
            d.callback(orig_resp)
        else:
            log.msg('Stale, need to redownload: %s' % url)
            self.startDownload([], req, HashObject(), url, d)
    
    def check_freshness_error(self, err, req, url, d):
        """Mirror request failed, continue with download.
        
        @param err: the response from the mirror to the HEAD request
        @type req: L{twisted.web2.http.Request}
        @param req: the initial request sent to the HTTP server by apt
        @param url: the URI of the actual mirror request
        """
        log.err(err)
        self.startDownload([], req, HashObject(), url, d)
    
    def getCachedFile(self, hash, req, url, d, locations):
        """Try to return the file from the cache, otherwise move on to a DHT lookup.
        
        @type locations: C{list} of C{dictionary}
        @param locations: the files in the cache that match the hash,
            the dictionary contains a key 'path' whose value is a
            L{twisted.python.filepath.FilePath} object for the file.
        """
        if not locations:
            log.msg('Failed to return file from cache: %s' % url)
            self.lookupHash(req, hash, url, d)
            return
        
        # Get the first possible location from the list
        file = locations.pop(0)['path']
        log.msg('Returning cached file: %s' % file.path)
        
        # Get it's response
        resp = static.File(file.path).renderHTTP(req)
        if isinstance(resp, defer.Deferred):
            resp.addBoth(self._getCachedFile, hash, req, url, d, locations)
        else:
            self._getCachedFile(resp, hash, req, url, d, locations)
        
    def _getCachedFile(self, resp, hash, req, url, d, locations):
        """Check the returned response to be sure it is valid."""
        if isinstance(resp, failure.Failure):
            log.msg('Got error trying to get cached file')
            log.err(resp)
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)
            return
            
        log.msg('Cached response: %r' % resp)
        
        if resp.code >= 200 and resp.code < 400:
            d.callback(resp)
        else:
            # Try the next possible location
            self.getCachedFile(hash, req, url, d, locations)

    def lookupHash(self, req, hash, url, d):
        """Lookup the hash in the DHT."""
        log.msg('Looking up hash in DHT for file: %s' % url)
        key = hash.expected()
        lookupDefer = self.dht.get(key)
        lookupDefer.addBoth(self.startDownload, req, hash, url, d)

    def startDownload(self, values, req, hash, url, d):
        """Start the download of the file.
        
        The download will be from peers if the DHT lookup succeeded, or
        from the mirror otherwise.
        
        @type values: C{list} of C{dictionary}
        @param values: the returned values from the DHT containing peer
            download information
        """
        # Remove some headers Apt sets in the request
        req.headers.removeHeader('If-Modified-Since')
        req.headers.removeHeader('Range')
        req.headers.removeHeader('If-Range')
        
        if not isinstance(values, list) or not values:
            if not isinstance(values, list):
                log.msg('DHT lookup for %s failed with error %r' % (url, values))
            else:
                log.msg('Peers for %s were not found' % url)
            getDefer = self.peers.get(hash, url)
#            getDefer.addErrback(self.final_fallback, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)
        else:
            log.msg('Found peers for %s: %r' % (url, values))
            # Download from the found peers
            getDefer = self.peers.get(hash, url, values)
            getDefer.addCallback(self.check_response, hash, url)
            getDefer.addCallback(self.cache.save_file, hash, url)
            getDefer.addErrback(self.cache.save_error, url)
            getDefer.addCallbacks(d.callback, d.errback)
            
    def check_response(self, response, hash, url):
        """Check the response from peers, and download from the mirror if it is not."""
        if response.code < 200 or response.code >= 300:
            log.msg('Download from peers failed, going to direct download: %s' % url)
            getDefer = self.peers.get(hash, url)
#            getDefer.addErrback(self.final_fallback, hash, url)
            return getDefer
        return response
        
    def final_fallback(self, err, hash, url):
        """Final retry if the mirror still generated an error."""
        log.msg('Download from mirror failed, retrying once only: %s' % url)
        log.err(err)
        getDefer = self.peers.get(hash, url)
        return getDefer
        
    def new_cached_file(self, file_path, hash, new_hash, url = None, forceDHT = False):
        """Add a newly cached file to the mirror info and/or the DHT.
        
        If the file was downloaded, set url to the path it was downloaded for.
        Doesn't add a file to the DHT unless a hash was found for it
        (but does add it anyway if forceDHT is True).
        
        @type file_path: L{twisted.python.filepath.FilePath}
        @param file_path: the location of the file in the local cache
        @type hash: L{Hash.HashObject}
        @param hash: the original (expected) hash object containing also the
            hash of the downloaded file
        @type new_hash: C{boolean}
        @param new_hash: whether the has was new to this peer, and so should
            be added to the DHT
        @type url: C{string}
        @param url: the URI of the location of the file in the mirror
            (optional, defaults to not adding the file to the mirror info)
        @type forceDHT: C{boolean}
        @param forceDHT: whether to force addition of the file to the DHT
            even if the hash was not found in a mirror
            (optional, defaults to False)
        """
        if url:
            self.mirrors.updatedFile(url, file_path)
        
        if self.my_addr and hash and new_hash and (hash.expected() is not None or forceDHT):
            return self.dht.store(hash)
        return None
Exemple #7
0
class KhashmirBase(protocol.Factory):
    """The base Khashmir class, with base functionality and find node, no key-value mappings.
    
    @type _Node: L{node.Node}
    @ivar _Node: the knode implementation to use for this class of DHT
    @type config: C{dictionary}
    @ivar config: the configuration parameters for the DHT
    @type pinging: C{dictionary}
    @ivar pinging: the node's that are currently being pinged, keys are the
        node id's, values are the Deferred or DelayedCall objects
    @type port: C{int}
    @ivar port: the port to listen on
    @type store: L{db.DB}
    @ivar store: the database to store nodes and key/value pairs in
    @type node: L{node.Node}
    @ivar node: this node
    @type table: L{ktable.KTable}
    @ivar table: the routing table
    @type token_secrets: C{list} of C{string}
    @ivar token_secrets: the current secrets to use to create tokens
    @type stats: L{stats.StatsLogger}
    @ivar stats: the statistics gatherer
    @type udp: L{krpc.hostbroker}
    @ivar udp: the factory for the KRPC protocol
    @type listenport: L{twisted.internet.interfaces.IListeningPort}
    @ivar listenport: the UDP listening port
    @type next_checkpoint: L{twisted.internet.interfaces.IDelayedCall}
    @ivar next_checkpoint: the delayed call for the next checkpoint
    """
    
    _Node = KNodeBase
    
    def __init__(self, config, cache_dir='/tmp'):
        """Initialize the Khashmir class and call the L{setup} method.
        
        @type config: C{dictionary}
        @param config: the configuration parameters for the DHT
        @type cache_dir: C{string}
        @param cache_dir: the directory to store all files in
            (optional, defaults to the /tmp directory)
        """
        self.config = None
        self.pinging = {}
        self.setup(config, cache_dir)
        
    def setup(self, config, cache_dir):
        """Setup all the Khashmir sub-modules.
        
        @type config: C{dictionary}
        @param config: the configuration parameters for the DHT
        @type cache_dir: C{string}
        @param cache_dir: the directory to store all files in
        """
        self.config = config
        self.port = config['PORT']
        self.store = DB(os.path.join(cache_dir, 'khashmir.' + str(self.port) + '.db'))
        self.node = self._loadSelfNode('', self.port)
        self.table = KTable(self.node, config)
        self.token_secrets = [newID()]
        self.stats = StatsLogger(self.table, self.store)
        
        # Start listening
        self.udp = krpc.hostbroker(self, self.stats, config)
        self.udp.protocol = krpc.KRPC
        self.listenport = reactor.listenUDP(self.port, self.udp)
        
        # Load the routing table and begin checkpointing
        self._loadRoutingTable()
        self.refreshTable(force = True)
        self.next_checkpoint = reactor.callLater(60, self.checkpoint)

    def Node(self, id, host = None, port = None):
        """Create a new node.
        
        @see: L{node.Node.__init__}
        """
        n = self._Node(id, host, port)
        n.table = self.table
        n.conn = self.udp.connectionForAddr((n.host, n.port))
        return n
    
    def __del__(self):
        """Stop listening for packets."""
        self.listenport.stopListening()
        
    def _loadSelfNode(self, host, port):
        """Create this node, loading any previously saved one."""
        id = self.store.getSelfNode()
        if not id or not id.endswith(self.config['VERSION']):
            id = newID(self.config['VERSION'])
        return self._Node(id, host, port)
        
    def checkpoint(self):
        """Perform some periodic maintenance operations."""
        # Create a new token secret
        self.token_secrets.insert(0, newID())
        if len(self.token_secrets) > 3:
            self.token_secrets.pop()
            
        # Save some parameters for reloading
        self.store.saveSelfNode(self.node.id)
        self.store.dumpRoutingTable(self.table.buckets)
        
        # DHT maintenance
        self.store.expireValues(self.config['KEY_EXPIRE'])
        self.refreshTable()
        
        self.next_checkpoint = reactor.callLater(randrange(int(self.config['CHECKPOINT_INTERVAL'] * .9), 
                                                           int(self.config['CHECKPOINT_INTERVAL'] * 1.1)), 
                                                 self.checkpoint)
        
    def _loadRoutingTable(self):
        """Load the previous routing table nodes from the database.
        
        It's usually a good idea to call refreshTable(force = True) after
        loading the table.
        """
        nodes = self.store.getRoutingTable()
        for rec in nodes:
            n = self.Node(rec[0], rec[1], int(rec[2]))
            self.table.insertNode(n, contacted = False)
            
    #{ Local interface
    def addContact(self, host, port, callback=None, errback=None):
        """Ping this node and add the contact info to the table on pong.
        
        @type host: C{string}
        @param host: the IP address of the node to contact
        @type port: C{int}
        @param port:the port of the node to contact
        @type callback: C{method}
        @param callback: the method to call with the results, it must take 1
            parameter, the contact info returned by the node
            (optional, defaults to doing nothing with the results)
        @type errback: C{method}
        @param errback: the method to call if an error occurs
            (optional, defaults to calling the callback with the error)
        """
        n = self.Node(NULL_ID, host, port)
        self.sendJoin(n, callback=callback, errback=errback)

    def findNode(self, id, callback):
        """Find the contact info for the K closest nodes in the global table.
        
        @type id: C{string}
        @param id: the target ID to find the K closest nodes of
        @type callback: C{method}
        @param callback: the method to call with the results, it must take 1
            parameter, the list of K closest nodes
        """
        # Mark the bucket as having been accessed
        self.table.touch(id)
        
        # Start with our node
        nodes = [copy(self.node)]

        # Start the finding nodes action
        state = FindNode(self, id, callback, self.config, self.stats)
        reactor.callLater(0, state.goWithNodes, nodes)
    
    def insertNode(self, node, contacted = True):
        """Try to insert a node in our local table, pinging oldest contact if necessary.
        
        If all you have is a host/port, then use L{addContact}, which calls this
        method after receiving the PONG from the remote node. The reason for
        the separation is we can't insert a node into the table without its
        node ID. That means of course the node passed into this method needs
        to be a properly formed Node object with a valid ID.

        @type node: L{node.Node}
        @param node: the new node to try and insert
        @type contacted: C{boolean}
        @param contacted: whether the new node is known to be good, i.e.
            responded to a request (optional, defaults to True)
        """
        # Don't add any local nodes to the routing table
        if not self.config['LOCAL_OK'] and isLocal.match(node.host):
            log.msg('Not adding local node to table: %s/%s' % (node.host, node.port))
            return
        
        old = self.table.insertNode(node, contacted=contacted)

        if (isinstance(old, self._Node) and old.id != self.node.id and
            (datetime.now() - old.lastSeen) > 
             timedelta(seconds=self.config['MIN_PING_INTERVAL'])):
            
            # Bucket is full, check to see if old node is still available
            df = self.sendPing(old)
            df.addErrback(self._staleNodeHandler, old, node, contacted)
        elif not old and not contacted:
            # There's room, we just need to contact the node first
            df = self.sendPing(node)
            # Also schedule a future ping to make sure the node works
            def rePing(newnode, self = self):
                if newnode.id not in self.pinging:
                    self.pinging[newnode.id] = reactor.callLater(self.config['MIN_PING_INTERVAL'],
                                                                 self.sendPing, newnode)
                return newnode
            df.addCallback(rePing)

    def _staleNodeHandler(self, err, old, node, contacted):
        """The pinged node never responded, so replace it."""
        self.table.invalidateNode(old)
        self.insertNode(node, contacted)
        return err
    
    def nodeFailed(self, node):
        """Mark a node as having failed a request and schedule a future check.
        
        @type node: L{node.Node}
        @param node: the new node to try and insert
        """
        exists = self.table.nodeFailed(node)
        
        # If in the table, schedule a ping, if one isn't already sent/scheduled
        if exists and node.id not in self.pinging:
            self.pinging[node.id] = reactor.callLater(self.config['MIN_PING_INTERVAL'],
                                                      self.sendPing, node)
    
    def sendPing(self, node):
        """Ping the node to see if it's still alive.
        
        @type node: L{node.Node}
        @param node: the node to send the join to
        """
        # Check for a ping already underway
        if (isinstance(self.pinging.get(node.id, None), DelayedCall) and
            self.pinging[node.id].active()):
            self.pinging[node.id].cancel()
        elif isinstance(self.pinging.get(node.id, None), Deferred):
            return self.pinging[node.id]

        self.stats.startedAction('ping')
        df = node.ping(self.node.id)
        self.pinging[node.id] = df
        df.addCallbacks(self._pingHandler, self._pingError,
                        callbackArgs = (node, datetime.now()),
                        errbackArgs = (node, datetime.now()))
        return df

    def _pingHandler(self, dict, node, start):
        """Node responded properly, update it and return the node object."""
        self.stats.completedAction('ping', start)
        del self.pinging[node.id]
        # Create the node using the returned contact info
        n = self.Node(dict['id'], dict['_krpc_sender'][0], dict['_krpc_sender'][1])
        reactor.callLater(0, self.insertNode, n)
        return n

    def _pingError(self, err, node, start):
        """Error occurred, fail node."""
        log.msg("action ping failed on %s/%s: %s" % (node.host, node.port, err.getErrorMessage()))
        self.stats.completedAction('ping', start)
        
        # Consume unhandled errors
        self.pinging[node.id].addErrback(lambda ping_err: None)
        del self.pinging[node.id]
        
        self.nodeFailed(node)
        return err
        
    def sendJoin(self, node, callback=None, errback=None):
        """Join the DHT by pinging a bootstrap node.
        
        @type node: L{node.Node}
        @param node: the node to send the join to
        @type callback: C{method}
        @param callback: the method to call with the results, it must take 1
            parameter, the contact info returned by the node
            (optional, defaults to doing nothing with the results)
        @type errback: C{method}
        @param errback: the method to call if an error occurs
            (optional, defaults to calling the callback with the error)
        """
        if errback is None:
            errback = callback
        self.stats.startedAction('join')
        df = node.join(self.node.id)
        df.addCallbacks(self._joinHandler, self._joinError,
                        callbackArgs = (node, datetime.now()),
                        errbackArgs = (node, datetime.now()))
        if callback:
            df.addCallbacks(callback, errback)

    def _joinHandler(self, dict, node, start):
        """Node responded properly, extract the response."""
        self.stats.completedAction('join', start)
        # Create the node using the returned contact info
        n = self.Node(dict['id'], dict['_krpc_sender'][0], dict['_krpc_sender'][1])
        reactor.callLater(0, self.insertNode, n)
        return (dict['ip_addr'], dict['port'])

    def _joinError(self, err, node, start):
        """Error occurred, fail node."""
        log.msg("action join failed on %s/%s: %s" % (node.host, node.port, err.getErrorMessage()))
        self.stats.completedAction('join', start)
        self.nodeFailed(node)
        return err
        
    def findCloseNodes(self, callback=lambda a: None):
        """Perform a findNode on the ID one away from our own.

        This will allow us to populate our table with nodes on our network
        closest to our own. This is called as soon as we start up with an
        empty table.

        @type callback: C{method}
        @param callback: the method to call with the results, it must take 1
            parameter, the list of K closest nodes
            (optional, defaults to doing nothing with the results)
        """
        id = self.node.id[:-1] + chr((ord(self.node.id[-1]) + 1) % 256)
        self.findNode(id, callback)

    def refreshTable(self, force = False):
        """Check all the buckets for those that need refreshing.
        
        @param force: refresh all buckets regardless of last bucket access time
            (optional, defaults to False)
        """
        def callback(nodes):
            pass
    
        for bucket in self.table.buckets:
            if force or (datetime.now() - bucket.lastAccessed > 
                         timedelta(seconds=self.config['BUCKET_STALENESS'])):
                # Choose a random ID in the bucket and try and find it
                id = newIDInRange(bucket.min, bucket.max)
                self.findNode(id, callback)

    def shutdown(self):
        """Closes the port and cancels pending later calls."""
        self.listenport.stopListening()
        try:
            self.next_checkpoint.cancel()
        except:
            pass
        for nodeid in self.pinging.keys():
            if isinstance(self.pinging[nodeid], DelayedCall) and self.pinging[nodeid].active():
                self.pinging[nodeid].cancel()
                del self.pinging[nodeid]
        self.store.close()
    
    def getStats(self):
        """Gather the statistics for the DHT."""
        return self.stats.formatHTML()

    #{ Remote interface
    def krpc_ping(self, id, _krpc_sender = None):
        """Pong with our ID.
        
        @type id: C{string}
        @param id: the node ID of the sender node
        @type _krpc_sender: (C{string}, C{int})
        @param _krpc_sender: the sender node's IP address and port
        """
        if _krpc_sender is not None:
            n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
            reactor.callLater(0, self.insertNode, n, False)

        return {"id" : self.node.id}
        
    def krpc_join(self, id, _krpc_sender = None):
        """Add the node by responding with its address and port.
        
        @type id: C{string}
        @param id: the node ID of the sender node
        @type _krpc_sender: (C{string}, C{int})
        @param _krpc_sender: the sender node's IP address and port
        """
        if _krpc_sender is not None:
            n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
            reactor.callLater(0, self.insertNode, n, False)
        else:
            _krpc_sender = ('127.0.0.1', self.port)

        return {"ip_addr" : _krpc_sender[0], "port" : _krpc_sender[1], "id" : self.node.id}
        
    def krpc_find_node(self, id, target, _krpc_sender = None):
        """Find the K closest nodes to the target in the local routing table.
        
        @type target: C{string}
        @param target: the target ID to find nodes for
        @type id: C{string}
        @param id: the node ID of the sender node
        @type _krpc_sender: (C{string}, C{int})
        @param _krpc_sender: the sender node's IP address and port
        """
        if _krpc_sender is not None:
            n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
            reactor.callLater(0, self.insertNode, n, False)
        else:
            _krpc_sender = ('127.0.0.1', self.port)

        nodes = self.table.findNodes(target)
        nodes = map(lambda node: node.contactInfo(), nodes)
        token = sha(self.token_secrets[0] + _krpc_sender[0]).digest()
        return {"nodes" : nodes, "token" : token, "id" : self.node.id}