Beispiel #1
0
    def setUp(self):
        
        self.mini_web_server = MiniWebServer()
        
        config_path = os.path.abspath( os.path.join( os.path.dirname(__file__), "config.yaml" ) )
        
        if not os.path.isfile( config_path ):
            self.raiseConfigException( config_path )
            
        config = yaml.load( open( config_path, 'r').read() )
        
        if not "aws_access_key_id" in config or "aws_secret_access_key" not in config:
            self.raiseConfigException( config_path )
            
        self.uuid = hashlib.sha256( config["aws_access_key_id"] + config["aws_secret_access_key"] + self.__class__.__name__ ).hexdigest()
        
        self.aws_access_key_id = config["aws_access_key_id"]
        self.aws_secret_access_key = config["aws_secret_access_key"]
        self.aws_s3_cache_bucket = "%s_cache" % self.uuid
        self.aws_s3_storage_bucket = "%s_storage" % self.uuid
        self.aws_sdb_reservation_domain = "%s_reservation" % self.uuid
        self.aws_sdb_coordination_domain = "%s_coordination" % self.uuid
        
        self.spider = AWSpider( 
            aws_access_key_id = self.aws_access_key_id, 
            aws_secret_access_key = self.aws_secret_access_key,
            aws_s3_cache_bucket = self.aws_s3_cache_bucket, 
            aws_s3_storage_bucket = self.aws_s3_storage_bucket, 
            aws_sdb_reservation_domain = self.aws_sdb_reservation_domain, 
            aws_sdb_coordination_domain = self.aws_sdb_coordination_domain, 
            port = 5000 )
        
        self.s3 = AmazonS3( config["aws_access_key_id"], config["aws_secret_access_key"])

        return self.spider.start()
Beispiel #2
0
class RequestQueuerTestCase(unittest.TestCase):
    
    def setUp(self):
        self.deferred = Deferred()
        self.mini_web_server = MiniWebServer()
        self.rq = RequestQueuer(max_requests_per_host_per_second=3, max_simultaneous_requests_per_host=5)
        
    def tearDown(self):
        return self.mini_web_server.shutdown()

    def testRequestQueuerOnSuccess(self):  
        d = self.rq.getPage("http://127.0.0.1:8080/helloworld", timeout=5)
        return d

    def testRequestQueuerOnFailure(self): 
        d = self.rq.getPage("http://0.0.0.0:99", timeout=5)
        d.addErrback(self._getPageErrback)  
        return d      
    
    def testHostMaxRequestsPerSecond(self,):
        self.failUnlessEqual(
            self.rq.getHostMaxRequestsPerSecond("example.com"), 3)
        self.rq.setHostMaxRequestsPerSecond("example2.com", 7)
        self.failUnlessEqual(
            self.rq.getHostMaxRequestsPerSecond("example2.com"), 7)
            
    def testHostMaxSimultaneousRequests(self,):
        self.failUnlessEqual(
            self.rq.getHostMaxSimultaneousRequests("example.com"), 5)
        self.rq.setHostMaxSimultaneousRequests("example2.com", 11)
        self.failUnlessEqual(
            self.rq.getHostMaxSimultaneousRequests("example2.com"),
            11)
            
    def testActive(self):
        self.failUnlessEqual(isinstance(self.rq.getActive(), int), True)
            
    def testPending(self):
        self.failUnlessEqual(isinstance(self.rq.getPending(), int), True)

    def testActiveRequestsByHost(self):
        self.failUnlessEqual(isinstance(self.rq.getActiveRequestsByHost(), dict), True)

    def testPendingRequestsByHost(self):
        self.failUnlessEqual(isinstance(self.rq.getPendingRequestsByHost(), dict), True)

    def _getPageErrback(self, error):
        return True
Beispiel #3
0
 def setUp(self):
     self.mini_web_server = MiniWebServer()
     config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config.yaml"))
     if not os.path.isfile(config_path):
         self.raiseConfigException(config_path)
     config = yaml.load(open(config_path, 'r').read())
     if not "aws_access_key_id" in config or "aws_secret_access_key" not in config:
         self.raiseConfigException(config_path)
     self.s3 = AmazonS3(
         config["aws_access_key_id"], 
         config["aws_secret_access_key"])        
     self.uuid = hashlib.sha256("".join([
         config["aws_access_key_id"],
         config["aws_secret_access_key"],
         self.__class__.__name__])).hexdigest()
     self.pg = PageGetter(self.s3, self.uuid)
     self.logging_handler = logging.StreamHandler()
     formatter = logging.Formatter("%(levelname)s: %(message)s %(pathname)s:%(lineno)d")
     self.logging_handler.setFormatter(formatter)
     LOGGER.addHandler(self.logging_handler)
     LOGGER.setLevel(logging.DEBUG)
     d = self.s3.putBucket(self.uuid)
     return d
Beispiel #4
0
class AWSpiderTestCase(unittest.TestCase):
    def setUp(self):
        
        self.mini_web_server = MiniWebServer()
        
        config_path = os.path.abspath( os.path.join( os.path.dirname(__file__), "config.yaml" ) )
        
        if not os.path.isfile( config_path ):
            self.raiseConfigException( config_path )
            
        config = yaml.load( open( config_path, 'r').read() )
        
        if not "aws_access_key_id" in config or "aws_secret_access_key" not in config:
            self.raiseConfigException( config_path )
            
        self.uuid = hashlib.sha256( config["aws_access_key_id"] + config["aws_secret_access_key"] + self.__class__.__name__ ).hexdigest()
        
        self.aws_access_key_id = config["aws_access_key_id"]
        self.aws_secret_access_key = config["aws_secret_access_key"]
        self.aws_s3_cache_bucket = "%s_cache" % self.uuid
        self.aws_s3_storage_bucket = "%s_storage" % self.uuid
        self.aws_sdb_reservation_domain = "%s_reservation" % self.uuid
        self.aws_sdb_coordination_domain = "%s_coordination" % self.uuid
        
        self.spider = AWSpider( 
            aws_access_key_id = self.aws_access_key_id, 
            aws_secret_access_key = self.aws_secret_access_key,
            aws_s3_cache_bucket = self.aws_s3_cache_bucket, 
            aws_s3_storage_bucket = self.aws_s3_storage_bucket, 
            aws_sdb_reservation_domain = self.aws_sdb_reservation_domain, 
            aws_sdb_coordination_domain = self.aws_sdb_coordination_domain, 
            port = 5000 )
        
        self.s3 = AmazonS3( config["aws_access_key_id"], config["aws_secret_access_key"])

        return self.spider.start()
        
    def tearDown(self):

        deferreds = []        
        deferreds.append(self.spider.shutdown())
        deferreds.append(self.mini_web_server.shutdown())
        d = DeferredList(deferreds)
        d.addCallback(self._tearDownCallback)
        return d 
    
    def _tearDownCallback(self, data):
        
        self.s3 = AmazonS3(self.aws_access_key_id, self.aws_secret_access_key)
        self.sdb = AmazonSDB(self.aws_access_key_id, self.aws_secret_access_key)
               
        deferreds = []        
        deferreds.append(self.spider.pg.clearCache())
        deferreds.append(self.spider.clearStorage())
                
        deferreds.append(self.sdb.deleteDomain(self.aws_sdb_reservation_domain))
        deferreds.append(self.sdb.deleteDomain(self.aws_sdb_coordination_domain))
        
        d = DeferredList(deferreds)
        d.addCallback( self._tearDownCallback2 )
        return d

    def _tearDownCallback2( self, data ):

        deferreds = []        
        deferreds.append(self.s3.deleteBucket(self.aws_s3_cache_bucket))
        deferreds.append(self.s3.deleteBucket(self.aws_s3_storage_bucket))        
        d = DeferredList(deferreds)
        return d
    
    def testPageGetter(self):  
        d = self.spider.getPage("http://127.0.0.1:8080", timeout=5)
        return d

    def testClearStorage(self):
        d = self.spider.clearStorage()
        return d
    
    def testGetServerData(self):
        server_data = self.spider.getServerData()
        self.failUnlessEqual( isinstance(server_data, dict), True )
                
    def testExpose(self):  
        self.spider.expose( foo )
        d = self.spider.getPage("http://127.0.0.1:5000/function/foo", timeout=5)
        return d
Beispiel #5
0
class PageGetterTestCase(unittest.TestCase):
    
    def setUp(self):
        self.mini_web_server = MiniWebServer()
        config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config.yaml"))
        if not os.path.isfile(config_path):
            self.raiseConfigException(config_path)
        config = yaml.load(open(config_path, 'r').read())
        if not "aws_access_key_id" in config or "aws_secret_access_key" not in config:
            self.raiseConfigException(config_path)
        self.s3 = AmazonS3(
            config["aws_access_key_id"], 
            config["aws_secret_access_key"])        
        self.uuid = hashlib.sha256("".join([
            config["aws_access_key_id"],
            config["aws_secret_access_key"],
            self.__class__.__name__])).hexdigest()
        self.pg = PageGetter(self.s3, self.uuid)
        self.logging_handler = logging.StreamHandler()
        formatter = logging.Formatter("%(levelname)s: %(message)s %(pathname)s:%(lineno)d")
        self.logging_handler.setFormatter(formatter)
        LOGGER.addHandler(self.logging_handler)
        LOGGER.setLevel(logging.DEBUG)
        d = self.s3.putBucket(self.uuid)
        return d
        
    def tearDown(self):
        LOGGER.removeHandler(self.logging_handler)
        a = self.mini_web_server.shutdown()
        b = self.pg.clearCache()
        d = DeferredList([a, b])
        d.addCallback(self._tearDownCallback)
        return d
    
    def _tearDownCallback(self, data):
        d = self.s3.deleteBucket(self.uuid)
        return d

    def test_01_PageGetterOnSuccess(self):  
        d = self.pg.getPage(
            "http://127.0.0.1:8080/helloworld", 
            confirm_cache_write=True)
        return d
    
    def test_02_PageGetterOnFailure(self): 
        d = self.pg.getPage(
            "http://0.0.0.0:99", 
            timeout=5, 
            confirm_cache_write=True)
        d.addErrback(self._getPageErrback)  
        return d 
    
    def _getPageErrback(self, error):
        return True
    
    def test_04_ContentSHA1(self):  
        d = self.pg.getPage(
            "http://127.0.0.1:8080/helloworld", 
            confirm_cache_write=True)
        d.addCallback(self._contentSHA1Callback)
        return d
    
    def _contentSHA1Callback(self, data):
        print data
        if "content-sha1" in data:
            content_sha1 = data["content-sha1"]
            d = self.pg.getPage(
                "http://127.0.0.1:8080/helloworld", 
                content_sha1=content_sha1, 
                confirm_cache_write=True)
            d.addCallback(self._contentSHA1Callback2)
            d.addErrback(self._contentSHA1Errback)
            return d
        else:
            raise Exception("Data should have Content SHA1 signature.")
    
    def _contentSHA1Callback2(self, data):
        print data
        raise Exception("Pagegetter.getPage() should have raised StaleContentException")
    
    def _contentSHA1Errback(self, error):
        try:
            error.raiseException()
        except StaleContentException, e:
            return True
        except:
Beispiel #6
0
 def setUp(self):
     self.deferred = Deferred()
     self.mini_web_server = MiniWebServer()
     self.rq = RequestQueuer(max_requests_per_host_per_second=3, max_simultaneous_requests_per_host=5)