def setUp(self): self.mini_web_server = MiniWebServer() config_path = os.path.abspath( os.path.join( os.path.dirname(__file__), "config.yaml" ) ) if not os.path.isfile( config_path ): self.raiseConfigException( config_path ) config = yaml.load( open( config_path, 'r').read() ) if not "aws_access_key_id" in config or "aws_secret_access_key" not in config: self.raiseConfigException( config_path ) self.uuid = hashlib.sha256( config["aws_access_key_id"] + config["aws_secret_access_key"] + self.__class__.__name__ ).hexdigest() self.aws_access_key_id = config["aws_access_key_id"] self.aws_secret_access_key = config["aws_secret_access_key"] self.aws_s3_cache_bucket = "%s_cache" % self.uuid self.aws_s3_storage_bucket = "%s_storage" % self.uuid self.aws_sdb_reservation_domain = "%s_reservation" % self.uuid self.aws_sdb_coordination_domain = "%s_coordination" % self.uuid self.spider = AWSpider( aws_access_key_id = self.aws_access_key_id, aws_secret_access_key = self.aws_secret_access_key, aws_s3_cache_bucket = self.aws_s3_cache_bucket, aws_s3_storage_bucket = self.aws_s3_storage_bucket, aws_sdb_reservation_domain = self.aws_sdb_reservation_domain, aws_sdb_coordination_domain = self.aws_sdb_coordination_domain, port = 5000 ) self.s3 = AmazonS3( config["aws_access_key_id"], config["aws_secret_access_key"]) return self.spider.start()
class RequestQueuerTestCase(unittest.TestCase): def setUp(self): self.deferred = Deferred() self.mini_web_server = MiniWebServer() self.rq = RequestQueuer(max_requests_per_host_per_second=3, max_simultaneous_requests_per_host=5) def tearDown(self): return self.mini_web_server.shutdown() def testRequestQueuerOnSuccess(self): d = self.rq.getPage("http://127.0.0.1:8080/helloworld", timeout=5) return d def testRequestQueuerOnFailure(self): d = self.rq.getPage("http://0.0.0.0:99", timeout=5) d.addErrback(self._getPageErrback) return d def testHostMaxRequestsPerSecond(self,): self.failUnlessEqual( self.rq.getHostMaxRequestsPerSecond("example.com"), 3) self.rq.setHostMaxRequestsPerSecond("example2.com", 7) self.failUnlessEqual( self.rq.getHostMaxRequestsPerSecond("example2.com"), 7) def testHostMaxSimultaneousRequests(self,): self.failUnlessEqual( self.rq.getHostMaxSimultaneousRequests("example.com"), 5) self.rq.setHostMaxSimultaneousRequests("example2.com", 11) self.failUnlessEqual( self.rq.getHostMaxSimultaneousRequests("example2.com"), 11) def testActive(self): self.failUnlessEqual(isinstance(self.rq.getActive(), int), True) def testPending(self): self.failUnlessEqual(isinstance(self.rq.getPending(), int), True) def testActiveRequestsByHost(self): self.failUnlessEqual(isinstance(self.rq.getActiveRequestsByHost(), dict), True) def testPendingRequestsByHost(self): self.failUnlessEqual(isinstance(self.rq.getPendingRequestsByHost(), dict), True) def _getPageErrback(self, error): return True
def setUp(self): self.mini_web_server = MiniWebServer() config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config.yaml")) if not os.path.isfile(config_path): self.raiseConfigException(config_path) config = yaml.load(open(config_path, 'r').read()) if not "aws_access_key_id" in config or "aws_secret_access_key" not in config: self.raiseConfigException(config_path) self.s3 = AmazonS3( config["aws_access_key_id"], config["aws_secret_access_key"]) self.uuid = hashlib.sha256("".join([ config["aws_access_key_id"], config["aws_secret_access_key"], self.__class__.__name__])).hexdigest() self.pg = PageGetter(self.s3, self.uuid) self.logging_handler = logging.StreamHandler() formatter = logging.Formatter("%(levelname)s: %(message)s %(pathname)s:%(lineno)d") self.logging_handler.setFormatter(formatter) LOGGER.addHandler(self.logging_handler) LOGGER.setLevel(logging.DEBUG) d = self.s3.putBucket(self.uuid) return d
class AWSpiderTestCase(unittest.TestCase): def setUp(self): self.mini_web_server = MiniWebServer() config_path = os.path.abspath( os.path.join( os.path.dirname(__file__), "config.yaml" ) ) if not os.path.isfile( config_path ): self.raiseConfigException( config_path ) config = yaml.load( open( config_path, 'r').read() ) if not "aws_access_key_id" in config or "aws_secret_access_key" not in config: self.raiseConfigException( config_path ) self.uuid = hashlib.sha256( config["aws_access_key_id"] + config["aws_secret_access_key"] + self.__class__.__name__ ).hexdigest() self.aws_access_key_id = config["aws_access_key_id"] self.aws_secret_access_key = config["aws_secret_access_key"] self.aws_s3_cache_bucket = "%s_cache" % self.uuid self.aws_s3_storage_bucket = "%s_storage" % self.uuid self.aws_sdb_reservation_domain = "%s_reservation" % self.uuid self.aws_sdb_coordination_domain = "%s_coordination" % self.uuid self.spider = AWSpider( aws_access_key_id = self.aws_access_key_id, aws_secret_access_key = self.aws_secret_access_key, aws_s3_cache_bucket = self.aws_s3_cache_bucket, aws_s3_storage_bucket = self.aws_s3_storage_bucket, aws_sdb_reservation_domain = self.aws_sdb_reservation_domain, aws_sdb_coordination_domain = self.aws_sdb_coordination_domain, port = 5000 ) self.s3 = AmazonS3( config["aws_access_key_id"], config["aws_secret_access_key"]) return self.spider.start() def tearDown(self): deferreds = [] deferreds.append(self.spider.shutdown()) deferreds.append(self.mini_web_server.shutdown()) d = DeferredList(deferreds) d.addCallback(self._tearDownCallback) return d def _tearDownCallback(self, data): self.s3 = AmazonS3(self.aws_access_key_id, self.aws_secret_access_key) self.sdb = AmazonSDB(self.aws_access_key_id, self.aws_secret_access_key) deferreds = [] deferreds.append(self.spider.pg.clearCache()) deferreds.append(self.spider.clearStorage()) deferreds.append(self.sdb.deleteDomain(self.aws_sdb_reservation_domain)) deferreds.append(self.sdb.deleteDomain(self.aws_sdb_coordination_domain)) d = DeferredList(deferreds) d.addCallback( self._tearDownCallback2 ) return d def _tearDownCallback2( self, data ): deferreds = [] deferreds.append(self.s3.deleteBucket(self.aws_s3_cache_bucket)) deferreds.append(self.s3.deleteBucket(self.aws_s3_storage_bucket)) d = DeferredList(deferreds) return d def testPageGetter(self): d = self.spider.getPage("http://127.0.0.1:8080", timeout=5) return d def testClearStorage(self): d = self.spider.clearStorage() return d def testGetServerData(self): server_data = self.spider.getServerData() self.failUnlessEqual( isinstance(server_data, dict), True ) def testExpose(self): self.spider.expose( foo ) d = self.spider.getPage("http://127.0.0.1:5000/function/foo", timeout=5) return d
class PageGetterTestCase(unittest.TestCase): def setUp(self): self.mini_web_server = MiniWebServer() config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config.yaml")) if not os.path.isfile(config_path): self.raiseConfigException(config_path) config = yaml.load(open(config_path, 'r').read()) if not "aws_access_key_id" in config or "aws_secret_access_key" not in config: self.raiseConfigException(config_path) self.s3 = AmazonS3( config["aws_access_key_id"], config["aws_secret_access_key"]) self.uuid = hashlib.sha256("".join([ config["aws_access_key_id"], config["aws_secret_access_key"], self.__class__.__name__])).hexdigest() self.pg = PageGetter(self.s3, self.uuid) self.logging_handler = logging.StreamHandler() formatter = logging.Formatter("%(levelname)s: %(message)s %(pathname)s:%(lineno)d") self.logging_handler.setFormatter(formatter) LOGGER.addHandler(self.logging_handler) LOGGER.setLevel(logging.DEBUG) d = self.s3.putBucket(self.uuid) return d def tearDown(self): LOGGER.removeHandler(self.logging_handler) a = self.mini_web_server.shutdown() b = self.pg.clearCache() d = DeferredList([a, b]) d.addCallback(self._tearDownCallback) return d def _tearDownCallback(self, data): d = self.s3.deleteBucket(self.uuid) return d def test_01_PageGetterOnSuccess(self): d = self.pg.getPage( "http://127.0.0.1:8080/helloworld", confirm_cache_write=True) return d def test_02_PageGetterOnFailure(self): d = self.pg.getPage( "http://0.0.0.0:99", timeout=5, confirm_cache_write=True) d.addErrback(self._getPageErrback) return d def _getPageErrback(self, error): return True def test_04_ContentSHA1(self): d = self.pg.getPage( "http://127.0.0.1:8080/helloworld", confirm_cache_write=True) d.addCallback(self._contentSHA1Callback) return d def _contentSHA1Callback(self, data): print data if "content-sha1" in data: content_sha1 = data["content-sha1"] d = self.pg.getPage( "http://127.0.0.1:8080/helloworld", content_sha1=content_sha1, confirm_cache_write=True) d.addCallback(self._contentSHA1Callback2) d.addErrback(self._contentSHA1Errback) return d else: raise Exception("Data should have Content SHA1 signature.") def _contentSHA1Callback2(self, data): print data raise Exception("Pagegetter.getPage() should have raised StaleContentException") def _contentSHA1Errback(self, error): try: error.raiseException() except StaleContentException, e: return True except:
def setUp(self): self.deferred = Deferred() self.mini_web_server = MiniWebServer() self.rq = RequestQueuer(max_requests_per_host_per_second=3, max_simultaneous_requests_per_host=5)