class ThrottleMixin(object): @mock.patch('crawling.distributed_scheduler.DistributedScheduler' \ '.setup') def setUp(self, a): self.scheduler = DistributedScheduler(MagicMock(), False, 10, 3) self.scheduler.open(MagicMock()) self.scheduler.spider.name = 'link' self.req = None def get_request(self): req = None # required req = Request('http://ex.com') req.meta['crawlid'] = "abc123" req.meta['appid'] = "myapp" # optional defaults req.meta['spiderid'] = "link" req.meta["attrs"] = {} req.meta["allowed_domains"] = () req.meta["allow_regex"] = () req.meta["deny_regex"] = () req.meta["deny_extensions"] = None req.meta['curdepth'] = 0 req.meta["maxdepth"] = 0 req.meta['priority'] = 0 req.meta['retry_times'] = 0 req.meta['expires'] = 0 req.meta['useragent'] = None return req
class ThrottleMixin(object): @mock.patch('crawling.distributed_scheduler.DistributedScheduler' \ '.update_ipaddress') @mock.patch('crawling.distributed_scheduler.DistributedScheduler' \ '.setup_zookeeper') def setUp(self, u, z): self.scheduler = DistributedScheduler(MagicMock(), False, 60, 10, 3, MagicMock(), 10, 60, False, 60, False, False, '.*', True, 3600, None, 600, 600) self.scheduler.open(MagicMock()) self.scheduler.my_ip = 'ip' self.scheduler.spider.name = 'link' self.req = None def get_request(self): req = None # required req = Request('http://ex.com') req.meta['crawlid'] = "abc123" req.meta['appid'] = "myapp" req.meta['url'] = "http://ex.com" req.meta['spiderid'] = "link" req.meta["attrs"] = None req.meta["allowed_domains"] = None req.meta["allow_regex"] = None req.meta["deny_regex"] = None req.meta["deny_extensions"] = None req.meta['curdepth'] = 0 req.meta["maxdepth"] = 0 req.meta["domain_max_pages"] = None req.meta['priority'] = 0 req.meta['retry_times'] = 0 req.meta['expires'] = 0 req.meta['useragent'] = None req.meta['cookie'] = None return req
class ThrottleMixin(object): @mock.patch('crawling.distributed_scheduler.DistributedScheduler' \ '.update_ipaddress') @mock.patch('crawling.distributed_scheduler.DistributedScheduler' \ '.setup_zookeeper') def setUp(self, u, z): self.scheduler = DistributedScheduler(MagicMock(), False, 60, 10, 3, MagicMock(), 10, 60, False, 60, False, False, '.*', True, 3600) self.scheduler.open(MagicMock()) self.scheduler.my_ip = 'ip' self.scheduler.spider.name = 'link' self.req = None def get_request(self): req = None # required req = Request('http://ex.com') req.meta['crawlid'] = "abc123" req.meta['appid'] = "myapp" req.meta['url'] = "http://ex.com" req.meta['spiderid'] = "link" req.meta["attrs"] = None req.meta["allowed_domains"] = None req.meta["allow_regex"] = None req.meta["deny_regex"] = None req.meta["deny_extensions"] = None req.meta['curdepth'] = 0 req.meta["maxdepth"] = 0 req.meta['priority'] = 0 req.meta['retry_times'] = 0 req.meta['expires'] = 0 req.meta['useragent'] = None req.meta['cookie'] = None return req