def test_update_domain_queues(self): # test without scale factor self.scheduler.domain_config = { "ex1.com": { "window": 50, "hits": 10, "scale": 1 } } q = RedisThrottledQueue(MagicMock(), MagicMock(), 100, 100) self.scheduler.queue_dict = {'link:ex1.com:queue': [q, 0]} self.scheduler.update_domain_queues() self.assertEqual( self.scheduler.queue_dict['link:ex1.com:queue'][0].window, 50) self.assertEqual( self.scheduler.queue_dict['link:ex1.com:queue'][0].limit, 10) # test with scale factor self.scheduler.domain_config = { "ex2.com": { "window": 50, "hits": 10, "scale": 0.5 } } q = RedisThrottledQueue(MagicMock(), MagicMock(), 100, 100) self.scheduler.queue_dict = {'link:ex2.com:queue': [q, 0]} self.scheduler.update_domain_queues() self.assertEqual( self.scheduler.queue_dict['link:ex2.com:queue'][0].window, 50) # the scale factor effects the limit only self.assertEqual( self.scheduler.queue_dict['link:ex2.com:queue'][0].limit, 5)
def create_queues(self): ''' Updates the in memory list of the redis queues Creates new throttled queue instances if it does not have them ''' # new config could have loaded between scrapes newConf = self.check_config() self.queue_keys = self.redis_conn.keys(self.spider.name + ":*:queue") for key in self.queue_keys: # build final queue key, depending on type and ip bools throttle_key = "" if self.add_type: throttle_key = self.spider.name + ":" if self.add_ip: throttle_key = throttle_key + self.my_ip + ":" # add the tld from the key `type:tld:queue` the_domain = re.split(':', key)[1] throttle_key = throttle_key + the_domain if key not in self.queue_dict or newConf: self.logger.debug( "Added new Throttled Queue {q}".format(q=key)) q = RedisPriorityQueue(self.redis_conn, key, encoding=ujson) # use default window and hits if the_domain not in self.domain_config: # this is now a tuple, all access needs to use [0] to get # the object, use [1] to get the time self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, self.window, self.hits, self.moderated, throttle_key, throttle_key, True), time.time() ] # use custom window and hits else: window = self.domain_config[the_domain]['window'] hits = self.domain_config[the_domain]['hits'] # adjust the crawl rate based on the scale if exists if 'scale' in self.domain_config[the_domain]: hits = int(hits * self.fit_scale( self.domain_config[the_domain]['scale'])) self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, window, hits, self.moderated, throttle_key, throttle_key, True), time.time() ]
class TestModeratedElasticRedisThrottledQueue(TestCase): def setUp(self): self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 4, 2, True, elastic=True) def test_moderated(self): # test elastic kick in hasnt happened yet self.queue.is_moderated = MagicMock(return_value=True) self.queue.elastic_kick_in = 0 self.assertFalse(self.queue.allowed()) # kick in overrides, even though we were moderated self.queue.elastic_kick_in = self.queue.limit self.queue.check_elastic = MagicMock(return_value=True) self.queue.test_hits = MagicMock(return_value=True) self.assertTrue(self.queue.allowed())
class TestModeratedRedisThrottledQueue(TestCase): def setUp(self): self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 4, 2, True) def test_moderated(self): # a moderated queue should pop ~ every x seconds # we already tested the window limit in the unmoderated test self.queue.is_moderated = MagicMock(return_value=True) self.assertFalse(self.queue.allowed()) self.queue.is_moderated = MagicMock(return_value=False) self.queue.test_hits = MagicMock(return_value=True) self.assertTrue(self.queue.allowed()) # mock exception raised even with good moderation self.queue.test_hits = MagicMock(side_effect=WatchError) self.assertFalse(self.queue.allowed())
class TestModeratedRedisThrottledQueue(TestCase): def setUp(self): self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 4, 2, True) def test_moderated(self): # a moderated queue should pop ~ every x seconds # we already tested the window limit in the unmoderated test self.queue.is_moderated = MagicMock(return_value=True) self.assertFalse(self.queue.allowed()) self.queue.is_moderated = MagicMock(return_value=False) self.queue.test_hits = MagicMock(return_value=True) self.assertTrue(self.queue.allowed()) # mock exception raised even with good moderation self.queue.test_hits = MagicMock(side_effect=WatchError) self.assertFalse(self.queue.allowed())
def create_throttle_queues(self): """ 创建限流队列 :return: """ new_conf = self.check_config() queue_key = '{spider_type}:{job_id}:*:queue'.format( spider_type=self.spider.name, job_id=self.job_id) self.queue_keys = self.redis_conn.keys(queue_key) for key in self.queue_keys: throttle_key = "" if self.add_type: throttle_key = self.spider.name + ":" if self.add_ip: throttle_key = throttle_key + self.ip + ":" the_domain = re.split(':', key)[2] throttle_key += the_domain if key not in self.queue_dict or new_conf: self.logger.debug( "Added new Throttled Queue {q}".format(q=key)) q = RedisPriorityQueue(self.redis_conn, key) if the_domain not in self.domain_config: self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, self.window, self.hits, self.moderated, throttle_key, throttle_key, True), time.time() ] else: window = self.domain_config[the_domain]['window'] hits = self.domain_config[the_domain]['hits'] if 'scale' in self.domain_config[the_domain]: hits = int(hits * self.fit_scale( self.domain_config[the_domain]['scale'])) self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, window, hits, self.moderated, throttle_key, throttle_key, True), time.time() ]
class TestUnmoderatedRedisThrottledQueue(TestCase): def setUp(self): # limit is 2 hits in the window self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 1, 2) def test_unmoderated(self): # an unmoderated queue is really just testing the number # of hits in a given window self.queue.redis_conn.zcard = MagicMock(return_value=0) self.assertTrue(self.queue.allowed()) self.queue.redis_conn.zcard = MagicMock(return_value=1) self.assertTrue(self.queue.allowed()) self.queue.redis_conn.zcard = MagicMock(return_value=2) self.assertFalse(self.queue.allowed()) # mock exception raised even with good hits self.queue.redis_conn.zcard = MagicMock(return_value=0, side_effect=WatchError) self.assertFalse(self.queue.allowed())
class TestUnmoderatedRedisThrottledQueue(TestCase): def setUp(self): # limit is 2 hits in the window self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 1, 2) def test_unmoderated(self): # an unmoderated queue is really just testing the number # of hits in a given window self.queue.redis_conn.zcard = MagicMock(return_value=0) self.assertTrue(self.queue.allowed()) self.queue.redis_conn.zcard = MagicMock(return_value=1) self.assertTrue(self.queue.allowed()) self.queue.redis_conn.zcard = MagicMock(return_value=2) self.assertFalse(self.queue.allowed()) # mock exception raised even with good hits self.queue.redis_conn.zcard = MagicMock(return_value=0, side_effect=WatchError) self.assertFalse(self.queue.allowed())
def test_error_config(self): self.scheduler.domain_config = {"ex1.com": {"window": 50, "hits": 10}} self.scheduler.window = 7 self.scheduler.hits = 5 q = RedisThrottledQueue(MagicMock(), MagicMock(), 100, 100) self.scheduler.queue_dict = {'link:ex1.com:queue': [q, 0]} self.scheduler.error_config('stuff') self.assertEqual( self.scheduler.queue_dict['link:ex1.com:queue'][0].window, 7) self.assertEqual( self.scheduler.queue_dict['link:ex1.com:queue'][0].limit, 5) self.assertEqual(self.scheduler.domain_config, {})
def main(): import argparse import redis import time import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from scutils.redis_queue import RedisPriorityQueue from scutils.redis_throttled_queue import RedisThrottledQueue parser = argparse.ArgumentParser(description="Throttled Queue Test Script." " Start either a single or multiple processes to see the " " throttled queue mechanism in action.") parser.add_argument('-r', '--redis-host', action='store', required=True, help="The Redis host ip") parser.add_argument('-p', '--redis-port', action='store', default='6379', help="The Redis port") parser.add_argument('-m', '--moderate', action='store_const', const=True, default=False, help="Moderate the outbound Queue") parser.add_argument('-w', '--window', action='store', default=60, help="The window time to test") parser.add_argument('-n', '--num-hits', action='store', default=10, help="The number of pops allowed in the given window") parser.add_argument('-q', '--queue', action='store', default='testqueue', help="The Redis queue name") args = vars(parser.parse_args()) window = int(args['window']) num = int(args['num_hits']) host = args['redis_host'] port = args['redis_port'] mod = args['moderate'] queue = args['queue'] conn = redis.Redis(host=host, port=port) q = RedisPriorityQueue(conn, queue) t = RedisThrottledQueue(conn, q, window, num, mod) def push_items(amount): for i in range(0, amount): t.push('item-'+str(i), i) print "Adding", num * 2, "items for testing" push_items(num * 2) def read_items(): print "Kill when satisfied ^C" ti = time.time() count = 0 while True: item = t.pop() if item: print "My item", item, "My time:", time.time() - ti count += 1 try: read_items() except KeyboardInterrupt: pass t.clear() print "Finished"
def main(): import argparse import redis import time import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from scutils.redis_queue import RedisPriorityQueue from scutils.redis_throttled_queue import RedisThrottledQueue parser = argparse.ArgumentParser( description="Throttled Queue Test Script." " Start either a single or multiple processes to see the " " throttled queue mechanism in action.") parser.add_argument('-r', '--redis-host', action='store', required=True, help="The Redis host ip") parser.add_argument('-p', '--redis-port', action='store', default='6379', help="The Redis port") parser.add_argument('-m', '--moderate', action='store_const', const=True, default=False, help="Moderate the outbound Queue") parser.add_argument('-w', '--window', action='store', default=60, help="The window time to test") parser.add_argument('-n', '--num-hits', action='store', default=10, help="The number of pops allowed in the given window") parser.add_argument('-q', '--queue', action='store', default='testqueue', help="The Redis queue name") args = vars(parser.parse_args()) window = int(args['window']) num = int(args['num_hits']) host = args['redis_host'] port = args['redis_port'] mod = args['moderate'] queue = args['queue'] conn = redis.Redis(host=host, port=port) q = RedisPriorityQueue(conn, queue) t = RedisThrottledQueue(conn, q, window, num, mod) def push_items(amount): for i in range(0, amount): t.push('item-' + str(i), i) print("Adding", num * 2, "items for testing") push_items(num * 2) def read_items(): print("Kill when satisfied ^C") ti = time.time() count = 0 while True: item = t.pop() if item: print("My item", item, "My time:", time.time() - ti) count += 1 try: read_items() except KeyboardInterrupt: pass t.clear() print("Finished")
def setUp(self): self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 4, 2, True)
def setUp(self): # limit is 2 hits in the window self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 1, 2)
def setUp(self): self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 4, 2, True, elastic=True)
def setUp(self): # limit is 2 hits in the window self.queue = RedisThrottledQueue(MagicMock(), MagicMock(), 1, 2)