def setUpClass(self): self.task_queue = TaskQueue() self.task_queue.rate = 100000 self.task_queue.burst = 100000 self.task_queue.processing_timeout = 0.2 self.task_queue.put('a3', 2, time.time() + 0.1) self.task_queue.put('a1', 1) self.task_queue.put('a2', 3)
def setUpClass(self): self.task_queue = TaskQueue() self.task_queue.rate = 100000 self.task_queue.burst = 100000 self.task_queue.processing_timeout = 0.2 self.task_queue.put('a3', 2, time.time()+0.1) self.task_queue.put('a1', 1) self.task_queue.put('a2', 3)
def test_task_queue(self): task_queue = TaskQueue() task_queue.processing_timeout = 0.1 task_queue.put('a3', 3, time.time()+0.1) task_queue.put('a1', 1) task_queue.put('a2', 2) # priority queue self.assertEqual(task_queue.get(), 'a2') # time queue time.sleep(0.1) task_queue._check_time_queue() self.assertEqual(task_queue.get(), 'a3') self.assertEqual(task_queue.get(), 'a1') # processing queue task_queue._check_processing() self.assertEqual(task_queue.get(), 'a2') self.assertEqual(len(task_queue), 0) # done task_queue.done('a2') task_queue.done('a1') time.sleep(0.1) task_queue._check_processing() task_queue._check_time_queue() self.assertEqual(task_queue.get(), 'a3') self.assertEqual(task_queue.get(), None)
class TestTaskQueue(unittest.TestCase): @classmethod def setUpClass(self): self.task_queue = TaskQueue() self.task_queue.rate = 100000 self.task_queue.burst = 100000 self.task_queue.processing_timeout = 0.2 self.task_queue.put('a3', 2, time.time()+0.1) self.task_queue.put('a1', 1) self.task_queue.put('a2', 3) def test_1_priority_queue(self): self.assertEqual(self.task_queue.get(), 'a2') def test_2_time_queue(self): time.sleep(0.1) self.task_queue.check_update() self.assertEqual(self.task_queue.get(), 'a3') self.assertEqual(self.task_queue.get(), 'a1') def test_3_processing_queue(self): time.sleep(0.1) self.task_queue.check_update() self.assertEqual(self.task_queue.get(), 'a2') self.assertEqual(len(self.task_queue), 0) def test_4_done(self): self.task_queue.done('a2') self.task_queue.done('a1') time.sleep(0.1) self.task_queue.check_update() self.assertEqual(self.task_queue.get(), 'a3') self.assertEqual(self.task_queue.get(), None)
class TestTaskQueue(unittest.TestCase): @classmethod def setUpClass(self): self.task_queue = TaskQueue() self.task_queue.rate = 100000 self.task_queue.burst = 100000 self.task_queue.processing_timeout = 0.2 self.task_queue.put('a3', 2, time.time() + 0.1) self.task_queue.put('a1', 1) self.task_queue.put('a2', 3) def test_1_priority_queue(self): self.assertEqual(self.task_queue.get(), 'a2') def test_2_time_queue(self): time.sleep(0.1) self.task_queue.check_update() self.assertEqual(self.task_queue.get(), 'a3') self.assertEqual(self.task_queue.get(), 'a1') def test_3_processing_queue(self): time.sleep(0.1) self.task_queue.check_update() self.assertEqual(self.task_queue.get(), 'a2') self.assertEqual(len(self.task_queue), 0) def test_4_done(self): self.task_queue.done('a2') self.task_queue.done('a1') time.sleep(0.1) self.task_queue.check_update() self.assertEqual(self.task_queue.get(), 'a3') self.assertEqual(self.task_queue.get(), None)
def __init__(self): super().__init__() self.downloader = Downloader(config.HEADERS) self.task_queue = TaskQueue(config.REDIS_DB_URL, config.REDIS_DB_DATABASE) self.count = 0
class Scheduler(threading.Thread): def __init__(self): super().__init__() self.downloader = Downloader(config.HEADERS) self.task_queue = TaskQueue(config.REDIS_DB_URL, config.REDIS_DB_DATABASE) self.count = 0 def append_request_task(self, task: Task): self.task_queue.push_task(task) # if self.count <= 50: # self.task_queue.push_task(task) # self.count += 1 def run(self) -> None: retry = 0 while True: task = self.task_queue.get_top_task() if task is None: break try: self._process_task(task) retry = 0 except: if retry <= MAX_RETRY: logger.warning( f'Failed to process task, attempt the {retry} retry.') retry += 1 delay = retry * 10 + 10 time.sleep(delay) else: raise def _process_task(self, task: Task) -> None: content = self.downloader.download_task(task) try: parser = get_parser(task.url) for item in parser.parse(task, content): if isinstance(item, Task): logger.info(f'Append new task {item}') self.task_queue.push_task(item) elif isinstance(item, Document): logger.info(f'Save the parsed item {item}') item.__class__.store_item(item) else: raise Exception( f'Unsupported parse result: class={item.__class__}') except Exception as e: with open('exception.html', 'w') as ofile: ofile.write(content) logger.error(f'Parse failed with error:') logger.exception(e) raise self.task_queue.drop_top_task(task.type_) # 等待指定的秒数+-2s # delay = config.DOWNLOAD_DELAY + random.randint(20, 50) / 10 delay = config.DOWNLOAD_DELAY logger.info(f'Delay for {delay} seconds.') time.sleep(delay)
def test_task_queue(self): task_queue = TaskQueue() task_queue.processing_timeout = 0.1 task_queue.put('a3', 3, time.time() + 0.1) task_queue.put('a1', 1) task_queue.put('a2', 2) # priority queue self.assertEqual(task_queue.get(), 'a2') # time queue time.sleep(0.1) task_queue._check_time_queue() self.assertEqual(task_queue.get(), 'a3') self.assertEqual(task_queue.get(), 'a1') # processing queue task_queue._check_processing() self.assertEqual(task_queue.get(), 'a2') self.assertEqual(len(task_queue), 0) # done task_queue.done('a2') task_queue.done('a1') time.sleep(0.1) task_queue._check_processing() task_queue._check_time_queue() self.assertEqual(task_queue.get(), 'a3') self.assertEqual(task_queue.get(), None)