Exemple #1
0
    def setUpClass(self):
        self.task_queue = TaskQueue()
        self.task_queue.rate = 100000
        self.task_queue.burst = 100000
        self.task_queue.processing_timeout = 0.2

        self.task_queue.put('a3', 2, time.time() + 0.1)
        self.task_queue.put('a1', 1)
        self.task_queue.put('a2', 3)
Exemple #2
0
    def setUpClass(self):
        self.task_queue = TaskQueue()
        self.task_queue.rate = 100000
        self.task_queue.burst = 100000
        self.task_queue.processing_timeout = 0.2

        self.task_queue.put('a3', 2, time.time()+0.1)
        self.task_queue.put('a1', 1)
        self.task_queue.put('a2', 3)
    def test_task_queue(self):
        task_queue = TaskQueue()
        task_queue.processing_timeout = 0.1
        task_queue.put('a3', 3, time.time()+0.1)
        task_queue.put('a1', 1)
        task_queue.put('a2', 2)

        # priority queue
        self.assertEqual(task_queue.get(), 'a2')

        # time queue
        time.sleep(0.1)
        task_queue._check_time_queue()
        self.assertEqual(task_queue.get(), 'a3')
        self.assertEqual(task_queue.get(), 'a1')

        # processing queue
        task_queue._check_processing()
        self.assertEqual(task_queue.get(), 'a2')
        self.assertEqual(len(task_queue), 0)

        # done
        task_queue.done('a2')
        task_queue.done('a1')
        time.sleep(0.1)
        task_queue._check_processing()
        task_queue._check_time_queue()
        self.assertEqual(task_queue.get(), 'a3')
        self.assertEqual(task_queue.get(), None)
Exemple #4
0
class TestTaskQueue(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.task_queue = TaskQueue()
        self.task_queue.rate = 100000
        self.task_queue.burst = 100000
        self.task_queue.processing_timeout = 0.2

        self.task_queue.put('a3', 2, time.time()+0.1)
        self.task_queue.put('a1', 1)
        self.task_queue.put('a2', 3)

    def test_1_priority_queue(self):
        self.assertEqual(self.task_queue.get(), 'a2')

    def test_2_time_queue(self):
        time.sleep(0.1)
        self.task_queue.check_update()
        self.assertEqual(self.task_queue.get(), 'a3')
        self.assertEqual(self.task_queue.get(), 'a1')

    def test_3_processing_queue(self):
        time.sleep(0.1)
        self.task_queue.check_update()
        self.assertEqual(self.task_queue.get(), 'a2')
        self.assertEqual(len(self.task_queue), 0)

    def test_4_done(self):
        self.task_queue.done('a2')
        self.task_queue.done('a1')
        time.sleep(0.1)
        self.task_queue.check_update()
        self.assertEqual(self.task_queue.get(), 'a3')
        self.assertEqual(self.task_queue.get(), None)
Exemple #5
0
class TestTaskQueue(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.task_queue = TaskQueue()
        self.task_queue.rate = 100000
        self.task_queue.burst = 100000
        self.task_queue.processing_timeout = 0.2

        self.task_queue.put('a3', 2, time.time() + 0.1)
        self.task_queue.put('a1', 1)
        self.task_queue.put('a2', 3)

    def test_1_priority_queue(self):
        self.assertEqual(self.task_queue.get(), 'a2')

    def test_2_time_queue(self):
        time.sleep(0.1)
        self.task_queue.check_update()
        self.assertEqual(self.task_queue.get(), 'a3')
        self.assertEqual(self.task_queue.get(), 'a1')

    def test_3_processing_queue(self):
        time.sleep(0.1)
        self.task_queue.check_update()
        self.assertEqual(self.task_queue.get(), 'a2')
        self.assertEqual(len(self.task_queue), 0)

    def test_4_done(self):
        self.task_queue.done('a2')
        self.task_queue.done('a1')
        time.sleep(0.1)
        self.task_queue.check_update()
        self.assertEqual(self.task_queue.get(), 'a3')
        self.assertEqual(self.task_queue.get(), None)
Exemple #6
0
 def __init__(self):
     super().__init__()
     self.downloader = Downloader(config.HEADERS)
     self.task_queue = TaskQueue(config.REDIS_DB_URL,
                                 config.REDIS_DB_DATABASE)
     self.count = 0
Exemple #7
0
class Scheduler(threading.Thread):
    def __init__(self):
        super().__init__()
        self.downloader = Downloader(config.HEADERS)
        self.task_queue = TaskQueue(config.REDIS_DB_URL,
                                    config.REDIS_DB_DATABASE)
        self.count = 0

    def append_request_task(self, task: Task):
        self.task_queue.push_task(task)
        # if self.count <= 50:
        #     self.task_queue.push_task(task)
        #     self.count += 1

    def run(self) -> None:
        retry = 0
        while True:
            task = self.task_queue.get_top_task()
            if task is None:
                break

            try:
                self._process_task(task)
                retry = 0
            except:
                if retry <= MAX_RETRY:
                    logger.warning(
                        f'Failed to process task, attempt the {retry} retry.')
                    retry += 1
                    delay = retry * 10 + 10
                    time.sleep(delay)
                else:
                    raise

    def _process_task(self, task: Task) -> None:
        content = self.downloader.download_task(task)
        try:
            parser = get_parser(task.url)
            for item in parser.parse(task, content):
                if isinstance(item, Task):
                    logger.info(f'Append new task {item}')
                    self.task_queue.push_task(item)
                elif isinstance(item, Document):
                    logger.info(f'Save the parsed item {item}')
                    item.__class__.store_item(item)
                else:
                    raise Exception(
                        f'Unsupported parse result: class={item.__class__}')

        except Exception as e:
            with open('exception.html', 'w') as ofile:
                ofile.write(content)
            logger.error(f'Parse failed with error:')
            logger.exception(e)
            raise

        self.task_queue.drop_top_task(task.type_)

        # 等待指定的秒数+-2s
        # delay = config.DOWNLOAD_DELAY + random.randint(20, 50) / 10
        delay = config.DOWNLOAD_DELAY
        logger.info(f'Delay for {delay} seconds.')
        time.sleep(delay)
Exemple #8
0
    def test_task_queue(self):
        task_queue = TaskQueue()
        task_queue.processing_timeout = 0.1
        task_queue.put('a3', 3, time.time() + 0.1)
        task_queue.put('a1', 1)
        task_queue.put('a2', 2)

        # priority queue
        self.assertEqual(task_queue.get(), 'a2')

        # time queue
        time.sleep(0.1)
        task_queue._check_time_queue()
        self.assertEqual(task_queue.get(), 'a3')
        self.assertEqual(task_queue.get(), 'a1')

        # processing queue
        task_queue._check_processing()
        self.assertEqual(task_queue.get(), 'a2')
        self.assertEqual(len(task_queue), 0)

        # done
        task_queue.done('a2')
        task_queue.done('a1')
        time.sleep(0.1)
        task_queue._check_processing()
        task_queue._check_time_queue()
        self.assertEqual(task_queue.get(), 'a3')
        self.assertEqual(task_queue.get(), None)