Beispiel #1
0
def add_task(
    queue: deque, _self: Scraper, task_name: str, request: QueueRequest
) -> None:
    """Overwritten version of `add_task` which adds the task to an in-memory
    queue.
    """

    # Serialize and deserialize the request. Simply to replicate production
    # and catch any potential issues.
    serialized = json.dumps(request.to_serializable())
    request = QueueRequest.from_serializable(json.loads(serialized))

    # Add it to the queue
    queue.append((task_name, request))
Beispiel #2
0
    def test_resume_scrape_background(self, mock_get_region, mock_sessions,
                                      mock_task_manager, mock_datetime):
        """Tests the resume_scrape flow for background scraping."""
        region = "us_nd"
        scrape_type = constants.ScrapeType.BACKGROUND
        queue_name = "us_nd_scraper"
        initial_task = "charge_it"

        mock_get_region.return_value = mock_region(region, queue_name)
        recent_session_none_scraped = ScrapeSession.new(
            key=None,
            scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.SCRAPE)
        recent_session = ScrapeSession.new(
            key=None,
            scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.SCRAPE,
            last_scraped="Bangalter, Thomas")
        mock_sessions.return_value = [
            recent_session_none_scraped, recent_session
        ]
        mock_task_manager.return_value.create_scrape_task.return_value = None
        mock_datetime.now.return_value = _DATETIME

        scraper = FakeScraper(region, initial_task)
        scraper.resume_scrape(scrape_type)

        mock_get_region.assert_called_with(region)
        mock_sessions.assert_called_with(ScrapeKey(region, scrape_type))

        queue_params = QueueRequest(
            scrape_type=scrape_type.value,
            scraper_start_time=_DATETIME,
            next_task=FAKE_TASK,
            # content=['Bangalter', 'Thomas'],
        )
        request_body = {
            'region': region,
            'task': initial_task,
            'params': queue_params.to_serializable()
        }

        mock_task_manager.return_value.create_scrape_task.assert_called_with(
            region_code=region,
            queue_name=queue_name,
            url=scraper.scraper_work_url,
            body=request_body)
Beispiel #3
0
    def add_task(self, task_name, request: QueueRequest):
        """ Add a task to the task queue.

        Args:
            task_name: (string) name of the function in the scraper class to
                       be invoked
            request: (dict) parameters to be passed to the function
        """
        self.cloud_task_manager.create_scrape_task(
            region_code=self.get_region().region_code,
            queue_name=self.get_region().get_queue_name(),
            url=self.scraper_work_url,
            body={
                'region': self.get_region().region_code,
                'task': task_name,
                'params': request.to_serializable(),
            })
Beispiel #4
0
    def test_start_scrape_background(
        self,
        mock_pubsub: Mock,
        mock_get_region: Mock,
        mock_tracker: Mock,
        mock_task_manager: Mock,
        mock_datetime: Mock,
    ) -> None:
        docket_item = ("Dog", "Cat")
        region = "us_nd"
        scrape_type = constants.ScrapeType.BACKGROUND
        queue_name = "us_nd_scraper"
        initial_task = "use_it"

        mock_get_region.return_value = mock_region(region, queue_name)
        mock_tracker.return_value = docket_item
        mock_task_manager.return_value.create_scrape_task.return_value = None
        mock_datetime.now.return_value = _DATETIME

        scraper = FakeScraper(region, initial_task)
        scraper.start_scrape(scrape_type)

        mock_get_region.assert_called_with(region)
        mock_tracker.assert_called_with(ScrapeKey(region, scrape_type))
        mock_pubsub.assert_called_with(ScrapeKey(region, scrape_type),
                                       BATCH_PUBSUB_TYPE)

        queue_params = QueueRequest(
            scrape_type=scrape_type,
            scraper_start_time=_DATETIME,
            next_task=FAKE_TASK,
            # content=docket_item,
        )
        request_body = {
            "region": region,
            "task": initial_task,
            "params": queue_params.to_serializable(),
        }

        mock_task_manager.return_value.create_scrape_task.assert_called_with(
            region_code=region,
            queue_name=queue_name,
            url=scraper.scraper_work_url,
            body=request_body,
        )
Beispiel #5
0
    def test_resume_scrape_snapshot(
        self,
        mock_get_region: Mock,
        mock_tracker: Mock,
        mock_task_manager: Mock,
        mock_datetime: Mock,
    ) -> None:
        docket_item = (41620, ["daft", "punk"])
        region = "us_nd"
        scrape_type = constants.ScrapeType.SNAPSHOT
        queue_name = "us_nd_scraper"
        initial_task = "press_it"

        mock_get_region.return_value = mock_region(region, queue_name)
        mock_tracker.return_value = docket_item
        mock_task_manager.return_value.create_scrape_task.return_value = None
        mock_datetime.now.return_value = _DATETIME

        scraper = FakeScraper(region, initial_task)
        scraper.resume_scrape(scrape_type)

        mock_get_region.assert_called_with(region)

        queue_params = QueueRequest(
            scrape_type=scrape_type,
            scraper_start_time=_DATETIME,
            next_task=FAKE_TASK,
            # content=(83240, ['dagt', 'punk']),
        )
        request_body = {
            "region": region,
            "task": initial_task,
            "params": queue_params.to_serializable(),
        }

        mock_task_manager.return_value.create_scrape_task.assert_called_with(
            region_code=region,
            queue_name=queue_name,
            url=scraper.scraper_work_url,
            body=request_body,
        )
Beispiel #6
0
    def test_start_scrape_background(self, mock_pubsub, mock_get_region,
                                     mock_tracker, mock_task_manager,
                                     mock_datetime):
        docket_item = ('Dog', 'Cat')
        region = 'us_nd'
        scrape_type = constants.ScrapeType.BACKGROUND
        queue_name = 'us_nd_scraper'
        initial_task = 'use_it'

        mock_get_region.return_value = mock_region(region, queue_name)
        mock_tracker.return_value = docket_item
        mock_task_manager.return_value.create_scrape_task.return_value = None
        mock_datetime.now.return_value = _DATETIME

        scraper = FakeScraper(region, initial_task)
        scraper.start_scrape(scrape_type)

        mock_get_region.assert_called_with(region)
        mock_tracker.assert_called_with(ScrapeKey(region, scrape_type))
        mock_pubsub.assert_called_with(ScrapeKey(region, scrape_type),
                                       BATCH_PUBSUB_TYPE)

        queue_params = QueueRequest(
            scrape_type=scrape_type.value,
            scraper_start_time=_DATETIME,
            next_task=FAKE_TASK,
            # content=docket_item,
        )
        request_body = {
            'region': region,
            'task': initial_task,
            'params': queue_params.to_serializable()
        }

        mock_task_manager.return_value.create_scrape_task.assert_called_with(
            region_code=region,
            queue_name=queue_name,
            url=scraper.scraper_work_url,
            body=request_body)
Beispiel #7
0
 def __init__(self, region: str, task: str, queue_request: QueueRequest):
     request_string = pprint.pformat(queue_request.to_serializable())
     msg = "Error when running '{}' for '{}' with request:\n{}".format(
         task, region, request_string)
     super().__init__(msg)