def test_scrape_data_and_more_no_persist_second_time_persist(
            self, mock_get_more, mock_fetch, mock_populate, mock_write):
        populate_task = Task.evolve(TEST_TASK,
                                    task_type=constants.TaskType.SCRAPE_DATA)
        mock_get_more.return_value = [populate_task]
        mock_fetch.return_value = (TEST_HTML, {})
        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=False,
        )
        start_time = datetime.datetime.now()
        t = Task.evolve(TEST_TASK,
                        task_type=constants.TaskType.SCRAPE_DATA_AND_MORE)
        req = QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=t,
            scraper_start_time=start_time,
        )

        scraper = FakeScraper("test")
        scraper.BATCH_WRITES = False
        scraper._generic_scrape(req)

        # Should send the ii since we chose not to persist.
        expected_tasks = [
            QueueRequest(
                scrape_type=constants.ScrapeType.BACKGROUND,
                next_task=populate_task,
                scraper_start_time=start_time,
                ingest_info=self.ii,
            )
        ]

        self.assertEqual(mock_get_more.call_count, 1)
        self.assertEqual(mock_populate.call_count, 1)
        self.assertEqual(mock_write.call_count, 0)
        mock_get_more.assert_called_once_with(TEST_HTML, t)
        self.assertCountEqual(expected_tasks, scraper.tasks)

        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=True,
        )
        scraper._generic_scrape(scraper.tasks[0])
        self.assertEqual(mock_get_more.call_count, 1)
        self.assertEqual(mock_populate.call_count, 2)
        self.assertEqual(mock_write.call_count, 1)

        expected_metadata = IngestMetadata(
            scraper.region.region_code,
            scraper.region.jurisdiction_id,
            start_time,
            scraper.get_enum_overrides(),
        )
        expected_proto = convert_ingest_info_to_proto(self.ii)
        mock_write.assert_called_once_with(expected_proto, expected_metadata)
Beispiel #2
0
    def test_scrape_data_no_more_tasks_batch(
        self,
        mock_get_more: Mock,
        mock_fetch: Mock,
        mock_populate: Mock,
        mock_write: Mock,
        mock_batch_write: Mock,
    ) -> None:
        mock_fetch.return_value = (TEST_HTML, {})
        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=True,
        )
        start_time = datetime.datetime.now()
        t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA)
        req = QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=t,
            scraper_start_time=start_time,
        )

        scraper = FakeScraper("test")
        scraper._generic_scrape(req)

        scrape_key = ScrapeKey("test", constants.ScrapeType.BACKGROUND)
        self.assertEqual(mock_get_more.call_count, 0)
        self.assertEqual(mock_populate.call_count, 1)
        self.assertEqual(mock_write.call_count, 0)
        mock_batch_write.assert_called_once_with(
            ingest_info=self.ii,
            task=t,
            scrape_key=scrape_key,
        )
        self.assertEqual(len(scraper.tasks), 0)
    def test_scrape_data_no_more_tasks(self, mock_get_more, mock_fetch,
                                       mock_populate, mock_write):
        mock_fetch.return_value = (TEST_HTML, {})
        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=True,
        )
        start_time = datetime.datetime.now()
        t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA)
        req = QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=t,
            scraper_start_time=start_time,
        )

        scraper = FakeScraper("test")
        scraper.BATCH_WRITES = False
        scraper._generic_scrape(req)

        expected_metadata = IngestMetadata(
            scraper.region.region_code,
            scraper.region.jurisdiction_id,
            start_time,
            scraper.get_enum_overrides(),
        )
        expected_proto = convert_ingest_info_to_proto(self.ii)

        self.assertEqual(mock_get_more.call_count, 0)
        self.assertEqual(mock_populate.call_count, 1)
        self.assertEqual(mock_write.call_count, 1)
        mock_write.assert_called_once_with(expected_proto, expected_metadata)
        self.assertEqual(len(scraper.tasks), 0)
Beispiel #4
0
    def test_scrape_data_and_more_no_persist(
            self, mock_get_more, mock_fetch, mock_populate, mock_write):
        mock_get_more.return_value = [TEST_TASK]
        mock_fetch.return_value = (TEST_HTML, {})
        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=False,
        )
        start_time = datetime.datetime.now()
        t = Task.evolve(
            TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA_AND_MORE)
        req = QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=t,
            scraper_start_time=start_time
        )

        scraper = FakeScraper('test')
        scraper.BATCH_WRITES = False
        scraper._generic_scrape(req)

        # Should send the ii since we chose not to persist.
        expected_tasks = [QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=TEST_TASK,
            scraper_start_time=start_time,
            ingest_info=self.ii
        )]

        self.assertEqual(mock_get_more.call_count, 1)
        self.assertEqual(mock_populate.call_count, 1)
        self.assertEqual(mock_write.call_count, 0)
        mock_get_more.assert_called_once_with(TEST_HTML, t)
        self.assertCountEqual(expected_tasks, scraper.tasks)
Beispiel #5
0
    def test_scrape_data_and_more_yes_persist(
        self,
        mock_get_more: Mock,
        mock_fetch: Mock,
        mock_populate: Mock,
        mock_write: Mock,
    ) -> None:
        mock_get_more.return_value = [TEST_TASK]
        mock_fetch.return_value = (TEST_HTML, {})
        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=True,
        )
        start_time = datetime.datetime.now()
        t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA_AND_MORE)
        req = QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=t,
            scraper_start_time=start_time,
        )

        scraper = FakeScraper("test")
        scraper.BATCH_WRITES = False
        scraper._generic_scrape(req)

        # Should send the ii since we chose not to persist.
        expected_tasks = [
            QueueRequest(
                scrape_type=constants.ScrapeType.BACKGROUND,
                next_task=TEST_TASK,
                scraper_start_time=start_time,
            )
        ]
        expected_metadata = IngestMetadata(
            region=scraper.region.region_code,
            jurisdiction_id=scraper.region.jurisdiction_id,
            ingest_time=start_time,
            enum_overrides=scraper.get_enum_overrides(),
            system_level=SystemLevel.COUNTY,
            database_key=SQLAlchemyDatabaseKey.for_schema(SchemaType.JAILS),
        )
        expected_proto = convert_ingest_info_to_proto(self.ii)

        self.assertEqual(mock_get_more.call_count, 1)
        self.assertEqual(mock_populate.call_count, 1)
        self.assertEqual(mock_write.call_count, 1)
        mock_write.assert_called_once_with(expected_proto, expected_metadata)
        self.assertCountEqual(expected_tasks, scraper.tasks)
 def testBoth(self):
     ScrapedData(
         ingest_info=IngestInfo(people=[Person(race=Race("ASIAN"))]),
         single_counts=[SingleCount(count=123)],
         persist=True)
 def testSingleCount(self):
     ScrapedData(single_counts=[SingleCount(count=123)], persist=True)
 def testIngestInfo(self):
     ScrapedData(
         ingest_info=IngestInfo(people=[Person(race=Race("ASIAN"))]),
         persist=True)
 def testRaiseWithNothing(self):
     with self.assertRaises(ScraperError):
         ScrapedData(ingest_info=None, persist=True)