def test_persist_duplicates_to_db(self, mock_write, _mock_region,
                                      mock_session_return):
        """Tests that duplicate ingest_info.Person objects are merged before
        write."""
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        # Arrange
        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        ii_2 = IngestInfo()
        ii.create_person(person_id=TEST_ID2, full_name=TEST_NAME2)

        ii_1_dup = copy.deepcopy(ii)

        t1, t2, t3 = (Task(task_type=constants.TaskType.SCRAPE_DATA,
                           endpoint=TEST_ENDPOINT + str(i),
                           response_type=constants.ResponseType.TEXT)
                      for i in range(3))

        batch_persistence.write(ii, scrape_key, t1)
        batch_persistence.write(ii_2, scrape_key, t2)
        batch_persistence.write(ii_1_dup, scrape_key, t3)

        batch_persistence.persist_to_database(scrape_key.region_code,
                                              mock_session.start)

        expected_ii = IngestInfo(people=ii.people + ii_2.people)
        expected_proto = ingest_utils.convert_ingest_info_to_proto(expected_ii)
        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)
Ejemplo n.º 2
0
    def test_persist_to_db(self, mock_write, _mock_region,
                           mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        ii = ingest_info.IngestInfo()
        ii.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME).create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        batch_persistence.write(ii, scrape_key, t)

        expected_proto = serialization.convert_ingest_info_to_proto(ii)

        batch_persistence.persist_to_database(scrape_key.region_code,
                                              mock_session.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        # After we persist, there should no longer be ingest infos on Datastore
        ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session.start)
        self.assertEqual(len(ingest_infos), 0)
Ejemplo n.º 3
0
    def test_persist_to_db_different_regions(self, mock_write, _mock_region,
                                             mock_session_return):
        scrape_key1 = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
        scrape_key2 = ScrapeKey(REGIONS[1], constants.ScrapeType.BACKGROUND)

        ii = ingest_info.IngestInfo()
        ii.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME).create_booking(booking_id=TEST_ID)

        ii2 = ingest_info.IngestInfo()
        ii2.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME2).create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        mock_session_1 = mock_session_return.return_value = create_mock_session(
        )

        batch_persistence.write(ii, scrape_key1, t)
        expected_proto = serialization.convert_ingest_info_to_proto(ii)
        batch_persistence.persist_to_database(scrape_key1.region_code,
                                              mock_session_1.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        # We expect the region that we persisted to have no more ingest infos.
        ingest_infos_1 = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session_1.start)
        self.assertEqual(len(ingest_infos_1), 0)

        mock_session_2 = mock_session_return.return_value = create_mock_session(
        )

        batch_persistence.write(ii2, scrape_key2, t2)
        ingest_infos_2 = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[1], mock_session_2.start)
        self.assertEqual(len(ingest_infos_2), 1)

        expected_proto = serialization.convert_ingest_info_to_proto(ii2)
        batch_persistence.persist_to_database(scrape_key2.region_code,
                                              mock_session_2.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        self.assertEqual(mock_write.call_count, 2)
Ejemplo n.º 4
0
    def test_persist_to_db_same_task_one_fail_one_pass(self, mock_write,
                                                       _mock_region,
                                                       mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
        mock_write.return_value = True

        ii = ingest_info.IngestInfo()
        ii.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME).create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        # Because the tasks are the same, we expect that to be counted as a
        # pass.
        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        batch_persistence.write(ii, scrape_key, t)
        batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key)

        expected_proto = serialization.convert_ingest_info_to_proto(ii)

        self.assertTrue(
            batch_persistence.persist_to_database(scrape_key.region_code,
                                                  mock_session.start))

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session.start)
        self.assertEqual(len(ingest_infos), 0)
Ejemplo n.º 5
0
    def test_persist_to_db_failed_no_write(self, mock_write, _mock_region,
                                           mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        # Because the tasks are different, we should fail.
        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
            params=TEST_PARAMS,
        )

        batch_persistence.write(ii, scrape_key, t)
        batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key)

        self.assertFalse(
            batch_persistence.persist_to_database(scrape_key.region_code,
                                                  mock_session.start))

        self.assertEqual(mock_write.call_count, 0)

        # We should still have both items still on Datastore because they
        # weren't persisted.
        batch_ingest_info_data_list = batch_persistence \
            ._get_batch_ingest_info_list(scrape_key.region_code,
                                         mock_session.start)
        self.assertEqual(len(batch_ingest_info_data_list), 2)