def test_persist_duplicates_to_db(self, mock_write, _mock_region, mock_session_return): """Tests that duplicate ingest_info.Person objects are merged before write.""" mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) # Arrange ii = IngestInfo() ii.create_person(person_id=TEST_ID, full_name=TEST_NAME) \ .create_booking(booking_id=TEST_ID) ii_2 = IngestInfo() ii.create_person(person_id=TEST_ID2, full_name=TEST_NAME2) ii_1_dup = copy.deepcopy(ii) t1, t2, t3 = (Task(task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT + str(i), response_type=constants.ResponseType.TEXT) for i in range(3)) batch_persistence.write(ii, scrape_key, t1) batch_persistence.write(ii_2, scrape_key, t2) batch_persistence.write(ii_1_dup, scrape_key, t3) batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start) expected_ii = IngestInfo(people=ii.people + ii_2.people) expected_proto = ingest_utils.convert_ingest_info_to_proto(expected_ii) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto)
def test_persist_to_db(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) batch_persistence.write(ii, scrape_key, t) expected_proto = serialization.convert_ingest_info_to_proto(ii) batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) # After we persist, there should no longer be ingest infos on Datastore ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session.start) self.assertEqual(len(ingest_infos), 0)
def test_persist_to_db_different_regions(self, mock_write, _mock_region, mock_session_return): scrape_key1 = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) scrape_key2 = ScrapeKey(REGIONS[1], constants.ScrapeType.BACKGROUND) ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) ii2 = ingest_info.IngestInfo() ii2.create_person( person_id=TEST_ID, full_name=TEST_NAME2).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) mock_session_1 = mock_session_return.return_value = create_mock_session( ) batch_persistence.write(ii, scrape_key1, t) expected_proto = serialization.convert_ingest_info_to_proto(ii) batch_persistence.persist_to_database(scrape_key1.region_code, mock_session_1.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) # We expect the region that we persisted to have no more ingest infos. ingest_infos_1 = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session_1.start) self.assertEqual(len(ingest_infos_1), 0) mock_session_2 = mock_session_return.return_value = create_mock_session( ) batch_persistence.write(ii2, scrape_key2, t2) ingest_infos_2 = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[1], mock_session_2.start) self.assertEqual(len(ingest_infos_2), 1) expected_proto = serialization.convert_ingest_info_to_proto(ii2) batch_persistence.persist_to_database(scrape_key2.region_code, mock_session_2.start) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) self.assertEqual(mock_write.call_count, 2)
def test_persist_to_db_same_task_one_fail_one_pass(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) mock_write.return_value = True ii = ingest_info.IngestInfo() ii.create_person( person_id=TEST_ID, full_name=TEST_NAME).create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) # Because the tasks are the same, we expect that to be counted as a # pass. t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) batch_persistence.write(ii, scrape_key, t) batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key) expected_proto = serialization.convert_ingest_info_to_proto(ii) self.assertTrue( batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start)) result_proto = mock_write.call_args[0][0] self.assertEqual(result_proto, expected_proto) ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region( REGIONS[0], mock_session.start) self.assertEqual(len(ingest_infos), 0)
def test_persist_to_db_failed_no_write(self, mock_write, _mock_region, mock_session_return): mock_session = mock_session_return.return_value = create_mock_session() scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND) ii = IngestInfo() ii.create_person(person_id=TEST_ID, full_name=TEST_NAME) \ .create_booking(booking_id=TEST_ID) t = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, ) # Because the tasks are different, we should fail. t2 = Task( task_type=constants.TaskType.SCRAPE_DATA, endpoint=TEST_ENDPOINT, response_type=constants.ResponseType.TEXT, params=TEST_PARAMS, ) batch_persistence.write(ii, scrape_key, t) batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key) self.assertFalse( batch_persistence.persist_to_database(scrape_key.region_code, mock_session.start)) self.assertEqual(mock_write.call_count, 0) # We should still have both items still on Datastore because they # weren't persisted. batch_ingest_info_data_list = batch_persistence \ ._get_batch_ingest_info_list(scrape_key.region_code, mock_session.start) self.assertEqual(len(batch_ingest_info_data_list), 2)