def test_persist_duplicates_to_db(self, mock_write, _mock_region,
                                      mock_session_return):
        """Tests that duplicate ingest_info.Person objects are merged before
        write."""
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        # Arrange
        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        ii_2 = IngestInfo()
        ii.create_person(person_id=TEST_ID2, full_name=TEST_NAME2)

        ii_1_dup = copy.deepcopy(ii)

        t1, t2, t3 = (Task(task_type=constants.TaskType.SCRAPE_DATA,
                           endpoint=TEST_ENDPOINT + str(i),
                           response_type=constants.ResponseType.TEXT)
                      for i in range(3))

        batch_persistence.write(ii, scrape_key, t1)
        batch_persistence.write(ii_2, scrape_key, t2)
        batch_persistence.write(ii_1_dup, scrape_key, t3)

        batch_persistence.persist_to_database(scrape_key.region_code,
                                              mock_session.start)

        expected_ii = IngestInfo(people=ii.people + ii_2.people)
        expected_proto = ingest_utils.convert_ingest_info_to_proto(expected_ii)
        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)
    def test_write_to_datastore(self, mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()

        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        ii = IngestInfo()
        ii.create_person(full_name=TEST_NAME).create_booking(
            booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )
        task_hash = hash(json.dumps(t.to_serializable(), sort_keys=True))

        expected_batch = BatchIngestInfoData(ingest_info=ii,
                                             task_hash=task_hash)

        batch_persistence.write(ii, scrape_key, t)

        batch_ingest_info_list = batch_persistence._get_batch_ingest_info_list(
            scrape_key.region_code, mock_session.start)

        self.assertEqual(len(batch_ingest_info_list), 1)
        self.assertEqual(expected_batch, batch_ingest_info_list[0])
Example #3
0
    def test_skip_empty(self):
        key_mapping_file = os.path.join(os.path.dirname(__file__),
                                        'fixtures/skip_empty.yaml')
        extractor = JsonDataExtractor(key_mapping_file)

        expected = IngestInfo()
        expected.create_person(
            full_name='skip empty',
            bookings=[
                Booking(
                    custody_status='in custody',
                    booking_id='1',
                    charges=[
                        Charge(name="battery", ),
                        Charge(
                            name="assault",
                            charge_class='misdemeanor',
                        ),
                    ],
                ),
                Booking(
                    booking_id='2',
                    charges=[
                        Charge(
                            name='robbery',
                            charge_class='felony',
                        ),
                    ],
                ),
            ],
        )

        result = extractor.extract_and_populate_data(_SKIP_EMPTY)
        self.assertEqual(result, expected)
    def test_persist_to_db(self, mock_write, _mock_region,
                           mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        batch_persistence.write(ii, scrape_key, t)

        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii)

        batch_persistence.persist_to_database(scrape_key.region_code,
                                              mock_session.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        # After we persist, there should no longer be ingest infos on Datastore
        ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session.start)
        self.assertEqual(len(ingest_infos), 0)
    def test_persist_to_db_different_regions(self, mock_write, _mock_region,
                                             mock_session_return):
        scrape_key1 = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
        scrape_key2 = ScrapeKey(REGIONS[1], constants.ScrapeType.BACKGROUND)

        ii = IngestInfo()
        ii.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME).create_booking(booking_id=TEST_ID)

        ii2 = IngestInfo()
        ii2.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME2).create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        mock_session_1 = mock_session_return.return_value = create_mock_session(
        )

        batch_persistence.write(ii, scrape_key1, t)
        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii)
        batch_persistence.persist_to_database(scrape_key1.region_code,
                                              mock_session_1.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        # We expect the region that we persisted to have no more ingest infos.
        ingest_infos_1 = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session_1.start)
        self.assertEqual(len(ingest_infos_1), 0)

        mock_session_2 = mock_session_return.return_value = create_mock_session(
        )

        batch_persistence.write(ii2, scrape_key2, t2)
        ingest_infos_2 = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[1], mock_session_2.start)
        self.assertEqual(len(ingest_infos_2), 1)

        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii2)
        batch_persistence.persist_to_database(scrape_key2.region_code,
                                              mock_session_2.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        self.assertEqual(mock_write.call_count, 2)
    def testParse(self):
        region = regions.get_region('us_ma_middlesex', is_direct_ingest=True)
        controller = region.get_ingestor()

        metadata = IngestMetadata(region.region_code, region.jurisdiction_id,
                                  _FAKE_START_TIME,
                                  controller.get_enum_overrides())

        ingest_info = UsMaMiddlesexParser().parse(_ROSTER_JSON)

        expected_info = IngestInfo()
        p1 = expected_info.create_person(
            person_id='12345       ',
            birthdate='1111-01-01 00:00:00.000',
            gender='M',
            ethnicity='HISPANIC',
            place_of_residence='123 ST DORCHESTER MA 01234     ')

        b1 = p1.create_booking(booking_id='1.0',
                               admission_date='2017-01-01 00:00:00.000',
                               admission_reason='BAIL MITTIMUS',
                               facility='MAIN      ')
        b1.create_charge(charge_id='1245.0',
                         statute='90/24/K',
                         name='OUI-LIQUOR, 2ND OFFENSE c90 ss24',
                         case_number='111.0',
                         court_type='Middlesex SC (81)',
                         charge_notes='Other')
        b1.create_charge(charge_id='1502.0',
                         offense_date='2017-01-28 00:00:00',
                         statute='90/23/J',
                         name='OUI while license suspended for OUI',
                         case_number='222.0',
                         court_type='Middlesex SC (81)',
                         charge_notes='Drug or Alcohol',
                         status='DISMISSED').create_bond(bond_id='12345.0')
        b1.create_hold(hold_id='00000.0',
                       jurisdiction_name='Middlesex SC (81)')

        p2 = expected_info.create_person(
            person_id='10472       ',
            birthdate='1111-02-02 00:00:00.000',
            gender='M',
            race='BLACK or AFRICAN AMERICAN',
            place_of_residence='456 ST MALDEN MA 98765      ')
        b2 = p2.create_booking(booking_id='333.0',
                               admission_date='2018-02-02 00:00:00.000',
                               admission_reason='SENTENCE MITTIMUS',
                               facility='MAIN      ')
        b2.create_arrest(agency='Cambridge PD')
        b2.create_charge(charge_id='12341234.0',
                         statute='269/10/J',
                         name='FIREARM, CARRY WITHOUT LICENSE c269 ss10',
                         case_number='555.0',
                         charge_notes='Other',
                         court_type='Cambridge DC (52)')

        self.validate_ingest(ingest_info, expected_info, metadata)
Example #7
0
    def test_readPeopleWithOpenBookings(self):
        admission_date = datetime.datetime(2018, 6, 20)
        release_date = datetime.date(2018, 7, 20)

        open_booking = Booking(
            custody_status=CustodyStatus.IN_CUSTODY.value,
            admission_date=admission_date,
            first_seen_time=admission_date,
            last_seen_time=admission_date,
        )
        closed_booking = Booking(
            custody_status=CustodyStatus.RELEASED.value,
            admission_date=admission_date,
            release_date=release_date,
            first_seen_time=admission_date,
            last_seen_time=admission_date,
        )

        person_no_match = Person(
            person_id=1,
            region=_REGION,
            jurisdiction_id=_JURISDICTION_ID,
            bookings=[deepcopy(open_booking)],
        )
        person_match_full_name = Person(
            person_id=2,
            region=_REGION,
            jurisdiction_id=_JURISDICTION_ID,
            bookings=[deepcopy(open_booking)],
            full_name=_FULL_NAME,
        )
        person_no_open_bookings = Person(
            person_id=6,
            region=_REGION,
            jurisdiction_id=_JURISDICTION_ID,
            full_name=_FULL_NAME,
            bookings=[closed_booking],
        )

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            session.add(person_no_match)
            session.add(person_no_open_bookings)
            session.add(person_match_full_name)
            session.commit()

            info = IngestInfo()
            info.create_person(full_name=_FULL_NAME, person_id=_EXTERNAL_ID)
            people = dao.read_people_with_open_bookings(
                session, _REGION, info.people)

            expected_people = [
                converter.convert_schema_object_to_entity(p)
                for p in [person_match_full_name]
            ]
            self.assertCountEqual(people, expected_people)
    def test_cell_ordering(self):
        """Tests that the HtmlDataExtractor handles 'th' and 'td' cells in the
        correct order."""
        expected_info = IngestInfo()
        expected_info.create_person(birthdate='A')
        expected_info.create_person(birthdate='B')
        expected_info.create_person(birthdate='C')

        info = self.extract('mixed_cells.html', 'good_table.yaml')
        self.assertEqual(expected_info.people[0], info.people[0])
Example #9
0
    def test_cell_ordering(self) -> None:
        """Tests that the HtmlDataExtractor handles 'th' and 'td' cells in the
        correct order."""
        expected_info = IngestInfo()
        expected_info.create_person(birthdate="A")
        expected_info.create_person(birthdate="B")
        expected_info.create_person(birthdate="C")

        info = self.extract("mixed_cells.html", "good_table.yaml")
        self.assertEqual(expected_info.people[0], info.people[0])
 def test_single_page_roster(self):
     """Tests that bookings are not treated as multi-key classes,
     i.e. we assume that a person has at most one booking if they are
     listed in columns."""
     expected_info = IngestInfo()
     p1 = expected_info.create_person(full_name="PERSON ONE", birthdate="1/1/1111")
     p1.create_booking(booking_id="NUMBER ONE")
     p2 = expected_info.create_person(full_name="PERSON TWO", birthdate="2/2/2222")
     p2.create_booking(booking_id="NUMBER TWO")
     p3 = expected_info.create_person(full_name="PERSON THREE", birthdate="3/3/3333")
     p3.create_booking(booking_id="NUMBER THREE")
     info = self.extract("single_page_roster.html", "single_page_roster.yaml")
     self.assertEqual(expected_info, info)
Example #11
0
    def test_jailtracker_person(self) -> None:
        key_mapping_file = "fixtures/jailtracker_person.yaml"
        key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file)
        extractor = JsonDataExtractor(key_mapping_file)

        expected_result = IngestInfo()
        expected_result.create_person(
            person_id="012345", birthdate="12/12/0001", age="2018", race="WHITE"
        )
        result = extractor.extract_and_populate_data(
            fixtures.as_dict("extractor", "jailtracker_person.json")
        )

        self.assertEqual(result, expected_result)
Example #12
0
    def test_jailtracker_person(self):
        key_mapping_file = 'fixtures/jailtracker_person.yaml'
        key_mapping_file = os.path.join(os.path.dirname(__file__),
                                        key_mapping_file)
        extractor = JsonDataExtractor(key_mapping_file)

        expected_result = IngestInfo()
        expected_result.create_person(person_id='012345',
                                      birthdate='12/12/0001',
                                      age='2018',
                                      race='WHITE')
        result = extractor.extract_and_populate_data(_JT_PERSON)

        self.assertEqual(result, expected_result)
Example #13
0
    def test_nested_good_table(self) -> None:
        """Tests a well modelled nested table."""
        expected_info = IngestInfo()

        # Add person information
        person = expected_info.create_person()
        person.surname = "LAST NAME"
        person.birthdate = "06/03/2999"
        person.gender = "Male"
        person.age = "100000000"
        person.race = "White/Eurp/ N.Afr/Mid Eas"
        person.person_id = "18-00187"

        # Add booking information
        booking = person.create_booking()
        booking.booking_id = "18-00000"
        booking.admission_date = "1/05/2000 09:39"
        booking.create_hold(jurisdiction_name="District Court 13-3-01")

        # Add charge information
        charge = booking.create_charge()
        charge.statute = "901"
        charge.name = "Criminal Attempt [INCHOATE]"
        charge.case_number = "CR-000-2000"

        # Add bond information
        charge.create_bond().amount = "$1.00"

        info = self.extract("nested_good_table.html", "nested_good_table.yaml")
        self.assertEqual(expected_info, info)
Example #14
0
    def test_text_label(self) -> None:
        """Tests a page with a key/value pair in plain text."""
        expected_info = IngestInfo()
        person = expected_info.create_person()
        person.birthdate = "12/25/0"
        person.race = "W"
        person.gender = "M"
        booking = person.create_booking()
        booking.booking_id = "202200000"
        booking.admission_date = "01/01/2001 19:44"
        booking.release_date = "11/01/2014"
        booking.total_bond_amount = "00000000"
        booking.facility = "Southwest Detention Center"
        arrest = booking.create_arrest()
        arrest.arrest_date = "01/01/2001 09:01"
        arrest.agency = "Hemet PD"
        charge1 = booking.create_charge()
        charge1.statute = "245(A)(1)"
        charge1.status = "DISM"
        charge1.name = "CHARGE 1"
        charge1.degree = "FEL"
        charge2 = booking.create_charge()
        charge2.statute = "245(A)(4)"
        charge2.status = "SENT"
        charge2.name = "CHARGE 2"
        charge2.degree = "FEL"
        bond2 = charge2.create_bond()
        bond2.amount = "$100"

        info = self.extract("text_label.html", "text_label.yaml")
        self.assertEqual(expected_info, info)
Example #15
0
def sample_ingest_info(number: str) -> IngestInfo:
    ingest_info = IngestInfo()

    person = ingest_info.create_person()
    person.full_name = 'LAST NAME, FIRST NAME MIDDLE NAME'
    person.person_id = number
    return ingest_info
    def test_text_label(self):
        """Tests a page with a key/value pair in plain text."""
        expected_info = IngestInfo()
        person = expected_info.create_person()
        person.birthdate = '12/25/0'
        person.race = 'W'
        person.gender = 'M'
        booking = person.create_booking()
        booking.booking_id = '202200000'
        booking.admission_date = '01/01/2001 19:44'
        booking.release_date = '11/01/2014'
        booking.total_bond_amount = '00000000'
        booking.facility = 'Southwest Detention Center'
        arrest = booking.create_arrest()
        arrest.arrest_date = '01/01/2001 09:01'
        arrest.agency = 'Hemet PD'
        charge1 = booking.create_charge()
        charge1.statute = '245(A)(1)'
        charge1.status = 'DISM'
        charge1.name = 'CHARGE 1'
        charge1.degree = 'FEL'
        charge2 = booking.create_charge()
        charge2.statute = '245(A)(4)'
        charge2.status = 'SENT'
        charge2.name = 'CHARGE 2'
        charge2.degree = 'FEL'
        bond2 = charge2.create_bond()
        bond2.amount = '$100'

        info = self.extract('text_label.html', 'text_label.yaml')
        self.assertEqual(expected_info, info)
    def test_nested_good_table(self):
        """Tests a well modelled nested table."""
        expected_info = IngestInfo()

        # Add person information
        person = expected_info.create_person()
        person.surname = 'LAST NAME'
        person.birthdate = '06/03/2999'
        person.gender = 'Male'
        person.age = '100000000'
        person.race = 'White/Eurp/ N.Afr/Mid Eas'
        person.person_id = '18-00187'

        # Add booking information
        booking = person.create_booking()
        booking.booking_id = '18-00000'
        booking.admission_date = '1/05/2000 09:39'
        booking.create_hold(jurisdiction_name='District Court 13-3-01')

        # Add charge information
        charge = booking.create_charge()
        charge.statute = '901'
        charge.name = 'Criminal Attempt [INCHOATE]'
        charge.case_number = 'CR-000-2000'

        # Add bond information
        charge.create_bond().amount = '$1.00'

        info = self.extract('nested_good_table.html', 'nested_good_table.yaml')
        self.assertEqual(expected_info, info)
    def test_good_table_with_link(self):
        """Tests a well modelled table with a link."""
        expected_info = IngestInfo()
        person = expected_info.create_person()
        person.birthdate = '1/15/2048'

        info = self.extract('good_table_links.html', 'good_table.yaml')
        self.assertEqual(expected_info, info)
 def test_single_page_roster(self):
     """Tests that bookings are not treated as multi-key classes,
     i.e. we assume that a person has at most one booking if they are
     listed in columns."""
     expected_info = IngestInfo()
     p1 = expected_info.create_person(full_name='PERSON ONE',
                                      birthdate='1/1/1111')
     p1.create_booking(booking_id='NUMBER ONE')
     p2 = expected_info.create_person(full_name='PERSON TWO',
                                      birthdate='2/2/2222')
     p2.create_booking(booking_id='NUMBER TWO')
     p3 = expected_info.create_person(full_name='PERSON THREE',
                                      birthdate='3/3/3333')
     p3.create_booking(booking_id='NUMBER THREE')
     info = self.extract('single_page_roster.html',
                         'single_page_roster.yaml')
     self.assertEqual(expected_info, info)
Example #20
0
    def test_good_table(self) -> None:
        """Tests a well modelled table."""
        expected_info = IngestInfo()
        person = expected_info.create_person()
        person.birthdate = "1/15/2048"

        info = self.extract("good_table.html", "good_table.yaml")
        self.assertEqual(expected_info, info)
    def test_bond_multi_key(self):
        expected_info = IngestInfo()
        booking = expected_info.create_person().create_booking()
        booking.create_charge().create_bond(bond_id='1', amount='10')
        booking.create_charge().create_bond(bond_id='2', amount='20')
        booking.create_charge().create_bond(bond_id='3', amount='30')

        info = self.extract('bonds.html', 'bonds.yaml')
        self.assertEqual(expected_info, info)
 def test_child_first(self):
     """Tests that in multi_key mappings (columns in a table), parent
     objects are created where needed."""
     expected_info = IngestInfo()
     p = expected_info.create_person()
     p.create_booking(admission_date='111').create_charge(name='AAA')
     p.create_booking(admission_date='222').create_charge(name='BBB')
     info = self.extract('child_first.html', 'child_first.yaml')
     self.assertEqual(expected_info, info)
Example #23
0
 def test_child_first(self) -> None:
     """Tests that in multi_key mappings (columns in a table), parent
     objects are created where needed."""
     expected_info = IngestInfo()
     p = expected_info.create_person()
     p.create_booking(admission_date="111").create_charge(name="AAA")
     p.create_booking(admission_date="222").create_charge(name="BBB")
     info = self.extract("child_first.html", "child_first.yaml")
     self.assertEqual(expected_info, info)
    def test_th_rows(self):
        """Tests a yaml file with <th> keys in rows."""
        expected_info = IngestInfo()
        person = expected_info.create_person()
        person.race = 'WHITE'
        person.gender = 'M'

        info = self.extract('th_rows.html', 'th_rows.yaml')
        self.assertEqual(expected_info, info)
Example #25
0
    def test_th_rows(self) -> None:
        """Tests a yaml file with <th> keys in rows."""
        expected_info = IngestInfo()
        person = expected_info.create_person()
        person.race = "WHITE"
        person.gender = "M"

        info = self.extract("th_rows.html", "th_rows.yaml")
        self.assertEqual(expected_info, info)
Example #26
0
    def test_bond_multi_key(self) -> None:
        expected_info = IngestInfo()
        booking = expected_info.create_person().create_booking()
        booking.create_charge().create_bond(bond_id="1", amount="10")
        booking.create_charge().create_bond(bond_id="2", amount="20")
        booking.create_charge().create_bond(bond_id="3", amount="30")

        info = self.extract("bonds.html", "bonds.yaml")
        self.assertEqual(expected_info, info)
    def test_persist_to_db_same_task_one_fail_one_pass(self, mock_write,
                                                       _mock_region,
                                                       mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
        mock_write.return_value = True

        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        # Because the tasks are the same, we expect that to be counted as a
        # pass.
        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        batch_persistence.write(ii, scrape_key, t)
        batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key)

        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii)

        self.assertTrue(
            batch_persistence.persist_to_database(scrape_key.region_code,
                                                  mock_session.start))

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session.start)
        self.assertEqual(len(ingest_infos), 0)
    def test_one_to_many(self):
        key_mapping_file = "../testdata/data_extractor/yaml/one_to_many.yaml"
        key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file)
        extractor = HtmlDataExtractor(key_mapping_file)

        expected_info = IngestInfo()
        charge = expected_info.create_person().create_booking().create_charge()
        charge.create_sentence(min_length="1 day", max_length="1 day")

        html_contents = html.fromstring("<td>Sentence Length</td><td>1 day</td>")
        info = extractor.extract_and_populate_data(html_contents)
        self.assertEqual(expected_info, info)
Example #29
0
    def test_skip_empty(self) -> None:
        key_mapping_file = os.path.join(
            os.path.dirname(__file__), "fixtures/skip_empty.yaml"
        )
        extractor = JsonDataExtractor(key_mapping_file)

        expected = IngestInfo()
        expected.create_person(
            full_name="skip empty",
            bookings=[
                Booking(
                    custody_status="in custody",
                    booking_id="1",
                    charges=[
                        Charge(
                            name="battery",
                        ),
                        Charge(
                            name="assault",
                            charge_class="misdemeanor",
                        ),
                    ],
                ),
                Booking(
                    booking_id="2",
                    charges=[
                        Charge(
                            name="robbery",
                            charge_class="felony",
                        ),
                    ],
                ),
            ],
        )

        result = extractor.extract_and_populate_data(
            fixtures.as_dict("extractor", "skip_empty.json")
        )
        self.assertEqual(result, expected)
    def test_persist_to_db_failed_no_write(self, mock_write, _mock_region,
                                           mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        # Because the tasks are different, we should fail.
        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
            params=TEST_PARAMS,
        )

        batch_persistence.write(ii, scrape_key, t)
        batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key)

        self.assertFalse(
            batch_persistence.persist_to_database(scrape_key.region_code,
                                                  mock_session.start))

        self.assertEqual(mock_write.call_count, 0)

        # We should still have both items still on Datastore because they
        # weren't persisted.
        batch_ingest_info_data_list = batch_persistence \
            ._get_batch_ingest_info_list(scrape_key.region_code,
                                         mock_session.start)
        self.assertEqual(len(batch_ingest_info_data_list), 2)