Ejemplo n.º 1
0
    def testConvert_PersonInferredBooking(self):
        # Arrange
        metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add()

        # Act
        result = self._convert_and_throw_on_errors(ingest_info, metadata)

        # Assert
        expected_result = [
            Person.new_with_defaults(
                bookings=[
                    Booking.new_with_defaults(
                        admission_date_inferred=True,
                        first_seen_time=_INGEST_TIME,
                        last_seen_time=_INGEST_TIME,
                        admission_date=_INGEST_TIME.date(),
                        custody_status=CustodyStatus.PRESENT_WITHOUT_INFO,
                    )
                ]
            )
        ]

        self.assertEqual(result, expected_result)
Ejemplo n.º 2
0
    def testConvert_TotalBondNoCharge_CreatesChargeWithTotalBondAmount(self):
        # Arrange
        metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(booking_ids=['BOOKING_ID'])
        ingest_info.bookings.add(booking_id='BOOKING_ID',
                                 total_bond_amount='$100')

        # Act
        result = self._convert_and_throw_on_errors(ingest_info, metadata)

        # Assert
        expected_result = [
            Person.new_with_defaults(bookings=[
                Booking.new_with_defaults(
                    admission_date=_INGEST_TIME.date(),
                    admission_date_inferred=True,
                    first_seen_time=_INGEST_TIME,
                    last_seen_time=_INGEST_TIME,
                    external_id='BOOKING_ID',
                    custody_status=CustodyStatus.PRESENT_WITHOUT_INFO,
                    charges=[
                        Charge.new_with_defaults(
                            status=ChargeStatus.PRESENT_WITHOUT_INFO,
                            bond=Bond.new_with_defaults(
                                status=BondStatus.PRESENT_WITHOUT_INFO,
                                bond_type=BondType.CASH,
                                amount_dollars=100))
                    ])
            ])
        ]

        self.assertEqual(result, expected_result)
Ejemplo n.º 3
0
    def testConvert_ExternalId_ClearPII(self):
        # Arrange
        metadata = IngestMetadata('REGION', _JURISDICTION_ID, _INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(person_id='PERSON_ID',
                               full_name='full_name',
                               booking_ids=['BOOKING_ID'])
        ingest_info.bookings.add(booking_id='BOOKING_ID',
                                 admission_date=str(_RELEASE_DATE))
        # Act
        result = self._convert_and_throw_on_errors(ingest_info, metadata)

        # Assert
        expected_result = [
            Person.new_with_defaults(
                external_id='PERSON_ID',
                region='REGION',
                jurisdiction_id='JURISDICTION_ID',
                bookings=[
                    Booking.new_with_defaults(
                        external_id='BOOKING_ID',
                        admission_date=_RELEASE_DATE,
                        admission_date_inferred=False,
                        custody_status=CustodyStatus.PRESENT_WITHOUT_INFO,
                        first_seen_time=_INGEST_TIME,
                        last_seen_time=_INGEST_TIME)
                ])
        ]

        self.assertEqual(result, expected_result)
Ejemplo n.º 4
0
    def testConvert_FullIngestInfo_GeneratedIds(self):
        # Arrange
        metadata = IngestMetadata('REGION', _JURISDICTION_ID, _INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(person_id='PERSON_ID_GENERATE',
                               booking_ids=['BOOKING_ID_GENERATE'])
        ingest_info.bookings.add(
            booking_id='BOOKING_ID_GENERATE',
            arrest_id='ARREST_ID_GENERATE',
            hold_ids=['HOLD_ID_1_GENERATE', 'HOLD_ID_2_GENERATE'],
            charge_ids=['CHARGE_ID_GENERATE'])
        ingest_info.holds.add(hold_id='HOLD_ID_1_GENERATE',
                              jurisdiction_name='jurisdiction')
        ingest_info.holds.add(hold_id='HOLD_ID_2_GENERATE',
                              jurisdiction_name='jurisdiction')
        ingest_info.arrests.add(arrest_id='ARREST_ID_GENERATE', agency='PD')
        ingest_info.charges.add(charge_id='CHARGE_ID_GENERATE',
                                name='DUI',
                                bond_id='BOND_ID_GENERATE',
                                sentence_id='SENTENCE_ID_GENERATE')
        ingest_info.bonds.add(bond_id='BOND_ID_GENERATE')
        ingest_info.sentences.add(sentence_id='SENTENCE_ID_GENERATE',
                                  is_life='True')

        result = self._convert_and_throw_on_errors(ingest_info, metadata)

        # Assert
        expected_result = [
            Person.new_with_defaults(
                region='REGION',
                jurisdiction_id='JURISDICTION_ID',
                bookings=[
                    Booking.new_with_defaults(
                        admission_date=_INGEST_TIME.date(),
                        admission_date_inferred=True,
                        first_seen_time=_INGEST_TIME,
                        last_seen_time=_INGEST_TIME,
                        custody_status=CustodyStatus.PRESENT_WITHOUT_INFO,
                        arrest=Arrest.new_with_defaults(agency='PD'),
                        holds=[
                            Hold.new_with_defaults(
                                jurisdiction_name='JURISDICTION',
                                status=HoldStatus.PRESENT_WITHOUT_INFO)
                        ],
                        charges=[
                            Charge.new_with_defaults(
                                status=ChargeStatus.PRESENT_WITHOUT_INFO,
                                name='DUI',
                                bond=Bond.new_with_defaults(
                                    status=BondStatus.PRESENT_WITHOUT_INFO),
                                sentence=Sentence.new_with_defaults(
                                    status=SentenceStatus.PRESENT_WITHOUT_INFO,
                                    is_life=True))
                        ])
                ])
        ]

        self.assertEqual(result, expected_result)
Ejemplo n.º 5
0
    def test_write_preexisting_person(self):
        # Arrange
        most_recent_scrape_time = (SCRAPER_START_DATETIME + timedelta(days=1))
        metadata = IngestMetadata.new_with_defaults(
            region=REGION_1,
            jurisdiction_id=JURISDICTION_ID,
            ingest_time=most_recent_scrape_time)

        schema_booking = schema.Booking(
            booking_id=BOOKING_ID,
            external_id=EXTERNAL_BOOKING_ID,
            admission_date_inferred=True,
            custody_status=CustodyStatus.IN_CUSTODY.value,
            last_seen_time=SCRAPER_START_DATETIME,
            first_seen_time=SCRAPER_START_DATETIME)
        schema_person = schema.Person(
            person_id=PERSON_ID,
            jurisdiction_id=JURISDICTION_ID,
            external_id=EXTERNAL_PERSON_ID,
            region=REGION_1,
            bookings=[schema_booking])

        session = SessionFactory.for_schema_base(JailsBase)
        session.add(schema_person)
        session.commit()

        ingest_info = IngestInfo()
        ingest_info.people.add(full_name=FULL_NAME_1,
                               person_id=EXTERNAL_PERSON_ID,
                               booking_ids=[EXTERNAL_BOOKING_ID])
        ingest_info.bookings.add(
            booking_id=EXTERNAL_BOOKING_ID,
            custody_status='IN CUSTODY',
        )

        # Act
        persistence.write(ingest_info, metadata)

        # Assert
        expected_booking = county_entities.Booking.new_with_defaults(
            booking_id=BOOKING_ID,
            external_id=EXTERNAL_BOOKING_ID,
            admission_date_inferred=True,
            custody_status=CustodyStatus.IN_CUSTODY,
            custody_status_raw_text=BOOKING_CUSTODY_STATUS.upper(),
            last_seen_time=most_recent_scrape_time,
            first_seen_time=SCRAPER_START_DATETIME)
        expected_person = county_entities.Person.new_with_defaults(
            person_id=PERSON_ID,
            external_id=EXTERNAL_PERSON_ID,
            region=REGION_1,
            jurisdiction_id=JURISDICTION_ID,
            bookings=[expected_booking])
        self.assertEqual([expected_person],
                         county_dao.read_people(
                             SessionFactory.for_schema_base(JailsBase)))
Ejemplo n.º 6
0
    def testConvert_CannotConvertField_RaisesValueError(self):
        # Arrange
        metadata = metadata = FakeIngestMetadata.for_state(region="us_xx")

        ingest_info = IngestInfo()
        ingest_info.state_people.add(birthdate="NOT_A_DATE")

        # Act + Assert
        with self.assertRaises(ValueError):
            self._convert_and_throw_on_errors(ingest_info, metadata)
Ejemplo n.º 7
0
    def testConvert_CannotConvertField_RaisesValueError(self):
        # Arrange
        metadata = IngestMetadata.new_with_defaults()

        ingest_info = IngestInfo()
        ingest_info.people.add(birthdate='NOT_A_DATE')

        # Act + Assert
        with self.assertRaises(ValueError):
            self._convert_and_throw_on_errors(ingest_info, metadata)
Ejemplo n.º 8
0
    def testConvert_CannotConvertField_RaisesValueError(self):
        # Arrange
        metadata = IngestMetadata.new_with_defaults(
            system_level=SystemLevel.STATE)

        ingest_info = IngestInfo()
        ingest_info.state_people.add(birthdate="NOT_A_DATE")

        # Act + Assert
        with self.assertRaises(ValueError):
            self._convert_and_throw_on_errors(ingest_info, metadata)
Ejemplo n.º 9
0
    def testConvert_MultipleOpenBookings_RaisesValueError(self):
        # Arrange
        metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(booking_ids=["BOOKING_ID1", "BOOKING_ID2"])
        ingest_info.bookings.add(booking_id="BOOKING_ID1", admission_date="3/14/2020")
        ingest_info.bookings.add(booking_id="BOOKING_ID2", admission_date="3/16/2020")

        # Act + Assert
        with self.assertRaises(ValueError):
            self._convert_and_throw_on_errors(ingest_info, metadata)
Ejemplo n.º 10
0
    def test_twoDifferentPeople_persistsNone(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(person_id='1', full_name=FULL_NAME_1)
        ingest_info.people.add(person_id='2', full_name=FULL_NAME_2, gender='X')

        # Act
        self.assertFalse(persistence.write(ingest_info, DEFAULT_METADATA))
        result = county_dao.read_people(
            SessionFactory.for_schema_base(JailsBase))

        # Assert
        assert not result
Ejemplo n.º 11
0
    def test_localRun(self):
        with patch('os.getenv', Mock(return_value='Local')):
            # Arrange
            ingest_info = IngestInfo()
            ingest_info.people.add(full_name=FULL_NAME_1)

            # Act
            persistence.write(ingest_info, DEFAULT_METADATA)
            result = county_dao.read_people(
                SessionFactory.for_schema_base(JailsBase))

            # Assert
            assert not result
Ejemplo n.º 12
0
    def test_persistLocally(self):
        # Arrange
        with patch('os.getenv', Mock(return_value='local')) \
             and patch.dict('os.environ', {'PERSIST_LOCALLY': 'true'}):
            ingest_info = IngestInfo()
            ingest_info.people.add(full_name=FULL_NAME_1)

            # Act
            persistence.write(ingest_info, DEFAULT_METADATA)
            result = county_dao.read_people(
                SessionFactory.for_schema_base(JailsBase))

            # Assert
            assert len(result) == 1
            assert result[0].full_name == _format_full_name(FULL_NAME_1)
Ejemplo n.º 13
0
    def test_twoDifferentPeople_persistsBoth(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(person_id='1_GENERATE', full_name=FULL_NAME_1)
        ingest_info.people.add(person_id='2_GENERATE', full_name=FULL_NAME_2)

        # Act
        persistence.write(ingest_info, DEFAULT_METADATA)
        result = county_dao.read_people(
            SessionFactory.for_schema_base(JailsBase))

        # Assert
        assert len(result) == 2

        assert result[0].full_name == _format_full_name(FULL_NAME_2)
        assert result[1].full_name == _format_full_name(FULL_NAME_1)
Ejemplo n.º 14
0
    def testConvert_TotalBondWithCharge_SetsTotalBondOnCharge(self):
        # Arrange
        metadata = FakeIngestMetadata.for_county(
            region="REGION",
            jurisdiction_id="JURISDICTION_ID",
            ingest_time=_INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(booking_ids=["BOOKING_ID"])
        ingest_info.bookings.add(booking_id="BOOKING_ID",
                                 total_bond_amount="$100",
                                 charge_ids=["CHARGE_ID"])
        ingest_info.charges.add(charge_id="CHARGE_ID")

        # Act
        result = self._convert_and_throw_on_errors(ingest_info, metadata)

        # Assert
        expected_result = [
            Person.new_with_defaults(
                region="REGION",
                jurisdiction_id="JURISDICTION_ID",
                bookings=[
                    Booking.new_with_defaults(
                        external_id="BOOKING_ID",
                        admission_date=_INGEST_TIME.date(),
                        admission_date_inferred=True,
                        first_seen_time=_INGEST_TIME,
                        last_seen_time=_INGEST_TIME,
                        custody_status=CustodyStatus.PRESENT_WITHOUT_INFO,
                        charges=[
                            Charge.new_with_defaults(
                                external_id="CHARGE_ID_COUNT_1",
                                status=ChargeStatus.PRESENT_WITHOUT_INFO,
                                bond=Bond.new_with_defaults(
                                    amount_dollars=100,
                                    status=BondStatus.PRESENT_WITHOUT_INFO,
                                    bond_type=BondType.CASH,
                                ),
                            )
                        ],
                    )
                ],
            )
        ]

        self.assertEqual(result, expected_result)
Ejemplo n.º 15
0
    def test_write_noPeople(self):
        # Arrange
        most_recent_scrape_time = (SCRAPER_START_DATETIME + timedelta(days=1))
        metadata = IngestMetadata.new_with_defaults(
            region=REGION_1,
            jurisdiction_id=JURISDICTION_ID,
            ingest_time=most_recent_scrape_time)

        ingest_info = IngestInfo()

        # Act
        persistence.write(ingest_info, metadata)

        # Assert
        people = county_dao.read_people(
            SessionFactory.for_schema_base(JailsBase))
        self.assertFalse(people)
Ejemplo n.º 16
0
    def testConvert_MultipleOpenBookings_RaisesValueError(self):
        # Arrange
        metadata = FakeIngestMetadata.for_county(
            region="REGION",
            jurisdiction_id="JURISDICTION_ID",
            ingest_time=_INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(booking_ids=["BOOKING_ID1", "BOOKING_ID2"])
        ingest_info.bookings.add(booking_id="BOOKING_ID1",
                                 admission_date="3/14/2020")
        ingest_info.bookings.add(booking_id="BOOKING_ID2",
                                 admission_date="3/16/2020")

        # Act + Assert
        with self.assertRaises(ValueError):
            self._convert_and_throw_on_errors(ingest_info, metadata)
Ejemplo n.º 17
0
    def testConvert_TotalBondWithMultipleBonds_ThrowsException(self):
        # Arrange
        metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(booking_ids=['BOOKING_ID'])
        ingest_info.bookings.add(booking_id='BOOKING_ID',
                                 total_bond_amount='$100',
                                 charge_ids=['CHARGE_ID', 'CHARGE_ID_2'])
        ingest_info.charges.add(charge_id='CHARGE_ID', bond_id='BOND_ID')
        ingest_info.charges.add(charge_id='CHARGE_ID_2', bond_id='BOND_ID_2')
        ingest_info.bonds.add(bond_id='BOND_ID')
        ingest_info.bonds.add(bond_id='BOND_ID_2')

        # Act + Assert
        with self.assertRaises(ValueError):
            self._convert_and_throw_on_errors(ingest_info, metadata)
    def test_extra_id(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.bookings.add(booking_id="1")

        # Act
        with pytest.raises(ValidationError) as e:
            ingest_info_validator.validate(ingest_info)
        result = e.value.errors

        # Assert
        expected_result = {
            "bookings": {
                "ids_never_referenced": {"1"},
            }
        }

        self.assertEqual(result, expected_result)
    def test_non_existing_id(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(person_id="1", booking_ids=["2"])

        # Act
        with pytest.raises(ValidationError) as e:
            ingest_info_validator.validate(ingest_info)
        result = e.value.errors

        # Assert
        expected_result = {
            "bookings": {
                "ids_referenced_that_do_not_exist": {"2"},
            }
        }

        self.assertEqual(result, expected_result)
Ejemplo n.º 20
0
    def test_retryableError_retries(self, mock_commit, mock_close):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(full_name=FULL_NAME_1)

        inner_error = create_autospec(psycopg2.OperationalError)
        # Serialization Failure is retryable
        inner_error.pgcode = SERIALIZATION_FAILURE
        error = sqlalchemy.exc.DatabaseError(statement=None, params=None, orig=inner_error)
        # 5 retries is allowed
        mock_commit.side_effect = [error] * 5 + [mock.DEFAULT]

        # Act
        persistence.write(ingest_info, DEFAULT_METADATA)

        # Assert
        assert mock_commit.call_args_list == [call()] * 6
        mock_close.assert_called_once()
Ejemplo n.º 21
0
    def test_nonRetryableError_failsImmediately(self, mock_commit, mock_close):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(full_name=FULL_NAME_1)

        inner_error = create_autospec(psycopg2.OperationalError)
        # Not Null Violation is not retryable
        inner_error.pgcode = NOT_NULL_VIOLATION
        error = sqlalchemy.exc.DatabaseError(statement=None, params=None, orig=inner_error)
        mock_commit.side_effect = [error, mock.DEFAULT]

        # Act / Assert
        with pytest.raises(sqlalchemy.exc.DatabaseError):
            persistence.write(ingest_info, DEFAULT_METADATA)

        # Assert
        assert mock_commit.call_args_list == [call()]
        mock_close.assert_called_once()
Ejemplo n.º 22
0
    def test_twoDifferentPeopleWithBooking_persistsNone(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(full_name=FULL_NAME_2)
        ingest_info.people.add(full_name=FULL_NAME_1,
                               person_id=EXTERNAL_PERSON_ID,
                               booking_ids=[EXTERNAL_BOOKING_ID])
        ingest_info.bookings.add(
            booking_id=EXTERNAL_BOOKING_ID,
            custody_status='NO EXIST',
        )

        # Act
        self.assertFalse(persistence.write(ingest_info, DEFAULT_METADATA))
        result = county_dao.read_people(
            SessionFactory.for_schema_base(JailsBase))

        # Assert
        assert not result
    def test_duplicate_ids(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.extend([
            Person(person_id=PERSON_1),
            Person(person_id=PERSON_1),
            Person(person_id=PERSON_1, booking_ids=[BOOKING_1, BOOKING_2]),
        ])
        ingest_info.bookings.extend([
            Booking(booking_id=BOOKING_1),
            Booking(booking_id=BOOKING_1, charge_ids=[CHARGE_1, CHARGE_2]),
            Booking(booking_id=BOOKING_2, arrest_id=ARREST_1),
            Booking(booking_id=BOOKING_2, arrest_id=ARREST_2),
        ])
        ingest_info.arrests.extend([
            Arrest(arrest_id=ARREST_1),
            Arrest(arrest_id=ARREST_1),
            Arrest(arrest_id=ARREST_2),
        ])
        ingest_info.charges.extend(
            [Charge(charge_id=CHARGE_1),
             Charge(charge_id=CHARGE_2)])

        # Act
        with pytest.raises(ValidationError) as e:
            ingest_info_validator.validate(ingest_info)
        result = e.value.errors

        # Assert
        expected_result = {
            "people": {
                "duplicate_ids": {PERSON_1},
            },
            "bookings": {
                "duplicate_ids": {BOOKING_1, BOOKING_2}
            },
            "arrests": {
                "duplicate_ids": {ARREST_1}
            },
        }

        self.assertEqual(result, expected_result)
Ejemplo n.º 24
0
    def test_readSinglePersonByName(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(person_id='1_GENERATE',
                               full_name=FULL_NAME_1,
                               birthdate=BIRTHDATE_1)
        ingest_info.people.add(person_id='2_GENERATE',
                               full_name=FULL_NAME_2,
                               birthdate=BIRTHDATE_2)

        # Act
        persistence.write(ingest_info, DEFAULT_METADATA)
        result = county_dao.read_people(
            SessionFactory.for_schema_base(JailsBase),
            full_name=_format_full_name(FULL_NAME_1))

        # Assert
        assert len(result) == 1
        assert result[0].full_name == _format_full_name(FULL_NAME_1)
        assert result[0].birthdate == BIRTHDATE_1_DATE
Ejemplo n.º 25
0
    def test_threeDifferentPeople_persistsTwoBelowThreshold(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.add(person_id='1_GENERATE', full_name=FULL_NAME_2)
        ingest_info.people.add(person_id='2_GENERATE', full_name=FULL_NAME_3)
        ingest_info.people.add(person_id=EXTERNAL_PERSON_ID,
                               full_name=FULL_NAME_1,
                               booking_ids=[EXTERNAL_BOOKING_ID])
        ingest_info.bookings.add(
            booking_id=EXTERNAL_BOOKING_ID,
            custody_status='NO EXIST',
        )

        # Act
        persistence.write(ingest_info, DEFAULT_METADATA)
        result = county_dao.read_people(
            SessionFactory.for_schema_base(JailsBase))

        # Assert
        assert len(result) == 2
        assert result[0].full_name == _format_full_name(FULL_NAME_3)
        assert result[1].full_name == _format_full_name(FULL_NAME_2)
Ejemplo n.º 26
0
    def testConvert_TotalBondWithMultipleBonds_ThrowsException(self):
        # Arrange
        metadata = FakeIngestMetadata.for_county(
            region="REGION",
            jurisdiction_id="JURISDICTION_ID",
            ingest_time=_INGEST_TIME)

        ingest_info = IngestInfo()
        ingest_info.people.add(booking_ids=["BOOKING_ID"])
        ingest_info.bookings.add(
            booking_id="BOOKING_ID",
            total_bond_amount="$100",
            charge_ids=["CHARGE_ID", "CHARGE_ID_2"],
        )
        ingest_info.charges.add(charge_id="CHARGE_ID", bond_id="BOND_ID")
        ingest_info.charges.add(charge_id="CHARGE_ID_2", bond_id="BOND_ID_2")
        ingest_info.bonds.add(bond_id="BOND_ID")
        ingest_info.bonds.add(bond_id="BOND_ID_2")

        # Act + Assert
        with self.assertRaises(ValueError):
            self._convert_and_throw_on_errors(ingest_info, metadata)
 def test_empty_ingest_info(self):
     ingest_info_validator.validate(IngestInfo())
    def test_reports_all_errors_together(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.people.extend([
            Person(person_id=PERSON_1, booking_ids=[MISSING_BOOKING]),
            Person(person_id=PERSON_1, booking_ids=[BOOKING_1]),
            Person(person_id=PERSON_1, booking_ids=[BOOKING_1, BOOKING_2]),
            Person(person_id=EXTRA_PERSON),
        ])
        ingest_info.bookings.extend([
            Booking(booking_id=BOOKING_1),
            Booking(booking_id=BOOKING_1, arrest_id=MISSING_ARREST),
            Booking(
                booking_id=BOOKING_2,
                arrest_id=ARREST_1,
                charge_ids=[CHARGE_1, CHARGE_2, MISSING_CHARGE],
            ),
            Booking(booking_id=EXTRA_BOOKING),
        ])
        ingest_info.arrests.extend([
            Arrest(arrest_id=ARREST_1),
            Arrest(arrest_id=ARREST_1),
            Arrest(arrest_id=EXTRA_ARREST),
        ])
        ingest_info.charges.extend([
            Charge(charge_id=CHARGE_1),
            Charge(charge_id=CHARGE_1, sentence_id=SENTENCE_1, bond_id=BOND_1),
            Charge(
                charge_id=CHARGE_2,
                sentence_id=MISSING_SENTENCE,
                bond_id=MISSING_BOND,
            ),
            Charge(charge_id=EXTRA_CHARGE),
        ])
        ingest_info.bonds.extend([
            Bond(bond_id=BOND_1),
            Bond(bond_id=BOND_1),
            Bond(bond_id=EXTRA_BOND)
        ])
        ingest_info.sentences.extend([
            Sentence(sentence_id=SENTENCE_1),
            Sentence(sentence_id=SENTENCE_1),
            Sentence(sentence_id=EXTRA_SENTENCE),
        ])

        # Act
        with pytest.raises(ValidationError) as e:
            ingest_info_validator.validate(ingest_info)
        result = e.value.errors

        # Assert
        expected_result = {
            "people": {
                "duplicate_ids": {PERSON_1}
            },
            "bookings": {
                "duplicate_ids": {BOOKING_1},
                "ids_referenced_that_do_not_exist": {MISSING_BOOKING},
                "ids_never_referenced": {EXTRA_BOOKING},
            },
            "arrests": {
                "duplicate_ids": {ARREST_1},
                "ids_referenced_that_do_not_exist": {MISSING_ARREST},
                "ids_never_referenced": {EXTRA_ARREST},
            },
            "charges": {
                "duplicate_ids": {CHARGE_1},
                "ids_referenced_that_do_not_exist": {MISSING_CHARGE},
                "ids_never_referenced": {EXTRA_CHARGE},
            },
            "sentences": {
                "duplicate_ids": {SENTENCE_1},
                "ids_referenced_that_do_not_exist": {MISSING_SENTENCE},
                "ids_never_referenced": {EXTRA_SENTENCE},
            },
            "bonds": {
                "duplicate_ids": {BOND_1},
                "ids_referenced_that_do_not_exist": {MISSING_BOND},
                "ids_never_referenced": {EXTRA_BOND},
            },
        }

        self.assertEqual(result, expected_result)
    def test_state_threeSentenceGroups_dontPersistAboveThreshold(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.state_people.add(
            state_person_id='1_GENERATE',
            state_sentence_group_ids=[SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_2])
        ingest_info.state_sentence_groups.add(
            state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE)
        ingest_info.state_sentence_groups.add(
            state_sentence_group_id=SENTENCE_GROUP_ID_2,
            county_code=COUNTY_CODE)

        db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1)
        db_sentence_group = schema.StateSentenceGroup(
            sentence_group_id=ID,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN.value,
            external_id=SENTENCE_GROUP_ID,
            state_code=REGION_CODE)
        db_sentence_group_2 = schema.StateSentenceGroup(
            sentence_group_id=ID_2,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN.value,
            external_id=SENTENCE_GROUP_ID_2,
            state_code=REGION_CODE)
        db_external_id = schema.StatePersonExternalId(person_external_id_id=ID,
                                                      state_code=REGION_CODE,
                                                      external_id=EXTERNAL_ID,
                                                      id_type=ID_TYPE)
        db_person.sentence_groups = [db_sentence_group, db_sentence_group_2]
        db_person.external_ids = [db_external_id]

        db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1)
        db_sentence_group_2_dup = schema.StateSentenceGroup(
            sentence_group_id=ID_3,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN.value,
            external_id=SENTENCE_GROUP_ID_2,
            state_code=REGION_CODE)
        db_external_id_2 = schema.StatePersonExternalId(
            person_external_id_id=ID_2,
            state_code=REGION_CODE,
            external_id=EXTERNAL_ID_2,
            id_type=ID_TYPE)
        db_person_2.sentence_groups = [db_sentence_group_2_dup]
        db_person_2.external_ids = [db_external_id_2]

        # No updates
        expected_person = self.to_entity(db_person)
        expected_person_2 = self.to_entity(db_person_2)

        session = SessionFactory.for_schema_base(StateBase)
        session.add(db_person)
        session.add(db_person_2)
        session.commit()

        # Act
        persistence.write(ingest_info, DEFAULT_METADATA)
        session = SessionFactory.for_schema_base(StateBase)
        persons = dao.read_people(session)

        # Assert
        self.assertEqual([expected_person, expected_person_2],
                         converter.convert_schema_objects_to_entity(persons))
    def test_state_threeSentenceGroups_persistsTwoBelowThreshold(self):
        # Arrange
        ingest_info = IngestInfo()
        ingest_info.state_people.add(state_person_id='1_GENERATE',
                                     state_sentence_group_ids=[
                                         SENTENCE_GROUP_ID,
                                         SENTENCE_GROUP_ID_2,
                                         SENTENCE_GROUP_ID_3
                                     ])
        ingest_info.state_sentence_groups.add(
            state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE)
        ingest_info.state_sentence_groups.add(
            state_sentence_group_id=SENTENCE_GROUP_ID_2,
            county_code=COUNTY_CODE)
        ingest_info.state_sentence_groups.add(
            state_sentence_group_id=SENTENCE_GROUP_ID_3,
            county_code=COUNTY_CODE)

        db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1)
        db_sentence_group = schema.StateSentenceGroup(
            sentence_group_id=ID,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN.value,
            external_id=SENTENCE_GROUP_ID,
            state_code=REGION_CODE)
        db_sentence_group_2 = schema.StateSentenceGroup(
            sentence_group_id=ID_2,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN.value,
            external_id=SENTENCE_GROUP_ID_2,
            state_code=REGION_CODE)
        db_sentence_group_3 = schema.StateSentenceGroup(
            sentence_group_id=ID_3,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN.value,
            external_id=SENTENCE_GROUP_ID_3,
            state_code=REGION_CODE)
        db_external_id = schema.StatePersonExternalId(person_external_id_id=ID,
                                                      state_code=REGION_CODE,
                                                      external_id=EXTERNAL_ID,
                                                      id_type=ID_TYPE)
        db_person.sentence_groups = [
            db_sentence_group, db_sentence_group_2, db_sentence_group_3
        ]
        db_person.external_ids = [db_external_id]

        db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1)
        db_sentence_group_3_dup = schema.StateSentenceGroup(
            sentence_group_id=ID_4,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN.value,
            external_id=SENTENCE_GROUP_ID_3,
            state_code=REGION_CODE)
        db_external_id_2 = schema.StatePersonExternalId(
            person_external_id_id=ID_2,
            state_code=REGION_CODE,
            external_id=EXTERNAL_ID_2,
            id_type=ID_TYPE)
        db_person_2.sentence_groups = [db_sentence_group_3_dup]
        db_person_2.external_ids = [db_external_id_2]

        expected_person = StatePerson.new_with_defaults(person_id=ID,
                                                        full_name=FULL_NAME_1,
                                                        external_ids=[],
                                                        sentence_groups=[])
        expected_external_id = StatePersonExternalId.new_with_defaults(
            person_external_id_id=ID,
            state_code=REGION_CODE,
            external_id=EXTERNAL_ID,
            id_type=ID_TYPE,
            person=expected_person)
        expected_sentence_group = StateSentenceGroup.new_with_defaults(
            sentence_group_id=ID,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN,
            external_id=SENTENCE_GROUP_ID,
            state_code=REGION_CODE,
            county_code=COUNTY_CODE,
            person=expected_person)
        expected_sentence_group_2 = StateSentenceGroup.new_with_defaults(
            sentence_group_id=ID_2,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN,
            external_id=SENTENCE_GROUP_ID_2,
            state_code=REGION_CODE,
            county_code=COUNTY_CODE,
            person=expected_person)
        # No county code because errors during match
        expected_sentence_group_3 = StateSentenceGroup.new_with_defaults(
            sentence_group_id=ID_3,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN,
            external_id=SENTENCE_GROUP_ID_3,
            state_code=REGION_CODE,
            person=expected_person)
        expected_person.external_ids = [expected_external_id]
        expected_person.sentence_groups = [
            expected_sentence_group, expected_sentence_group_2,
            expected_sentence_group_3
        ]

        expected_person_2 = StatePerson.new_with_defaults(
            person_id=ID_2, full_name=FULL_NAME_1)
        expected_external_id_2 = StatePersonExternalId.new_with_defaults(
            person_external_id_id=ID_2,
            state_code=REGION_CODE,
            external_id=EXTERNAL_ID_2,
            id_type=ID_TYPE,
            person=expected_person_2)
        # No county code because unmatched
        expected_sentence_group_3_dup = StateSentenceGroup.new_with_defaults(
            sentence_group_id=ID_4,
            status=StateSentenceStatus.EXTERNAL_UNKNOWN,
            external_id=SENTENCE_GROUP_ID_3,
            state_code=REGION_CODE,
            person=expected_person_2)
        expected_person_2.sentence_groups = [expected_sentence_group_3_dup]
        expected_person_2.external_ids = [expected_external_id_2]

        session = SessionFactory.for_schema_base(StateBase)
        session.add(db_person)
        session.add(db_person_2)
        session.commit()

        # Act
        persistence.write(ingest_info, DEFAULT_METADATA)
        session = SessionFactory.for_schema_base(StateBase)
        persons = dao.read_people(session)

        # Assert
        self.assertEqual([expected_person, expected_person_2],
                         converter.convert_schema_objects_to_entity(persons))