def testConvert_PersonInferredBooking(self): # Arrange metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add() # Act result = self._convert_and_throw_on_errors(ingest_info, metadata) # Assert expected_result = [ Person.new_with_defaults( bookings=[ Booking.new_with_defaults( admission_date_inferred=True, first_seen_time=_INGEST_TIME, last_seen_time=_INGEST_TIME, admission_date=_INGEST_TIME.date(), custody_status=CustodyStatus.PRESENT_WITHOUT_INFO, ) ] ) ] self.assertEqual(result, expected_result)
def testConvert_TotalBondNoCharge_CreatesChargeWithTotalBondAmount(self): # Arrange metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(booking_ids=['BOOKING_ID']) ingest_info.bookings.add(booking_id='BOOKING_ID', total_bond_amount='$100') # Act result = self._convert_and_throw_on_errors(ingest_info, metadata) # Assert expected_result = [ Person.new_with_defaults(bookings=[ Booking.new_with_defaults( admission_date=_INGEST_TIME.date(), admission_date_inferred=True, first_seen_time=_INGEST_TIME, last_seen_time=_INGEST_TIME, external_id='BOOKING_ID', custody_status=CustodyStatus.PRESENT_WITHOUT_INFO, charges=[ Charge.new_with_defaults( status=ChargeStatus.PRESENT_WITHOUT_INFO, bond=Bond.new_with_defaults( status=BondStatus.PRESENT_WITHOUT_INFO, bond_type=BondType.CASH, amount_dollars=100)) ]) ]) ] self.assertEqual(result, expected_result)
def testConvert_ExternalId_ClearPII(self): # Arrange metadata = IngestMetadata('REGION', _JURISDICTION_ID, _INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(person_id='PERSON_ID', full_name='full_name', booking_ids=['BOOKING_ID']) ingest_info.bookings.add(booking_id='BOOKING_ID', admission_date=str(_RELEASE_DATE)) # Act result = self._convert_and_throw_on_errors(ingest_info, metadata) # Assert expected_result = [ Person.new_with_defaults( external_id='PERSON_ID', region='REGION', jurisdiction_id='JURISDICTION_ID', bookings=[ Booking.new_with_defaults( external_id='BOOKING_ID', admission_date=_RELEASE_DATE, admission_date_inferred=False, custody_status=CustodyStatus.PRESENT_WITHOUT_INFO, first_seen_time=_INGEST_TIME, last_seen_time=_INGEST_TIME) ]) ] self.assertEqual(result, expected_result)
def testConvert_FullIngestInfo_GeneratedIds(self): # Arrange metadata = IngestMetadata('REGION', _JURISDICTION_ID, _INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(person_id='PERSON_ID_GENERATE', booking_ids=['BOOKING_ID_GENERATE']) ingest_info.bookings.add( booking_id='BOOKING_ID_GENERATE', arrest_id='ARREST_ID_GENERATE', hold_ids=['HOLD_ID_1_GENERATE', 'HOLD_ID_2_GENERATE'], charge_ids=['CHARGE_ID_GENERATE']) ingest_info.holds.add(hold_id='HOLD_ID_1_GENERATE', jurisdiction_name='jurisdiction') ingest_info.holds.add(hold_id='HOLD_ID_2_GENERATE', jurisdiction_name='jurisdiction') ingest_info.arrests.add(arrest_id='ARREST_ID_GENERATE', agency='PD') ingest_info.charges.add(charge_id='CHARGE_ID_GENERATE', name='DUI', bond_id='BOND_ID_GENERATE', sentence_id='SENTENCE_ID_GENERATE') ingest_info.bonds.add(bond_id='BOND_ID_GENERATE') ingest_info.sentences.add(sentence_id='SENTENCE_ID_GENERATE', is_life='True') result = self._convert_and_throw_on_errors(ingest_info, metadata) # Assert expected_result = [ Person.new_with_defaults( region='REGION', jurisdiction_id='JURISDICTION_ID', bookings=[ Booking.new_with_defaults( admission_date=_INGEST_TIME.date(), admission_date_inferred=True, first_seen_time=_INGEST_TIME, last_seen_time=_INGEST_TIME, custody_status=CustodyStatus.PRESENT_WITHOUT_INFO, arrest=Arrest.new_with_defaults(agency='PD'), holds=[ Hold.new_with_defaults( jurisdiction_name='JURISDICTION', status=HoldStatus.PRESENT_WITHOUT_INFO) ], charges=[ Charge.new_with_defaults( status=ChargeStatus.PRESENT_WITHOUT_INFO, name='DUI', bond=Bond.new_with_defaults( status=BondStatus.PRESENT_WITHOUT_INFO), sentence=Sentence.new_with_defaults( status=SentenceStatus.PRESENT_WITHOUT_INFO, is_life=True)) ]) ]) ] self.assertEqual(result, expected_result)
def test_write_preexisting_person(self): # Arrange most_recent_scrape_time = (SCRAPER_START_DATETIME + timedelta(days=1)) metadata = IngestMetadata.new_with_defaults( region=REGION_1, jurisdiction_id=JURISDICTION_ID, ingest_time=most_recent_scrape_time) schema_booking = schema.Booking( booking_id=BOOKING_ID, external_id=EXTERNAL_BOOKING_ID, admission_date_inferred=True, custody_status=CustodyStatus.IN_CUSTODY.value, last_seen_time=SCRAPER_START_DATETIME, first_seen_time=SCRAPER_START_DATETIME) schema_person = schema.Person( person_id=PERSON_ID, jurisdiction_id=JURISDICTION_ID, external_id=EXTERNAL_PERSON_ID, region=REGION_1, bookings=[schema_booking]) session = SessionFactory.for_schema_base(JailsBase) session.add(schema_person) session.commit() ingest_info = IngestInfo() ingest_info.people.add(full_name=FULL_NAME_1, person_id=EXTERNAL_PERSON_ID, booking_ids=[EXTERNAL_BOOKING_ID]) ingest_info.bookings.add( booking_id=EXTERNAL_BOOKING_ID, custody_status='IN CUSTODY', ) # Act persistence.write(ingest_info, metadata) # Assert expected_booking = county_entities.Booking.new_with_defaults( booking_id=BOOKING_ID, external_id=EXTERNAL_BOOKING_ID, admission_date_inferred=True, custody_status=CustodyStatus.IN_CUSTODY, custody_status_raw_text=BOOKING_CUSTODY_STATUS.upper(), last_seen_time=most_recent_scrape_time, first_seen_time=SCRAPER_START_DATETIME) expected_person = county_entities.Person.new_with_defaults( person_id=PERSON_ID, external_id=EXTERNAL_PERSON_ID, region=REGION_1, jurisdiction_id=JURISDICTION_ID, bookings=[expected_booking]) self.assertEqual([expected_person], county_dao.read_people( SessionFactory.for_schema_base(JailsBase)))
def testConvert_CannotConvertField_RaisesValueError(self): # Arrange metadata = metadata = FakeIngestMetadata.for_state(region="us_xx") ingest_info = IngestInfo() ingest_info.state_people.add(birthdate="NOT_A_DATE") # Act + Assert with self.assertRaises(ValueError): self._convert_and_throw_on_errors(ingest_info, metadata)
def testConvert_CannotConvertField_RaisesValueError(self): # Arrange metadata = IngestMetadata.new_with_defaults() ingest_info = IngestInfo() ingest_info.people.add(birthdate='NOT_A_DATE') # Act + Assert with self.assertRaises(ValueError): self._convert_and_throw_on_errors(ingest_info, metadata)
def testConvert_CannotConvertField_RaisesValueError(self): # Arrange metadata = IngestMetadata.new_with_defaults( system_level=SystemLevel.STATE) ingest_info = IngestInfo() ingest_info.state_people.add(birthdate="NOT_A_DATE") # Act + Assert with self.assertRaises(ValueError): self._convert_and_throw_on_errors(ingest_info, metadata)
def testConvert_MultipleOpenBookings_RaisesValueError(self): # Arrange metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(booking_ids=["BOOKING_ID1", "BOOKING_ID2"]) ingest_info.bookings.add(booking_id="BOOKING_ID1", admission_date="3/14/2020") ingest_info.bookings.add(booking_id="BOOKING_ID2", admission_date="3/16/2020") # Act + Assert with self.assertRaises(ValueError): self._convert_and_throw_on_errors(ingest_info, metadata)
def test_twoDifferentPeople_persistsNone(self): # Arrange ingest_info = IngestInfo() ingest_info.people.add(person_id='1', full_name=FULL_NAME_1) ingest_info.people.add(person_id='2', full_name=FULL_NAME_2, gender='X') # Act self.assertFalse(persistence.write(ingest_info, DEFAULT_METADATA)) result = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) # Assert assert not result
def test_localRun(self): with patch('os.getenv', Mock(return_value='Local')): # Arrange ingest_info = IngestInfo() ingest_info.people.add(full_name=FULL_NAME_1) # Act persistence.write(ingest_info, DEFAULT_METADATA) result = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) # Assert assert not result
def test_persistLocally(self): # Arrange with patch('os.getenv', Mock(return_value='local')) \ and patch.dict('os.environ', {'PERSIST_LOCALLY': 'true'}): ingest_info = IngestInfo() ingest_info.people.add(full_name=FULL_NAME_1) # Act persistence.write(ingest_info, DEFAULT_METADATA) result = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) # Assert assert len(result) == 1 assert result[0].full_name == _format_full_name(FULL_NAME_1)
def test_twoDifferentPeople_persistsBoth(self): # Arrange ingest_info = IngestInfo() ingest_info.people.add(person_id='1_GENERATE', full_name=FULL_NAME_1) ingest_info.people.add(person_id='2_GENERATE', full_name=FULL_NAME_2) # Act persistence.write(ingest_info, DEFAULT_METADATA) result = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) # Assert assert len(result) == 2 assert result[0].full_name == _format_full_name(FULL_NAME_2) assert result[1].full_name == _format_full_name(FULL_NAME_1)
def testConvert_TotalBondWithCharge_SetsTotalBondOnCharge(self): # Arrange metadata = FakeIngestMetadata.for_county( region="REGION", jurisdiction_id="JURISDICTION_ID", ingest_time=_INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(booking_ids=["BOOKING_ID"]) ingest_info.bookings.add(booking_id="BOOKING_ID", total_bond_amount="$100", charge_ids=["CHARGE_ID"]) ingest_info.charges.add(charge_id="CHARGE_ID") # Act result = self._convert_and_throw_on_errors(ingest_info, metadata) # Assert expected_result = [ Person.new_with_defaults( region="REGION", jurisdiction_id="JURISDICTION_ID", bookings=[ Booking.new_with_defaults( external_id="BOOKING_ID", admission_date=_INGEST_TIME.date(), admission_date_inferred=True, first_seen_time=_INGEST_TIME, last_seen_time=_INGEST_TIME, custody_status=CustodyStatus.PRESENT_WITHOUT_INFO, charges=[ Charge.new_with_defaults( external_id="CHARGE_ID_COUNT_1", status=ChargeStatus.PRESENT_WITHOUT_INFO, bond=Bond.new_with_defaults( amount_dollars=100, status=BondStatus.PRESENT_WITHOUT_INFO, bond_type=BondType.CASH, ), ) ], ) ], ) ] self.assertEqual(result, expected_result)
def test_write_noPeople(self): # Arrange most_recent_scrape_time = (SCRAPER_START_DATETIME + timedelta(days=1)) metadata = IngestMetadata.new_with_defaults( region=REGION_1, jurisdiction_id=JURISDICTION_ID, ingest_time=most_recent_scrape_time) ingest_info = IngestInfo() # Act persistence.write(ingest_info, metadata) # Assert people = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) self.assertFalse(people)
def testConvert_MultipleOpenBookings_RaisesValueError(self): # Arrange metadata = FakeIngestMetadata.for_county( region="REGION", jurisdiction_id="JURISDICTION_ID", ingest_time=_INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(booking_ids=["BOOKING_ID1", "BOOKING_ID2"]) ingest_info.bookings.add(booking_id="BOOKING_ID1", admission_date="3/14/2020") ingest_info.bookings.add(booking_id="BOOKING_ID2", admission_date="3/16/2020") # Act + Assert with self.assertRaises(ValueError): self._convert_and_throw_on_errors(ingest_info, metadata)
def testConvert_TotalBondWithMultipleBonds_ThrowsException(self): # Arrange metadata = IngestMetadata.new_with_defaults(ingest_time=_INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(booking_ids=['BOOKING_ID']) ingest_info.bookings.add(booking_id='BOOKING_ID', total_bond_amount='$100', charge_ids=['CHARGE_ID', 'CHARGE_ID_2']) ingest_info.charges.add(charge_id='CHARGE_ID', bond_id='BOND_ID') ingest_info.charges.add(charge_id='CHARGE_ID_2', bond_id='BOND_ID_2') ingest_info.bonds.add(bond_id='BOND_ID') ingest_info.bonds.add(bond_id='BOND_ID_2') # Act + Assert with self.assertRaises(ValueError): self._convert_and_throw_on_errors(ingest_info, metadata)
def test_extra_id(self): # Arrange ingest_info = IngestInfo() ingest_info.bookings.add(booking_id="1") # Act with pytest.raises(ValidationError) as e: ingest_info_validator.validate(ingest_info) result = e.value.errors # Assert expected_result = { "bookings": { "ids_never_referenced": {"1"}, } } self.assertEqual(result, expected_result)
def test_non_existing_id(self): # Arrange ingest_info = IngestInfo() ingest_info.people.add(person_id="1", booking_ids=["2"]) # Act with pytest.raises(ValidationError) as e: ingest_info_validator.validate(ingest_info) result = e.value.errors # Assert expected_result = { "bookings": { "ids_referenced_that_do_not_exist": {"2"}, } } self.assertEqual(result, expected_result)
def test_retryableError_retries(self, mock_commit, mock_close): # Arrange ingest_info = IngestInfo() ingest_info.people.add(full_name=FULL_NAME_1) inner_error = create_autospec(psycopg2.OperationalError) # Serialization Failure is retryable inner_error.pgcode = SERIALIZATION_FAILURE error = sqlalchemy.exc.DatabaseError(statement=None, params=None, orig=inner_error) # 5 retries is allowed mock_commit.side_effect = [error] * 5 + [mock.DEFAULT] # Act persistence.write(ingest_info, DEFAULT_METADATA) # Assert assert mock_commit.call_args_list == [call()] * 6 mock_close.assert_called_once()
def test_nonRetryableError_failsImmediately(self, mock_commit, mock_close): # Arrange ingest_info = IngestInfo() ingest_info.people.add(full_name=FULL_NAME_1) inner_error = create_autospec(psycopg2.OperationalError) # Not Null Violation is not retryable inner_error.pgcode = NOT_NULL_VIOLATION error = sqlalchemy.exc.DatabaseError(statement=None, params=None, orig=inner_error) mock_commit.side_effect = [error, mock.DEFAULT] # Act / Assert with pytest.raises(sqlalchemy.exc.DatabaseError): persistence.write(ingest_info, DEFAULT_METADATA) # Assert assert mock_commit.call_args_list == [call()] mock_close.assert_called_once()
def test_twoDifferentPeopleWithBooking_persistsNone(self): # Arrange ingest_info = IngestInfo() ingest_info.people.add(full_name=FULL_NAME_2) ingest_info.people.add(full_name=FULL_NAME_1, person_id=EXTERNAL_PERSON_ID, booking_ids=[EXTERNAL_BOOKING_ID]) ingest_info.bookings.add( booking_id=EXTERNAL_BOOKING_ID, custody_status='NO EXIST', ) # Act self.assertFalse(persistence.write(ingest_info, DEFAULT_METADATA)) result = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) # Assert assert not result
def test_duplicate_ids(self): # Arrange ingest_info = IngestInfo() ingest_info.people.extend([ Person(person_id=PERSON_1), Person(person_id=PERSON_1), Person(person_id=PERSON_1, booking_ids=[BOOKING_1, BOOKING_2]), ]) ingest_info.bookings.extend([ Booking(booking_id=BOOKING_1), Booking(booking_id=BOOKING_1, charge_ids=[CHARGE_1, CHARGE_2]), Booking(booking_id=BOOKING_2, arrest_id=ARREST_1), Booking(booking_id=BOOKING_2, arrest_id=ARREST_2), ]) ingest_info.arrests.extend([ Arrest(arrest_id=ARREST_1), Arrest(arrest_id=ARREST_1), Arrest(arrest_id=ARREST_2), ]) ingest_info.charges.extend( [Charge(charge_id=CHARGE_1), Charge(charge_id=CHARGE_2)]) # Act with pytest.raises(ValidationError) as e: ingest_info_validator.validate(ingest_info) result = e.value.errors # Assert expected_result = { "people": { "duplicate_ids": {PERSON_1}, }, "bookings": { "duplicate_ids": {BOOKING_1, BOOKING_2} }, "arrests": { "duplicate_ids": {ARREST_1} }, } self.assertEqual(result, expected_result)
def test_readSinglePersonByName(self): # Arrange ingest_info = IngestInfo() ingest_info.people.add(person_id='1_GENERATE', full_name=FULL_NAME_1, birthdate=BIRTHDATE_1) ingest_info.people.add(person_id='2_GENERATE', full_name=FULL_NAME_2, birthdate=BIRTHDATE_2) # Act persistence.write(ingest_info, DEFAULT_METADATA) result = county_dao.read_people( SessionFactory.for_schema_base(JailsBase), full_name=_format_full_name(FULL_NAME_1)) # Assert assert len(result) == 1 assert result[0].full_name == _format_full_name(FULL_NAME_1) assert result[0].birthdate == BIRTHDATE_1_DATE
def test_threeDifferentPeople_persistsTwoBelowThreshold(self): # Arrange ingest_info = IngestInfo() ingest_info.people.add(person_id='1_GENERATE', full_name=FULL_NAME_2) ingest_info.people.add(person_id='2_GENERATE', full_name=FULL_NAME_3) ingest_info.people.add(person_id=EXTERNAL_PERSON_ID, full_name=FULL_NAME_1, booking_ids=[EXTERNAL_BOOKING_ID]) ingest_info.bookings.add( booking_id=EXTERNAL_BOOKING_ID, custody_status='NO EXIST', ) # Act persistence.write(ingest_info, DEFAULT_METADATA) result = county_dao.read_people( SessionFactory.for_schema_base(JailsBase)) # Assert assert len(result) == 2 assert result[0].full_name == _format_full_name(FULL_NAME_3) assert result[1].full_name == _format_full_name(FULL_NAME_2)
def testConvert_TotalBondWithMultipleBonds_ThrowsException(self): # Arrange metadata = FakeIngestMetadata.for_county( region="REGION", jurisdiction_id="JURISDICTION_ID", ingest_time=_INGEST_TIME) ingest_info = IngestInfo() ingest_info.people.add(booking_ids=["BOOKING_ID"]) ingest_info.bookings.add( booking_id="BOOKING_ID", total_bond_amount="$100", charge_ids=["CHARGE_ID", "CHARGE_ID_2"], ) ingest_info.charges.add(charge_id="CHARGE_ID", bond_id="BOND_ID") ingest_info.charges.add(charge_id="CHARGE_ID_2", bond_id="BOND_ID_2") ingest_info.bonds.add(bond_id="BOND_ID") ingest_info.bonds.add(bond_id="BOND_ID_2") # Act + Assert with self.assertRaises(ValueError): self._convert_and_throw_on_errors(ingest_info, metadata)
def test_empty_ingest_info(self): ingest_info_validator.validate(IngestInfo())
def test_reports_all_errors_together(self): # Arrange ingest_info = IngestInfo() ingest_info.people.extend([ Person(person_id=PERSON_1, booking_ids=[MISSING_BOOKING]), Person(person_id=PERSON_1, booking_ids=[BOOKING_1]), Person(person_id=PERSON_1, booking_ids=[BOOKING_1, BOOKING_2]), Person(person_id=EXTRA_PERSON), ]) ingest_info.bookings.extend([ Booking(booking_id=BOOKING_1), Booking(booking_id=BOOKING_1, arrest_id=MISSING_ARREST), Booking( booking_id=BOOKING_2, arrest_id=ARREST_1, charge_ids=[CHARGE_1, CHARGE_2, MISSING_CHARGE], ), Booking(booking_id=EXTRA_BOOKING), ]) ingest_info.arrests.extend([ Arrest(arrest_id=ARREST_1), Arrest(arrest_id=ARREST_1), Arrest(arrest_id=EXTRA_ARREST), ]) ingest_info.charges.extend([ Charge(charge_id=CHARGE_1), Charge(charge_id=CHARGE_1, sentence_id=SENTENCE_1, bond_id=BOND_1), Charge( charge_id=CHARGE_2, sentence_id=MISSING_SENTENCE, bond_id=MISSING_BOND, ), Charge(charge_id=EXTRA_CHARGE), ]) ingest_info.bonds.extend([ Bond(bond_id=BOND_1), Bond(bond_id=BOND_1), Bond(bond_id=EXTRA_BOND) ]) ingest_info.sentences.extend([ Sentence(sentence_id=SENTENCE_1), Sentence(sentence_id=SENTENCE_1), Sentence(sentence_id=EXTRA_SENTENCE), ]) # Act with pytest.raises(ValidationError) as e: ingest_info_validator.validate(ingest_info) result = e.value.errors # Assert expected_result = { "people": { "duplicate_ids": {PERSON_1} }, "bookings": { "duplicate_ids": {BOOKING_1}, "ids_referenced_that_do_not_exist": {MISSING_BOOKING}, "ids_never_referenced": {EXTRA_BOOKING}, }, "arrests": { "duplicate_ids": {ARREST_1}, "ids_referenced_that_do_not_exist": {MISSING_ARREST}, "ids_never_referenced": {EXTRA_ARREST}, }, "charges": { "duplicate_ids": {CHARGE_1}, "ids_referenced_that_do_not_exist": {MISSING_CHARGE}, "ids_never_referenced": {EXTRA_CHARGE}, }, "sentences": { "duplicate_ids": {SENTENCE_1}, "ids_referenced_that_do_not_exist": {MISSING_SENTENCE}, "ids_never_referenced": {EXTRA_SENTENCE}, }, "bonds": { "duplicate_ids": {BOND_1}, "ids_referenced_that_do_not_exist": {MISSING_BOND}, "ids_never_referenced": {EXTRA_BOND}, }, } self.assertEqual(result, expected_result)
def test_state_threeSentenceGroups_dontPersistAboveThreshold(self): # Arrange ingest_info = IngestInfo() ingest_info.state_people.add( state_person_id='1_GENERATE', state_sentence_group_ids=[SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_2]) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_2, county_code=COUNTY_CODE) db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID, state_code=REGION_CODE) db_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE) db_external_id = schema.StatePersonExternalId(person_external_id_id=ID, state_code=REGION_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE) db_person.sentence_groups = [db_sentence_group, db_sentence_group_2] db_person.external_ids = [db_external_id] db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1) db_sentence_group_2_dup = schema.StateSentenceGroup( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE) db_external_id_2 = schema.StatePersonExternalId( person_external_id_id=ID_2, state_code=REGION_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE) db_person_2.sentence_groups = [db_sentence_group_2_dup] db_person_2.external_ids = [db_external_id_2] # No updates expected_person = self.to_entity(db_person) expected_person_2 = self.to_entity(db_person_2) session = SessionFactory.for_schema_base(StateBase) session.add(db_person) session.add(db_person_2) session.commit() # Act persistence.write(ingest_info, DEFAULT_METADATA) session = SessionFactory.for_schema_base(StateBase) persons = dao.read_people(session) # Assert self.assertEqual([expected_person, expected_person_2], converter.convert_schema_objects_to_entity(persons))
def test_state_threeSentenceGroups_persistsTwoBelowThreshold(self): # Arrange ingest_info = IngestInfo() ingest_info.state_people.add(state_person_id='1_GENERATE', state_sentence_group_ids=[ SENTENCE_GROUP_ID, SENTENCE_GROUP_ID_2, SENTENCE_GROUP_ID_3 ]) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_2, county_code=COUNTY_CODE) ingest_info.state_sentence_groups.add( state_sentence_group_id=SENTENCE_GROUP_ID_3, county_code=COUNTY_CODE) db_person = schema.StatePerson(person_id=ID, full_name=FULL_NAME_1) db_sentence_group = schema.StateSentenceGroup( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID, state_code=REGION_CODE) db_sentence_group_2 = schema.StateSentenceGroup( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE) db_sentence_group_3 = schema.StateSentenceGroup( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE) db_external_id = schema.StatePersonExternalId(person_external_id_id=ID, state_code=REGION_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE) db_person.sentence_groups = [ db_sentence_group, db_sentence_group_2, db_sentence_group_3 ] db_person.external_ids = [db_external_id] db_person_2 = schema.StatePerson(person_id=ID_2, full_name=FULL_NAME_1) db_sentence_group_3_dup = schema.StateSentenceGroup( sentence_group_id=ID_4, status=StateSentenceStatus.EXTERNAL_UNKNOWN.value, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE) db_external_id_2 = schema.StatePersonExternalId( person_external_id_id=ID_2, state_code=REGION_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE) db_person_2.sentence_groups = [db_sentence_group_3_dup] db_person_2.external_ids = [db_external_id_2] expected_person = StatePerson.new_with_defaults(person_id=ID, full_name=FULL_NAME_1, external_ids=[], sentence_groups=[]) expected_external_id = StatePersonExternalId.new_with_defaults( person_external_id_id=ID, state_code=REGION_CODE, external_id=EXTERNAL_ID, id_type=ID_TYPE, person=expected_person) expected_sentence_group = StateSentenceGroup.new_with_defaults( sentence_group_id=ID, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID, state_code=REGION_CODE, county_code=COUNTY_CODE, person=expected_person) expected_sentence_group_2 = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_2, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_2, state_code=REGION_CODE, county_code=COUNTY_CODE, person=expected_person) # No county code because errors during match expected_sentence_group_3 = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_3, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE, person=expected_person) expected_person.external_ids = [expected_external_id] expected_person.sentence_groups = [ expected_sentence_group, expected_sentence_group_2, expected_sentence_group_3 ] expected_person_2 = StatePerson.new_with_defaults( person_id=ID_2, full_name=FULL_NAME_1) expected_external_id_2 = StatePersonExternalId.new_with_defaults( person_external_id_id=ID_2, state_code=REGION_CODE, external_id=EXTERNAL_ID_2, id_type=ID_TYPE, person=expected_person_2) # No county code because unmatched expected_sentence_group_3_dup = StateSentenceGroup.new_with_defaults( sentence_group_id=ID_4, status=StateSentenceStatus.EXTERNAL_UNKNOWN, external_id=SENTENCE_GROUP_ID_3, state_code=REGION_CODE, person=expected_person_2) expected_person_2.sentence_groups = [expected_sentence_group_3_dup] expected_person_2.external_ids = [expected_external_id_2] session = SessionFactory.for_schema_base(StateBase) session.add(db_person) session.add(db_person_2) session.commit() # Act persistence.write(ingest_info, DEFAULT_METADATA) session = SessionFactory.for_schema_base(StateBase) persons = dao.read_people(session) # Assert self.assertEqual([expected_person, expected_person_2], converter.convert_schema_objects_to_entity(persons))