def test_skip_empty(self): key_mapping_file = os.path.join(os.path.dirname(__file__), 'fixtures/skip_empty.yaml') extractor = JsonDataExtractor(key_mapping_file) expected = IngestInfo() expected.create_person( full_name='skip empty', bookings=[ Booking( custody_status='in custody', booking_id='1', charges=[ Charge(name="battery", ), Charge( name="assault", charge_class='misdemeanor', ), ], ), Booking( booking_id='2', charges=[ Charge( name='robbery', charge_class='felony', ), ], ), ], ) result = extractor.extract_and_populate_data(_SKIP_EMPTY) self.assertEqual(result, expected)
def test_jailtracker_person(self): key_mapping_file = 'fixtures/jailtracker_person.yaml' key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file) extractor = JsonDataExtractor(key_mapping_file) expected_result = IngestInfo() expected_result.create_person(person_id='012345', birthdate='12/12/0001', age='2018', race='WHITE') result = extractor.extract_and_populate_data(_JT_PERSON) self.assertEqual(result, expected_result)
def test_jailtracker_person(self) -> None: key_mapping_file = "fixtures/jailtracker_person.yaml" key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file) extractor = JsonDataExtractor(key_mapping_file) expected_result = IngestInfo() expected_result.create_person( person_id="012345", birthdate="12/12/0001", age="2018", race="WHITE" ) result = extractor.extract_and_populate_data( fixtures.as_dict("extractor", "jailtracker_person.json") ) self.assertEqual(result, expected_result)
def test_jailtracker_booking(self): key_mapping_file = 'fixtures/jailtracker_booking.yaml' key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file) extractor = JsonDataExtractor(key_mapping_file) expected_result = IngestInfo() expected_person = expected_result.create_person() expected_person.create_booking(booking_id='123098', admission_date='1/1/2001', release_date='1/1/2001') expected_person.create_booking(booking_id='123099', admission_date='1/1/2002', release_date='1/1/2002') result = extractor.extract_and_populate_data(_JT_BOOKING) self.assertEqual(result, expected_result)
def test_person_with_charges(self): key_mapping_file = 'fixtures/person_with_charges.yaml' key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file) extractor = JsonDataExtractor(key_mapping_file) expected_result = IngestInfo() expected_person = expected_result.create_person(person_id='3245', full_name='AAA AAAB', race='BLACK') booking_1 = expected_person.create_booking(booking_id='324567', admission_date='1/1/1111') booking_1.create_charge(charge_id='345309', name='charge name 1') booking_1.create_charge(charge_id='894303', name='charge name 2') booking_2 = expected_person.create_booking(booking_id='3245', admission_date='2/2/2222') booking_2.create_charge(charge_id='42309', name='charge name 3') result = extractor.extract_and_populate_data(_PERSON_WITH_CHARGES) self.assertEqual(result, expected_result)
def test_jailtracker_booking(self) -> None: key_mapping_file = "fixtures/jailtracker_booking.yaml" key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file) extractor = JsonDataExtractor(key_mapping_file) expected_result = IngestInfo() expected_person = expected_result.create_person() expected_person.create_booking( booking_id="123098", admission_date="1/1/2001", release_date="1/1/2001" ) expected_person.create_booking( booking_id="123099", admission_date="1/1/2002", release_date="1/1/2002" ) result = extractor.extract_and_populate_data( fixtures.as_dict("extractor", "jailtracker_booking.json") ) self.assertEqual(result, expected_result)
def test_person_with_charges(self): key_mapping_file = "fixtures/person_with_charges.yaml" key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file) extractor = JsonDataExtractor(key_mapping_file) expected_result = IngestInfo() expected_person = expected_result.create_person(person_id="3245", full_name="AAA AAAB", race="BLACK") booking_1 = expected_person.create_booking(booking_id="324567", admission_date="1/1/1111") booking_1.create_charge(charge_id="345309", name="charge name 1") booking_1.create_charge(charge_id="894303", name="charge name 2") booking_2 = expected_person.create_booking(booking_id="3245", admission_date="2/2/2222") booking_2.create_charge(charge_id="42309", name="charge name 3") result = extractor.extract_and_populate_data(_PERSON_WITH_CHARGES) self.assertEqual(result, expected_result)
def test_skip_empty(self) -> None: key_mapping_file = os.path.join( os.path.dirname(__file__), "fixtures/skip_empty.yaml" ) extractor = JsonDataExtractor(key_mapping_file) expected = IngestInfo() expected.create_person( full_name="skip empty", bookings=[ Booking( custody_status="in custody", booking_id="1", charges=[ Charge( name="battery", ), Charge( name="assault", charge_class="misdemeanor", ), ], ), Booking( booking_id="2", charges=[ Charge( name="robbery", charge_class="felony", ), ], ), ], ) result = extractor.extract_and_populate_data( fixtures.as_dict("extractor", "skip_empty.json") ) self.assertEqual(result, expected)
def test_person_with_holds(self) -> None: key_mapping_file = "fixtures/person_with_holds.yaml" key_mapping_file = os.path.join(os.path.dirname(__file__), key_mapping_file) extractor = JsonDataExtractor(key_mapping_file) expected_result = IngestInfo() expected_person = expected_result.create_person( person_id="3245", full_name="AAA AAAB", race="BLACK" ) booking_1 = expected_person.create_booking( booking_id="324567", admission_date="1/1/1111" ) booking_1.create_hold(hold_id="345309", jurisdiction_name="jurisdiction name 1") booking_1.create_hold(hold_id="894303", jurisdiction_name="jurisdiction name 2") booking_2 = expected_person.create_booking( booking_id="3245", admission_date="2/2/2222" ) booking_2.create_hold(hold_id="42309", jurisdiction_name="jurisdiction name 3") result = extractor.extract_and_populate_data( fixtures.as_dict("extractor", "person_with_holds.json") ) self.assertEqual(result, expected_result)
def parse(self, json_people: Iterable[Dict]) -> IngestInfo: """Uses the JsonDataExtractor to convert JSON data at the person level to IngestInfo objects.""" extractor = JsonDataExtractor(self.yaml_file) bond_extractor = JsonDataExtractor(self.bond_yaml_file) # Group people by person id. Since we're iterating over bookings, not # people, we have to manually merge people's bookings. people: Dict[str, List[Person]] = defaultdict(list) for person_dict in json_people: ii = extractor.extract_and_populate_data(person_dict) person = scraper_utils.one('person', ii) person.place_of_residence = self.get_address(person_dict) # TODO(1802): parse ethnicity in enum overrides if person.race == 'HISPANIC' or person.ethnicity == 'Y': person.race, person.ethnicity = None, 'HISPANIC' else: person.ethnicity = None booking = scraper_utils.one('booking', ii) booking.admission_reason = self.get_admission_reason(person_dict) for hold in person_dict['hold']: jurisdiction_name = hold['holding_for_agency'] if jurisdiction_name == 'Request to Hold': jurisdiction_name = hold['charges'] booking.create_hold(hold_id=hold['pkey'], jurisdiction_name=jurisdiction_name) # Bonds are shared across all charges within a single case for bond_dict in person_dict['bond']: bond = scraper_utils.one( 'bond', bond_extractor.extract_and_populate_data(bond_dict)) case_pk = bond_dict['case_pk'] matching_charges = (c for c in ii.get_all_charges() if c.case_number == case_pk) if matching_charges: for charge in matching_charges: charge.bond = bond else: # Some bonds have no charges associated with their case booking.create_charge(bond=bond) court_type = person_dict['booking']['commiting_authority'] for charge in ii.get_all_charges(): charge.court_type = court_type charge.status = self.get_charge_status(person_dict, charge.charge_id) if charge.degree: logging.info( "Charge degree found, but we don't expect it " "to be filled in: \n%s", ii) if charge.charge_class: logging.info( "Charge class found, but we don't expect it " "to be filled in: \n%s", ii) if charge.number_of_counts: match = re.search(r"([0-9]+) (?:other )?mitts", charge.number_of_counts, re.IGNORECASE) charge.number_of_counts = match.group(1) if match else None for bond in ii.get_all_bonds(lambda b: b.bond_agent): # bond.speccond (stored temporarily in bond.bond_agent) might # have two cash values separated by a slash, indicating a # partial bond. if re.search(r'[0-9]+ */ *[0-9]+', bond.bond_agent): bond.bond_type = BondType.PARTIAL_CASH.value bond.bond_agent = None people[person.person_id].append(person) def merge_bookings(dupes): base = dupes.pop() for p in dupes: base.bookings.extend(p.bookings) return base merged_people = [merge_bookings(dupes) for dupes in people.values()] return IngestInfo(people=merged_people)