def _merge_row_into_ingest_info(self, ingest_info, row_ii): row_person = scraper_utils.one('person', row_ii) existing_person = ingest_info.get_person_by_id(row_person.person_id) if not existing_person: ingest_info.people.append(row_person) return if len(row_person.bookings) != 1: raise DirectIngestError( error_type=DirectIngestErrorType.PARSE_ERROR, msg="Exactly one booking must be on each row.") row_booking = row_person.bookings[0] existing_booking = existing_person.get_booking_by_id( row_booking.booking_id) if not existing_booking: existing_person.bookings.append(row_booking) return if len(row_booking.charges) != 1: raise DirectIngestError( error_type=DirectIngestErrorType.PARSE_ERROR, msg="Exactly one charge must be on each row.") row_charge = row_booking.charges[0] existing_booking.charges.append(row_charge)
def test_oneBond_passes(self): ii = IngestInfo() b = ii.create_person().create_booking().create_charge().create_bond() self.assertIs(b, scraper_utils.one("bond", ii))
def test_oneBooking_passes(self): ii = IngestInfo() b = ii.create_person().create_booking() b.create_arrest() self.assertIs(b, scraper_utils.one("booking", ii))
def test_noSentence_raises(self): ii = IngestInfo() ii.create_person().create_booking().create_charge().create_bond() with self.assertRaises(ValueError): scraper_utils.one("sentence", ii)
def test_twoPeople_raises(self): ii = IngestInfo() ii.create_person().create_booking() ii.create_person() with self.assertRaises(ValueError): scraper_utils.one("booking", ii)
def test_onePerson_passes(self): ii = IngestInfo() p = ii.create_person() self.assertIs(p, scraper_utils.one("person", ii))
def parse(self, json_people: Iterable[Dict]) -> IngestInfo: """Uses the JsonDataExtractor to convert JSON data at the person level to IngestInfo objects.""" extractor = JsonDataExtractor(self.yaml_file) bond_extractor = JsonDataExtractor(self.bond_yaml_file) # Group people by person id. Since we're iterating over bookings, not # people, we have to manually merge people's bookings. people: Dict[str, List[Person]] = defaultdict(list) for person_dict in json_people: ii = extractor.extract_and_populate_data(person_dict) person = scraper_utils.one('person', ii) person.place_of_residence = self.get_address(person_dict) # TODO(1802): parse ethnicity in enum overrides if person.race == 'HISPANIC' or person.ethnicity == 'Y': person.race, person.ethnicity = None, 'HISPANIC' else: person.ethnicity = None booking = scraper_utils.one('booking', ii) booking.admission_reason = self.get_admission_reason(person_dict) for hold in person_dict['hold']: jurisdiction_name = hold['holding_for_agency'] if jurisdiction_name == 'Request to Hold': jurisdiction_name = hold['charges'] booking.create_hold(hold_id=hold['pkey'], jurisdiction_name=jurisdiction_name) # Bonds are shared across all charges within a single case for bond_dict in person_dict['bond']: bond = scraper_utils.one( 'bond', bond_extractor.extract_and_populate_data(bond_dict)) case_pk = bond_dict['case_pk'] matching_charges = (c for c in ii.get_all_charges() if c.case_number == case_pk) if matching_charges: for charge in matching_charges: charge.bond = bond else: # Some bonds have no charges associated with their case booking.create_charge(bond=bond) court_type = person_dict['booking']['commiting_authority'] for charge in ii.get_all_charges(): charge.court_type = court_type charge.status = self.get_charge_status(person_dict, charge.charge_id) if charge.degree: logging.info( "Charge degree found, but we don't expect it " "to be filled in: \n%s", ii) if charge.charge_class: logging.info( "Charge class found, but we don't expect it " "to be filled in: \n%s", ii) if charge.number_of_counts: match = re.search(r"([0-9]+) (?:other )?mitts", charge.number_of_counts, re.IGNORECASE) charge.number_of_counts = match.group(1) if match else None for bond in ii.get_all_bonds(lambda b: b.bond_agent): # bond.speccond (stored temporarily in bond.bond_agent) might # have two cash values separated by a slash, indicating a # partial bond. if re.search(r'[0-9]+ */ *[0-9]+', bond.bond_agent): bond.bond_type = BondType.PARTIAL_CASH.value bond.bond_agent = None people[person.person_id].append(person) def merge_bookings(dupes): base = dupes.pop() for p in dupes: base.bookings.extend(p.bookings) return base merged_people = [merge_bookings(dupes) for dupes in people.values()] return IngestInfo(people=merged_people)