def test_get_sequence_for_class_multiple_parents_enforced_bad_choice( self) -> None: with self.assertRaises(ValueError): get_ancestor_class_sequence( "state_supervision_period", enforced_ancestor_choices={"state_sentence": "bogus"}, )
def test_get_sequence_for_class_single_parent(self): actual = get_ancestor_class_sequence("bond") expected = ("person", "booking", "charge") self.assertEqual(expected, actual) actual = get_ancestor_class_sequence("state_supervision_sentence") expected = ("state_person", "state_sentence_group") self.assertEqual(expected, actual)
def test_get_sequence_for_class_persons(self): actual = get_ancestor_class_sequence("person") expected = () self.assertEqual(expected, actual) actual = get_ancestor_class_sequence("state_person") expected = () self.assertEqual(expected, actual)
def test_get_sequence_for_class_single_parent(self): actual = get_ancestor_class_sequence('bond') expected = ('person', 'booking', 'charge') self.assertEqual(expected, actual) actual = get_ancestor_class_sequence('state_supervision_sentence') expected = ('state_person', 'state_sentence_group') self.assertEqual(expected, actual)
def _get_parent( self, ingest_info: IngestInfo, class_to_set: str, index: int, is_multi_key: bool, ancestor_chain: Dict[str, str], enforced_ancestor_types: Dict[str, str], **create_args) \ -> IngestObject: """Finds or creates the parent of the object we are going to set, which may need to have its own parent created if it is a hold or charge in a multi-key column.""" ancestor_class_sequence = get_ancestor_class_sequence( class_to_set, ancestor_chain, enforced_ancestor_types) # Multi-keys may need to be indexed by their parent, i.e. a bond at # index 3 has the parent charge at index 3. if is_multi_key and ancestor_class_sequence: parent_cls_to_set = ancestor_class_sequence[-1] parent_ancestor_class_sequence = ancestor_class_sequence[:-1] grandparent_cls_to_set = parent_ancestor_class_sequence[-1] if \ parent_ancestor_class_sequence else None grandparent = self._find_parent_ingest_object( ingest_info, parent_ancestor_class_sequence, index, ancestor_chain) recent = _get_by_id_or_recent_if_no_cache(self.ingest_object_cache, grandparent, parent_cls_to_set, ancestor_chain) # If |index| was used to index the grandparent, use the most # recent parent if one exists. if parent_cls_to_set in self.multi_key_classes \ and grandparent_cls_to_set in self.multi_key_classes \ and recent: return recent # If |index| should be used to index the parent, find the parent. if class_to_set not in PLURALS or class_to_set == 'booking': list_of_parents = getattr(grandparent, PLURALS[parent_cls_to_set]) if index < len(list_of_parents): return list_of_parents[index] # If the parent doesn't exist, create it. if class_to_set not in PLURALS or \ parent_cls_to_set in self.multi_key_classes: return self._create(grandparent, parent_cls_to_set) if self.ingest_object_cache: parent = self._get_cached_parent(ingest_info, class_to_set, ancestor_chain, ancestor_class_sequence) if parent is None: raise ValueError( f"_get_cached_parent returned none: {class_to_set}") return parent return self._find_parent_ingest_object(ingest_info, ancestor_class_sequence, index, ancestor_chain, **create_args)
def test_get_sequence_for_class_multiple_parents_enforced(self): actual = get_ancestor_class_sequence('state_supervision_period', enforced_ancestor_choices={ 'state_sentence': 'state_incarceration_sentence' }) expected = ('state_person', 'state_sentence_group', 'state_incarceration_sentence') self.assertEqual(expected, actual)
def test_get_sequence_for_class_multiple_parents_further_downstream(self): actual = get_ancestor_class_sequence( 'state_supervision_violation_response', enforced_ancestor_choices={ 'state_sentence': 'state_supervision_sentence' }) expected = ('state_person', 'state_sentence_group', 'state_supervision_sentence', 'state_supervision_period', 'state_supervision_violation') self.assertEqual(expected, actual)
def test_get_sequence_for_class_multiple_parents_chain(self): actual = get_ancestor_class_sequence( 'state_incarceration_period', ancestor_chain={ 'state_person': '12345', 'state_incarceration_sentence': '45678' }) expected = ('state_person', 'state_sentence_group', 'state_incarceration_sentence') self.assertEqual(expected, actual)
def test_get_sequence_for_class_multiple_parents_enforced(self): actual = get_ancestor_class_sequence( "state_supervision_period", enforced_ancestor_choices={ "state_sentence": "state_incarceration_sentence" }, ) expected = ( "state_person", "state_sentence_group", "state_incarceration_sentence", ) self.assertEqual(expected, actual)
def test_get_sequence_for_class_multiple_parents_chain(self): actual = get_ancestor_class_sequence( "state_incarceration_period", ancestor_chain={ "state_person": "12345", "state_incarceration_sentence": "45678", }, ) expected = ( "state_person", "state_sentence_group", "state_incarceration_sentence", ) self.assertEqual(expected, actual)
def test_get_sequence_for_class_multiple_parents_further_downstream(self): actual = get_ancestor_class_sequence( "state_supervision_violation_response", enforced_ancestor_choices={ "state_sentence": "state_supervision_sentence" }, ) expected = ( "state_person", "state_sentence_group", "state_supervision_sentence", "state_supervision_period", "state_supervision_violation", ) self.assertEqual(expected, actual)
def test_get_sequence_for_class_multiple_parents_enforced_over_chain( self) -> None: actual = get_ancestor_class_sequence( "state_supervision_period", ancestor_chain={"state_person": "12345"}, enforced_ancestor_choices={ "state_sentence": "state_supervision_sentence" }, ) expected = ( "state_person", "state_sentence_group", "state_supervision_sentence", ) self.assertEqual(expected, actual)
def one(ingest_object: str, ingest_info_or_scraped_data: Union[IngestInfo, ScrapedData]): """Convenience function to return the single descendant of an IngestInfo object. For example, |one('arrest', ingest_info)| returns the single arrest of the single booking of the single person in |ingest_info| and raises an error if there are zero or multiple people, bookings, or arrests.""" if ingest_info_or_scraped_data is None: raise ValueError("No ScrapedData or IngestInfo was found.") if isinstance(ingest_info_or_scraped_data, ScrapedData): ingest_info = ingest_info_or_scraped_data.ingest_info else: ingest_info = ingest_info_or_scraped_data hierarchy_sequence = get_ancestor_class_sequence(ingest_object) parent = ingest_info for hier_class in hierarchy_sequence: parent = _one(hier_class, parent) return _one(ingest_object, parent)
def _update_column_ancestor_chain_for_child_object( self, row: Dict[str, str], primary_coordinates: IngestFieldCoordinates, child_class_to_set: str, column_ancestor_chain: Dict[str, str], ) -> None: """ The ancestor chain for a column starts with just id values for ancestors of the primary object in this row. This function adds id values for all classes in the ancestor chain of the object represented by this column. For example, for a file with primary object state_sentence_group, the ancestor chain would start as: {'state_person': 'my_person_id'} For a column corresponding to child of class 'state_incarceration_period', the ancestor chain might be updated to be: {'state_person': 'my_person_id', 'state_sentence_group': 'my_booking_id', 'state_incarceration_sentence': 'DUMMY_GENERATED_ID'} """ # Add primary object id to the ancestor chain for this child object column_ancestor_chain[ primary_coordinates.class_name ] = primary_coordinates.field_value ancestor_class_sequence = get_ancestor_class_sequence( child_class_to_set, column_ancestor_chain, self.enforced_ancestor_types ) i = ancestor_class_sequence.index(primary_coordinates.class_name) ancestor_class_sequence_below_primary = ancestor_class_sequence[i + 1 :] # For all children below the primary object and above this child object, # add to the column ancestor chain. for child_ancestor_class in ancestor_class_sequence_below_primary: child_coordinates = self._child_primary_coordinates( row, child_ancestor_class, column_ancestor_chain ) column_ancestor_chain[ child_coordinates.class_name ] = child_coordinates.field_value
def test_get_sequence_for_class_multiple_parents_enforced_bad_key(self): with self.assertRaises(ValueError): get_ancestor_class_sequence( "state_supervision_period", enforced_ancestor_choices={"nonsense": "whatever"}, )
def test_get_sequence_for_class_multiple_parents_enforced_bad_choice(self): with self.assertRaises(ValueError): get_ancestor_class_sequence( 'state_supervision_period', enforced_ancestor_choices={'state_sentence': 'bogus'})