Ejemplo n.º 1
0
    def test_persist_to_db_different_regions(self, mock_write, _mock_region,
                                             mock_session_return):
        scrape_key1 = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
        scrape_key2 = ScrapeKey(REGIONS[1], constants.ScrapeType.BACKGROUND)

        ii = IngestInfo()
        ii.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME).create_booking(booking_id=TEST_ID)

        ii2 = IngestInfo()
        ii2.create_person(
            person_id=TEST_ID,
            full_name=TEST_NAME2).create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        mock_session_1 = mock_session_return.return_value = create_mock_session(
        )

        batch_persistence.write(ii, scrape_key1, t)
        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii)
        batch_persistence.persist_to_database(scrape_key1.region_code,
                                              mock_session_1.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        # We expect the region that we persisted to have no more ingest infos.
        ingest_infos_1 = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session_1.start)
        self.assertEqual(len(ingest_infos_1), 0)

        mock_session_2 = mock_session_return.return_value = create_mock_session(
        )

        batch_persistence.write(ii2, scrape_key2, t2)
        ingest_infos_2 = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[1], mock_session_2.start)
        self.assertEqual(len(ingest_infos_2), 1)

        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii2)
        batch_persistence.persist_to_database(scrape_key2.region_code,
                                              mock_session_2.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        self.assertEqual(mock_write.call_count, 2)
Ejemplo n.º 2
0
    def _parse_and_persist_contents(self, args: IngestArgsType,
                                    contents_handle: ContentsHandleType):
        """
        Runs the full ingest process for this controller for files with
        non-empty contents.
        """
        ingest_info = self._parse(args, contents_handle)
        if not ingest_info:
            raise DirectIngestError(
                error_type=DirectIngestErrorType.PARSE_ERROR,
                msg="No IngestInfo after parse.")

        logging.info("Successfully parsed data for ingest run [%s]",
                     self._job_tag(args))

        ingest_info_proto = \
            ingest_utils.convert_ingest_info_to_proto(ingest_info)

        logging.info(
            "Successfully converted ingest_info to proto for ingest "
            "run [%s]", self._job_tag(args))

        ingest_metadata = self._get_ingest_metadata(args)
        persist_success = persistence.write(ingest_info_proto, ingest_metadata)

        if not persist_success:
            raise DirectIngestError(
                error_type=DirectIngestErrorType.PERSISTENCE_ERROR,
                msg="Persist step failed")

        logging.info("Successfully persisted for ingest run [%s]",
                     self._job_tag(args))
    def test_persist_duplicates_to_db(self, mock_write, _mock_region,
                                      mock_session_return):
        """Tests that duplicate ingest_info.Person objects are merged before
        write."""
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        # Arrange
        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        ii_2 = IngestInfo()
        ii.create_person(person_id=TEST_ID2, full_name=TEST_NAME2)

        ii_1_dup = copy.deepcopy(ii)

        t1, t2, t3 = (Task(task_type=constants.TaskType.SCRAPE_DATA,
                           endpoint=TEST_ENDPOINT + str(i),
                           response_type=constants.ResponseType.TEXT)
                      for i in range(3))

        batch_persistence.write(ii, scrape_key, t1)
        batch_persistence.write(ii_2, scrape_key, t2)
        batch_persistence.write(ii_1_dup, scrape_key, t3)

        batch_persistence.persist_to_database(scrape_key.region_code,
                                              mock_session.start)

        expected_ii = IngestInfo(people=ii.people + ii_2.people)
        expected_proto = ingest_utils.convert_ingest_info_to_proto(expected_ii)
        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)
Ejemplo n.º 4
0
    def test_scrape_data_no_more_tasks(self, mock_get_more, mock_fetch,
                                       mock_populate, mock_write):
        mock_fetch.return_value = (TEST_HTML, {})
        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=True,
        )
        start_time = datetime.datetime.now()
        t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA)
        req = QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=t,
            scraper_start_time=start_time,
        )

        scraper = FakeScraper("test")
        scraper.BATCH_WRITES = False
        scraper._generic_scrape(req)

        expected_metadata = IngestMetadata(
            scraper.region.region_code,
            scraper.region.jurisdiction_id,
            start_time,
            scraper.get_enum_overrides(),
        )
        expected_proto = convert_ingest_info_to_proto(self.ii)

        self.assertEqual(mock_get_more.call_count, 0)
        self.assertEqual(mock_populate.call_count, 1)
        self.assertEqual(mock_write.call_count, 1)
        mock_write.assert_called_once_with(expected_proto, expected_metadata)
        self.assertEqual(len(scraper.tasks), 0)
    def test_persist_to_db(self, mock_write, _mock_region,
                           mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        batch_persistence.write(ii, scrape_key, t)

        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii)

        batch_persistence.persist_to_database(scrape_key.region_code,
                                              mock_session.start)

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        # After we persist, there should no longer be ingest infos on Datastore
        ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session.start)
        self.assertEqual(len(ingest_infos), 0)
Ejemplo n.º 6
0
    def validate_ingest(
        self,
        ingest_info: IngestInfo,
        expected_ingest_info: IngestInfo,
        metadata: IngestMetadata,
    ) -> IngestInfo:
        """This function runs validation on a computed and expected ingest_info.

        Args:
            ingest_info: the computed ingest info object
            expected_ingest_info: the ingest info expected to be returned from
                `populate_data`. If `expected_ingest_info` is `None`, then
                expects the return value of `populate_data` to be `None`.
            metadata: an ingest info metadata struct to pass along to the proto
                converter.

        Returns:
            The result from populate_data in case the user needs to do any
            extra validations on the output.

        """

        if expected_ingest_info is None:
            assert ingest_info == expected_ingest_info
            return ingest_info

        # Attempt to convert the ingest_info to the ingest info proto,
        # validate the proto, and finally attempt to convert the proto into
        # our entitiy/ objects (which includes parsing strings into types)
        ingest_info_proto = ingest_utils.convert_ingest_info_to_proto(
            ingest_info)
        validate(ingest_info_proto)
        res = ingest_info_converter.convert_to_persistence_entities(
            ingest_info_proto, metadata)

        assert res.enum_parsing_errors == 0
        assert res.general_parsing_errors == 0
        assert res.protected_class_errors == 0

        entity_validator.validate(res.people)

        differences = diff_ingest_infos(expected_ingest_info, ingest_info)

        if differences:
            self.fail(  # type: ignore[attr-defined]
                "IngestInfo objects do not match.\n"
                "Expected:\n{}\n"
                "Actual:\n{}\n"
                "Differences:\n{}\n\n"
                "(paste the following) scraped object:"
                "\n{}".format(
                    expected_ingest_info,
                    ingest_info,
                    "\n".join(differences),
                    repr(ingest_info),
                ))

        return ingest_info
Ejemplo n.º 7
0
    def test_multipleOpenBookings_raisesPersistenceError(self):
        ingest_info = ii.IngestInfo()
        person = ingest_info.create_person(full_name=FULL_NAME_1)
        person.create_booking(admission_date=DATE_RAW)
        person.create_booking(admission_date=DATE_RAW)

        self.assertFalse(
            persistence.write(convert_ingest_info_to_proto(ingest_info),
                              DEFAULT_METADATA))
Ejemplo n.º 8
0
    def test_scrape_data_and_more_no_persist_second_time_persist(
            self, mock_get_more, mock_fetch, mock_populate, mock_write):
        populate_task = Task.evolve(TEST_TASK,
                                    task_type=constants.TaskType.SCRAPE_DATA)
        mock_get_more.return_value = [populate_task]
        mock_fetch.return_value = (TEST_HTML, {})
        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=False,
        )
        start_time = datetime.datetime.now()
        t = Task.evolve(TEST_TASK,
                        task_type=constants.TaskType.SCRAPE_DATA_AND_MORE)
        req = QueueRequest(
            scrape_type=constants.ScrapeType.BACKGROUND,
            next_task=t,
            scraper_start_time=start_time,
        )

        scraper = FakeScraper("test")
        scraper.BATCH_WRITES = False
        scraper._generic_scrape(req)

        # Should send the ii since we chose not to persist.
        expected_tasks = [
            QueueRequest(
                scrape_type=constants.ScrapeType.BACKGROUND,
                next_task=populate_task,
                scraper_start_time=start_time,
                ingest_info=self.ii,
            )
        ]

        self.assertEqual(mock_get_more.call_count, 1)
        self.assertEqual(mock_populate.call_count, 1)
        self.assertEqual(mock_write.call_count, 0)
        mock_get_more.assert_called_once_with(TEST_HTML, t)
        self.assertCountEqual(expected_tasks, scraper.tasks)

        mock_populate.return_value = ScrapedData(
            ingest_info=self.ii,
            persist=True,
        )
        scraper._generic_scrape(scraper.tasks[0])
        self.assertEqual(mock_get_more.call_count, 1)
        self.assertEqual(mock_populate.call_count, 2)
        self.assertEqual(mock_write.call_count, 1)

        expected_metadata = IngestMetadata(
            scraper.region.region_code,
            scraper.region.jurisdiction_id,
            start_time,
            scraper.get_enum_overrides(),
        )
        expected_proto = convert_ingest_info_to_proto(self.ii)
        mock_write.assert_called_once_with(expected_proto, expected_metadata)
Ejemplo n.º 9
0
    def test_convert_ingest_info_one_charge_to_one_bond(self, mock_create):
        mock_create.side_effect = self._create_generated_id
        info = ingest_info.IngestInfo()
        person = info.create_person()
        person.person_id = 'id1'

        booking = person.create_booking()
        booking.booking_id = 'id1'
        charge = booking.create_charge()
        charge.charge_id = 'id1'
        bond1 = charge.create_bond()
        bond1.amount = '$1'
        charge = booking.create_charge()
        charge.charge_id = 'id2'
        bond2 = charge.create_bond()
        bond2.amount = '$1'

        expected_proto = ingest_info_pb2.IngestInfo()
        person = expected_proto.people.add()
        person.person_id = 'id1'
        person.booking_ids.append('id1')
        booking = expected_proto.bookings.add()
        booking.booking_id = 'id1'
        booking.charge_ids.extend(['id1', 'id2'])
        charge = expected_proto.charges.add()
        charge.charge_id = 'id1'
        proto_bond1 = expected_proto.bonds.add()
        proto_bond1.amount = '$1'
        proto_bond1.bond_id = '1_GENERATE'
        charge.bond_id = proto_bond1.bond_id
        charge = expected_proto.charges.add()
        charge.charge_id = 'id2'
        proto_bond2 = expected_proto.bonds.add()
        proto_bond2.amount = '$1'
        proto_bond2.bond_id = '2_GENERATE'
        charge.bond_id = proto_bond2.bond_id

        proto = ingest_utils.convert_ingest_info_to_proto(info)
        assert expected_proto == proto

        info_back = ingest_utils.convert_proto_to_ingest_info(proto)
        assert info_back == info
Ejemplo n.º 10
0
    def test_convert_ingest_info_one_charge_to_one_bond(self, mock_create):
        mock_create.side_effect = self._create_generated_id
        info = ingest_info.IngestInfo()
        person = info.create_person()
        person.person_id = "id1"

        booking = person.create_booking()
        booking.booking_id = "id1"
        charge = booking.create_charge()
        charge.charge_id = "id1"
        bond1 = charge.create_bond()
        bond1.amount = "$1"
        charge = booking.create_charge()
        charge.charge_id = "id2"
        bond2 = charge.create_bond()
        bond2.amount = "$1"

        expected_proto = ingest_info_pb2.IngestInfo()
        person = expected_proto.people.add()
        person.person_id = "id1"
        person.booking_ids.append("id1")
        booking = expected_proto.bookings.add()
        booking.booking_id = "id1"
        booking.charge_ids.extend(["id1", "id2"])
        charge = expected_proto.charges.add()
        charge.charge_id = "id1"
        proto_bond1 = expected_proto.bonds.add()
        proto_bond1.amount = "$1"
        proto_bond1.bond_id = "1_GENERATE"
        charge.bond_id = proto_bond1.bond_id
        charge = expected_proto.charges.add()
        charge.charge_id = "id2"
        proto_bond2 = expected_proto.bonds.add()
        proto_bond2.amount = "$1"
        proto_bond2.bond_id = "2_GENERATE"
        charge.bond_id = proto_bond2.bond_id

        proto = ingest_utils.convert_ingest_info_to_proto(info)
        assert expected_proto == proto

        info_back = ingest_utils.convert_proto_to_ingest_info(proto)
        assert info_back == info
Ejemplo n.º 11
0
    def test_convert_ingest_info_id_is_generated(self, mock_create):
        mock_create.side_effect = self._create_generated_id
        info = ingest_info.IngestInfo()
        person = info.create_person()
        person.surname = "testname"
        person.create_booking()

        expected_proto = ingest_info_pb2.IngestInfo()
        proto_person = expected_proto.people.add()
        proto_person.surname = "testname"
        proto_person.person_id = "1_GENERATE"
        proto_booking = expected_proto.bookings.add()
        proto_booking.booking_id = "2_GENERATE"
        proto_person.booking_ids.append(proto_booking.booking_id)

        proto = ingest_utils.convert_ingest_info_to_proto(info)
        assert proto == expected_proto

        info_back = ingest_utils.convert_proto_to_ingest_info(proto)
        assert info_back == info
Ejemplo n.º 12
0
    def test_persist_to_db_same_task_one_fail_one_pass(self, mock_write,
                                                       _mock_region,
                                                       mock_session_return):
        mock_session = mock_session_return.return_value = create_mock_session()
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
        mock_write.return_value = True

        ii = IngestInfo()
        ii.create_person(person_id=TEST_ID,
                         full_name=TEST_NAME) \
            .create_booking(booking_id=TEST_ID)

        t = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        # Because the tasks are the same, we expect that to be counted as a
        # pass.
        t2 = Task(
            task_type=constants.TaskType.SCRAPE_DATA,
            endpoint=TEST_ENDPOINT,
            response_type=constants.ResponseType.TEXT,
        )

        batch_persistence.write(ii, scrape_key, t)
        batch_persistence.write_error(TEST_ERROR, TEST_TRACE, t2, scrape_key)

        expected_proto = ingest_utils.convert_ingest_info_to_proto(ii)

        self.assertTrue(
            batch_persistence.persist_to_database(scrape_key.region_code,
                                                  mock_session.start))

        result_proto = mock_write.call_args[0][0]
        self.assertEqual(result_proto, expected_proto)

        ingest_infos = datastore_ingest_info.batch_get_ingest_infos_for_region(
            REGIONS[0], mock_session.start)
        self.assertEqual(len(ingest_infos), 0)
Ejemplo n.º 13
0
def _get_proto_from_batch_ingest_info_data_list(
        batch_ingest_info_data_list: List[BatchIngestInfoData]) -> \
        Tuple[ingest_info_pb2.IngestInfo, Dict[int, BatchIngestInfoData]]:
    """Merges an ingest_info_proto from all of the batched ingest_infos.

    Args:
        batch_ingest_info_data_list: A list of BatchIngestInfoData.
    Returns:
        an IngestInfo proto with data from all of the messages.
    """
    logging.info("Starting generation of proto")
    ingest_infos: List[IngestInfo] = []
    successful_tasks: Set[int] = set()
    failed_tasks: Dict[int, BatchIngestInfoData] = {}
    for batch_ingest_info_datum in batch_ingest_info_data_list:
        # We do this because dicts are not hashable in python and we want to
        # avoid an n2 operation to see which tasks have been seen previously
        # which can be on the order of a million operations.
        task_hash = batch_ingest_info_datum.task_hash
        if not batch_ingest_info_datum.error and task_hash not in \
                successful_tasks:
            successful_tasks.add(task_hash)
            if task_hash in failed_tasks:
                del failed_tasks[task_hash]
            if batch_ingest_info_datum.ingest_info:
                ingest_infos.append(batch_ingest_info_datum.ingest_info)
        else:
            # We only add to failed if we didn't see a successful one. This is
            # because its possible a task ran 3 times before passing, meaning
            # we don't want to fail on that when we see the failed ones.
            if task_hash not in successful_tasks:
                failed_tasks[task_hash] = batch_ingest_info_datum

    deduped_ingest_info = _dedup_people(ingest_infos)
    base_proto = ingest_utils.convert_ingest_info_to_proto(deduped_ingest_info)
    ingest_info_validator.validate(base_proto)
    logging.info("Generated proto for [%s] people", len(base_proto.people))
    return base_proto, failed_tasks
Ejemplo n.º 14
0
    def test_convert_ingest_info_id_is_not_generated(self):
        info = ingest_info.IngestInfo()
        person = info.create_person()
        person.person_id = "id1"
        person.surname = "testname"
        booking = person.create_booking()
        booking.booking_id = "id2"
        booking.admission_date = "testdate"

        expected_proto = ingest_info_pb2.IngestInfo()
        person = expected_proto.people.add()
        person.person_id = "id1"
        person.surname = "testname"
        person.booking_ids.append("id2")
        booking = expected_proto.bookings.add()
        booking.booking_id = "id2"
        booking.admission_date = "testdate"

        proto = ingest_utils.convert_ingest_info_to_proto(info)
        assert expected_proto == proto

        info_back = ingest_utils.convert_proto_to_ingest_info(proto)
        assert info_back == info
Ejemplo n.º 15
0
    def _parse_and_persist_contents(self, args: IngestArgsType,
                                    contents: ContentsType):
        """
        Runs the full ingest process for this controller for files with
        non-empty contents.
        """
        ingest_info = self._parse(args, contents)
        # TODO(1738): implement retry on fail.
        if not ingest_info:
            raise DirectIngestError(
                error_type=DirectIngestErrorType.PARSE_ERROR,
                msg="No IngestInfo after parse.")

        logging.info("Successfully parsed data for ingest run [%s]",
                     self._job_tag(args))

        ingest_info_proto = \
            ingest_utils.convert_ingest_info_to_proto(ingest_info)

        logging.info(
            "Successfully converted ingest_info to proto for ingest "
            "run [%s]", self._job_tag(args))

        ingest_metadata = IngestMetadata(self.region.region_code,
                                         self.region.jurisdiction_id,
                                         args.ingest_time,
                                         self.get_enum_overrides(),
                                         self.system_level)
        persist_success = persistence.write(ingest_info_proto, ingest_metadata)

        if not persist_success:
            raise DirectIngestError(
                error_type=DirectIngestErrorType.PERSISTENCE_ERROR,
                msg="Persist step failed")

        logging.info("Successfully persisted for ingest run [%s]",
                     self._job_tag(args))
Ejemplo n.º 16
0
    def test_convert_ingest_info_duplicate_incarceration_incidents(self):
        # Arrange Python ingest info
        info = ingest_info.IngestInfo()
        person = info.create_state_person()
        person.state_person_id = 'person1'
        person.surname = 'testname'

        group = person.create_state_sentence_group()
        group.state_sentence_group_id = 'group1'

        incarceration_sentence = group.create_state_incarceration_sentence()
        incarceration_sentence.state_incarceration_sentence_id = 'is1'
        incarceration_period = incarceration_sentence. \
            create_state_incarceration_period()
        incarceration_period.state_incarceration_period_id = 'ip1'
        incarceration_period.status = 'IN_CUSTODY'

        incident = incarceration_period.create_state_incarceration_incident()
        incident.state_incarceration_incident_id = 'incident1'
        incident.incident_type = 'FISTICUFFS'
        incident_outcome = \
            incident.create_state_incarceration_incident_outcome()
        incident_outcome.state_incarceration_incident_outcome_id = 'incident1-1'
        incident_outcome.outcome_type = 'FINE'

        incident_dup = \
            incarceration_period.create_state_incarceration_incident()
        incident_dup.state_incarceration_incident_id = 'incident1'
        incident_dup.incident_type = 'FISTICUFFS'
        incident_outcome_2 = \
            incident_dup.create_state_incarceration_incident_outcome()
        incident_outcome_2.state_incarceration_incident_outcome_id = \
            'incident1-2'
        incident_outcome_2.outcome_type = 'FINE'

        # Arrange Proto ingest info
        expected_proto = ingest_info_pb2.IngestInfo()
        pb_person = expected_proto.state_people.add()
        pb_person.state_person_id = 'person1'
        pb_person.surname = 'testname'
        pb_person.state_sentence_group_ids.append('group1')
        pb_group = expected_proto.state_sentence_groups.add()
        pb_group.state_sentence_group_id = 'group1'
        pb_group.state_incarceration_sentence_ids.append('is1')

        pb_incarceration_sentence = \
            expected_proto.state_incarceration_sentences.add()
        pb_incarceration_sentence.state_incarceration_sentence_id = 'is1'
        pb_incarceration_sentence.state_incarceration_period_ids.append('ip1')
        pb_incarceration_period = \
            expected_proto.state_incarceration_periods.add()
        pb_incarceration_period.state_incarceration_period_id = 'ip1'
        pb_incarceration_period.status = 'IN_CUSTODY'
        pb_incarceration_period.state_incarceration_incident_ids \
            .append('incident1')

        pb_incident = expected_proto.state_incarceration_incidents.add()
        pb_incident.state_incarceration_incident_id = 'incident1'
        pb_incident.incident_type = 'FISTICUFFS'

        pb_incident.state_incarceration_incident_outcome_ids.append(
            'incident1-1')
        pb_incident_outcome = \
            expected_proto.state_incarceration_incident_outcomes.add()
        pb_incident_outcome.state_incarceration_incident_outcome_id = \
            'incident1-1'
        pb_incident_outcome.outcome_type = 'FINE'

        pb_incident.state_incarceration_incident_outcome_ids.append(
            'incident1-2')
        pb_incident_outcome = \
            expected_proto.state_incarceration_incident_outcomes.add()
        pb_incident_outcome.state_incarceration_incident_outcome_id = \
            'incident1-2'
        pb_incident_outcome.outcome_type = 'FINE'

        # Act & Assert
        proto = ingest_utils.convert_ingest_info_to_proto(info)
        assert expected_proto == proto

        # Duplicate IncarcerationIncident is gone.
        info_back = ingest_utils.convert_proto_to_ingest_info(proto)
        incarceration_period.state_incarceration_incidents = [incident]
        incident.state_incarceration_incident_outcomes = [
            incident_outcome, incident_outcome_2
        ]

        assert info_back == info
Ejemplo n.º 17
0
    def test_convert_ingest_info_state_entities(self):
        # Arrange Python ingest info
        info = ingest_info.IngestInfo()
        person = info.create_state_person()
        person.state_person_id = "person1"
        person.surname = "testname"

        race = person.create_state_person_race()
        race.state_person_race_id = "race1"
        race.race = "white"
        ethnicity = person.create_state_person_ethnicity()
        ethnicity.state_person_ethnicity_id = "ethnicity1"
        ethnicity.ethnicity = "non-hispanic"
        external_id = person.create_state_person_external_id()
        external_id.state_person_external_id_id = "external_id1"
        external_id.id_type = "contrived"
        alias = person.create_state_alias()
        alias.state_alias_id = "alias1"
        alias.surname = "testerson"
        assessment = person.create_state_assessment()
        assessment.state_assessment_id = "assessment1"
        assessment.assessment_score = "42"
        supervising_officer = person.create_state_agent()
        supervising_officer.state_agent_id = "supervising_officer1"
        supervising_officer.full_name = "Officer Supervising"

        assessment_agent = assessment.create_state_agent()
        assessment_agent.state_agent_id = "agent1"
        assessment_agent.full_name = "Officer Jones"

        program_assignment = person.create_state_program_assignment()
        program_assignment.state_program_assignment_id = "assignment1"
        program_assignment.program_id = "program_id1"

        program_assignment_agent = program_assignment.create_state_agent()
        program_assignment_agent.state_agent_id = "program_agent1"
        program_assignment_agent.full_name = "Officer Program"

        group = person.create_state_sentence_group()
        group.state_sentence_group_id = "group1"

        fine = group.create_state_fine()
        fine.state_fine_id = "fine1"

        incarceration_sentence = group.create_state_incarceration_sentence()
        incarceration_sentence.state_incarceration_sentence_id = "is1"
        early_discharge1 = incarceration_sentence.create_state_early_discharge()
        early_discharge1.state_early_discharge_id = "early_discharge1"
        charge1 = incarceration_sentence.create_state_charge()
        charge1.state_charge_id = "charge1"
        charge1.classification_type = "F"
        incarceration_period = (
            incarceration_sentence.create_state_incarceration_period()
        )
        incarceration_period.state_incarceration_period_id = "ip1"
        incarceration_period.status = "IN_CUSTODY"
        incarceration_period.specialized_purpose_for_incarceration = (
            "SHOCK INCARCERATION"
        )
        incarceration_period.state_program_assignments = [program_assignment]
        incident = incarceration_period.create_state_incarceration_incident()
        incident.state_incarceration_incident_id = "incident1"
        incident.incident_type = "FISTICUFFS"
        incident_outcome = incident.create_state_incarceration_incident_outcome()
        incident_outcome.state_incarceration_incident_outcome_id = "incident1-1"
        incident_outcome.outcome_type = "FINE"

        incident_agent = incident.create_state_agent()
        incident_agent.state_agent_id = "agent2"
        incident_agent.full_name = "Officer Thompson"

        decision = incarceration_period.create_state_parole_decision()
        decision.state_parole_decision_id = "decision1"

        decision_agent = decision.create_state_agent()
        decision_agent.state_agent_id = "agent3"
        decision_agent.full_name = "Officer Barkley"

        supervision_sentence = group.create_state_supervision_sentence()
        supervision_sentence.state_supervision_sentence_id = "ss1"
        early_discharge2 = supervision_sentence.create_state_early_discharge()
        early_discharge2.state_early_discharge_id = "early_discharge2"
        charge2 = supervision_sentence.create_state_charge()
        charge2.state_charge_id = "charge2"
        charge2.classification_type = "M"
        supervision_period = supervision_sentence.create_state_supervision_period()
        supervision_period.state_supervision_period_id = "sp1"
        supervision_period.status = "TERMINATED"
        supervision_period_agent = supervision_period.create_state_agent()
        supervision_period_agent.state_agent_id = "agentPO"
        supervision_period_agent.full_name = "Officer Paroley"
        supervision_period.state_program_assignments = [program_assignment]

        supervision_case_type_entry = (
            supervision_period.create_state_supervision_case_type_entry()
        )
        supervision_case_type_entry.case_type = "case_type"
        supervision_case_type_entry.state_supervision_case_type_entry_id = (
            "case_type_entry_id"
        )

        supervision_contact = supervision_period.create_state_supervision_contact()
        supervision_contact.state_supervision_contact_id = "supervision_contact_id"
        supervision_contact.contact_type = "contact_type"
        supervision_contacted_agent = supervision_contact.create_state_agent()
        supervision_contacted_agent.state_agent_id = "agentPO"
        supervision_contacted_agent.full_name = "Officer Paroley"

        violation = supervision_period.create_state_supervision_violation()
        violation.state_supervision_violation_id = "violation1"
        violation.violated_conditions = "cond"
        violation.is_violent = "false"

        violation_type = violation.create_state_supervision_violation_type_entry()
        violation_type.state_supervision_violation_type_entry_id = "violation_type_id"
        violation_type.violation_type = "FELONY"

        violated_condition = (
            violation.create_state_supervision_violated_condition_entry()
        )
        violated_condition.state_supervision_violated_condition_entry_id = (
            "condition_id"
        )
        violated_condition.condition = "CURFEW"

        response = violation.create_state_supervision_violation_response()
        response.state_supervision_violation_response_id = "response1"
        response_decision_agent = response.create_state_agent()
        response_decision_agent.state_agent_id = "agentTERM"
        response_decision_agent.full_name = "Officer Termy"

        response_decision = (
            response.create_state_supervision_violation_response_decision_entry()
        )
        response_decision.state_supervision_violation_response_decision_entry_id = (
            "response_decision_id"
        )
        response_decision.decision = "REVOCATION"
        response_decision.revocation_type = "REINCARCERATION"

        bond = charge1.create_state_bond()
        bond.state_bond_id = "bond1"

        court_case = charge2.create_state_court_case()
        court_case.state_court_case_id = "case1"

        court_case_agent = court_case.create_state_agent()
        court_case_agent.state_agent_id = "agentJ"
        court_case_agent.full_name = "Judge Agent"

        # Arrange Proto ingest info
        expected_proto = ingest_info_pb2.IngestInfo()
        person_pb = expected_proto.state_people.add()
        person_pb.state_person_id = "person1"
        person_pb.surname = "testname"

        person_pb.state_person_race_ids.append("race1")
        race_pb = expected_proto.state_person_races.add()
        race_pb.state_person_race_id = "race1"
        race_pb.race = "white"
        person_pb.state_person_ethnicity_ids.append("ethnicity1")
        ethnicity_pb = expected_proto.state_person_ethnicities.add()
        ethnicity_pb.state_person_ethnicity_id = "ethnicity1"
        ethnicity_pb.ethnicity = "non-hispanic"
        person_pb.state_person_external_ids_ids.append("contrived:external_id1")
        external_id_pb = expected_proto.state_person_external_ids.add()
        external_id_pb.state_person_external_id_id = "contrived:external_id1"
        external_id_pb.id_type = "contrived"
        person_pb.state_alias_ids.append("alias1")
        alias_pb = expected_proto.state_aliases.add()
        alias_pb.state_alias_id = "alias1"
        alias_pb.surname = "testerson"
        person_pb.state_assessment_ids.append("assessment1")
        assessment_pb = expected_proto.state_assessments.add()
        assessment_pb.state_assessment_id = "assessment1"
        assessment_pb.assessment_score = "42"
        person_pb.supervising_officer_id = "supervising_officer1"
        supervising_officer_pb = expected_proto.state_agents.add()
        supervising_officer_pb.state_agent_id = "supervising_officer1"
        supervising_officer_pb.full_name = "Officer Supervising"

        assessment_pb.conducting_agent_id = "agent1"
        assessment_agent_pb = expected_proto.state_agents.add()
        assessment_agent_pb.state_agent_id = "agent1"
        assessment_agent_pb.full_name = "Officer Jones"

        person_pb.state_program_assignment_ids.append("assignment1")
        program_assignment_pb = expected_proto.state_program_assignments.add()
        program_assignment_pb.state_program_assignment_id = "assignment1"
        program_assignment_pb.program_id = "program_id1"
        program_assignment_pb.referring_agent_id = "program_agent1"
        program_assignment_agent_pb = expected_proto.state_agents.add()
        program_assignment_agent_pb.state_agent_id = "program_agent1"
        program_assignment_agent_pb.full_name = "Officer Program"

        person_pb.state_sentence_group_ids.append("group1")
        group_pb = expected_proto.state_sentence_groups.add()
        group_pb.state_sentence_group_id = "group1"

        group_pb.state_fine_ids.append("fine1")
        fine_pb = expected_proto.state_fines.add()
        fine_pb.state_fine_id = "fine1"

        group_pb.state_supervision_sentence_ids.append("ss1")
        supervision_sentence_pb = expected_proto.state_supervision_sentences.add()
        supervision_sentence_pb.state_supervision_sentence_id = "ss1"
        supervision_sentence_pb.state_early_discharge_ids.append("early_discharge2")
        early_discharge2_pb = expected_proto.state_early_discharges.add()
        early_discharge2_pb.state_early_discharge_id = "early_discharge2"
        supervision_sentence_pb.state_charge_ids.append("charge2")
        charge2_pb = expected_proto.state_charges.add()
        charge2_pb.state_charge_id = "charge2"
        charge2_pb.classification_type = "M"
        supervision_sentence_pb.state_supervision_period_ids.append("sp1")
        supervision_period_pb = expected_proto.state_supervision_periods.add()
        supervision_period_pb.state_supervision_period_id = "sp1"
        supervision_period_pb.status = "TERMINATED"
        supervision_period_pb.state_program_assignment_ids.append("assignment1")

        # An ordering requirement in the proto equality check at the end of this
        # test requires that this agent be added after agent1 and before agentPO
        court_case_agent_pb = expected_proto.state_agents.add()
        court_case_agent_pb.state_agent_id = "agentJ"
        court_case_agent_pb.full_name = "Judge Agent"

        supervision_period_pb.supervising_officer_id = "agentPO"
        supervision_period_agent_pb = expected_proto.state_agents.add()
        supervision_period_agent_pb.state_agent_id = "agentPO"
        supervision_period_agent_pb.full_name = "Officer Paroley"

        supervision_case_type_entry_pb = (
            expected_proto.state_supervision_case_type_entries.add()
        )
        supervision_case_type_entry_pb.state_supervision_case_type_entry_id = (
            "case_type_entry_id"
        )
        supervision_case_type_entry_pb.case_type = "case_type"
        supervision_period_pb.state_supervision_case_type_entry_ids.append(
            "case_type_entry_id"
        )

        supervision_contact_pb = expected_proto.state_supervision_contacts.add()
        supervision_contact_pb.state_supervision_contact_id = "supervision_contact_id"
        supervision_contact_pb.contact_type = "contact_type"
        supervision_contact_pb.contacted_agent_id = "agentPO"
        supervision_period_pb.state_supervision_contact_ids.append(
            "supervision_contact_id"
        )

        supervision_period_pb.state_supervision_violation_entry_ids.append("violation1")
        violation_pb = expected_proto.state_supervision_violations.add()
        violation_pb.state_supervision_violation_id = "violation1"
        violation_pb.is_violent = "false"
        violation_pb.violated_conditions = "cond"
        violation_pb.state_supervision_violation_type_entry_ids.append(
            "violation_type_id"
        )
        violation_type_pb = (
            expected_proto.state_supervision_violation_type_entries.add()
        )
        violation_type_pb.state_supervision_violation_type_entry_id = (
            "violation_type_id"
        )
        violation_type_pb.violation_type = "FELONY"

        violation_pb.state_supervision_violated_condition_entry_ids.append(
            "condition_id"
        )
        violation_type_pb = (
            expected_proto.state_supervision_violated_condition_entries.add()
        )
        violation_type_pb.state_supervision_violated_condition_entry_id = "condition_id"
        violation_type_pb.condition = "CURFEW"

        violation_pb.state_supervision_violation_response_ids.append("response1")
        response_pb = expected_proto.state_supervision_violation_responses.add()
        response_pb.state_supervision_violation_response_id = "response1"
        response_pb.decision_agent_ids.append("agentTERM")
        response_decision_agent_pb = expected_proto.state_agents.add()
        response_decision_agent_pb.state_agent_id = "agentTERM"
        response_decision_agent_pb.full_name = "Officer Termy"
        response_decision_pb = (
            expected_proto.state_supervision_violation_response_decision_entries.add()
        )
        response_decision_pb.state_supervision_violation_response_decision_entry_id = (
            "response_decision_id"
        )
        response_decision_pb.decision = "REVOCATION"
        response_decision_pb.revocation_type = "REINCARCERATION"
        response_pb.state_supervision_violation_response_decision_entry_ids.append(
            "response_decision_id"
        )

        group_pb.state_incarceration_sentence_ids.append("is1")
        incarceration_sentence_pb = expected_proto.state_incarceration_sentences.add()
        incarceration_sentence_pb.state_incarceration_sentence_id = "is1"
        incarceration_sentence_pb.state_early_discharge_ids.append("early_discharge1")
        early_discharge1_pb = expected_proto.state_early_discharges.add()
        early_discharge1_pb.state_early_discharge_id = "early_discharge1"
        incarceration_sentence_pb.state_charge_ids.append("charge1")
        charge1_pb = expected_proto.state_charges.add()
        charge1_pb.state_charge_id = "charge1"
        charge1_pb.classification_type = "F"
        incarceration_sentence_pb.state_incarceration_period_ids.append("ip1")
        incarceration_period_pb = expected_proto.state_incarceration_periods.add()
        incarceration_period_pb.state_incarceration_period_id = "ip1"
        incarceration_period_pb.status = "IN_CUSTODY"
        incarceration_period_pb.specialized_purpose_for_incarceration = (
            "SHOCK INCARCERATION"
        )
        incarceration_period_pb.state_incarceration_incident_ids.append("incident1")
        incident_pb = expected_proto.state_incarceration_incidents.add()
        incident_pb.state_incarceration_incident_id = "incident1"
        incident_pb.incident_type = "FISTICUFFS"
        incarceration_period_pb.state_program_assignment_ids.append("assignment1")

        incident_pb.responding_officer_id = "agent2"
        incident_agent_pb = expected_proto.state_agents.add()
        incident_agent_pb.state_agent_id = "agent2"
        incident_agent_pb.full_name = "Officer Thompson"

        incident_pb.state_incarceration_incident_outcome_ids.append("incident1-1")
        incident_outcome_pb = expected_proto.state_incarceration_incident_outcomes.add()
        incident_outcome_pb.state_incarceration_incident_outcome_id = "incident1-1"
        incident_outcome_pb.outcome_type = "FINE"

        incarceration_period_pb.state_parole_decision_ids.append("decision1")
        decision_pb = expected_proto.state_parole_decisions.add()
        decision_pb.state_parole_decision_id = "decision1"

        decision_pb.decision_agent_ids.append("agent3")
        decision_agent_pb = expected_proto.state_agents.add()
        decision_agent_pb.state_agent_id = "agent3"
        decision_agent_pb.full_name = "Officer Barkley"

        charge1_pb.state_bond_id = "bond1"
        bond_pb = expected_proto.state_bonds.add()
        bond_pb.state_bond_id = "bond1"

        charge2_pb.state_court_case_id = "case1"
        court_case_pb = expected_proto.state_court_cases.add()
        court_case_pb.state_court_case_id = "case1"

        court_case_pb.judge_id = "agentJ"

        expected_info = copy.deepcopy(info)
        # Act & Assert

        proto = ingest_utils.convert_ingest_info_to_proto(info)
        assert expected_proto == proto

        info_back = ingest_utils.convert_proto_to_ingest_info(proto)
        assert info_back == expected_info

        # Assert that none of the proto's collections are empty, i.e. we've
        # tested all of the object graph
        proto_classes = [field.name for field in proto.DESCRIPTOR.fields]
        for cls in proto_classes:
            if cls.startswith("state_"):
                assert proto.__getattribute__(cls)
Ejemplo n.º 18
0
    def test_convert_ingest_info_state_entities(self):
        # Arrange Python ingest info
        info = ingest_info.IngestInfo()
        person = info.create_state_person()
        person.state_person_id = 'person1'
        person.surname = 'testname'

        race = person.create_state_person_race()
        race.state_person_race_id = 'race1'
        race.race = 'white'
        ethnicity = person.create_state_person_ethnicity()
        ethnicity.state_person_ethnicity_id = 'ethnicity1'
        ethnicity.ethnicity = 'non-hispanic'
        external_id = person.create_state_person_external_id()
        external_id.state_person_external_id_id = 'external_id1'
        external_id.id_type = 'contrived'
        alias = person.create_state_alias()
        alias.state_alias_id = 'alias1'
        alias.surname = 'testerson'
        assessment = person.create_state_assessment()
        assessment.state_assessment_id = 'assessment1'
        assessment.assessment_score = '42'
        supervising_officer = person.create_state_agent()
        supervising_officer.state_agent_id = 'supervising_officer1'
        supervising_officer.full_name = 'Officer Supervising'

        assessment_agent = assessment.create_state_agent()
        assessment_agent.state_agent_id = 'agent1'
        assessment_agent.full_name = 'Officer Jones'

        program_assignment = person.create_state_program_assignment()
        program_assignment.state_program_assignment_id = 'assignment1'
        program_assignment.program_id = 'program_id1'

        program_assignment_agent = program_assignment.create_state_agent()
        program_assignment_agent.state_agent_id = 'program_agent1'
        program_assignment_agent.full_name = 'Officer Program'

        group = person.create_state_sentence_group()
        group.state_sentence_group_id = 'group1'

        fine = group.create_state_fine()
        fine.state_fine_id = 'fine1'

        incarceration_sentence = group.create_state_incarceration_sentence()
        incarceration_sentence.state_incarceration_sentence_id = 'is1'
        early_discharge1 = incarceration_sentence.create_state_early_discharge(
        )
        early_discharge1.state_early_discharge_id = 'early_discharge1'
        charge1 = incarceration_sentence.create_state_charge()
        charge1.state_charge_id = 'charge1'
        charge1.classification_type = 'F'
        incarceration_period = incarceration_sentence. \
            create_state_incarceration_period()
        incarceration_period.state_incarceration_period_id = 'ip1'
        incarceration_period.status = 'IN_CUSTODY'
        incarceration_period.specialized_purpose_for_incarceration = \
            'SHOCK INCARCERATION'
        incarceration_period.state_program_assignments = [program_assignment]
        incident = incarceration_period.create_state_incarceration_incident()
        incident.state_incarceration_incident_id = 'incident1'
        incident.incident_type = 'FISTICUFFS'
        incident_outcome = \
            incident.create_state_incarceration_incident_outcome()
        incident_outcome.state_incarceration_incident_outcome_id = 'incident1-1'
        incident_outcome.outcome_type = 'FINE'

        incident_agent = incident.create_state_agent()
        incident_agent.state_agent_id = 'agent2'
        incident_agent.full_name = 'Officer Thompson'

        decision = incarceration_period.create_state_parole_decision()
        decision.state_parole_decision_id = 'decision1'

        decision_agent = decision.create_state_agent()
        decision_agent.state_agent_id = 'agent3'
        decision_agent.full_name = 'Officer Barkley'

        supervision_sentence = group.create_state_supervision_sentence()
        supervision_sentence.state_supervision_sentence_id = 'ss1'
        early_discharge2 = supervision_sentence.create_state_early_discharge()
        early_discharge2.state_early_discharge_id = 'early_discharge2'
        charge2 = supervision_sentence.create_state_charge()
        charge2.state_charge_id = 'charge2'
        charge2.classification_type = 'M'
        supervision_period = supervision_sentence. \
            create_state_supervision_period()
        supervision_period.state_supervision_period_id = 'sp1'
        supervision_period.status = 'TERMINATED'
        supervision_period_agent = supervision_period.create_state_agent()
        supervision_period_agent.state_agent_id = 'agentPO'
        supervision_period_agent.full_name = 'Officer Paroley'
        supervision_period.state_program_assignments = [program_assignment]

        supervision_case_type_entry = supervision_period.create_state_supervision_case_type_entry(
        )
        supervision_case_type_entry.case_type = 'case_type'
        supervision_case_type_entry.state_supervision_case_type_entry_id = 'case_type_entry_id'

        supervision_contact = supervision_period.create_state_supervision_contact(
        )
        supervision_contact.state_supervision_contact_id = 'supervision_contact_id'
        supervision_contact.contact_type = 'contact_type'
        supervision_contacted_agent = supervision_contact.create_state_agent()
        supervision_contacted_agent.state_agent_id = 'agentPO'
        supervision_contacted_agent.full_name = 'Officer Paroley'

        violation = supervision_period.create_state_supervision_violation()
        violation.state_supervision_violation_id = 'violation1'
        violation.violated_conditions = 'cond'
        violation.is_violent = 'false'

        violation_type = violation.\
            create_state_supervision_violation_type_entry()
        violation_type.state_supervision_violation_type_entry_id =\
            'violation_type_id'
        violation_type.violation_type = 'FELONY'

        violated_condition = \
            violation.create_state_supervision_violated_condition_entry()
        violated_condition.state_supervision_violated_condition_entry_id =\
            'condition_id'
        violated_condition.condition = 'CURFEW'

        response = violation.create_state_supervision_violation_response()
        response.state_supervision_violation_response_id = 'response1'
        response_decision_agent = response.create_state_agent()
        response_decision_agent.state_agent_id = 'agentTERM'
        response_decision_agent.full_name = 'Officer Termy'

        response_decision = response.\
            create_state_supervision_violation_response_decision_entry()
        response_decision.\
            state_supervision_violation_response_decision_entry_id =\
            'response_decision_id'
        response_decision.decision = 'REVOCATION'
        response_decision.revocation_type = 'REINCARCERATION'

        bond = charge1.create_state_bond()
        bond.state_bond_id = 'bond1'

        court_case = charge2.create_state_court_case()
        court_case.state_court_case_id = 'case1'

        court_case_agent = court_case.create_state_agent()
        court_case_agent.state_agent_id = 'agentJ'
        court_case_agent.full_name = 'Judge Agent'

        # Arrange Proto ingest info
        expected_proto = ingest_info_pb2.IngestInfo()
        person_pb = expected_proto.state_people.add()
        person_pb.state_person_id = 'person1'
        person_pb.surname = 'testname'

        person_pb.state_person_race_ids.append('race1')
        race_pb = expected_proto.state_person_races.add()
        race_pb.state_person_race_id = 'race1'
        race_pb.race = 'white'
        person_pb.state_person_ethnicity_ids.append('ethnicity1')
        ethnicity_pb = expected_proto.state_person_ethnicities.add()
        ethnicity_pb.state_person_ethnicity_id = 'ethnicity1'
        ethnicity_pb.ethnicity = 'non-hispanic'
        person_pb.state_person_external_ids_ids.append(
            'contrived:external_id1')
        external_id_pb = expected_proto.state_person_external_ids.add()
        external_id_pb.state_person_external_id_id = 'contrived:external_id1'
        external_id_pb.id_type = 'contrived'
        person_pb.state_alias_ids.append('alias1')
        alias_pb = expected_proto.state_aliases.add()
        alias_pb.state_alias_id = 'alias1'
        alias_pb.surname = 'testerson'
        person_pb.state_assessment_ids.append('assessment1')
        assessment_pb = expected_proto.state_assessments.add()
        assessment_pb.state_assessment_id = 'assessment1'
        assessment_pb.assessment_score = '42'
        person_pb.supervising_officer_id = 'supervising_officer1'
        supervising_officer_pb = expected_proto.state_agents.add()
        supervising_officer_pb.state_agent_id = 'supervising_officer1'
        supervising_officer_pb.full_name = 'Officer Supervising'

        assessment_pb.conducting_agent_id = 'agent1'
        assessment_agent_pb = expected_proto.state_agents.add()
        assessment_agent_pb.state_agent_id = 'agent1'
        assessment_agent_pb.full_name = 'Officer Jones'

        person_pb.state_program_assignment_ids.append('assignment1')
        program_assignment_pb = expected_proto.state_program_assignments.add()
        program_assignment_pb.state_program_assignment_id = 'assignment1'
        program_assignment_pb.program_id = 'program_id1'
        program_assignment_pb.referring_agent_id = 'program_agent1'
        program_assignment_agent_pb = expected_proto.state_agents.add()
        program_assignment_agent_pb.state_agent_id = 'program_agent1'
        program_assignment_agent_pb.full_name = 'Officer Program'

        person_pb.state_sentence_group_ids.append('group1')
        group_pb = expected_proto.state_sentence_groups.add()
        group_pb.state_sentence_group_id = 'group1'

        group_pb.state_fine_ids.append('fine1')
        fine_pb = expected_proto.state_fines.add()
        fine_pb.state_fine_id = 'fine1'

        group_pb.state_supervision_sentence_ids.append('ss1')
        supervision_sentence_pb = \
            expected_proto.state_supervision_sentences.add()
        supervision_sentence_pb.state_supervision_sentence_id = 'ss1'
        supervision_sentence_pb.state_early_discharge_ids.append(
            'early_discharge2')
        early_discharge2_pb = expected_proto.state_early_discharges.add()
        early_discharge2_pb.state_early_discharge_id = 'early_discharge2'
        supervision_sentence_pb.state_charge_ids.append('charge2')
        charge2_pb = expected_proto.state_charges.add()
        charge2_pb.state_charge_id = 'charge2'
        charge2_pb.classification_type = 'M'
        supervision_sentence_pb.state_supervision_period_ids.append('sp1')
        supervision_period_pb = expected_proto.state_supervision_periods.add()
        supervision_period_pb.state_supervision_period_id = 'sp1'
        supervision_period_pb.status = 'TERMINATED'
        supervision_period_pb.state_program_assignment_ids.append(
            'assignment1')

        # An ordering requirement in the proto equality check at the end of this
        # test requires that this agent be added after agent1 and before agentPO
        court_case_agent_pb = expected_proto.state_agents.add()
        court_case_agent_pb.state_agent_id = 'agentJ'
        court_case_agent_pb.full_name = 'Judge Agent'

        supervision_period_pb.supervising_officer_id = 'agentPO'
        supervision_period_agent_pb = expected_proto.state_agents.add()
        supervision_period_agent_pb.state_agent_id = 'agentPO'
        supervision_period_agent_pb.full_name = 'Officer Paroley'

        supervision_case_type_entry_pb = expected_proto.state_supervision_case_type_entries.add(
        )
        supervision_case_type_entry_pb.state_supervision_case_type_entry_id = 'case_type_entry_id'
        supervision_case_type_entry_pb.case_type = 'case_type'
        supervision_period_pb.state_supervision_case_type_entry_ids.append(
            'case_type_entry_id')

        supervision_contact_pb = expected_proto.state_supervision_contacts.add(
        )
        supervision_contact_pb.state_supervision_contact_id = 'supervision_contact_id'
        supervision_contact_pb.contact_type = 'contact_type'
        supervision_contact_pb.contacted_agent_id = 'agentPO'
        supervision_period_pb.state_supervision_contact_ids.append(
            'supervision_contact_id')

        supervision_period_pb.state_supervision_violation_entry_ids.append(
            'violation1')
        violation_pb = expected_proto.state_supervision_violations.add()
        violation_pb.state_supervision_violation_id = 'violation1'
        violation_pb.is_violent = 'false'
        violation_pb.violated_conditions = 'cond'
        violation_pb.state_supervision_violation_type_entry_ids.append(
            'violation_type_id')
        violation_type_pb = \
            expected_proto.state_supervision_violation_type_entries.add()
        violation_type_pb.state_supervision_violation_type_entry_id = \
            'violation_type_id'
        violation_type_pb.violation_type = 'FELONY'

        violation_pb.state_supervision_violated_condition_entry_ids.append(
            'condition_id')
        violation_type_pb = \
            expected_proto.state_supervision_violated_condition_entries.add()
        violation_type_pb.state_supervision_violated_condition_entry_id = \
            'condition_id'
        violation_type_pb.condition = 'CURFEW'

        violation_pb.state_supervision_violation_response_ids.append(
            'response1')
        response_pb = expected_proto.state_supervision_violation_responses.add(
        )
        response_pb.state_supervision_violation_response_id = 'response1'
        response_pb.decision_agent_ids.append('agentTERM')
        response_decision_agent_pb = expected_proto.state_agents.add()
        response_decision_agent_pb.state_agent_id = 'agentTERM'
        response_decision_agent_pb.full_name = 'Officer Termy'
        response_decision_pb = \
            expected_proto.\
            state_supervision_violation_response_decision_entries.add()
        response_decision_pb.\
            state_supervision_violation_response_decision_entry_id = \
            'response_decision_id'
        response_decision_pb.decision = 'REVOCATION'
        response_decision_pb.revocation_type = 'REINCARCERATION'
        response_pb.\
            state_supervision_violation_response_decision_entry_ids.append(
                'response_decision_id'
            )

        group_pb.state_incarceration_sentence_ids.append('is1')
        incarceration_sentence_pb = \
            expected_proto.state_incarceration_sentences.add()
        incarceration_sentence_pb.state_incarceration_sentence_id = 'is1'
        incarceration_sentence_pb.state_early_discharge_ids.append(
            'early_discharge1')
        early_discharge1_pb = expected_proto.state_early_discharges.add()
        early_discharge1_pb.state_early_discharge_id = 'early_discharge1'
        incarceration_sentence_pb.state_charge_ids.append('charge1')
        charge1_pb = expected_proto.state_charges.add()
        charge1_pb.state_charge_id = 'charge1'
        charge1_pb.classification_type = 'F'
        incarceration_sentence_pb.state_incarceration_period_ids.append('ip1')
        incarceration_period_pb = \
            expected_proto.state_incarceration_periods.add()
        incarceration_period_pb.state_incarceration_period_id = 'ip1'
        incarceration_period_pb.status = 'IN_CUSTODY'
        incarceration_period_pb.specialized_purpose_for_incarceration = \
            'SHOCK INCARCERATION'
        incarceration_period_pb.state_incarceration_incident_ids \
            .append('incident1')
        incident_pb = expected_proto.state_incarceration_incidents.add()
        incident_pb.state_incarceration_incident_id = 'incident1'
        incident_pb.incident_type = 'FISTICUFFS'
        incarceration_period_pb.state_program_assignment_ids.append(
            'assignment1')

        incident_pb.responding_officer_id = 'agent2'
        incident_agent_pb = expected_proto.state_agents.add()
        incident_agent_pb.state_agent_id = 'agent2'
        incident_agent_pb.full_name = 'Officer Thompson'

        incident_pb.state_incarceration_incident_outcome_ids.append(
            'incident1-1')
        incident_outcome_pb = \
            expected_proto.state_incarceration_incident_outcomes.add()
        incident_outcome_pb.state_incarceration_incident_outcome_id = \
            'incident1-1'
        incident_outcome_pb.outcome_type = 'FINE'

        incarceration_period_pb.state_parole_decision_ids.append('decision1')
        decision_pb = expected_proto.state_parole_decisions.add()
        decision_pb.state_parole_decision_id = 'decision1'

        decision_pb.decision_agent_ids.append('agent3')
        decision_agent_pb = expected_proto.state_agents.add()
        decision_agent_pb.state_agent_id = 'agent3'
        decision_agent_pb.full_name = 'Officer Barkley'

        charge1_pb.state_bond_id = 'bond1'
        bond_pb = expected_proto.state_bonds.add()
        bond_pb.state_bond_id = 'bond1'

        charge2_pb.state_court_case_id = 'case1'
        court_case_pb = expected_proto.state_court_cases.add()
        court_case_pb.state_court_case_id = 'case1'

        court_case_pb.judge_id = 'agentJ'

        expected_info = copy.deepcopy(info)
        # Act & Assert

        proto = ingest_utils.convert_ingest_info_to_proto(info)
        assert expected_proto == proto

        info_back = ingest_utils.convert_proto_to_ingest_info(proto)
        assert info_back == expected_info

        # Assert that none of the proto's collections are empty, i.e. we've
        # tested all of the object graph
        proto_classes = [field.name for field in proto.DESCRIPTOR.fields]
        for cls in proto_classes:
            if cls.startswith('state_'):
                assert proto.__getattribute__(cls)
Ejemplo n.º 19
0
    def _generic_scrape(self, request: QueueRequest):
        """
        General handler for all scrape tasks.  This function is a generic entry
        point into all types of scrapes.  It decides what to call based on
        params.

        Args:
            params: dict of parameters passed from the last scrape session.
        """
        try:
            task = request.next_task

            # Here we handle a special case where we weren't really sure
            # we were going to get data when we submitted a task, but then
            # we ended up with data, so no more requests are required,
            # just the content we already have.
            # TODO(#680): remove this
            if task.content is not None:
                content = self._parse_html_content(task.content)
                cookies = None
            else:
                post_data = task.post_data

                # Let the child transform the post_data if it wants before
                # sending the requests.  This hook is in here in case the
                # child did something like compress the post_data before
                # it put it on the queue.
                self.transform_post_data(post_data)

                # We always fetch some content before doing anything.
                # Note that we use get here for the post_data to return a
                # default value of None if this scraper doesn't set it.
                try:
                    content, cookies = self._fetch_content(
                        task.endpoint,
                        task.response_type,
                        headers=task.headers,
                        cookies=task.cookies,
                        params=task.params,
                        post_data=post_data,
                        json_data=task.json)
                except Exception as e:
                    raise ScraperFetchError(str(e)) from e

            scraped_data = None
            if self.should_scrape_data(task.task_type):
                # If we want to scrape data, we should either create an
                # ingest_info object or get the one that already exists.
                logging.info("Scraping data for [%s] and endpoint: [%s]",
                             self.region.region_code, task.endpoint)
                try:
                    scraped_data = self.populate_data(
                        content, task, request.ingest_info or IngestInfo())
                except Exception as e:
                    raise ScraperPopulateDataError(str(e)) from e

            if self.should_get_more_tasks(task.task_type):
                logging.info("Getting more tasks for [%s] and endpoint: [%s]",
                             self.region.region_code, task.endpoint)

                # Only send along ingest info if it will not be persisted now.
                ingest_info_to_send = None
                if scraped_data is not None and not scraped_data.persist:
                    ingest_info_to_send = scraped_data.ingest_info

                try:
                    # pylint: disable=assignment-from-no-return
                    next_tasks = self.get_more_tasks(content, task)
                except Exception as e:
                    raise ScraperGetMoreTasksError(str(e)) from e
                for next_task in next_tasks:
                    # Include cookies received from response, if any
                    if cookies:
                        cookies.update(next_task.cookies)
                        next_task = Task.evolve(next_task, cookies=cookies)
                    self.add_task(
                        '_generic_scrape',
                        QueueRequest(
                            scrape_type=request.scrape_type,
                            scraper_start_time=request.scraper_start_time,
                            next_task=next_task,
                            ingest_info=ingest_info_to_send,
                        ))

            if scraped_data is not None and scraped_data.persist:
                if scraped_data.ingest_info:
                    logging.info("Logging at most 4 people (were %d):",
                                 len(scraped_data.ingest_info.people))
                    loop_count = min(len(scraped_data.ingest_info.people),
                                     constants.MAX_PEOPLE_TO_LOG)
                    for i in range(loop_count):
                        logging.info("[%s]",
                                     str(scraped_data.ingest_info.people[i]))
                    logging.info("Last seen time of person being set as: [%s]",
                                 request.scraper_start_time)
                    metadata = IngestMetadata(self.region.region_code,
                                              self.region.jurisdiction_id,
                                              request.scraper_start_time,
                                              self.get_enum_overrides())
                    if self.BATCH_WRITES:
                        logging.info(
                            "Queuing ingest_info ([%d] people) to "
                            "batch_persistence for [%s]",
                            len(scraped_data.ingest_info.people),
                            self.region.region_code)
                        scrape_key = ScrapeKey(self.region.region_code,
                                               request.scrape_type)
                        batch_persistence.write(
                            ingest_info=scraped_data.ingest_info,
                            scrape_key=scrape_key,
                            task=task,
                        )
                    else:
                        logging.info(
                            "Writing ingest_info ([%d] people) to the database"
                            " for [%s]", len(scraped_data.ingest_info.people),
                            self.region.region_code)
                        persistence.write(
                            ingest_utils.convert_ingest_info_to_proto(
                                scraped_data.ingest_info), metadata)
                for sc in scraped_data.single_counts:
                    if not sc.date:
                        scrape_key = ScrapeKey(self.region.region_code,
                                               constants.ScrapeType.BACKGROUND)
                        session = sessions.get_current_session(scrape_key)
                        if session:
                            sc = attr.evolve(sc, date=session.start.date())
                    single_count.store_single_count(
                        sc, self.region.jurisdiction_id)
        except Exception as e:
            if self.BATCH_WRITES:
                scrape_key = ScrapeKey(self.region.region_code,
                                       request.scrape_type)
                batch_persistence.write_error(
                    error=str(e),
                    trace_id=get_trace_id_from_flask(),
                    task=task,
                    scrape_key=scrape_key,
                )
            raise e