Example #1
0
def generate_test_assessment(person_id) -> state_schema.StateAssessment:
    instance = state_schema.StateAssessment(
        assessment_id=345,
        state_code='us_ca',
        assessment_type=StateAssessmentType.ASI.value,
        person_id=person_id)
    return instance
Example #2
0
    def build_data_dict(fake_person_id: int, fake_supervision_period_id: int):
        """Builds a data_dict for a basic run of the pipeline."""
        fake_person = schema.StatePerson(
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        persons_data = [normalized_database_base_dict(fake_person)]

        race_1 = schema.StatePersonRace(person_race_id=111,
                                        state_code='CA',
                                        race=Race.BLACK,
                                        person_id=fake_person_id)

        race_2 = schema.StatePersonRace(person_race_id=111,
                                        state_code='ND',
                                        race=Race.WHITE,
                                        person_id=fake_person_id)

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111,
                                                state_code='CA',
                                                ethnicity=Ethnicity.HISPANIC,
                                                person_id=fake_person_id)

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        program_assignment = schema.StateProgramAssignment(
            state_code='CA',
            program_assignment_id=123,
            referral_date=date(2015, 5, 10),
            person_id=fake_person_id)

        assessment = schema.StateAssessment(assessment_id=298374,
                                            assessment_date=date(2015, 3, 19),
                                            assessment_type='LSIR',
                                            person_id=fake_person_id)

        supervision_period = schema.StateSupervisionPeriod(
            supervision_period_id=fake_supervision_period_id,
            state_code='CA',
            county_code='124',
            start_date=date(2015, 3, 14),
            termination_date=date(2016, 12, 29),
            supervision_type=StateSupervisionType.PROBATION,
            person_id=fake_person_id)

        program_assignment_data = [
            normalized_database_base_dict(program_assignment)
        ]

        assessment_data = [normalized_database_base_dict(assessment)]

        supervision_periods_data = [
            normalized_database_base_dict(supervision_period)
        ]

        supervision_violation_response = \
            database_test_utils.generate_test_supervision_violation_response(
                fake_person_id)

        supervision_violation_response_data = [
            normalized_database_base_dict(supervision_violation_response)
        ]

        data_dict = {
            schema.StatePerson.__tablename__: persons_data,
            schema.StatePersonRace.__tablename__: races_data,
            schema.StatePersonEthnicity.__tablename__: ethnicity_data,
            schema.StateSupervisionViolationResponse.__tablename__:
            supervision_violation_response_data,
            schema.StateSupervisionPeriod.__tablename__:
            supervision_periods_data,
            schema.StateProgramAssignment.__tablename__:
            program_assignment_data,
            schema.StateAssessment.__tablename__: assessment_data,
            schema.StatePersonExternalId.__tablename__: [],
            schema.StatePersonAlias.__tablename__: [],
            schema.StateSentenceGroup.__tablename__: [],
        }

        return data_dict
Example #3
0
    def testProgramPipelineNoReferrals(self):
        """Tests the program pipeline where one person does not have any
        program assignment entities."""
        fake_person_id = 12345
        fake_person_id_2 = 9876

        fake_person = schema.StatePerson(
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        fake_person_2 = schema.StatePerson(
            person_id=fake_person_id_2,
            gender=Gender.MALE,
            birthdate=date(1974, 3, 12),
            residency_status=ResidencyStatus.PERMANENT)

        persons_data = normalized_database_base_dict_list(
            [fake_person, fake_person_2])

        race_1 = schema.StatePersonRace(person_race_id=111,
                                        state_code='CA',
                                        race=Race.BLACK,
                                        person_id=fake_person_id)

        race_2 = schema.StatePersonRace(person_race_id=111,
                                        state_code='ND',
                                        race=Race.WHITE,
                                        person_id=fake_person_id)

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111,
                                                state_code='CA',
                                                ethnicity=Ethnicity.HISPANIC,
                                                person_id=fake_person_id)

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        # Program assignment for a different person
        program_assignment = schema.StateProgramAssignment(
            state_code='CA',
            program_assignment_id=123,
            referral_date=date(2015, 5, 10),
            person_id=fake_person_id_2)

        assessment = schema.StateAssessment(assessment_id=298374,
                                            assessment_date=date(2015, 3, 19),
                                            assessment_type='LSIR',
                                            person_id=fake_person_id)

        supervision_period = schema.StateSupervisionPeriod(
            supervision_period_id=1111,
            state_code='CA',
            county_code='124',
            start_date=date(2015, 3, 14),
            termination_date=date(2016, 12, 29),
            supervision_type=StateSupervisionType.PROBATION,
            person_id=fake_person_id)

        program_assignment_data = [
            normalized_database_base_dict(program_assignment)
        ]

        assessment_data = [normalized_database_base_dict(assessment)]

        supervision_periods_data = [
            normalized_database_base_dict(supervision_period)
        ]

        supervision_violation_response = \
            database_test_utils.generate_test_supervision_violation_response(
                fake_person_id)

        supervision_violation_response_data = [
            normalized_database_base_dict(supervision_violation_response)
        ]

        data_dict = {
            schema.StatePerson.__tablename__: persons_data,
            schema.StatePersonRace.__tablename__: races_data,
            schema.StatePersonEthnicity.__tablename__: ethnicity_data,
            schema.StateSupervisionViolationResponse.__tablename__:
            supervision_violation_response_data,
            schema.StateSupervisionPeriod.__tablename__:
            supervision_periods_data,
            schema.StateProgramAssignment.__tablename__:
            program_assignment_data,
            schema.StateAssessment.__tablename__: assessment_data,
            schema.StatePersonExternalId.__tablename__: [],
            schema.StatePersonAlias.__tablename__: [],
            schema.StateSentenceGroup.__tablename__: [],
        }

        dataset = 'recidiviz-123.state'

        with patch(
                'recidiviz.calculator.pipeline.utils.extractor_utils.ReadFromBigQuery',
                self.fake_bq_source_factory.create_fake_bq_source_constructor(
                    dataset, data_dict)):
            self.run_test_pipeline(dataset,
                                   supervision_period.supervision_period_id)
Example #4
0
    def testProgramPipelineNoReferrals(self):
        """Tests the program pipeline where one person does not have any
        program assignment entities."""
        fake_person_id = 12345
        fake_person_id_2 = 9876

        fake_person = schema.StatePerson(
            state_code="US_XX",
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT,
        )

        fake_person_2 = schema.StatePerson(
            state_code="US_XX",
            person_id=fake_person_id_2,
            gender=Gender.MALE,
            birthdate=date(1974, 3, 12),
            residency_status=ResidencyStatus.PERMANENT,
        )

        persons_data = normalized_database_base_dict_list([fake_person, fake_person_2])

        race_1 = schema.StatePersonRace(
            person_race_id=111,
            state_code="US_XX",
            race=Race.BLACK,
            person_id=fake_person_id,
        )

        race_2 = schema.StatePersonRace(
            person_race_id=111,
            state_code="US_XX",
            race=Race.WHITE,
            person_id=fake_person_id,
        )

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(
            person_ethnicity_id=111,
            state_code="US_XX",
            ethnicity=Ethnicity.HISPANIC,
            person_id=fake_person_id,
        )

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        # Program assignment for a different person
        program_assignment = schema.StateProgramAssignment(
            state_code="US_XX",
            program_assignment_id=123,
            referral_date=date(2015, 5, 10),
            person_id=fake_person_id_2,
            participation_status=StateProgramAssignmentParticipationStatus.DENIED,
        )

        assessment = schema.StateAssessment(
            assessment_id=298374,
            state_code="US_XX",
            assessment_date=date(2015, 3, 19),
            assessment_type="LSIR",
            person_id=fake_person_id,
        )

        supervision_period = schema.StateSupervisionPeriod(
            supervision_period_id=1111,
            state_code="US_XX",
            county_code="124",
            start_date=date(2015, 3, 14),
            termination_date=date(2016, 12, 29),
            supervision_type=StateSupervisionType.PROBATION,
            person_id=fake_person_id,
            status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO,
        )

        program_assignment_data = [normalized_database_base_dict(program_assignment)]

        assessment_data = [normalized_database_base_dict(assessment)]

        supervision_periods_data = [normalized_database_base_dict(supervision_period)]

        supervision_violation_response = (
            database_test_utils.generate_test_supervision_violation_response(
                fake_person_id
            )
        )

        supervision_violation_response_data = [
            normalized_database_base_dict(supervision_violation_response)
        ]

        supervision_period_to_agent_data = [
            {
                "agent_id": 1010,
                "person_id": fake_person_id,
                "state_code": "US_XX",
                "agent_external_id": "OFFICER0009",
                "supervision_period_id": supervision_period.supervision_period_id,
            }
        ]

        state_race_ethnicity_population_count_data = [
            {
                "state_code": "US_XX",
                "race_or_ethnicity": "BLACK",
                "population_count": 1,
                "representation_priority": 1,
            }
        ]

        data_dict = {
            schema.StatePerson.__tablename__: persons_data,
            schema.StatePersonRace.__tablename__: races_data,
            schema.StatePersonEthnicity.__tablename__: ethnicity_data,
            schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data,
            schema.StateSupervisionPeriod.__tablename__: supervision_periods_data,
            schema.StateProgramAssignment.__tablename__: program_assignment_data,
            schema.StateAssessment.__tablename__: assessment_data,
            schema.StatePersonExternalId.__tablename__: [],
            schema.StatePersonAlias.__tablename__: [],
            schema.StateSentenceGroup.__tablename__: [],
            "supervision_period_to_agent_association": supervision_period_to_agent_data,
            "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data,
        }

        dataset = "recidiviz-123.state"

        self.run_test_pipeline(dataset, data_dict)
Example #5
0
    def build_data_dict(fake_person_id: int, fake_supervision_period_id: int):
        """Builds a data_dict for a basic run of the pipeline."""
        fake_person = schema.StatePerson(
            state_code="US_XX",
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT,
        )

        persons_data = [normalized_database_base_dict(fake_person)]

        race_1 = schema.StatePersonRace(
            person_race_id=111,
            state_code="US_XX",
            race=Race.BLACK,
            person_id=fake_person_id,
        )

        race_2 = schema.StatePersonRace(
            person_race_id=111,
            state_code="US_XX",
            race=Race.WHITE,
            person_id=fake_person_id,
        )

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(
            person_ethnicity_id=111,
            state_code="US_XX",
            ethnicity=Ethnicity.HISPANIC,
            person_id=fake_person_id,
        )

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        program_assignment = schema.StateProgramAssignment(
            state_code="US_XX",
            program_assignment_id=123,
            referral_date=date(2015, 5, 10),
            person_id=fake_person_id,
            participation_status=StateProgramAssignmentParticipationStatus.IN_PROGRESS,
        )

        assessment = schema.StateAssessment(
            assessment_id=298374,
            state_code="US_XX",
            assessment_date=date(2015, 3, 19),
            assessment_type="LSIR",
            person_id=fake_person_id,
        )

        supervision_period = schema.StateSupervisionPeriod(
            supervision_period_id=fake_supervision_period_id,
            state_code="US_XX",
            county_code="124",
            start_date=date(2015, 3, 14),
            termination_date=date(2016, 12, 29),
            supervision_type=StateSupervisionType.PROBATION,
            person_id=fake_person_id,
            status=StateSupervisionPeriodStatus.PRESENT_WITHOUT_INFO,
        )

        program_assignment_data = [normalized_database_base_dict(program_assignment)]

        assessment_data = [normalized_database_base_dict(assessment)]

        supervision_periods_data = [normalized_database_base_dict(supervision_period)]

        supervision_violation_response = (
            database_test_utils.generate_test_supervision_violation_response(
                fake_person_id
            )
        )

        supervision_violation_response_data = [
            normalized_database_base_dict(supervision_violation_response)
        ]

        supervision_period_to_agent_data = [
            {
                "agent_id": 1010,
                "person_id": fake_person_id,
                "state_code": "US_XX",
                "agent_external_id": "OFFICER0009",
                "supervision_period_id": fake_supervision_period_id,
            }
        ]

        state_race_ethnicity_population_count_data = [
            {
                "state_code": "US_XX",
                "race_or_ethnicity": "BLACK",
                "population_count": 1,
                "representation_priority": 1,
            }
        ]

        data_dict = {
            schema.StatePerson.__tablename__: persons_data,
            schema.StatePersonRace.__tablename__: races_data,
            schema.StatePersonEthnicity.__tablename__: ethnicity_data,
            schema.StateSupervisionViolationResponse.__tablename__: supervision_violation_response_data,
            schema.StateSupervisionPeriod.__tablename__: supervision_periods_data,
            schema.StateProgramAssignment.__tablename__: program_assignment_data,
            schema.StateAssessment.__tablename__: assessment_data,
            schema.StatePersonExternalId.__tablename__: [],
            schema.StatePersonAlias.__tablename__: [],
            schema.StateSentenceGroup.__tablename__: [],
            "supervision_period_to_agent_association": supervision_period_to_agent_data,
            "state_race_ethnicity_population_counts": state_race_ethnicity_population_count_data,
        }

        return data_dict
def generate_test_person(person_id, sentence_groups, incarceration_period,
                         agent, supervision_period) -> state_schema.StatePerson:
    """Returns a StatePerson to be used for testing."""
    instance = state_schema.StatePerson(
        person_id=person_id,
        full_name='name',
        birthdate=datetime.date(1980, 1, 5),
        birthdate_inferred_from_age=False,
        external_ids=[
            state_schema.StatePersonExternalId(
                person_external_id_id=234,
                external_id='person_external_id',
                id_type='STATE',
                state_code='us_ny',
                person_id=person_id,
            )
        ],
        aliases=[
            state_schema.StatePersonAlias(
                person_alias_id=1456,
                state_code='us_ca',
                full_name='name',
                person_id=person_id,
            )
        ],
        races=[
            state_schema.StatePersonRace(
                person_race_id=345,
                state_code='us_ca',
                race=Race.BLACK.value,
                race_raw_text='BLK',
                person_id=person_id,
            )
        ],
        ethnicities=[
            state_schema.StatePersonEthnicity(
                person_ethnicity_id=345,
                state_code='us_ca',
                ethnicity=Ethnicity.NOT_HISPANIC.value,
                ethnicity_raw_text='HISP',
                person_id=person_id,
            )
        ],
        sentence_groups=sentence_groups,
        assessments=[
            state_schema.StateAssessment(
                assessment_id=456,
                person_id=person_id,
                state_code='us_ca',
                incarceration_period=incarceration_period,
                conducting_agent=agent,
            ),
            state_schema.StateAssessment(
                assessment_id=4567,
                person_id=person_id,
                state_code='us_ca',
                supervision_period=supervision_period,
                conducting_agent=agent,
            )
        ],
        program_assignments=[
            state_schema.StateProgramAssignment(
                program_assignment_id=567,
                participation_status=
                StateProgramAssignmentParticipationStatus.
                PRESENT_WITHOUT_INFO.value,
                state_code='us_ca',
                referring_agent=agent,
            )
        ]
    )
    return instance
def generate_assessment(person, **kwargs) -> schema.StateAssessment:
    args = {
        "state_code": _STATE_CODE,
    }
    args.update(kwargs)
    return schema.StateAssessment(person=person, **args)
Example #8
0
    def testProgramPipeline(self):
        """Tests the program pipeline."""
        fake_person_id = 12345

        fake_person = schema.StatePerson(
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        persons_data = [normalized_database_base_dict(fake_person)]

        race_1 = schema.StatePersonRace(person_race_id=111,
                                        state_code='CA',
                                        race=Race.BLACK,
                                        person_id=fake_person_id)

        race_2 = schema.StatePersonRace(person_race_id=111,
                                        state_code='ND',
                                        race=Race.WHITE,
                                        person_id=fake_person_id)

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111,
                                                state_code='CA',
                                                ethnicity=Ethnicity.HISPANIC,
                                                person_id=fake_person_id)

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        program_assignment = schema.StateProgramAssignment(
            program_assignment_id=123,
            referral_date=date(2015, 5, 10),
            person_id=fake_person_id)

        assessment = schema.StateAssessment(assessment_id=298374,
                                            assessment_date=date(2015, 3, 19),
                                            assessment_type='LSIR',
                                            person_id=fake_person_id)

        supervision_period = schema.StateSupervisionPeriod(
            supervision_period_id=1111,
            state_code='CA',
            county_code='124',
            start_date=date(2015, 3, 14),
            termination_date=date(2016, 12, 29),
            supervision_type=StateSupervisionType.PROBATION,
            person_id=fake_person_id)

        program_assignment_data = [
            normalized_database_base_dict(program_assignment)
        ]

        assessment_data = [normalized_database_base_dict(assessment)]

        supervision_periods_data = [
            normalized_database_base_dict(supervision_period)
        ]

        supervision_violation_response = \
            database_test_utils.generate_test_supervision_violation_response(
                fake_person_id)

        supervision_violation_response_data = [
            normalized_database_base_dict(supervision_violation_response)
        ]

        data_dict = {
            schema.StatePerson.__tablename__: persons_data,
            schema.StatePersonRace.__tablename__: races_data,
            schema.StatePersonEthnicity.__tablename__: ethnicity_data,
            schema.StateSupervisionViolationResponse.__tablename__:
            supervision_violation_response_data,
            schema.StateSupervisionPeriod.__tablename__:
            supervision_periods_data,
            schema.StateProgramAssignment.__tablename__:
            program_assignment_data,
            schema.StateAssessment.__tablename__: assessment_data
        }

        test_pipeline = TestPipeline()

        # Get StatePersons
        persons = (test_pipeline
                   | 'Load Persons' >> extractor_utils.BuildRootEntity(
                       dataset=None,
                       data_dict=data_dict,
                       root_schema_class=schema.StatePerson,
                       root_entity_class=entities.StatePerson,
                       unifying_id_field='person_id',
                       build_related_entities=True))

        # Get StateProgramAssignments
        program_assignments = (
            test_pipeline
            | 'Load Program Assignments' >> extractor_utils.BuildRootEntity(
                dataset=None,
                data_dict=data_dict,
                root_schema_class=schema.StateProgramAssignment,
                root_entity_class=entities.StateProgramAssignment,
                unifying_id_field='person_id',
                build_related_entities=True))

        # Get StateAssessments
        assessments = (test_pipeline
                       | 'Load Assessments' >> extractor_utils.BuildRootEntity(
                           dataset=None,
                           data_dict=data_dict,
                           root_schema_class=schema.StateAssessment,
                           root_entity_class=entities.StateAssessment,
                           unifying_id_field='person_id',
                           build_related_entities=False))

        # Get StateSupervisionPeriods
        supervision_periods = (
            test_pipeline
            | 'Load SupervisionPeriods' >> extractor_utils.BuildRootEntity(
                dataset=None,
                data_dict=data_dict,
                root_schema_class=schema.StateSupervisionPeriod,
                root_entity_class=entities.StateSupervisionPeriod,
                unifying_id_field='person_id',
                build_related_entities=False))

        supervision_period_to_agent_map = {
            'agent_id': 1010,
            'agent_external_id': 'OFFICER0009',
            'district_external_id': '10',
            'supervision_period_id': supervision_period.supervision_period_id
        }

        supervision_period_to_agent_associations = (
            test_pipeline
            | 'Create SupervisionPeriod to Agent table' >> beam.Create(
                [supervision_period_to_agent_map]))

        supervision_period_to_agent_associations_as_kv = (
            supervision_period_to_agent_associations
            | 'Convert SupervisionPeriod to Agent table to KV tuples' >>
            beam.ParDo(pipeline.ConvertDictToKVTuple(),
                       'supervision_period_id'))

        # Group each StatePerson with their other entities
        persons_entities = ({
            'person': persons,
            'program_assignments': program_assignments,
            'assessments': assessments,
            'supervision_periods': supervision_periods
        }
                            |
                            'Group StatePerson to StateProgramAssignments and'
                            >> beam.CoGroupByKey())

        # Identify ProgramEvents from the StatePerson's
        # StateProgramAssignments
        person_program_events = (
            persons_entities
            | beam.ParDo(
                pipeline.ClassifyProgramAssignments(),
                AsDict(supervision_period_to_agent_associations_as_kv)))

        # Get pipeline job details for accessing job_id
        all_pipeline_options = PipelineOptions().get_all_options()

        # Add timestamp for local jobs
        job_timestamp = datetime.datetime.now().strftime(
            '%Y-%m-%d_%H_%M_%S.%f')
        all_pipeline_options['job_timestamp'] = job_timestamp

        # Get program metrics
        program_metrics = (person_program_events
                           |
                           'Get Program Metrics' >> pipeline.GetProgramMetrics(
                               pipeline_options=all_pipeline_options,
                               inclusions=ALL_INCLUSIONS_DICT,
                               calculation_month_limit=-1))

        assert_that(program_metrics, AssertMatchers.validate_pipeline_test())

        test_pipeline.run()