Beispiel #1
0
    def run_test_pipeline(
            self,
            dataset: str,
            fake_supervision_period_id: int,
            unifying_id_field_filter_set: Optional[Set[int]] = None,
            metric_types_filter: Optional[Set[str]] = None):
        """Runs a test version of the program pipeline."""
        test_pipeline = TestPipeline()

        # Get StatePersons
        persons = (
            test_pipeline
            | 'Load Persons' >>  # type: ignore
            extractor_utils.BuildRootEntity(
                dataset=dataset,
                root_entity_class=entities.StatePerson,
                unifying_id_field=entities.StatePerson.get_class_id_name(),
                build_related_entities=True))

        # Get StateProgramAssignments
        program_assignments = (
            test_pipeline
            | 'Load Program Assignments' >>  # type: ignore
            extractor_utils.BuildRootEntity(
                dataset=dataset,
                root_entity_class=entities.StateProgramAssignment,
                unifying_id_field=entities.StatePerson.get_class_id_name(),
                build_related_entities=True,
                unifying_id_field_filter_set=unifying_id_field_filter_set))

        # Get StateAssessments
        assessments = (
            test_pipeline
            | 'Load Assessments' >>  # type: ignore
            extractor_utils.BuildRootEntity(
                dataset=dataset,
                root_entity_class=entities.StateAssessment,
                unifying_id_field=entities.StatePerson.get_class_id_name(),
                build_related_entities=False,
                unifying_id_field_filter_set=unifying_id_field_filter_set))

        # Get StateSupervisionPeriods
        supervision_periods = (
            test_pipeline
            | 'Load SupervisionPeriods' >>  # type: ignore
            extractor_utils.BuildRootEntity(
                dataset=dataset,
                root_entity_class=entities.StateSupervisionPeriod,
                unifying_id_field=entities.StatePerson.get_class_id_name(),
                build_related_entities=False,
                unifying_id_field_filter_set=unifying_id_field_filter_set))

        supervision_period_to_agent_map = {
            'agent_id': 1010,
            'agent_external_id': 'OFFICER0009',
            'district_external_id': '10',
            'supervision_period_id': fake_supervision_period_id
        }

        supervision_period_to_agent_associations = (
            test_pipeline
            | 'Create SupervisionPeriod to Agent table' >> beam.Create(
                [supervision_period_to_agent_map]))

        supervision_period_to_agent_associations_as_kv = (
            supervision_period_to_agent_associations
            | 'Convert SupervisionPeriod to Agent table to KV tuples' >>
            beam.ParDo(pipeline.ConvertDictToKVTuple(),
                       'supervision_period_id'))

        # Group each StatePerson with their other entities
        persons_entities = ({
            'person': persons,
            'program_assignments': program_assignments,
            'assessments': assessments,
            'supervision_periods': supervision_periods
        }
                            |
                            'Group StatePerson to StateProgramAssignments and'
                            >> beam.CoGroupByKey())

        # Identify ProgramEvents from the StatePerson's
        # StateProgramAssignments
        person_program_events = (
            persons_entities
            | beam.ParDo(
                pipeline.ClassifyProgramAssignments(),
                AsDict(supervision_period_to_agent_associations_as_kv)))

        # Get pipeline job details for accessing job_id
        all_pipeline_options = PipelineOptions().get_all_options()

        # Add timestamp for local jobs
        job_timestamp = datetime.datetime.now().strftime(
            '%Y-%m-%d_%H_%M_%S.%f')
        all_pipeline_options['job_timestamp'] = job_timestamp

        metric_types = metric_types_filter if metric_types_filter else {'ALL'}

        # Get program metrics
        program_metrics = (
            person_program_events
            | 'Get Program Metrics' >>  # type: ignore
            pipeline.GetProgramMetrics(pipeline_options=all_pipeline_options,
                                       metric_types=metric_types,
                                       calculation_end_month=None,
                                       calculation_month_count=-1))

        assert_that(program_metrics, AssertMatchers.validate_pipeline_test())

        test_pipeline.run()
Beispiel #2
0
    def testProgramPipeline(self):
        """Tests the program pipeline."""
        fake_person_id = 12345

        fake_person = schema.StatePerson(
            person_id=fake_person_id,
            gender=Gender.MALE,
            birthdate=date(1970, 1, 1),
            residency_status=ResidencyStatus.PERMANENT)

        persons_data = [normalized_database_base_dict(fake_person)]

        race_1 = schema.StatePersonRace(person_race_id=111,
                                        state_code='CA',
                                        race=Race.BLACK,
                                        person_id=fake_person_id)

        race_2 = schema.StatePersonRace(person_race_id=111,
                                        state_code='ND',
                                        race=Race.WHITE,
                                        person_id=fake_person_id)

        races_data = normalized_database_base_dict_list([race_1, race_2])

        ethnicity = schema.StatePersonEthnicity(person_ethnicity_id=111,
                                                state_code='CA',
                                                ethnicity=Ethnicity.HISPANIC,
                                                person_id=fake_person_id)

        ethnicity_data = normalized_database_base_dict_list([ethnicity])

        program_assignment = schema.StateProgramAssignment(
            program_assignment_id=123,
            referral_date=date(2015, 5, 10),
            person_id=fake_person_id)

        assessment = schema.StateAssessment(assessment_id=298374,
                                            assessment_date=date(2015, 3, 19),
                                            assessment_type='LSIR',
                                            person_id=fake_person_id)

        supervision_period = schema.StateSupervisionPeriod(
            supervision_period_id=1111,
            state_code='CA',
            county_code='124',
            start_date=date(2015, 3, 14),
            termination_date=date(2016, 12, 29),
            supervision_type=StateSupervisionType.PROBATION,
            person_id=fake_person_id)

        program_assignment_data = [
            normalized_database_base_dict(program_assignment)
        ]

        assessment_data = [normalized_database_base_dict(assessment)]

        supervision_periods_data = [
            normalized_database_base_dict(supervision_period)
        ]

        supervision_violation_response = \
            database_test_utils.generate_test_supervision_violation_response(
                fake_person_id)

        supervision_violation_response_data = [
            normalized_database_base_dict(supervision_violation_response)
        ]

        data_dict = {
            schema.StatePerson.__tablename__: persons_data,
            schema.StatePersonRace.__tablename__: races_data,
            schema.StatePersonEthnicity.__tablename__: ethnicity_data,
            schema.StateSupervisionViolationResponse.__tablename__:
            supervision_violation_response_data,
            schema.StateSupervisionPeriod.__tablename__:
            supervision_periods_data,
            schema.StateProgramAssignment.__tablename__:
            program_assignment_data,
            schema.StateAssessment.__tablename__: assessment_data
        }

        test_pipeline = TestPipeline()

        # Get StatePersons
        persons = (test_pipeline
                   | 'Load Persons' >> extractor_utils.BuildRootEntity(
                       dataset=None,
                       data_dict=data_dict,
                       root_schema_class=schema.StatePerson,
                       root_entity_class=entities.StatePerson,
                       unifying_id_field='person_id',
                       build_related_entities=True))

        # Get StateProgramAssignments
        program_assignments = (
            test_pipeline
            | 'Load Program Assignments' >> extractor_utils.BuildRootEntity(
                dataset=None,
                data_dict=data_dict,
                root_schema_class=schema.StateProgramAssignment,
                root_entity_class=entities.StateProgramAssignment,
                unifying_id_field='person_id',
                build_related_entities=True))

        # Get StateAssessments
        assessments = (test_pipeline
                       | 'Load Assessments' >> extractor_utils.BuildRootEntity(
                           dataset=None,
                           data_dict=data_dict,
                           root_schema_class=schema.StateAssessment,
                           root_entity_class=entities.StateAssessment,
                           unifying_id_field='person_id',
                           build_related_entities=False))

        # Get StateSupervisionPeriods
        supervision_periods = (
            test_pipeline
            | 'Load SupervisionPeriods' >> extractor_utils.BuildRootEntity(
                dataset=None,
                data_dict=data_dict,
                root_schema_class=schema.StateSupervisionPeriod,
                root_entity_class=entities.StateSupervisionPeriod,
                unifying_id_field='person_id',
                build_related_entities=False))

        supervision_period_to_agent_map = {
            'agent_id': 1010,
            'agent_external_id': 'OFFICER0009',
            'district_external_id': '10',
            'supervision_period_id': supervision_period.supervision_period_id
        }

        supervision_period_to_agent_associations = (
            test_pipeline
            | 'Create SupervisionPeriod to Agent table' >> beam.Create(
                [supervision_period_to_agent_map]))

        supervision_period_to_agent_associations_as_kv = (
            supervision_period_to_agent_associations
            | 'Convert SupervisionPeriod to Agent table to KV tuples' >>
            beam.ParDo(pipeline.ConvertDictToKVTuple(),
                       'supervision_period_id'))

        # Group each StatePerson with their other entities
        persons_entities = ({
            'person': persons,
            'program_assignments': program_assignments,
            'assessments': assessments,
            'supervision_periods': supervision_periods
        }
                            |
                            'Group StatePerson to StateProgramAssignments and'
                            >> beam.CoGroupByKey())

        # Identify ProgramEvents from the StatePerson's
        # StateProgramAssignments
        person_program_events = (
            persons_entities
            | beam.ParDo(
                pipeline.ClassifyProgramAssignments(),
                AsDict(supervision_period_to_agent_associations_as_kv)))

        # Get pipeline job details for accessing job_id
        all_pipeline_options = PipelineOptions().get_all_options()

        # Add timestamp for local jobs
        job_timestamp = datetime.datetime.now().strftime(
            '%Y-%m-%d_%H_%M_%S.%f')
        all_pipeline_options['job_timestamp'] = job_timestamp

        # Get program metrics
        program_metrics = (person_program_events
                           |
                           'Get Program Metrics' >> pipeline.GetProgramMetrics(
                               pipeline_options=all_pipeline_options,
                               inclusions=ALL_INCLUSIONS_DICT,
                               calculation_month_limit=-1))

        assert_that(program_metrics, AssertMatchers.validate_pipeline_test())

        test_pipeline.run()