Ejemplo n.º 1
0
    def test_json_serializable_metric_key_InvalidList(self):
        metric_key = {'invalid_list_key': ['list', 'values']}

        with pytest.raises(ValueError) as e:
            json_serializable_metric_key(metric_key)

            self.assertEqual(e, "Unexpected list in metric_key for key: invalid_list_key")
Ejemplo n.º 2
0
def convert_event_based_to_person_based_metrics(
        metrics: List[Tuple[Dict[str, Any], Any]]) -> \
        List[Tuple[Dict[str, Any], Any]]:
    """
    Takes in a set of event-based metrics and converts them to be person-based
    by removing any duplicate metric dictionaries attributed to this person.

    By eliminating duplicate instances of metric keys, this person will only
    contribute a +1 to a metric once per metric for all person-based counts.
    """

    person_based_metrics_set = set()

    for metric, value in metrics:
        metric['methodology'] = MetricMethodologyType.PERSON
        # Converting the metric key to a JSON string so it is hashable
        serializable_dict = json_serializable_metric_key(metric)
        json_key = json.dumps(serializable_dict, sort_keys=True)
        # Add the metric to the set
        person_based_metrics_set.add((json_key, value))

    person_based_metrics: List[Tuple[Dict[str, Any], Any]] = []

    for json_metric, value in person_based_metrics_set:
        # Convert JSON string to dictionary
        dict_metric_key = json.loads(json_metric)

        person_based_metrics.append((dict_metric_key, value))

    return person_based_metrics
Ejemplo n.º 3
0
    def process(self, element, *args, **kwargs):
        """The beam.io.WriteToBigQuery transform requires elements to be in dictionary form, where the values are in
        formats as required by BigQuery I/O connector.

        For a list of required formats, see the "Data types" section of:
            https://beam.apache.org/documentation/io/built-in/google-bigquery/

        Args:
            element: A SupervisionMetric

        Yields:
            A dictionary representation of the SupervisionMetric in the format Dict[str, Any] so that it can be written
                to BigQuery using beam.io.WriteToBigQuery.
        """
        element_dict = json_serializable_metric_key(element.__dict__)

        if isinstance(element, SupervisionPopulationMetric):
            yield beam.pvalue.TaggedOutput('populations', element_dict)
        elif isinstance(element, SupervisionRevocationAnalysisMetric):
            yield beam.pvalue.TaggedOutput('revocation_analyses', element_dict)
        elif isinstance(element,
                        SupervisionRevocationViolationTypeAnalysisMetric):
            yield beam.pvalue.TaggedOutput(
                'revocation_violation_type_analyses', element_dict)
        elif isinstance(element, SupervisionRevocationMetric) \
                and not isinstance(element, SupervisionRevocationAnalysisMetric):
            yield beam.pvalue.TaggedOutput('revocations', element_dict)
        elif isinstance(element, SupervisionSuccessMetric):
            yield beam.pvalue.TaggedOutput('successes', element_dict)
        elif isinstance(element,
                        TerminatedSupervisionAssessmentScoreChangeMetric):
            yield beam.pvalue.TaggedOutput('assessment_changes', element_dict)
Ejemplo n.º 4
0
    def testProduceProgramMetric_EmptyMetric(self):
        metric_key_dict = {}

        metric_key = json.dumps(json_serializable_metric_key(metric_key_dict),
                                sort_keys=True)

        value = 102

        test_pipeline = TestPipeline()

        all_pipeline_options = PipelineOptions().get_all_options()

        job_timestamp = datetime.datetime.now().strftime(
            '%Y-%m-%d_%H_%M_%S.%f')
        all_pipeline_options['job_timestamp'] = job_timestamp

        output = (
            test_pipeline
            | beam.Create([(metric_key, value)])
            | 'Produce Program Metric' >> beam.ParDo(
                pipeline.ProduceProgramMetrics(), **all_pipeline_options))

        assert_that(output, equal_to([]))

        test_pipeline.run()
Ejemplo n.º 5
0
    def testProduceProgramMetric(self):
        metric_key_dict = {
            'gender': Gender.MALE,
            'methodology': MetricMethodologyType.PERSON,
            'year': 1999,
            'month': 3,
            'metric_type': ProgramMetricType.REFERRAL.value,
            'state_code': 'CA'
        }

        metric_key = json.dumps(json_serializable_metric_key(metric_key_dict),
                                sort_keys=True)

        value = 10

        test_pipeline = TestPipeline()

        all_pipeline_options = PipelineOptions().get_all_options()

        job_timestamp = datetime.datetime.now().strftime(
            '%Y-%m-%d_%H_%M_%S.%f')
        all_pipeline_options['job_timestamp'] = job_timestamp

        output = (
            test_pipeline
            | beam.Create([(metric_key, value)])
            | 'Produce Program Metric' >> beam.ParDo(
                pipeline.ProduceProgramMetrics(), **all_pipeline_options))

        assert_that(output,
                    AssertMatchers.validate_program_referral_metric(value))

        test_pipeline.run()
Ejemplo n.º 6
0
    def test_json_serializable_metric_key_ViolationTypeFrequencyCounter(self):
        metric_key = {
            "gender":
            Gender.MALE,
            "year":
            1999,
            "month":
            3,
            "state_code":
            "CA",
            "violation_type_frequency_counter": [
                ["TECHNICAL"],
                ["ASC", "EMP", "TECHNICAL"],
            ],
        }

        expected_output = {
            "gender":
            "MALE",
            "year":
            1999,
            "month":
            3,
            "state_code":
            "CA",
            "violation_type_frequency_counter":
            "[ASC, EMP, TECHNICAL],[TECHNICAL]",
        }

        updated_metric_key = json_serializable_metric_key(metric_key)

        self.assertEqual(expected_output, updated_metric_key)
Ejemplo n.º 7
0
    def process(self, element, calculation_month_limit, inclusions):
        """Produces various supervision metric combinations.

        Sends the calculator the StatePerson entity and their corresponding SupervisionTimeBuckets for mapping all
        supervision combinations.

        Args:
            element: Tuple containing a StatePerson and their SupervisionTimeBuckets
            calculation_month_limit: The number of months to limit the monthly calculation output to.
            inclusions: This should be a dictionary with values for the following keys:
                    - age_bucket
                    - gender
                    - race
                    - ethnicity
        Yields:
            Each supervision metric combination, tagged by metric type.
        """
        person, supervision_time_buckets = element

        # Calculate supervision metric combinations for this person and their supervision time buckets
        metric_combinations = calculator.map_supervision_combinations(
            person, supervision_time_buckets, inclusions,
            calculation_month_limit)

        # Return each of the supervision metric combinations
        for metric_combination in metric_combinations:
            metric_key, value = metric_combination
            metric_type = metric_key.get('metric_type')

            # Converting the metric key to a JSON string so it is hashable
            serializable_dict = json_serializable_metric_key(metric_key)
            json_key = json.dumps(serializable_dict, sort_keys=True)

            if metric_type == MetricType.POPULATION.value:
                yield beam.pvalue.TaggedOutput('populations',
                                               (json_key, value))
            elif metric_type == MetricType.REVOCATION.value:
                yield beam.pvalue.TaggedOutput('revocations',
                                               (json_key, value))
            elif metric_type == MetricType.SUCCESS.value:
                yield beam.pvalue.TaggedOutput('successes', (json_key, value))
            elif metric_type == MetricType.ASSESSMENT_CHANGE.value:
                yield beam.pvalue.TaggedOutput('assessment_changes',
                                               (json_key, value))
            elif metric_type == MetricType.REVOCATION_ANALYSIS.value:
                yield beam.pvalue.TaggedOutput('revocation_analyses',
                                               (json_key, value))
            elif metric_type == MetricType.REVOCATION_VIOLATION_TYPE_ANALYSIS.value:
                yield beam.pvalue.TaggedOutput(
                    'revocation_violation_type_analyses', (json_key, value))
Ejemplo n.º 8
0
    def process(self, element, *args, **kwargs):
        """The beam.io.WriteToBigQuery transform requires elements to be in dictionary form, where the values are in
        formats as required by BigQuery I/O connector.

        For a list of required formats, see the "Data types" section of:
            https://beam.apache.org/documentation/io/built-in/google-bigquery/

        Args:
            element: A ProgramMetric

        Yields:
            A dictionary representation of the ProgramMetric in the format Dict[str, Any] so that it can be written to
                BigQuery using beam.io.WriteToBigQuery.
        """
        element_dict = json_serializable_metric_key(element.__dict__)

        if isinstance(element, ProgramReferralMetric):
            yield beam.pvalue.TaggedOutput('referrals', element_dict)
Ejemplo n.º 9
0
    def test_json_serializable_metric_key_OneRace(self):
        metric_key = {'gender': Gender.MALE,
                      'race': [Race.BLACK],
                      'methodology': MetricMethodologyType.PERSON,
                      'year': 1999,
                      'month': 3,
                      'state_code': 'CA'}

        expected_output = {'gender': 'MALE',
                           'race': 'BLACK',
                           'methodology': 'PERSON',
                           'year': 1999,
                           'month': 3,
                           'state_code': 'CA'}

        updated_metric_key = json_serializable_metric_key(metric_key)

        self.assertEqual(expected_output, updated_metric_key)
Ejemplo n.º 10
0
    def test_json_serializable_metric_key(self):
        metric_key = {
            "gender": Gender.MALE,
            "year": 1999,
            "month": 3,
            "state_code": "CA",
        }

        expected_output = {
            "gender": "MALE",
            "year": 1999,
            "month": 3,
            "state_code": "CA",
        }

        updated_metric_key = json_serializable_metric_key(metric_key)

        self.assertEqual(expected_output, updated_metric_key)
Ejemplo n.º 11
0
    def process(self, element, *args, **kwargs):
        """Produces various recidivism metric combinations.

        Sends the calculator the StatePerson entity and their corresponding
        ReleaseEvents for mapping all recidivism combinations.

        Args:
            element: Tuple containing a StatePerson and their ReleaseEvents
            **kwargs: This should be a dictionary with values for the
                following keys:
                    - age_bucket
                    - gender
                    - stay_length_bucket
                    - release_facility
                    - race
                    - ethnicity
        Yields:
            Each recidivism metric combination, tagged by metric type.
        """
        person, release_events = element

        # Calculate recidivism metric combinations for this person and events
        metric_combinations = \
            calculator.map_recidivism_combinations(person,
                                                   release_events, kwargs)

        # Return each of the recidivism metric combinations
        for metric_combination in metric_combinations:
            metric_key, value = metric_combination
            metric_type = metric_key.get('metric_type')

            # Converting the metric key to a JSON string so it is hashable
            serializable_dict = json_serializable_metric_key(metric_key)
            json_key = json.dumps(serializable_dict, sort_keys=True)

            if metric_type == MetricType.RATE:
                yield beam.pvalue.TaggedOutput('rates',
                                               (json_key, value))
            elif metric_type == MetricType.COUNT:
                yield beam.pvalue.TaggedOutput('counts',
                                               (json_key, value))
            elif metric_type == MetricType.LIBERTY:
                yield beam.pvalue.TaggedOutput('liberties',
                                               (json_key, value))
Ejemplo n.º 12
0
    def test_json_serializable_metric_key_RaceEthnicityNone(self):
        # This should never happen due to the way this dictionary is constructed.
        metric_key = {'gender': Gender.MALE,
                      'race': [None],
                      'ethnicity': [None],
                      'methodology': MetricMethodologyType.PERSON,
                      'year': 1999,
                      'month': 3,
                      'state_code': 'CA'}

        expected_output = {'gender': 'MALE',
                           'methodology': 'PERSON',
                           'year': 1999,
                           'month': 3,
                           'state_code': 'CA'}

        updated_metric_key = json_serializable_metric_key(metric_key)

        self.assertEqual(expected_output, updated_metric_key)
Ejemplo n.º 13
0
    def test_json_serializable_metric_key_RaceEthnicity(self):
        metric_key = {'gender': Gender.MALE,
                      'race': [Race.BLACK],
                      'ethnicity': [Ethnicity.HISPANIC, Ethnicity.EXTERNAL_UNKNOWN],
                      'methodology': MetricMethodologyType.PERSON,
                      'year': 1999,
                      'month': 3,
                      'state_code': 'CA'}

        expected_output = {'gender': 'MALE',
                           'race': 'BLACK',
                           'ethnicity': 'HISPANIC,EXTERNAL_UNKNOWN',
                           'methodology': 'PERSON',
                           'year': 1999,
                           'month': 3,
                           'state_code': 'CA'}

        updated_metric_key = json_serializable_metric_key(metric_key)

        self.assertEqual(expected_output, updated_metric_key)
Ejemplo n.º 14
0
    def process(self, element, *args, **kwargs):
        """The beam.io.WriteToBigQuery transform requires elements to be in dictionary form, where the values are in
        formats as required by BigQuery I/O connector.

        For a list of required formats, see the "Data types" section of:
            https://beam.apache.org/documentation/io/built-in/google-bigquery/

        Args:
            element: A RecidivizMetric

        Yields:
            A dictionary representation of the RecidivizMetric in the format Dict[str, Any] so that it can be written to
                BigQuery using beam.io.WriteToBigQuery.
        """
        element_dict = json_serializable_metric_key(element.__dict__)

        if isinstance(element, RecidivizMetric):
            yield beam.pvalue.TaggedOutput(element.metric_type.value, element_dict)
        else:
            raise ValueError("Attempting to convert an object that is not a RecidivizMetric into a writable dict"
                             "for BigQuery.")
Ejemplo n.º 15
0
    def process(self, element, calculation_month_limit, inclusions):
        """Produces various incarceration metric combinations.

        Sends the calculator the StatePerson entity and their corresponding IncarcerationEvents for mapping all
        incarceration combinations.

        Args:
            element: Tuple containing a StatePerson and their IncarcerationEvents
            calculation_month_limit: The number of months to limit the monthly calculation output to.
            inclusions: This should be a dictionary with values for the following keys:
                    - age_bucket
                    - gender
                    - race
                    - ethnicity
        Yields:
            Each incarceration metric combination, tagged by metric type.
        """
        person, incarceration_events = element

        # Calculate incarceration metric combinations for this person and events
        metric_combinations = calculator.map_incarceration_combinations(person,
                                                                        incarceration_events,
                                                                        inclusions,
                                                                        calculation_month_limit)

        # Return each of the incarceration metric combinations
        for metric_combination in metric_combinations:
            metric_key, value = metric_combination
            metric_type = metric_key.get('metric_type')

            # Converting the metric key to a JSON string so it is hashable
            serializable_dict = json_serializable_metric_key(metric_key)
            json_key = json.dumps(serializable_dict, sort_keys=True)

            if metric_type == MetricType.ADMISSION.value:
                yield beam.pvalue.TaggedOutput('admissions', (json_key, value))
            elif metric_type == MetricType.POPULATION.value:
                yield beam.pvalue.TaggedOutput('populations', (json_key, value))
            elif metric_type == MetricType.RELEASE.value:
                yield beam.pvalue.TaggedOutput('releases', (json_key, value))
Ejemplo n.º 16
0
    def process(self, element, calculation_month_limit, inclusions):
        """Produces various program metric combinations.

        Sends the calculator the StatePerson entity and their corresponding ProgramEvents for mapping all program
        combinations.

        Args:
            element: Tuple containing a StatePerson and their ProgramEvents
            calculation_month_limit: The number of months to limit the monthly calculation output to.
            inclusions: This should be a dictionary with values for the
                following keys:
                    - age_bucket
                    - gender
                    - race
                    - ethnicity
        Yields:
            Each program metric combination, tagged by metric type.
        """
        person, program_events = element

        # Calculate program metric combinations for this person and their program events
        metric_combinations = \
            calculator.map_program_combinations(person=person,
                                                program_events=program_events,
                                                inclusions=inclusions,
                                                calculation_month_limit=calculation_month_limit)

        # Return each of the program metric combinations
        for metric_combination in metric_combinations:
            metric_key, value = metric_combination
            metric_type = metric_key.get('metric_type')

            # Converting the metric key to a JSON string so it is hashable
            serializable_dict = json_serializable_metric_key(metric_key)
            json_key = json.dumps(serializable_dict, sort_keys=True)

            if metric_type == MetricType.REFERRAL.value:
                yield beam.pvalue.TaggedOutput('referrals', (json_key, value))