def test_json_serializable_metric_key_InvalidList(self): metric_key = {'invalid_list_key': ['list', 'values']} with pytest.raises(ValueError) as e: json_serializable_metric_key(metric_key) self.assertEqual(e, "Unexpected list in metric_key for key: invalid_list_key")
def convert_event_based_to_person_based_metrics( metrics: List[Tuple[Dict[str, Any], Any]]) -> \ List[Tuple[Dict[str, Any], Any]]: """ Takes in a set of event-based metrics and converts them to be person-based by removing any duplicate metric dictionaries attributed to this person. By eliminating duplicate instances of metric keys, this person will only contribute a +1 to a metric once per metric for all person-based counts. """ person_based_metrics_set = set() for metric, value in metrics: metric['methodology'] = MetricMethodologyType.PERSON # Converting the metric key to a JSON string so it is hashable serializable_dict = json_serializable_metric_key(metric) json_key = json.dumps(serializable_dict, sort_keys=True) # Add the metric to the set person_based_metrics_set.add((json_key, value)) person_based_metrics: List[Tuple[Dict[str, Any], Any]] = [] for json_metric, value in person_based_metrics_set: # Convert JSON string to dictionary dict_metric_key = json.loads(json_metric) person_based_metrics.append((dict_metric_key, value)) return person_based_metrics
def process(self, element, *args, **kwargs): """The beam.io.WriteToBigQuery transform requires elements to be in dictionary form, where the values are in formats as required by BigQuery I/O connector. For a list of required formats, see the "Data types" section of: https://beam.apache.org/documentation/io/built-in/google-bigquery/ Args: element: A SupervisionMetric Yields: A dictionary representation of the SupervisionMetric in the format Dict[str, Any] so that it can be written to BigQuery using beam.io.WriteToBigQuery. """ element_dict = json_serializable_metric_key(element.__dict__) if isinstance(element, SupervisionPopulationMetric): yield beam.pvalue.TaggedOutput('populations', element_dict) elif isinstance(element, SupervisionRevocationAnalysisMetric): yield beam.pvalue.TaggedOutput('revocation_analyses', element_dict) elif isinstance(element, SupervisionRevocationViolationTypeAnalysisMetric): yield beam.pvalue.TaggedOutput( 'revocation_violation_type_analyses', element_dict) elif isinstance(element, SupervisionRevocationMetric) \ and not isinstance(element, SupervisionRevocationAnalysisMetric): yield beam.pvalue.TaggedOutput('revocations', element_dict) elif isinstance(element, SupervisionSuccessMetric): yield beam.pvalue.TaggedOutput('successes', element_dict) elif isinstance(element, TerminatedSupervisionAssessmentScoreChangeMetric): yield beam.pvalue.TaggedOutput('assessment_changes', element_dict)
def testProduceProgramMetric_EmptyMetric(self): metric_key_dict = {} metric_key = json.dumps(json_serializable_metric_key(metric_key_dict), sort_keys=True) value = 102 test_pipeline = TestPipeline() all_pipeline_options = PipelineOptions().get_all_options() job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp output = ( test_pipeline | beam.Create([(metric_key, value)]) | 'Produce Program Metric' >> beam.ParDo( pipeline.ProduceProgramMetrics(), **all_pipeline_options)) assert_that(output, equal_to([])) test_pipeline.run()
def testProduceProgramMetric(self): metric_key_dict = { 'gender': Gender.MALE, 'methodology': MetricMethodologyType.PERSON, 'year': 1999, 'month': 3, 'metric_type': ProgramMetricType.REFERRAL.value, 'state_code': 'CA' } metric_key = json.dumps(json_serializable_metric_key(metric_key_dict), sort_keys=True) value = 10 test_pipeline = TestPipeline() all_pipeline_options = PipelineOptions().get_all_options() job_timestamp = datetime.datetime.now().strftime( '%Y-%m-%d_%H_%M_%S.%f') all_pipeline_options['job_timestamp'] = job_timestamp output = ( test_pipeline | beam.Create([(metric_key, value)]) | 'Produce Program Metric' >> beam.ParDo( pipeline.ProduceProgramMetrics(), **all_pipeline_options)) assert_that(output, AssertMatchers.validate_program_referral_metric(value)) test_pipeline.run()
def test_json_serializable_metric_key_ViolationTypeFrequencyCounter(self): metric_key = { "gender": Gender.MALE, "year": 1999, "month": 3, "state_code": "CA", "violation_type_frequency_counter": [ ["TECHNICAL"], ["ASC", "EMP", "TECHNICAL"], ], } expected_output = { "gender": "MALE", "year": 1999, "month": 3, "state_code": "CA", "violation_type_frequency_counter": "[ASC, EMP, TECHNICAL],[TECHNICAL]", } updated_metric_key = json_serializable_metric_key(metric_key) self.assertEqual(expected_output, updated_metric_key)
def process(self, element, calculation_month_limit, inclusions): """Produces various supervision metric combinations. Sends the calculator the StatePerson entity and their corresponding SupervisionTimeBuckets for mapping all supervision combinations. Args: element: Tuple containing a StatePerson and their SupervisionTimeBuckets calculation_month_limit: The number of months to limit the monthly calculation output to. inclusions: This should be a dictionary with values for the following keys: - age_bucket - gender - race - ethnicity Yields: Each supervision metric combination, tagged by metric type. """ person, supervision_time_buckets = element # Calculate supervision metric combinations for this person and their supervision time buckets metric_combinations = calculator.map_supervision_combinations( person, supervision_time_buckets, inclusions, calculation_month_limit) # Return each of the supervision metric combinations for metric_combination in metric_combinations: metric_key, value = metric_combination metric_type = metric_key.get('metric_type') # Converting the metric key to a JSON string so it is hashable serializable_dict = json_serializable_metric_key(metric_key) json_key = json.dumps(serializable_dict, sort_keys=True) if metric_type == MetricType.POPULATION.value: yield beam.pvalue.TaggedOutput('populations', (json_key, value)) elif metric_type == MetricType.REVOCATION.value: yield beam.pvalue.TaggedOutput('revocations', (json_key, value)) elif metric_type == MetricType.SUCCESS.value: yield beam.pvalue.TaggedOutput('successes', (json_key, value)) elif metric_type == MetricType.ASSESSMENT_CHANGE.value: yield beam.pvalue.TaggedOutput('assessment_changes', (json_key, value)) elif metric_type == MetricType.REVOCATION_ANALYSIS.value: yield beam.pvalue.TaggedOutput('revocation_analyses', (json_key, value)) elif metric_type == MetricType.REVOCATION_VIOLATION_TYPE_ANALYSIS.value: yield beam.pvalue.TaggedOutput( 'revocation_violation_type_analyses', (json_key, value))
def process(self, element, *args, **kwargs): """The beam.io.WriteToBigQuery transform requires elements to be in dictionary form, where the values are in formats as required by BigQuery I/O connector. For a list of required formats, see the "Data types" section of: https://beam.apache.org/documentation/io/built-in/google-bigquery/ Args: element: A ProgramMetric Yields: A dictionary representation of the ProgramMetric in the format Dict[str, Any] so that it can be written to BigQuery using beam.io.WriteToBigQuery. """ element_dict = json_serializable_metric_key(element.__dict__) if isinstance(element, ProgramReferralMetric): yield beam.pvalue.TaggedOutput('referrals', element_dict)
def test_json_serializable_metric_key_OneRace(self): metric_key = {'gender': Gender.MALE, 'race': [Race.BLACK], 'methodology': MetricMethodologyType.PERSON, 'year': 1999, 'month': 3, 'state_code': 'CA'} expected_output = {'gender': 'MALE', 'race': 'BLACK', 'methodology': 'PERSON', 'year': 1999, 'month': 3, 'state_code': 'CA'} updated_metric_key = json_serializable_metric_key(metric_key) self.assertEqual(expected_output, updated_metric_key)
def test_json_serializable_metric_key(self): metric_key = { "gender": Gender.MALE, "year": 1999, "month": 3, "state_code": "CA", } expected_output = { "gender": "MALE", "year": 1999, "month": 3, "state_code": "CA", } updated_metric_key = json_serializable_metric_key(metric_key) self.assertEqual(expected_output, updated_metric_key)
def process(self, element, *args, **kwargs): """Produces various recidivism metric combinations. Sends the calculator the StatePerson entity and their corresponding ReleaseEvents for mapping all recidivism combinations. Args: element: Tuple containing a StatePerson and their ReleaseEvents **kwargs: This should be a dictionary with values for the following keys: - age_bucket - gender - stay_length_bucket - release_facility - race - ethnicity Yields: Each recidivism metric combination, tagged by metric type. """ person, release_events = element # Calculate recidivism metric combinations for this person and events metric_combinations = \ calculator.map_recidivism_combinations(person, release_events, kwargs) # Return each of the recidivism metric combinations for metric_combination in metric_combinations: metric_key, value = metric_combination metric_type = metric_key.get('metric_type') # Converting the metric key to a JSON string so it is hashable serializable_dict = json_serializable_metric_key(metric_key) json_key = json.dumps(serializable_dict, sort_keys=True) if metric_type == MetricType.RATE: yield beam.pvalue.TaggedOutput('rates', (json_key, value)) elif metric_type == MetricType.COUNT: yield beam.pvalue.TaggedOutput('counts', (json_key, value)) elif metric_type == MetricType.LIBERTY: yield beam.pvalue.TaggedOutput('liberties', (json_key, value))
def test_json_serializable_metric_key_RaceEthnicityNone(self): # This should never happen due to the way this dictionary is constructed. metric_key = {'gender': Gender.MALE, 'race': [None], 'ethnicity': [None], 'methodology': MetricMethodologyType.PERSON, 'year': 1999, 'month': 3, 'state_code': 'CA'} expected_output = {'gender': 'MALE', 'methodology': 'PERSON', 'year': 1999, 'month': 3, 'state_code': 'CA'} updated_metric_key = json_serializable_metric_key(metric_key) self.assertEqual(expected_output, updated_metric_key)
def test_json_serializable_metric_key_RaceEthnicity(self): metric_key = {'gender': Gender.MALE, 'race': [Race.BLACK], 'ethnicity': [Ethnicity.HISPANIC, Ethnicity.EXTERNAL_UNKNOWN], 'methodology': MetricMethodologyType.PERSON, 'year': 1999, 'month': 3, 'state_code': 'CA'} expected_output = {'gender': 'MALE', 'race': 'BLACK', 'ethnicity': 'HISPANIC,EXTERNAL_UNKNOWN', 'methodology': 'PERSON', 'year': 1999, 'month': 3, 'state_code': 'CA'} updated_metric_key = json_serializable_metric_key(metric_key) self.assertEqual(expected_output, updated_metric_key)
def process(self, element, *args, **kwargs): """The beam.io.WriteToBigQuery transform requires elements to be in dictionary form, where the values are in formats as required by BigQuery I/O connector. For a list of required formats, see the "Data types" section of: https://beam.apache.org/documentation/io/built-in/google-bigquery/ Args: element: A RecidivizMetric Yields: A dictionary representation of the RecidivizMetric in the format Dict[str, Any] so that it can be written to BigQuery using beam.io.WriteToBigQuery. """ element_dict = json_serializable_metric_key(element.__dict__) if isinstance(element, RecidivizMetric): yield beam.pvalue.TaggedOutput(element.metric_type.value, element_dict) else: raise ValueError("Attempting to convert an object that is not a RecidivizMetric into a writable dict" "for BigQuery.")
def process(self, element, calculation_month_limit, inclusions): """Produces various incarceration metric combinations. Sends the calculator the StatePerson entity and their corresponding IncarcerationEvents for mapping all incarceration combinations. Args: element: Tuple containing a StatePerson and their IncarcerationEvents calculation_month_limit: The number of months to limit the monthly calculation output to. inclusions: This should be a dictionary with values for the following keys: - age_bucket - gender - race - ethnicity Yields: Each incarceration metric combination, tagged by metric type. """ person, incarceration_events = element # Calculate incarceration metric combinations for this person and events metric_combinations = calculator.map_incarceration_combinations(person, incarceration_events, inclusions, calculation_month_limit) # Return each of the incarceration metric combinations for metric_combination in metric_combinations: metric_key, value = metric_combination metric_type = metric_key.get('metric_type') # Converting the metric key to a JSON string so it is hashable serializable_dict = json_serializable_metric_key(metric_key) json_key = json.dumps(serializable_dict, sort_keys=True) if metric_type == MetricType.ADMISSION.value: yield beam.pvalue.TaggedOutput('admissions', (json_key, value)) elif metric_type == MetricType.POPULATION.value: yield beam.pvalue.TaggedOutput('populations', (json_key, value)) elif metric_type == MetricType.RELEASE.value: yield beam.pvalue.TaggedOutput('releases', (json_key, value))
def process(self, element, calculation_month_limit, inclusions): """Produces various program metric combinations. Sends the calculator the StatePerson entity and their corresponding ProgramEvents for mapping all program combinations. Args: element: Tuple containing a StatePerson and their ProgramEvents calculation_month_limit: The number of months to limit the monthly calculation output to. inclusions: This should be a dictionary with values for the following keys: - age_bucket - gender - race - ethnicity Yields: Each program metric combination, tagged by metric type. """ person, program_events = element # Calculate program metric combinations for this person and their program events metric_combinations = \ calculator.map_program_combinations(person=person, program_events=program_events, inclusions=inclusions, calculation_month_limit=calculation_month_limit) # Return each of the program metric combinations for metric_combination in metric_combinations: metric_key, value = metric_combination metric_type = metric_key.get('metric_type') # Converting the metric key to a JSON string so it is hashable serializable_dict = json_serializable_metric_key(metric_key) json_key = json.dumps(serializable_dict, sort_keys=True) if metric_type == MetricType.REFERRAL.value: yield beam.pvalue.TaggedOutput('referrals', (json_key, value))