def get_calculation_month_upper_bound_date( calculation_end_month: Optional[str], ) -> datetime.date: """Returns the date at the end of the month represented in the calculation_end_month string. String must be in the format YYYY-MM. If calculation_end_month is unset, returns the last day of the current month.""" if not calculation_end_month: year, month = year_and_month_for_today() return last_day_of_month(datetime.date(year, month, 1)) try: end_month_date = datetime.datetime.strptime(calculation_end_month, "%Y-%m").date() except ValueError as e: raise ValueError( f"Invalid value for calculation_end_month: {calculation_end_month}" ) from e return last_day_of_month(end_month_date)
def relevant_metric_periods(event_date: datetime.date, end_year: int, end_month: int) -> List[int]: """Given the year and month when this metric period ends, returns the relevant metric period months lengths for the given event_date. For example, if the end_year is 2009 and the end_month is 10, then we are looking for events that occurred since the start of the following months: - 10-2009 (metric_period = 1) - 08-2009 (metric_period = 3) - 05-2009 (metric_period = 6) - 11-2008 (metric_period = 12) - 11-2006 (metric_period = 36) If the event happened in 11-2008, then this function will return: [12, 36], because the event occurred within the 12-month metric period and the 36-month metric period of the given month. """ start_of_month = datetime.date(end_year, end_month, 1) end_of_month = last_day_of_month(start_of_month) relevant_periods = [] for metric_period in METRIC_PERIOD_MONTHS: start_of_bucket_boundary_month = \ start_of_month - \ dateutil.relativedelta.relativedelta(months=metric_period) boundary_date = last_day_of_month(start_of_bucket_boundary_month) if boundary_date < event_date <= end_of_month: relevant_periods.append(metric_period) else: break return relevant_periods
def get_month_supervision_type_default( any_date_in_month: datetime.date, supervision_sentences: List[StateSupervisionSentence], incarceration_sentences: List[StateIncarcerationSentence], supervision_period: StateSupervisionPeriod ) -> StateSupervisionPeriodSupervisionType: """Supervision type can change over time even if the period does not change. This function calculates the supervision type that a given supervision period represents during the month that |any_date_in_month| falls in. We do this by looking at all sentences attached to this supervision period, then determining which ones overlap with any day in the month, and using the sentence supervision types to determine the period supervision type at this point in time. Args: any_date_in_month: (date) Any day in the month to consider supervision_period: (StateSupervisionPeriod) The supervision period we want to associate a supervision type with supervision_sentences: (List[StateSupervisionSentence]) All supervision sentences for a given person. """ if not supervision_period.supervision_period_id: raise ValueError('All objects should have database ids.') if is_placeholder(supervision_period): raise ValueError('Do not expect placeholder periods!') start_of_month = first_day_of_month(any_date_in_month) end_of_month = last_day_of_month(any_date_in_month) # Find sentences that are attached to the period and overlap with the month incarceration_sentences = _get_valid_attached_sentences( incarceration_sentences, supervision_period) incarceration_sentences = _get_sentences_overlapping_with_dates( start_of_month, end_of_month, incarceration_sentences) supervision_sentences = _get_valid_attached_sentences( supervision_sentences, supervision_period) supervision_sentences = _get_sentences_overlapping_with_dates( start_of_month, end_of_month, supervision_sentences) return get_supervision_type_from_sentences(incarceration_sentences, supervision_sentences)
def us_mo_get_month_supervision_type( any_date_in_month: datetime.date, supervision_sentences: List[StateSupervisionSentence], incarceration_sentences: List[StateIncarcerationSentence], supervision_period: StateSupervisionPeriod ) -> StateSupervisionPeriodSupervisionType: """Calculates the supervision period supervision type that should be attributed to a US_MO supervision period on a given month. The date used to calculate the supervision period supervision type is either the last day of the month, or the last day of supervision, whichever comes first. """ start_of_month = first_day_of_month(any_date_in_month) end_of_month = last_day_of_month(any_date_in_month) first_of_next_month = end_of_month + datetime.timedelta(days=1) if supervision_period.termination_date is None: upper_bound_exclusive_date = first_of_next_month else: upper_bound_exclusive_date = min(first_of_next_month, supervision_period.termination_date) lower_bound_inclusive = max( start_of_month, supervision_period.start_date or datetime.date.min) supervision_type = \ us_mo_get_most_recent_supervision_period_supervision_type_before_upper_bound_day( upper_bound_exclusive_date=upper_bound_exclusive_date, lower_bound_inclusive_date=lower_bound_inclusive, supervision_sentences=supervision_sentences, incarceration_sentences=incarceration_sentences) if not supervision_type: return StateSupervisionPeriodSupervisionType.INTERNAL_UNKNOWN return supervision_type
def map_recidivism_combinations(person: StatePerson, release_events: Dict[int, List[ReleaseEvent]], metric_inclusions: Dict[ReincarcerationRecidivismMetricType, bool], person_metadata: PersonMetadata) \ -> List[Tuple[Dict[str, Any], Any]]: """Transforms ReleaseEvents and a StatePerson into metric combinations. Takes in a StatePerson and all of her ReleaseEvents and returns an array of "recidivism combinations". These are key-value pairs where the key represents a specific metric and the value represents whether or not recidivism occurred. This translates a particular recidivism event into many different recidivism metrics. Both count-based and rate-based metrics are generated. Each metric represents one of many possible combinations of characteristics being tracked for that event. For example, if an asian male is reincarcerated, there is a metric that corresponds to asian people, one to males, one to asian males, one to all people, and more depending on other dimensions in the data. If a release does not count towards recidivism, then the value is 0 for the rate-based metrics in either methodology. For both count and rate-based metrics, the value is 0 if the dimensions of the metric do not fully match the attributes of the person and their type of return to incarceration. For example, for a RecidivismReleaseEvent where the return_type is 'REVOCATION', there will be metrics produced where the return_type is 'NEW INCARCERATION_ADMISSION' and the value is 0. Args: person: the StatePerson release_events: A dictionary mapping release cohorts to a list of ReleaseEvents for the given StatePerson. metric_inclusions: A dictionary where the keys are each ReincarcerationRecidivismMetricType, and the values are boolean flags for whether or not to include that metric type in the calculations person_metadata: Contains information about the StatePerson that is necessary for the metrics. Returns: A list of key-value tuples representing specific metric combinations and the recidivism value corresponding to that metric. """ metrics = [] all_reincarcerations = reincarcerations(release_events) metric_period_end_date = last_day_of_month(date.today()) for _, events in release_events.items(): for event in events: if metric_inclusions.get( ReincarcerationRecidivismMetricType.REINCARCERATION_RATE): characteristic_combo_rate = \ characteristics_dict(person, event, ReincarcerationRecidivismRateMetric, person_metadata) rate_metrics = map_recidivism_rate_combinations( characteristic_combo_rate, event, release_events, all_reincarcerations) metrics.extend(rate_metrics) if metric_inclusions.get(ReincarcerationRecidivismMetricType.REINCARCERATION_COUNT) and \ isinstance(event, RecidivismReleaseEvent): characteristic_combo_count = \ characteristics_dict(person, event, ReincarcerationRecidivismCountMetric, person_metadata) count_metrics = map_recidivism_count_combinations( characteristic_combo_count, event, all_reincarcerations, metric_period_end_date) metrics.extend(count_metrics) return metrics
def map_recidivism_count_combinations( characteristic_combo: Dict[str, Any], event: ReleaseEvent, all_reincarcerations: Dict[date, RecidivismReleaseEvent], metric_period_end_date: date) -> \ List[Tuple[Dict[str, Any], Any]]: """Maps the given event and characteristic combinations to a variety of metrics that track count-based recidivism. If the event is a RecidivismReleaseEvent, then a count of reincarceration occurred. This produces metrics for both the year and the month in which the person was reincarcerated. Args: characteristic_combo: A dictionary describing the person and event event: the recidivism event from which the combination was derived all_reincarcerations: dictionary where the keys are all dates of reincarceration for the person's ReleaseEvents, and the values are the corresponding ReleaseEvents metric_period_end_date: The day the metric periods end Returns: A list of key-value tuples representing specific metric combinations and the recidivism value corresponding to that metric. """ metrics = [] if isinstance(event, RecidivismReleaseEvent): reincarceration_date = event.reincarceration_date relevant_periods = relevant_metric_periods( reincarceration_date, metric_period_end_date.year, metric_period_end_date.month) characteristic_combo[ 'metric_type'] = ReincarcerationRecidivismMetricType.REINCARCERATION_COUNT combo = characteristic_combo.copy() # Bucket for the month of the incarceration combo['year'] = reincarceration_date.year combo['month'] = reincarceration_date.month combo['metric_period_months'] = 1 end_of_event_month = last_day_of_month(reincarceration_date) metrics.extend( combination_count_metrics(combo, event, all_reincarcerations, end_of_event_month)) # Bucket for each of the relevant metric period month lengths for relevant_period in relevant_periods: metric_period_combo = characteristic_combo.copy() metric_period_combo['year'] = metric_period_end_date.year metric_period_combo['month'] = metric_period_end_date.month metric_period_combo['metric_period_months'] = relevant_period metrics.extend( combination_count_metrics(metric_period_combo, event, all_reincarcerations, metric_period_end_date)) return metrics
def combination_incarceration_metrics( combo: Dict[str, Any], incarceration_event: IncarcerationEvent, all_incarceration_events: List[IncarcerationEvent], is_daily_metric: bool) \ -> List[Tuple[Dict[str, Any], int]]: """Returns all unique incarceration metrics for the given event and combination. First, includes an event-based count for the event. Then, if this is a daily metric, includes a count of the event if it should be included in the person-based count for the day when the event occurred. If this is not a daily metric, includes a count of the event if it should be included in the person-based count for the month of the event. Args: combo: A characteristic combination to convert into metrics incarceration_event: The IncarcerationEvent from which the combination was derived all_incarceration_events: All of this person's IncarcerationEvents is_daily_metric: If True, limits person-based counts to the date of the event. If False, limits person-based counts to the month of the event. Returns: A list of key-value tuples representing specific metric combination dictionaries and the number 1 representing a positive contribution to that count metric. """ metrics = [] event_date = incarceration_event.event_date event_year = event_date.year event_month = event_date.month metric_period_months = 0 if is_daily_metric else 1 # Add event-based combo for the 1-month period the month of the event event_based_same_month_combo = augmented_combo_for_calculations( combo, incarceration_event.state_code, event_year, event_month, MetricMethodologyType.EVENT, metric_period_months=metric_period_months) metrics.append((event_based_same_month_combo, 1)) # Create the person-based combo for the 1-month period of the month of the event person_based_same_month_combo = augmented_combo_for_calculations( combo, incarceration_event.state_code, event_year, event_month, MetricMethodologyType.PERSON, metric_period_months=metric_period_months) day_match_value = event_date.day if is_daily_metric else None # Get the events of the same type that happened in the same month events_in_period = matching_events_for_person_based_count( year=event_year, month=event_month, day=day_match_value, event_type=type(incarceration_event), all_incarceration_events=all_incarceration_events) if events_in_period and include_event_in_count( incarceration_event, last_day_of_month(event_date), events_in_period): # Include this event in the person-based count metrics.append((person_based_same_month_combo, 1)) return metrics
if text is None: return SnapshotType.DAY return cls(text) DAY = 'DAY' FIRST_DAY_OF_MONTH = 'FIRST_DAY_OF_MONTH' LAST_DAY_OF_MONTH = 'LAST_DAY_OF_MONTH' SNAPSHOT_CONVERTERS: Dict[SnapshotType, DateRangeConverterType] = { SnapshotType.DAY: DateRange.for_day, SnapshotType.FIRST_DAY_OF_MONTH: lambda date: DateRange.for_day(first_day_of_month(date)), SnapshotType.LAST_DAY_OF_MONTH: lambda date: DateRange.for_day(last_day_of_month(date)), } class Metric: @property @abstractmethod def filters(self) -> List[Dimension]: """Any dimensions where the data only represents a subset of values for that dimension. For instance, a table for the population metric may only cover data for the prison population, not those on parole or probation. In that case filters would contain PopulationType.PRISON. """ @property @abstractmethod