NULL AS male_white, NULL AS male_other, NULL AS male_unknown_race, NULL AS female_asian, NULL AS female_black, NULL AS female_native_american, NULL AS female_latino, NULL AS female_white, NULL AS female_other, NULL AS female_unknown_race, NULL AS unknown_gender_asian, NULL AS unknown_gender_black, NULL AS unknown_gender_native_american, NULL AS unknown_gender_latino, NULL AS unknown_gender_white, NULL AS unknown_gender_other, NULL AS unknown_gender_unknown_race FROM `{project_id}.{views_dataset}.{combined_state_aggregates}` """.format(project_id=PROJECT_ID, views_dataset=VIEWS_DATASET, combined_state_aggregates=state_aggregate_collapsed_to_fips.STATE_AGGREGATES_COLLAPSED_TO_FIPS.view_id, description=_DESCRIPTION) STATE_AGGREGATE_STITCH_SUBSET_VIEW = BigQueryView( view_id='state_aggregate_stitch_subset', view_query=_QUERY ) if __name__ == '__main__': print(STATE_AGGREGATE_STITCH_SUBSET_VIEW.view_query)
SUM(IF(gender = 'MALE' AND race = 'OTHER', person_count, null)) AS male_other, SUM(IF(gender = 'MALE' AND race = 'EXTERNAL_UNKNOWN', person_count, null)) AS male_unknown_race, SUM(IF(gender = 'FEMALE' AND race = 'ASIAN', person_count, null)) AS female_asian, SUM(IF(gender = 'FEMALE' AND race = 'BLACK', person_count, null)) AS female_black, SUM(IF(gender = 'FEMALE' AND race = 'AMERICAN_INDIAN_ALASKAN_NATIVE', person_count, null)) AS female_native_american, SUM(IF(gender = 'FEMALE' AND race = 'HISPANIC', person_count, null)) AS female_latino, SUM(IF(gender = 'FEMALE' AND race = 'WHITE', person_count, null)) AS female_white, SUM(IF(gender = 'FEMALE' AND race = 'OTHER', person_count, null)) AS female_other, SUM(IF(gender = 'FEMALE' AND race = 'EXTERNAL_UNKNOWN', person_count, null)) AS female_unknown_race, SUM(IF(gender = 'EXTERNAL_UNKNOWN' AND race = 'ASIAN', person_count, null)) AS unknown_gender_asian, SUM(IF(gender = 'EXTERNAL_UNKNOWN' AND race = 'BLACK', person_count, null)) AS unknown_gender_black, SUM(IF(gender = 'EXTERNAL_UNKNOWN' AND race = 'AMERICAN_INDIAN_ALASKAN_NATIVE', person_count, null)) AS unknown_gender_native_american, SUM(IF(gender = 'EXTERNAL_UNKNOWN' AND race = 'HISPANIC', person_count, null)) AS unknown_gender_latino, SUM(IF(gender = 'EXTERNAL_UNKNOWN' AND race = 'WHITE', person_count, null)) AS unknown_gender_white, SUM(IF(gender = 'EXTERNAL_UNKNOWN' AND race = 'OTHER', person_count, null)) AS unknown_gender_other, SUM(IF(gender = 'EXTERNAL_UNKNOWN' AND race = 'EXTERNAL_UNKNOWN', person_count, null)) AS unknown_gender_unknown_race FROM `{project_id}.{views_dataset}.{population_admissions_releases_race_gender}` RaceGender GROUP BY fips, RaceGender.day """.format( project_id=PROJECT_ID, views_dataset=VIEWS_DATASET, population_admissions_releases_race_gender=population_admissions_releases. POPULATION_ADMISSIONS_RELEASES_RACE_GENDER_VIEW.view_id, description=_DESCRIPTION) SCRAPER_AGGREGATED_STITCH_SUBSET_VIEW = BigQueryView( view_id='scraper_aggregated_stitch_subset', view_query=_QUERY) if __name__ == '__main__': print(SCRAPER_AGGREGATED_STITCH_SUBSET_VIEW.view_query)
from recidiviz.persistence.database import schema_utils from recidiviz.utils import metadata def _to_bq_table(query_str: str) -> str: """Rename schema table_names with supported BQ syntax.""" project_id = metadata.project_id() base_dataset = export_config.COUNTY_BASE_TABLES_BQ_DATASET for table in schema_utils.get_aggregate_table_classes(): bq_table_name = '`{project_id}.{base_dataset}.{table_name}`'.format( project_id=project_id, base_dataset=base_dataset, table_name=table.name) query_str = query_str.replace(table.name, bq_table_name) return query_str _QUERIES = [m.to_query() for m in state_aggregate_mappings.MAPPINGS] _UNIONED_STATEMENT = sqlalchemy.union_all(*_QUERIES) _BQ_UNIONED_STATEMENT = _to_bq_table(str(_UNIONED_STATEMENT.compile())) # This view is the concatenation of all "state-reported aggregate reports" after # mapping each column to a shared column_name. The next logical derivation from # this view is to combine it with a view of our "scraped website data" that maps # to the same shared column_names. Both of these views should be aggregated to # the same level (eg. jurisdiction level via jurisdiction_id). COMBINED_STATE_AGGREGATE_VIEW = BigQueryView( view_id='combined_state_aggregates', view_query=_BQ_UNIONED_STATEMENT)
PROJECT_ID: str = metadata.project_id() _DESCRIPTION = """ Combine {itp}, {state}, {scraper}, and {single_count} into one unified view, limited to total jail population only. When overlapping data exists, we select {state} data first. We then select any {itp} data that exists before {state} data. We then select any {scraper} data that exists after {state} data and finally the {single_count} data after that. Note: we use valid_from to check cutoffs, instead of checking valid_from and valid_to (eg: {itp}.valid_to < {state}.valid_from). This is because all data points are plotted using valid_from. """.format( state=INCARCERATION_TRENDS_STITCH_SUBSET_VIEW.view_id, scraper=SCRAPER_AGGREGATED_STITCH_SUBSET_VIEW.view_id, single_count=SINGLE_COUNT_STITCH_SUBSET_VIEW.view_id, itp=STATE_AGGREGATE_STITCH_SUBSET_VIEW.view_id) with open(os.path.splitext(__file__)[0] + '.sql') as fp: _QUERY = fp.read().format( project_id=PROJECT_ID, views_dataset=VIEWS_DATASET, combined_stitch=combined_stitch.COMBINED_STITCH_VIEW.view_id, description=_DESCRIPTION) COMBINED_STITCH_DROP_OVERLAPPING_TOTAL_JAIL_POP_VIEW = BigQueryView( view_id='combined_stitch_drop_overlapping_total_jail_pop', view_query=_QUERY ) if __name__ == '__main__': print(COMBINED_STITCH_DROP_OVERLAPPING_TOTAL_JAIL_POP_VIEW.view_query)
ON Data.fips = StateCutoffs.fips FULL JOIN ScrapedCutoffs ON Data.fips = ScrapedCutoffs.fips WHERE # We only have itp data (StateCutoffs.fips IS NULL AND ScrapedCutoffs.fips IS NULL) OR # We have itp and scraped (StateCutoffs.fips IS NULL AND ( Data.data_source = 'scraped' OR (Data.data_source = 'incarceration_trends' AND Data.valid_from < ScrapedCutoffs.min_valid_from) )) OR # We have itp, state_aggregate and scraped Data.data_source = 'state_aggregates' OR (Data.data_source = 'incarceration_trends' AND Data.valid_from < StateCutoffs.min_valid_from) OR (Data.data_source = 'scraped' AND StateCutoffs.min_valid_from < Data.valid_from) """.format(project_id=PROJECT_ID, views_dataset=VIEWS_DATASET, combined_stitch=combined_stitch.COMBINED_STITCH_VIEW.view_id, description=_DESCRIPTION) COMBINED_STITCH_DROP_OVERLAPPING_VIEW = BigQueryView( view_id='combined_stitch_drop_overlapping', view_query=_QUERY) if __name__ == '__main__': print(COMBINED_STITCH_DROP_OVERLAPPING_VIEW.view_query)
NULL AS male_other, NULL AS male_unknown_race, NULL AS female_asian, NULL AS female_black, NULL AS female_native_american, NULL AS female_latino, NULL AS female_white, NULL AS female_other, NULL AS female_unknown_race, NULL AS unknown_gender_asian, NULL AS unknown_gender_black, NULL AS unknown_gender_native_american, NULL AS unknown_gender_latino, NULL AS unknown_gender_white, NULL AS unknown_gender_other, NULL AS unknown_gender_unknown_race FROM `{project_id}.{vera_dataset}.{incarceration_trends}` """.format(project_id=PROJECT_ID, vera_dataset=VERA_DATASET, incarceration_trends=INCARCERATION_TRENDS_TABLE, description=_DESCRIPTION) INCARCERATION_TRENDS_STITCH_SUBSET_VIEW = BigQueryView( view_id='incarceration_trends_stitch_subset', view_query=_QUERY) if __name__ == '__main__': print(INCARCERATION_TRENDS_STITCH_SUBSET_VIEW.view_query)
SUM(misdemeanor_sentenced) AS misdemeanor_sentenced, SUM(misdemeanor_male) AS misdemeanor_male, SUM(misdemeanor_female) AS misdemeanor_female, SUM(misdemeanor_pretrial_male) AS misdemeanor_pretrial_male, SUM(misdemeanor_pretrial_female) AS misdemeanor_pretrial_female, SUM(misdemeanor_sentenced_male) AS misdemeanor_sentenced_male, SUM(misdemeanor_sentenced_female) AS misdemeanor_sentenced_female, SUM(parole_violators_male) AS parole_violators_male, SUM(parole_violators_female) AS parole_violators_female, SUM(held_for_doc_male) AS held_for_doc_male, SUM(held_for_doc_female) AS held_for_doc_female, SUM(held_for_federal_male) AS held_for_federal_male, SUM(held_for_federal_female) AS held_for_federal_female, SUM(total_held_for_other_male) AS total_held_for_other_male, SUM(total_held_for_other_female) AS total_held_for_other_female FROM `{project_id}.{views_dataset}.{combined_state_aggregates}` GROUP BY fips, report_date, aggregation_window """.format(project_id=PROJECT_ID, views_dataset=VIEWS_DATASET, combined_state_aggregates=COMBINED_STATE_AGGREGATE_VIEW.view_id, description=_DESCRIPTION) STATE_AGGREGATES_COLLAPSED_TO_FIPS: BigQueryView = BigQueryView( view_id='state_aggregates_collapsed_to_fips', view_query=_QUERY ) if __name__ == '__main__': print(STATE_AGGREGATES_COLLAPSED_TO_FIPS.view_query)
# along with this program. If not, see <https://www.gnu.org/licenses/>. # ============================================================================= """Single count data used for stitch""" import os from recidiviz.calculator.query.bqview import BigQueryView from recidiviz.calculator.query.export_config import \ COUNTY_BASE_TABLES_BQ_DATASET from recidiviz.utils import metadata PROJECT_ID: str = metadata.project_id() VIEWS_DATASET: str = COUNTY_BASE_TABLES_BQ_DATASET SINGLE_COUNT_AGGREGATE_VIEW_ID: str = 'single_count_aggregate' _DESCRIPTION = """ Copy single count data to a format for stitching. """ with open(os.path.splitext(__file__)[0] + '.sql') as fp: _QUERY = fp.read().format( project_id=PROJECT_ID, views_dataset=VIEWS_DATASET, single_count_aggregate=SINGLE_COUNT_AGGREGATE_VIEW_ID, description=_DESCRIPTION) SINGLE_COUNT_STITCH_SUBSET_VIEW = BigQueryView( view_id='single_count_stitch_subset', view_query=_QUERY) if __name__ == '__main__': print(SINGLE_COUNT_STITCH_SUBSET_VIEW.view_query)
incarceration_trends_aggregate= INCARCERATION_TRENDS_STITCH_SUBSET_VIEW.view_id) _QUERY = """ /*{description}*/ SELECT * FROM `{project_id}.{views_dataset}.{interpolated_state_aggregate}` UNION ALL SELECT * FROM `{project_id}.{views_dataset}.{single_count_aggregate}` UNION ALL SELECT * FROM `{project_id}.{views_dataset}.{scraper_data_aggregated}` UNION ALL SELECT * FROM `{project_id}.{views_dataset}.{incarceration_trends_aggregate}` """.format(project_id=PROJECT_ID, views_dataset=VIEWS_DATASET, interpolated_state_aggregate= STATE_AGGREGATE_STITCH_SUBSET_VIEW.view_id, single_count_aggregate=SINGLE_COUNT_STITCH_SUBSET_VIEW.view_id, scraper_data_aggregated= SCRAPER_AGGREGATED_STITCH_SUBSET_VIEW.view_id, incarceration_trends_aggregate= INCARCERATION_TRENDS_STITCH_SUBSET_VIEW.view_id, description=_DESCRIPTION) COMBINED_STITCH_VIEW = BigQueryView( view_id='combined_stitch', view_query=_QUERY ) if __name__ == '__main__': print(COMBINED_STITCH_VIEW.view_query)