for month in range(12) ] + [ total_population_data.iloc[(year - reference_year) * 6 + 3, 4] for month in range(12) ] + [ total_population_data.iloc[(year - reference_year) * 6 + 4, 4] for month in range(12) ] + [ total_population_data.iloc[(year - reference_year) * 6 + 5, 4] for month in range(12) ], }) monthly_total_population_data = pd.concat( [monthly_total_population_data, temp_monthly_total_population_data]) # STORE DATA monthly_outflows_data = monthly_outflows_data.rename( {"tis_percentage": "crime_type"}, axis=1) transitions_data = transitions_data.rename({"tis_percentage": "crime_type"}, axis=1) monthly_total_population_data = monthly_total_population_data.rename( {"tis_percentage": "crime_type"}, axis=1) upload_spark_model_inputs( "recidiviz-staging", "IL_prison_TIS", monthly_outflows_data, transitions_data, monthly_total_population_data, )
for year in outflows_data.time_step.unique(): year_outflows = outflows_data[outflows_data.time_step == year] for month in range(12): month_outflows = year_outflows.copy() month_outflows.time_step = 12 * month_outflows.time_step - month month_outflows.total_population /= 12 final_outflows = pd.concat([final_outflows, month_outflows]) outflows_data = final_outflows # TOTAL POPULATION TABLE final_pops = pd.DataFrame() for year in total_population_data.time_step.unique(): year_pops = total_population_data[total_population_data.time_step == year] for month in range(12): month_pops = year_pops.copy() month_pops.time_step = 12 * month_pops.time_step - month final_pops = pd.concat([final_pops, month_pops]) total_population_data = final_pops # STORE DATA # TODO(#99999): fill in `state` and `primary_compartment` simulation_tag = "OH_SB3_prong2" upload_spark_model_inputs( "recidiviz-staging", simulation_tag, outflows_data, transitions_data, total_population_data, )
(year - reference_year) * 12, (year - reference_year + 1) * 12 ) ] * 2, "compartment": ["pretrial"] * 24, "outflow_to": ["prison"] * 24, "race": ["white"] * 12 + ["non-white"] * 12, "total_population": [ yearly_outflows_data.iloc[(year - reference_year) * 2, 4] / 12 for month in range(12) ] + [ yearly_outflows_data.iloc[(year - reference_year) * 2 + 1, 4] / 12 for month in range(12) ], } ) outflows_data = pd.concat([outflows_data, temp_monthly_outflows_data]) # TOTAL POPULATION TABLE # none # STORE DATA upload_spark_model_inputs( "recidiviz-staging", "IL_prison_three_strikes", outflows_data, transitions_data, pd.DataFrame(), )
) for year in range(2013, 2021): temp_monthly_total_population_data = pd.DataFrame({ "time_step": [ i for i in range((year - reference_year) * 12, (year - reference_year + 1) * 12) ] * 2, "compartment": ["prison"] * 24, "crime_type": ["non-violent"] * 12 + ["violent"] * 12, "total_population": [ yearly_total_population_data.iloc[(year - reference_year) * 2, 3] for month in range(12) ] + [ yearly_total_population_data.iloc[(year - reference_year) * 2 + 1, 3] for month in range(12) ], }) total_population_data = pd.concat( [total_population_data, temp_monthly_total_population_data], sort=False) # STORE DATA upload_spark_model_inputs( "recidiviz-staging", "LA_HB_364", outflows_data, transitions_data, total_population_data, )
}) final_outflows = pd.DataFrame() for year in outflows_data.time_step.unique(): year_outflows = outflows_data[outflows_data.time_step == year] for month in range(12): month_outflows = year_outflows.copy() month_outflows.time_step = 12 * month_outflows.time_step + month month_outflows.total_population /= 12 final_outflows = pd.concat([final_outflows, month_outflows]) outflows_data = final_outflows transitions_data = pd.DataFrame({ "compartment": ["prison"] * 2 + ["release"] * 36 + ["release", "release_full"], "outflow_to": ["release"] * 2 + ["prison"] * 36 + ["release_full", "release_full"], "compartment_duration": [2400.80 / 365 * 12, 2457.60 / 365 * 12] + list(range(1, 37)) + [36, 36], "total_population": [1.0] * 2 + [0.23 / 36] * 36 + [0.77, 1], "crime_type": ["NA"] * 40, }) upload_spark_model_inputs( "recidiviz-staging", "OK_resentencing", outflows_data, transitions_data, pd.DataFrame(), )
(outflows_data.age == "50_and_under"), "total_population", ] = total_population_data.loc[ total_population_data.age == "50_and_under", "total_population"].iloc[0] outflows_data.loc[(outflows_data.time_step == 0) & (outflows_data.age == "51_and_up"), "total_population", ] = total_population_data.loc[ total_population_data.age == "51_and_up", "total_population"].iloc[0] outflows_data = outflows_data.append( pd.DataFrame({ "total_population": [0] * 20, "age": ["50_and_under"] * 10 + ["51_and_up"] * 10, "time_step": list(range(1, 11)) * 2, })).ffill() # STORE DATA fake_total_population_data = pd.DataFrame({ "compartment": ["prison"] * 2, "time_step": [-1] * 2, "total_population": [0] * 2, "age": ["50_and_under", "51_and_up"], }) upload_spark_model_inputs( "recidiviz-staging", "MS_habitual_offenders_A", outflows_data, transitions_data, fake_total_population_data, )
outflows_data = jail_prison_admissions.drop("offense_group", axis=1) # this is left over from the last policy we modeled, you'll want to filter differently based on what you're modeling affected_crimes = [ "ASL1342", "NAR3038", "NAR3087", "DWI5406", "DWI5449", "DWI5450", "LIC6834", "LIC6860", "WPN5296", "WPN5297", ] transitions_data = transitions_data[transitions_data.crime.isin( affected_crimes)] outflows_data = outflows_data[outflows_data.crime.isin(affected_crimes)] # Don't want sentences listed as hundreds of years to skew our model, so we cap sentence length at 50 years sentence_cap_data = pd.Series( [50 for i in transitions_data.compartment_duration], index=transitions_data.index) transitions_data.loc[transitions_data.compartment_duration > sentence_cap_data, "compartment_duration"] = 50 # STORE DATA upload_spark_model_inputs("recidiviz-staging", "VA_prison", outflows_data, transitions_data, pd.DataFrame())
transitions = pd.concat([transitions, recidivism_transitions]) # shrink outflows so we don't overcount recidivism outflows.total_population *= (1 - 0.43) if SAVE_TO_CSV: transitions.to_csv('/Users/jpouls/recidiviz/nyrecidiviz/mm_preprocessing/transitionfull/transitionfull'+str(int(time.time()))+'.csv') ########### TOTAL POPULATION pop = pop_valid.copy() pop = pop[['custodyStatus','crime']] pop_in_custody = pop[pop.custodyStatus == 'IN CUSTODY'] total_pop = pop_in_custody.groupby('crime').count() total_pop.reset_index(inplace=True) total_pop['compartment'] = 'prison' total_pop['total_population'] = total_pop.custodyStatus # population as of Feb 2021 == 254 months since 2000 total_pop['time_step'] = 254 total_pop['crime_type'] = total_pop.crime pop_out = total_pop[['compartment','total_population','time_step','crime_type']] if SAVE_TO_CSV: pop_out.to_csv('/Users/jpouls/recidiviz/nyrecidiviz/mm_preprocessing/total_population/total_population'+str(int(time.time()))+'.csv') ############ SPARK MODEL UPLOAD upload_spark_model_inputs('recidiviz-staging', 'NY_CTC', outflows, transitions, pop_out)
final_outflows = pd.DataFrame() for year in outflows_data.time_step.unique(): year_outflows = outflows_data[outflows_data.time_step == year] for month in range(12): month_outflows = year_outflows.copy() month_outflows.time_step = 12 * month_outflows.time_step - month month_outflows.total_population /= 12 final_outflows = pd.concat([final_outflows, month_outflows]) outflows_data = final_outflows # TOTAL POPULATION TABLE # TODO(#99999): populate total_population_data from raw data final_pops = pd.DataFrame() for year in total_population_data.time_step.unique(): year_pops = total_population_data[total_population_data.time_step == year] for month in range(12): month_pops = year_pops.copy() month_pops.time_step = 12 * month_pops.time_step - month final_pops = pd.concat([final_pops, month_pops]) total_population_data = final_pops # STORE DATA # TODO(#99999): fill in `state` and `primary_compartment` upload_spark_model_inputs( "recidiviz-staging", "MS_SB_2123", outflows_data, transitions_data, total_population_data, )
[ outflows_data, pd.read_csv( "recidiviz/calculator/modeling/population_projection/state/TX/TX_data/Outflows Data-Table 1.csv" ), ] ) outflows_data = outflows_data.rename({"placeholder_axis": "crime_type"}, axis=1) # TOTAL POPULATION TABLE total_population_data = pd.concat( [ total_population_data, pd.read_csv( "recidiviz/calculator/modeling/population_projection/state/TX/TX_data/Total Population Data-Table 1.csv" ), ] ) total_population_data = total_population_data.rename( {"placeholder_axis": "crime_type"}, axis=1 ) # STORE DATA upload_spark_model_inputs( "recidiviz-staging", "TX_PO_incentives", outflows_data, transitions_data, total_population_data, )