Python upload_spark_model_inputs Examples, recidiviz.calculator.modeling.population_projection.spark_bq_utils.upload_spark_model_inputs Python Examples

Example #1

0

Show file

File: historical_data_IL_prison_TIS.py Project: jazzPouls/pulse-data

            for month in range(12)
        ] + [
            total_population_data.iloc[(year - reference_year) * 6 + 3, 4]
            for month in range(12)
        ] + [
            total_population_data.iloc[(year - reference_year) * 6 + 4, 4]
            for month in range(12)
        ] + [
            total_population_data.iloc[(year - reference_year) * 6 + 5, 4]
            for month in range(12)
        ],
    })
    monthly_total_population_data = pd.concat(
        [monthly_total_population_data, temp_monthly_total_population_data])

# STORE DATA
monthly_outflows_data = monthly_outflows_data.rename(
    {"tis_percentage": "crime_type"}, axis=1)
transitions_data = transitions_data.rename({"tis_percentage": "crime_type"},
                                           axis=1)
monthly_total_population_data = monthly_total_population_data.rename(
    {"tis_percentage": "crime_type"}, axis=1)

upload_spark_model_inputs(
    "recidiviz-staging",
    "IL_prison_TIS",
    monthly_outflows_data,
    transitions_data,
    monthly_total_population_data,
)

Example #2

0

Show file

File: historical_data_OH_prison_p2.py Project: jazzPouls/pulse-data

for year in outflows_data.time_step.unique():
    year_outflows = outflows_data[outflows_data.time_step == year]
    for month in range(12):
        month_outflows = year_outflows.copy()
        month_outflows.time_step = 12 * month_outflows.time_step - month
        month_outflows.total_population /= 12
        final_outflows = pd.concat([final_outflows, month_outflows])
outflows_data = final_outflows

# TOTAL POPULATION TABLE

final_pops = pd.DataFrame()
for year in total_population_data.time_step.unique():
    year_pops = total_population_data[total_population_data.time_step == year]
    for month in range(12):
        month_pops = year_pops.copy()
        month_pops.time_step = 12 * month_pops.time_step - month
        final_pops = pd.concat([final_pops, month_pops])
total_population_data = final_pops

# STORE DATA
# TODO(#99999): fill in `state` and `primary_compartment`
simulation_tag = "OH_SB3_prong2"
upload_spark_model_inputs(
    "recidiviz-staging",
    simulation_tag,
    outflows_data,
    transitions_data,
    total_population_data,
)

Example #3

0

Show file

                    (year - reference_year) * 12, (year - reference_year + 1) * 12
                )
            ]
            * 2,
            "compartment": ["pretrial"] * 24,
            "outflow_to": ["prison"] * 24,
            "race": ["white"] * 12 + ["non-white"] * 12,
            "total_population": [
                yearly_outflows_data.iloc[(year - reference_year) * 2, 4] / 12
                for month in range(12)
            ]
            + [
                yearly_outflows_data.iloc[(year - reference_year) * 2 + 1, 4] / 12
                for month in range(12)
            ],
        }
    )
    outflows_data = pd.concat([outflows_data, temp_monthly_outflows_data])

# TOTAL POPULATION TABLE
# none

# STORE DATA
upload_spark_model_inputs(
    "recidiviz-staging",
    "IL_prison_three_strikes",
    outflows_data,
    transitions_data,
    pd.DataFrame(),
)

Example #4

0

Show file

)

for year in range(2013, 2021):
    temp_monthly_total_population_data = pd.DataFrame({
        "time_step": [
            i for i in range((year - reference_year) *
                             12, (year - reference_year + 1) * 12)
        ] * 2,
        "compartment": ["prison"] * 24,
        "crime_type": ["non-violent"] * 12 + ["violent"] * 12,
        "total_population": [
            yearly_total_population_data.iloc[(year - reference_year) * 2, 3]
            for month in range(12)
        ] + [
            yearly_total_population_data.iloc[(year - reference_year) * 2 + 1,
                                              3] for month in range(12)
        ],
    })
    total_population_data = pd.concat(
        [total_population_data, temp_monthly_total_population_data],
        sort=False)

# STORE DATA
upload_spark_model_inputs(
    "recidiviz-staging",
    "LA_HB_364",
    outflows_data,
    transitions_data,
    total_population_data,
)

Example #5

0

Show file

File: historical_data_prison_OK.py Project: jazzPouls/pulse-data

})

final_outflows = pd.DataFrame()
for year in outflows_data.time_step.unique():
    year_outflows = outflows_data[outflows_data.time_step == year]
    for month in range(12):
        month_outflows = year_outflows.copy()
        month_outflows.time_step = 12 * month_outflows.time_step + month
        month_outflows.total_population /= 12
        final_outflows = pd.concat([final_outflows, month_outflows])
outflows_data = final_outflows

transitions_data = pd.DataFrame({
    "compartment":
    ["prison"] * 2 + ["release"] * 36 + ["release", "release_full"],
    "outflow_to":
    ["release"] * 2 + ["prison"] * 36 + ["release_full", "release_full"],
    "compartment_duration":
    [2400.80 / 365 * 12, 2457.60 / 365 * 12] + list(range(1, 37)) + [36, 36],
    "total_population": [1.0] * 2 + [0.23 / 36] * 36 + [0.77, 1],
    "crime_type": ["NA"] * 40,
})

upload_spark_model_inputs(
    "recidiviz-staging",
    "OK_resentencing",
    outflows_data,
    transitions_data,
    pd.DataFrame(),
)

Example #6

0

Show file

                  (outflows_data.age == "50_and_under"),
                  "total_population", ] = total_population_data.loc[
                      total_population_data.age == "50_and_under",
                      "total_population"].iloc[0]
outflows_data.loc[(outflows_data.time_step == 0) &
                  (outflows_data.age == "51_and_up"),
                  "total_population", ] = total_population_data.loc[
                      total_population_data.age == "51_and_up",
                      "total_population"].iloc[0]
outflows_data = outflows_data.append(
    pd.DataFrame({
        "total_population": [0] * 20,
        "age": ["50_and_under"] * 10 + ["51_and_up"] * 10,
        "time_step": list(range(1, 11)) * 2,
    })).ffill()

# STORE DATA
fake_total_population_data = pd.DataFrame({
    "compartment": ["prison"] * 2,
    "time_step": [-1] * 2,
    "total_population": [0] * 2,
    "age": ["50_and_under", "51_and_up"],
})
upload_spark_model_inputs(
    "recidiviz-staging",
    "MS_habitual_offenders_A",
    outflows_data,
    transitions_data,
    fake_total_population_data,
)

Example #7

0

Show file

File: historical_data_VA_prison.py Project: jazzPouls/pulse-data

outflows_data = jail_prison_admissions.drop("offense_group", axis=1)

# this is left over from the last policy we modeled, you'll want to filter differently based on what you're modeling
affected_crimes = [
    "ASL1342",
    "NAR3038",
    "NAR3087",
    "DWI5406",
    "DWI5449",
    "DWI5450",
    "LIC6834",
    "LIC6860",
    "WPN5296",
    "WPN5297",
]

transitions_data = transitions_data[transitions_data.crime.isin(
    affected_crimes)]
outflows_data = outflows_data[outflows_data.crime.isin(affected_crimes)]

# Don't want sentences listed as hundreds of years to skew our model, so we cap sentence length at 50 years
sentence_cap_data = pd.Series(
    [50 for i in transitions_data.compartment_duration],
    index=transitions_data.index)
transitions_data.loc[transitions_data.compartment_duration > sentence_cap_data,
                     "compartment_duration"] = 50

# STORE DATA
upload_spark_model_inputs("recidiviz-staging", "VA_prison", outflows_data,
                          transitions_data, pd.DataFrame())

Example #8

0

Show file

    transitions = pd.concat([transitions, recidivism_transitions])

# shrink outflows so we don't overcount recidivism
outflows.total_population *= (1 - 0.43)

if SAVE_TO_CSV:
    transitions.to_csv('/Users/jpouls/recidiviz/nyrecidiviz/mm_preprocessing/transitionfull/transitionfull'+str(int(time.time()))+'.csv')


########### TOTAL POPULATION
pop = pop_valid.copy()
pop = pop[['custodyStatus','crime']]
pop_in_custody = pop[pop.custodyStatus == 'IN CUSTODY']

total_pop = pop_in_custody.groupby('crime').count()
total_pop.reset_index(inplace=True)

total_pop['compartment'] = 'prison'
total_pop['total_population'] = total_pop.custodyStatus
# population as of Feb 2021 == 254 months since 2000
total_pop['time_step'] = 254
total_pop['crime_type'] = total_pop.crime
pop_out = total_pop[['compartment','total_population','time_step','crime_type']]

if SAVE_TO_CSV:
    pop_out.to_csv('/Users/jpouls/recidiviz/nyrecidiviz/mm_preprocessing/total_population/total_population'+str(int(time.time()))+'.csv')


############ SPARK MODEL UPLOAD
upload_spark_model_inputs('recidiviz-staging', 'NY_CTC', outflows, transitions, pop_out)

Example #9

0

Show file

final_outflows = pd.DataFrame()
for year in outflows_data.time_step.unique():
    year_outflows = outflows_data[outflows_data.time_step == year]
    for month in range(12):
        month_outflows = year_outflows.copy()
        month_outflows.time_step = 12 * month_outflows.time_step - month
        month_outflows.total_population /= 12
        final_outflows = pd.concat([final_outflows, month_outflows])
outflows_data = final_outflows

# TOTAL POPULATION TABLE
# TODO(#99999): populate total_population_data from raw data
final_pops = pd.DataFrame()
for year in total_population_data.time_step.unique():
    year_pops = total_population_data[total_population_data.time_step == year]
    for month in range(12):
        month_pops = year_pops.copy()
        month_pops.time_step = 12 * month_pops.time_step - month
        final_pops = pd.concat([final_pops, month_pops])
total_population_data = final_pops

# STORE DATA
# TODO(#99999): fill in `state` and `primary_compartment`
upload_spark_model_inputs(
    "recidiviz-staging",
    "MS_SB_2123",
    outflows_data,
    transitions_data,
    total_population_data,
)

Example #10

0

Show file

File: historical_data_TX_prison_revocations.py Project: jazzPouls/pulse-data

    [
        outflows_data,
        pd.read_csv(
            "recidiviz/calculator/modeling/population_projection/state/TX/TX_data/Outflows Data-Table 1.csv"
        ),
    ]
)
outflows_data = outflows_data.rename({"placeholder_axis": "crime_type"}, axis=1)

# TOTAL POPULATION TABLE
total_population_data = pd.concat(
    [
        total_population_data,
        pd.read_csv(
            "recidiviz/calculator/modeling/population_projection/state/TX/TX_data/Total Population Data-Table 1.csv"
        ),
    ]
)
total_population_data = total_population_data.rename(
    {"placeholder_axis": "crime_type"}, axis=1
)

# STORE DATA
upload_spark_model_inputs(
    "recidiviz-staging",
    "TX_PO_incentives",
    outflows_data,
    transitions_data,
    total_population_data,
)