Python upload_spark_model_inputs 예제들, recidiviz.calculator.modeling.population_projection.utils.spark_bq_utils.upload_spark_model_inputs Python 예제들

예제 #1

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_missing_yaml_inputs(self) -> None:
     with self.assertRaises(ValueError) as e:
         upload_spark_model_inputs(
             "recidiviz-staging",
             "test",
             self.outflows_data,
             self.transitions_data,
             self.total_population_data,
             get_inputs_path("super_simulation_missing_inputs.yaml"),
         )
     self.assertTrue(str(e.exception).startswith("Missing yaml inputs"))

예제 #2

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_valid_inputs(
         self, mock_store: Any) -> None:
     upload_spark_model_inputs(
         "recidiviz-staging",
         "test",
         self.outflows_data,
         self.transitions_data,
         self.total_population_data,
         get_inputs_path("super_simulation_data_ingest.yaml"),
     )
     assert mock_store.call_count == 3

예제 #3

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_null_values(self) -> None:
     with self.assertRaises(ValueError) as e:
         upload_spark_model_inputs(
             "recidiviz-staging",
             "test",
             self.outflows_data,
             self.transitions_data_with_null_values,
             self.total_population_data,
             get_inputs_path("super_simulation_data_ingest.yaml"),
         )
     self.assertEqual(
         str(e.exception),
         "Table 'transitions_data' must not contain null values")

예제 #4

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_missing_column(self) -> None:
     with self.assertRaises(ValueError) as e:
         upload_spark_model_inputs(
             "recidiviz-staging",
             "test",
             self.outflows_data,
             self.transitions_data,
             self.total_population_data_missing_column,
             get_inputs_path("super_simulation_data_ingest.yaml"),
         )
     self.assertEqual(
         str(e.exception),
         "Table 'total_population_data' missing required columns {'time_step'}",
     )

예제 #5

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_extra_column(self) -> None:
     with self.assertRaises(ValueError) as e:
         upload_spark_model_inputs(
             "recidiviz-staging",
             "test",
             self.outflows_data,
             self.transitions_data,
             self.total_population_data_extra_column,
             get_inputs_path("super_simulation_data_ingest.yaml"),
         )
     self.assertEqual(
         str(e.exception),
         "Table 'total_population_data' contains unexpected columns {'random_extra_column'}",
     )

예제 #6

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_column_wrong_type(self) -> None:
     with self.assertRaises(ValueError) as e:
         upload_spark_model_inputs(
             "recidiviz-staging",
             "test",
             self.outflows_data,
             self.transitions_data,
             self.total_population_data_wrong_type,
             get_inputs_path("super_simulation_data_ingest.yaml"),
         )
     self.assertEqual(
         str(e.exception),
         "Table 'total_population_data' has wrong type for column 'total_population'. Type 'int64' should be 'float64'",
     )

예제 #7

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_invalid_project_id(self) -> None:
     with self.assertRaises(ValueError) as e:
         upload_spark_model_inputs(
             "bad_project_id",
             "test",
             self.outflows_data,
             self.transitions_data,
             self.total_population_data,
             get_inputs_path("super_simulation_data_ingest.yaml"),
         )
     self.assertEqual(
         str(e.exception),
         "bad_project_id is not a supported gcloud BigQuery project",
     )

예제 #8

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_wrong_disaggregation_axis_in_yaml(
     self, ) -> None:
     with self.assertRaises(ValueError) as e:
         upload_spark_model_inputs(
             "recidiviz-staging",
             "test",
             self.outflows_data_wrong_disaggregation_axis,
             self.transitions_data_wrong_disaggregation_axis,
             self.total_population_data_wrong_disaggregation_axis,
             get_inputs_path("super_simulation_data_ingest.yaml"),
         )
     self.assertEqual(
         str(e.exception),
         "All disagregation axes must be included in the input dataframe columns\n"
         "Expected: ['crime_type'], Actual: Index(['compartment', 'outflow_to', 'time_step', 'age', 'total_population'], dtype='object')",
     )

예제 #9

0

파일 보기

파일: spark_bq_utils_test.py 프로젝트: Recidiviz/pulse-data

 def test_upload_spark_model_inputs_with_missing_disaggregation_axis(
         self) -> None:
     with self.assertRaises(ValueError) as e:
         # import pdb
         # pdb.set_trace()
         upload_spark_model_inputs(
             "recidiviz-staging",
             "test",
             self.outflows_data_no_disaggregation_axis,
             self.transitions_data,
             self.total_population_data,
             get_inputs_path("super_simulation_data_ingest.yaml"),
         )
     self.assertEqual(
         str(e.exception),
         "Tables ['outflows_data'] must have dissaggregation axis of 'crime', 'crime_type', 'age', or 'race'",
     )

예제 #10

0

파일 보기

final_outflows = pd.DataFrame()
for year in outflows_data.time_step.unique():
    year_outflows = outflows_data[outflows_data.time_step == year]
    for month in range(12):
        month_outflows = year_outflows.copy()
        month_outflows.time_step = 12 * month_outflows.time_step - month
        month_outflows.total_population /= 12
        final_outflows = pd.concat([final_outflows, month_outflows])
outflows_data = final_outflows

transitions_data.compartment_duration *= 12

final_pops = pd.DataFrame()
for year in total_population_data.time_step.unique():
    year_pops = total_population_data[total_population_data.time_step == year]
    for month in range(12):
        month_pops = year_pops.copy()
        month_pops.time_step = 12 * month_pops.time_step - month
        final_pops = pd.concat([final_pops, month_pops])
total_population_data = final_pops

upload_spark_model_inputs(
    "recidiviz-staging",
    "AZ_HB_2376",
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/AZ/ax_state_prison_HB_2376_model_inputs.yaml",
)

예제 #11

0

파일 보기

파일: historical_data_SC_prison.py 프로젝트: Recidiviz/pulse-data

transitions_data = pd.concat(
    [prison_to_parole_transitions, parole_to_prison_transitions]
)

# TOTAL POPULATION TABLE (prison)
total_population_data = pd.read_csv(
    "recidiviz/calculator/modeling/population_projection/state/SC/total_population.csv"
)
# ignore parole

final_pop_data = pd.DataFrame()
for year in total_population_data.time_step.unique():
    year_pops = total_population_data[total_population_data.time_step == year]
    for month in range(12):
        month_pops = year_pops.copy()
        month_pops.time_step = 12 * month_pops.time_step + month
        final_pop_data = pd.concat([final_pop_data, month_pops])
total_population_data = final_pop_data


# STORE DATA

upload_spark_model_inputs(
    "recidiviz-staging",
    "SC_prison",
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/SC/SC_prison_model_inputs.yaml",
)

예제 #12

0

파일 보기

        "recidiviz/calculator/modeling/population_projection/state/AZ/financial_incentives/transitions_AZ_data.csv"
    ),
])

# OUTFLOWS TABLE
outflows_data = pd.concat([
    outflows_data,
    pd.read_csv(
        "recidiviz/calculator/modeling/population_projection/state/AZ/financial_incentives/outflows_data AZ.csv"
    ),
])

# TOTAL POPULATION TABLE
total_population_data = pd.concat([
    total_population_data,
    pd.read_csv(
        "recidiviz/calculator/modeling/population_projection/state/AZ/financial_incentives/total_population_data AZ.csv"
    ),
])

# STORE DATA
simulation_tag = "AZ_financialincentives"
upload_spark_model_inputs(
    "recidiviz-staging",
    simulation_tag,
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/AZ/financial_incentives/AZ_supervision_model_inputs.yaml",
)

예제 #13

0

파일 보기

for year in range(2013, 2021):
    temp_monthly_total_population_data = pd.DataFrame({
        "time_step": [
            i for i in range((year - reference_year) *
                             12, (year - reference_year + 1) * 12)
        ] * 2,
        "compartment": ["prison"] * 24,
        "crime_type": ["non-violent"] * 12 + ["violent"] * 12,
        "total_population": [
            yearly_total_population_data.iloc[(year - reference_year) * 2, 3]
            for month in range(12)
        ] + [
            yearly_total_population_data.iloc[(year - reference_year) * 2 + 1,
                                              3] for month in range(12)
        ],
    })
    total_population_data = pd.concat(
        [total_population_data, temp_monthly_total_population_data],
        sort=False)

# STORE DATA
upload_spark_model_inputs(
    "recidiviz-staging",
    "LA_HB_364",
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/LA/LA_prison_habitual_model_inputs.yaml",
)

예제 #14

0

파일 보기

pop = pop_valid.copy()
pop = pop[["custodyStatus", "crime"]]
pop_in_custody = pop[pop.custodyStatus == "IN CUSTODY"]

total_pop = pop_in_custody.groupby("crime").count()
total_pop.reset_index(inplace=True)

total_pop["compartment"] = "prison"
total_pop["total_population"] = total_pop.custodyStatus
# population as of Feb 2021 == 254 months since 2000
total_pop["time_step"] = 254
total_pop["crime_type"] = total_pop.crime
pop_out = total_pop[[
    "compartment", "total_population", "time_step", "crime_type"
]]

if SAVE_TO_CSV:
    pop_out.to_csv(
        "/Users/jpouls/recidiviz/nyrecidiviz/mm_preprocessing/total_population/total_population"
        + str(int(time.time())) + ".csv")

############ SPARK MODEL UPLOAD
upload_spark_model_inputs(
    "recidiviz-staging",
    "NY_mms",
    outflows,
    transitions,
    pop_out,
    "recidiviz/calculator/modeling/population_projection/state/NY/mms/ny_state_prison_model_inputs",
)

예제 #15

0

파일 보기

    "offense_code",
    "time_step",
    "sentence_type",
    "compartment",
]].groupby(
    [
        "offense_group", "offense_code", "compartment", "sentence_type",
        "time_step"
    ],
    as_index=False,
).count())
jail_prison_admissions = jail_prison_admissions.rename(
    {
        "off1_vcc": "total_population",
        "offense_code": "crime",
        "sentence_type": "outflow_to",
    },
    axis=1,
).drop("offense_group", axis=1)
outflows_data = jail_prison_admissions

# STORE DATA
upload_spark_model_inputs(
    "recidiviz-staging",
    "VA_parole",
    outflows_data,
    transitions_data,
    pd.DataFrame(),
    "recidiviz/calculator/modeling/population_projection/state/VA/VA_parole_model_inputs.yaml",
)

예제 #16

0

파일 보기

# OUTFLOWS TABLE
outflows_data = pd.concat([
    outflows_data,
    pd.read_csv(
        "recidiviz/calculator/modeling/population_projection/state/TX/TX_data/Outflows Data-Table 1.csv"
    ),
])
outflows_data = outflows_data.rename({"placeholder_axis": "crime_type"},
                                     axis=1)

# TOTAL POPULATION TABLE
total_population_data = pd.concat([
    total_population_data,
    pd.read_csv(
        "recidiviz/calculator/modeling/population_projection/state/TX/TX_data/Total Population Data-Table 1.csv"
    ),
])
total_population_data = total_population_data.rename(
    {"placeholder_axis": "crime_type"}, axis=1)

# STORE DATA
upload_spark_model_inputs(
    "recidiviz-staging",
    "TX_PO_incentives",
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/TX/TX_prison_revocations_model_inputs.yaml",
)

예제 #17

0

파일 보기

파일: historical_data_AZ_felonyreclassification.py 프로젝트: Recidiviz/pulse-data

# OUTFLOWS TABLE
outflows_data = pd.concat(
    [
        outflows_data,
        pd.read_csv(
            "recidiviz/calculator/modeling/population_projection/state/AZ/felony_reclassification/outflows_data_felonyAZ.csv"
        ),
    ]
)

# TOTAL POPULATION TABLE
total_population_data = pd.concat(
    [
        total_population_data,
        pd.read_csv(
            "recidiviz/calculator/modeling/population_projection/state/AZ/felony_reclassification/total_population_data_felonyAZ.csv"
        ),
    ]
)

# STORE DATA
simulation_tag = "AZ_reclassification"
upload_spark_model_inputs(
    "recidiviz-staging",
    simulation_tag,
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/AZ/felony_reclassification/AZ_reclassification_model_inputs.yaml",
)

예제 #18

0

파일 보기

# TODO(#99999): add one column to transitions_data & outflows_data per disaggregation
#  axis. If none exist, add place-holder axis.
transitions_data = pd.DataFrame(columns=[
    "compartment", "outflow_to", "total_population", "compartment_duration"
])
outflows_data = pd.DataFrame(
    columns=["compartment", "outflow_to", "total_population", "time_step"])
total_population_data = pd.DataFrame(
    columns=["compartment", "total_population", "time_step"])

# TRANSITIONS TABLE
# TODO(#99999): populate transitions_data from raw data

# OUTFLOWS TABLE
# TODO(#99999): populate outflows_data from raw data

# TOTAL POPULATION TABLE
# TODO(#99999): populate total_population_data from raw data

# STORE DATA
# TODO(#99999): fill in `simulation_tag'
simulation_tag = "TKTK"
upload_spark_model_inputs(
    "recidiviz-staging",
    simulation_tag,
    outflows_data,
    transitions_data,
    total_population_data,
    "path_to_your_yaml",
)

예제 #19

0

파일 보기

파일: historical_data_IL_prison_three_strikes.py 프로젝트: Recidiviz/pulse-data

    temp_monthly_outflows_data = pd.DataFrame({
        "time_step": [
            i for i in range((year - reference_year) *
                             12, (year - reference_year + 1) * 12)
        ] * 2,
        "compartment": ["pretrial"] * 24,
        "outflow_to": ["prison"] * 24,
        "race": ["white"] * 12 + ["non-white"] * 12,
        "total_population": [
            yearly_outflows_data.iloc[(year - reference_year) * 2, 4] / 12
            for month in range(12)
        ] + [
            yearly_outflows_data.iloc[(year - reference_year) * 2 + 1, 4] / 12
            for month in range(12)
        ],
    })
    outflows_data = pd.concat([outflows_data, temp_monthly_outflows_data])

# TOTAL POPULATION TABLE
# none

# STORE DATA
upload_spark_model_inputs(
    "recidiviz-staging",
    "IL_prison_three_strikes",
    outflows_data,
    transitions_data,
    pd.DataFrame(),
    "recidiviz/calculator/modeling/population_projection/state/IL/IL_prison_three_strikes_model_inputs.yaml",
)

예제 #20

0

파일 보기

for year in outflows_data.time_step.unique():
    year_outflows = outflows_data[outflows_data.time_step == year]
    for month in range(12):
        month_outflows = year_outflows.copy()
        month_outflows.time_step = 12 * month_outflows.time_step - month
        month_outflows.total_population /= 12
        final_outflows = pd.concat([final_outflows, month_outflows])
outflows_data = final_outflows

# TOTAL POPULATION TABLE
# TODO(#99999): populate total_population_data from raw data
final_pops = pd.DataFrame()
for year in total_population_data.time_step.unique():
    year_pops = total_population_data[total_population_data.time_step == year]
    for month in range(12):
        month_pops = year_pops.copy()
        month_pops.time_step = 12 * month_pops.time_step - month
        final_pops = pd.concat([final_pops, month_pops])
total_population_data = final_pops

# STORE DATA
# TODO(#99999): fill in `state` and `primary_compartment`
upload_spark_model_inputs(
    "recidiviz-staging",
    "MS_SB_2123",
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/MS/SB_2123_Parole_Eligibility/MS_prison_model_inputs.yaml",
)

예제 #21

0

파일 보기

파일: historical_data_wv_p1.py 프로젝트: Recidiviz/pulse-data

    )
)

# switch tech revs to leapfrog to full_release
transitions_data.loc[
    transitions_data.compartment == "prison_tech_rev", "outflow_to"
] = "full_release"

# scale down outflows to avoid double counting
outflows_data.total_population *= (
    1
    - transitions_data.loc[
        (transitions_data.compartment == "release")
        & (transitions_data.outflow_to == "prison_new_crime"),
        "total_population",
    ].iloc[0]
)


column_names = ["compartment", "total_population", "time_step", "age"]
total_population_data = pd.DataFrame(columns=column_names)
transitions_data = transitions_data.astype({"compartment_duration": "float64"})
upload_spark_model_inputs(
    "recidiviz-staging",
    "wv_prison_p1",
    outflows_data,
    transitions_data,
    total_population_data,
    "./recidiviz/calculator/modeling/population_projection/state/WV/HB_2257/WV_hb2257_model_inputs.yaml",
)

예제 #22

0

파일 보기

파일: historical_data_OK_prison.py 프로젝트: Recidiviz/pulse-data

            for month in range(12)
        ] + [
            yearly_outflows_data.iloc[(year - reference_year) * 10 + 5, 4] / 12
            for month in range(12)
        ] + [
            yearly_outflows_data.iloc[(year - reference_year) * 10 + 6, 4] / 12
            for month in range(12)
        ] + [
            yearly_outflows_data.iloc[(year - reference_year) * 10 + 7, 4] / 12
            for month in range(12)
        ] + [
            yearly_outflows_data.iloc[(year - reference_year) * 10 + 8, 4] / 12
            for month in range(12)
        ] + [
            yearly_outflows_data.iloc[(year - reference_year) * 10 + 9, 4] / 12
            for month in range(12)
        ],
    })
    outflows_data = pd.concat([outflows_data, temp_monthly_outflows_data])

# STORE DATA
simulation_tag = "OK_prison"
upload_spark_model_inputs(
    "recidiviz-staging",
    simulation_tag,
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/OK/OK_habitual_sentencing/OK_prison_model_inputs.yaml",
)

예제 #23

0

파일 보기

파일: historical_data_OK_probation.py 프로젝트: Recidiviz/pulse-data

])
transitions_data.loc[transitions_data.compartment == "prison_technical",
                     "outflow_to"] = "release"
transitions_data.loc[transitions_data.compartment == "probation ",
                     "compartment"] = "probation"

# OUTFLOWS TABLE
outflows_data = pd.concat([
    outflows_data,
    pd.read_csv(
        "recidiviz/calculator/modeling/population_projection/state/OK/OK_data/Outflows Data-Table 1.csv"
    ),
])

outflows_data = outflows_data.rename({"placeholder_axis": "crime_type"},
                                     axis=1)

transitions_data = transitions_data.rename({"placeholder_axis": "crime_type"},
                                           axis=1)

# STORE DATA
# NB IF YOU RUN THIS FILE: There were two yaml files in the folder - please make sure the one passed in below is the correct one
upload_spark_model_inputs(
    "recidiviz-staging",
    "OK_probation",
    outflows_data,
    transitions_data,
    pd.DataFrame(),
    "recidiviz/calculator/modeling/population_projection/state/OK/OK_earned_credits/OK_probation_model_inputs_average_cost.yaml",
)

예제 #24

0

파일 보기

                  "total_population", ] = total_population_data.loc[
                      total_population_data.age == "50_and_under",
                      "total_population"].iloc[0]
outflows_data.loc[(outflows_data.time_step == 0) &
                  (outflows_data.age == "51_and_up"),
                  "total_population", ] = total_population_data.loc[
                      total_population_data.age == "51_and_up",
                      "total_population"].iloc[0]
outflows_data = outflows_data.append(
    pd.DataFrame({
        "total_population": [0] * 20,
        "age": ["50_and_under"] * 10 + ["51_and_up"] * 10,
        "time_step": list(range(1, 11)) * 2,
    })).ffill()

# STORE DATA
fake_total_population_data = pd.DataFrame({
    "compartment": ["prison"] * 2,
    "time_step": [-1] * 2,
    "total_population": [0] * 2,
    "age": ["50_and_under", "51_and_up"],
})
upload_spark_model_inputs(
    "recidiviz-staging",
    "MS_habitual_offenders_A",
    outflows_data,
    transitions_data,
    fake_total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/MS/habitual_sentencing/MS_prison_habitual_A_model_inputs.yaml",
)

예제 #25

0

파일 보기

                yearly_total_population_data.iloc[(year - reference_year) * 3 + 1, 3]
                for month in range(12)
            ]
            + [
                yearly_total_population_data.iloc[(year - reference_year) * 3 + 2, 3]
                for month in range(12)
            ],
        }
    )
    monthly_total_population_data = pd.concat(
        [monthly_total_population_data, temp_monthly_total_population_data]
    )

# STORE DATA
monthly_outflows_data = monthly_outflows_data.rename(
    {"tis_percentage": "crime_type"}, axis=1
)
transitions_data = transitions_data.rename({"tis_percentage": "crime_type"}, axis=1)
monthly_total_population_data = monthly_total_population_data.rename(
    {"tis_percentage": "crime_type"}, axis=1
)

upload_spark_model_inputs(
    "recidiviz-staging",
    "IL_prison_RAE",
    monthly_outflows_data,
    transitions_data,
    monthly_total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/IL/IL_prison_RAE_model_inputs.yaml",
)

예제 #26

0

파일 보기

    ),
])
total_population_data = total_population_data[
    (total_population_data.time_step < 0)
    | (total_population_data.compartment != "probation")]
total_population_data = pd.concat([total_population_data, PROBATION_POP_DATA])

# drop duplicate probation data
total_population_data = total_population_data[
    (total_population_data.compartment != "probation")
    | (total_population_data.crime_type != "newcrime")]
# move disaggregation axis to compartments
prison_populations = total_population_data.loc[
    total_population_data.compartment == "prison"]
total_population_data.loc[total_population_data.compartment == "prison",
                          "compartment"] = (prison_populations.compartment +
                                            "_" +
                                            prison_populations.crime_type)
total_population_data.crime_type = "NA"

# STORE DATA
simulation_tag = "VA_HB2038"
upload_spark_model_inputs(
    "recidiviz-staging",
    simulation_tag,
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/VA/HB_2038/VAHB2038_model_inputs.yaml",
)

예제 #27

0

파일 보기

    year_outflows = outflows_data[outflows_data.time_step == year]
    for month in range(12):
        month_outflows = year_outflows.copy()
        month_outflows.time_step = 12 * month_outflows.time_step - month
        month_outflows.total_population /= 12
        final_outflows = pd.concat([final_outflows, month_outflows])
outflows_data = final_outflows

# TOTAL POPULATION TABLE

final_pops = pd.DataFrame()
for year in total_population_data.time_step.unique():
    year_pops = total_population_data[total_population_data.time_step == year]
    for month in range(12):
        month_pops = year_pops.copy()
        month_pops.time_step = 12 * month_pops.time_step - month
        final_pops = pd.concat([final_pops, month_pops])
total_population_data = final_pops

# STORE DATA
# TODO(#99999): fill in `state` and `primary_compartment`
simulation_tag = "OH_SB3_prong2"
upload_spark_model_inputs(
    "recidiviz-staging",
    simulation_tag,
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/OH/Prong2/OH_prison_p2_model_inputs.yaml",
)

예제 #28

0

파일 보기

final_outflows = pd.DataFrame()
for year in outflows_data.time_step.unique():
    year_outflows = outflows_data[outflows_data.time_step == year]
    for month in range(12):
        month_outflows = year_outflows.copy()
        month_outflows.time_step = 12 * month_outflows.time_step + month
        month_outflows.total_population /= 12
        final_outflows = pd.concat([final_outflows, month_outflows])
outflows_data = final_outflows

transitions_data = pd.DataFrame({
    "compartment":
    ["prison"] * 3 + ["release"] * 36 + ["release", "release_full"],
    "outflow_to":
    ["release"] * 3 + ["prison"] * 36 + ["release_full", "release_full"],
    "compartment_duration":
    [888.2 / 365 * 12, 962.9 / 365 * 12, 1089.9 / 365 * 12] +
    list(range(1, 37)) + [36, 36],
    "total_population": [1.0] * 3 + [0.23 / 36] * 36 + [0.77, 1],
    "crime_type": ["NA"] * 41,
})

upload_spark_model_inputs(
    "recidiviz-staging",
    "OK_resentencing",
    outflows_data,
    transitions_data,
    pd.DataFrame(),
    "recidiviz/calculator/modeling/population_projection/state/OK/OK_resentencing/OK_prison_model_inputs.yaml",
)

예제 #29

0

파일 보기

파일: historical_data_CA_parole.py 프로젝트: Recidiviz/pulse-data

            released,
            1,
            1,
        ],
    }
)

# TOTAL POPULATION TABLE (parole to new offense revocation)
total_population_data = pd.read_csv(
    "recidiviz/calculator/modeling/population_projection/state/CA/parole_total_population.csv"
)

# STORE DATA
# state = 'CA'
# primary_compartment = 'parole'
# pd.concat([transitions_data, outflows_data, total_population_data], sort=False).to_csv(
#     f'recidiviz/calculator/modeling/population_projection/state/{state}/preprocessed_data_{state}_{primary_compartment}.csv')
print("OUTFLOWS = ", outflows_data)
print("TRANSITIONS = ", transitions_data)
print("TOTAL POP = ", total_population_data)


upload_spark_model_inputs(
    "recidiviz-staging",
    "CA_parole",
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/CA/PO_incentives/CA_parole_model_inputs.yaml",
)

예제 #30

0

파일 보기

파일: historical_data_va_p1.py 프로젝트: Recidiviz/pulse-data

two_parole_transitions.loc[two_parole_transitions.outflow_to ==
                           "prison_parole_rev_new",
                           "outflow_to"] = "prison_parole_rev_new_two"
two_parole_transitions.compartment = "parole_two"
three_parole_transitions.loc[three_parole_transitions.outflow_to ==
                             "prison_parole_rev_new",
                             "outflow_to"] = "prison_parole_rev_new_three"
three_parole_transitions.compartment = "parole_three"

two_rev_transitions.outflow_to = "parole_three"
two_rev_transitions.compartment = "prison_parole_rev_new_two"
three_rev_transitions.outflow_to = "parole_three"
three_rev_transitions.compartment = "prison_parole_rev_new_three"

transitions_data = pd.concat([
    transitions_data,
    two_rev_transitions,
    two_parole_transitions,
    three_rev_transitions,
    three_parole_transitions,
])

upload_spark_model_inputs(
    "recidiviz-staging",
    "va_prison_p1",
    outflows_data,
    transitions_data,
    total_population_data,
    "recidiviz/calculator/modeling/population_projection/state/NY/mms/ny_state_prison_model_inputs",
)