def main(): """ """ index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, SUM(citations) AS citations, SUM(fines) AS citation_fines FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {citations} c ON pop.riipl_id = c.riipl_id AND c.yrmo = lb.yrmo GROUP BY pop.riipl_id """.format(**globals()) with Connection() as cxn: features = features.join( pd.read_sql(sql, cxn._connection).set_index(index)) features = features.fillna(0) labels = { "CITATIONS": "Number of police citations", "CITATION_FINES": "Total fines from police citations" } SaveFeatures(features, out, manifest, population, labels)
def main(): """ Home addresses from MEGA table """ index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT DISTINCT pop.riipl_id, address_trct FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id INNER JOIN {mega} m ON pop.riipl_id = m.riipl_id AND lb.yrmo = m.month WHERE m.address_trct IS NOT NULL """.format(**globals()) with cx_Oracle.connect("/") as cxn: values = pd.read_sql(sql, cxn).set_index(index) labels = {} for trct in values.ADDRESS_TRCT.unique(): name = "TRCT_{}".format(trct) feature = values[values["ADDRESS_TRCT"] == trct] feature[name] = 1 features = features.join(feature[name]) labels[name] = "lived in Census tract {} during lookback period".format(trct) features = features.fillna(0) SaveFeatures(features, out, manifest, population, labels, bool_features=list(labels))
def main(): """ """ index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, SUM(cc.car_crash) AS car_crashes, SUM(cc.injured) AS car_crash_injuries FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id INNER JOIN {car_crashes} cc ON pop.riipl_id = cc.riipl_id AND cc.yrmo = lb.yrmo GROUP BY pop.riipl_id """.format(**globals()) with Connection() as cxn: features = features.join( pd.read_sql(sql, cxn._connection).set_index(index)) features = features.fillna(0) labels = { "CAR_CRASHES": "count of car crashes involved in during lookback period", "CAR_CRASH_INJURIES": "count of car crash injuries during lookback period", } SaveFeatures(features, out, manifest, population, labels)
def main(): index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) # Claims payments sql = """ SELECT pop.riipl_id, SUM(mc.pay_amt) AS medicaid_payments, SUM(CASE WHEN mc.src_id IN (4, 8, 16, 17, 26, 27) THEN mc.pay_amt ELSE 0 END) AS medicaid_prof_payments, SUM(CASE WHEN mc.ed_flag = 1 THEN mc.pay_amt ELSE 0 END) AS medicaid_ed_payments FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {dim_date} dd ON lb.yrmo = dd.yrmo INNER JOIN {medicaid_claims} mc ON pop.riipl_id = mc.riipl_id AND dd.date_dt = mc.claim_dt GROUP BY pop.riipl_id """.format(**globals()) with cx_Oracle.connect("/") as cxn: features = features.join(pd.read_sql(sql, cxn).set_index(index)) # Pharmacy payments sql = """ SELECT pop.riipl_id, SUM(mp.pay_amt) AS medicaid_rx_payments FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {dim_date} dd ON lb.yrmo = dd.yrmo INNER JOIN {medicaid_pharmacy} mp ON pop.riipl_id = mp.riipl_id AND dd.date_dt = mp.dispensed_dt GROUP BY pop.riipl_id """.format(**globals()) with cx_Oracle.connect("/") as cxn: features = features.join(pd.read_sql(sql, cxn).set_index(index)) # Payments are comprehensive, so a missing value means $0 features = features.fillna(0) labels = { "MEDICAID_PAYMENTS" : "Total Medicaid payments", "MEDICAID_PROF_PAYMENTS" : "Total Medicaid payments to professionals", "MEDICAID_ED_PAYMENTS" : "Total Medicaid payments to Emergency Departments", "MEDICAID_RX_PAYMENTS" : "Total Medicaid pharmacy payments" } SaveFeatures(features, out, manifest, population, labels)
def main(): icd9 = pd.concat([ pd.read_csv(icd9_file1, usecols=[0, 1], names=["DIAG_CDE", "DESC"]), pd.read_csv(icd9_file2) ]).drop_duplicates("DIAG_CDE").set_index("DIAG_CDE") index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, mdc.diag_cde, COUNT(*) AS n FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {dim_date} dd ON lb.yrmo = dd.yrmo INNER JOIN {medicaid_diag_cde} mdc ON pop.riipl_id = mdc.riipl_id AND dd.date_dt = mdc.claim_dt INNER JOIN {diag_corr} dc ON mdc.diag_cde = dc.diag_cde AND ABS(dc.corr) > 0 GROUP BY pop.riipl_id, mdc.diag_cde """.format(**globals()) with Connection() as cxn: values = pd.read_sql(sql, cxn._connection) cdes = values.DIAG_CDE.unique() labels = {} for cde in cdes: feature = values.loc[values.DIAG_CDE == cde, "N"] feature.name = "DIAG_{}".format(cde) features = features.join(feature) try: desc = icd9.loc[cde, "DESC"] except KeyError: desc = "unknown description" label = "number of diagnoses for '{}' during lookback period".format( desc) labels[feature.name] = label features = features.fillna(0) SaveFeatures(features, out, manifest, population, labels)
def main(): procs = pd.read_csv(procsfile, sep="|", index_col="PROC_CDE") index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, mpc.proc_cde, COUNT(*) AS n FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {dim_date} dd ON lb.yrmo = dd.yrmo INNER JOIN {medicaid_proc_cde} mpc ON pop.riipl_id = mpc.riipl_id AND dd.date_dt = mpc.claim_dt INNER JOIN {proc_corr} pc ON mpc.proc_cde = pc.proc_cde AND ABS(pc.corr) > 0 GROUP BY pop.riipl_id, mpc.proc_cde """.format(**globals()) with Connection() as cxn: values = pd.read_sql(sql, cxn._connection) cdes = values.PROC_CDE.unique() labels = {} for cde in cdes: feature = values.loc[values.PROC_CDE == cde, "N"] feature.name = "PROC_{}".format(cde) features = features.join(feature) try: desc = procs.loc[cde, "DESCRIPTION"] except KeyError: desc = "unknown procedure" label = "number of procedures for '{}' during lookback period".format( desc) labels[feature.name] = label features = features.fillna(0) SaveFeatures(features, out, manifest, population, labels)
def main(): """ Opioid prescriptions and adverse outcomes among DHS household """ index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, SUM(ABS(no.opioid)) AS dhs_hh_opioid_rx, SUM(no.recovery) AS dhs_hh_recovery_rx, COUNT(o.outcome_any_dt) AS dhs_hh_outcomes FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {dim_date} dd ON lb.yrmo = dd.yrmo LEFT JOIN {household} hh ON pop.riipl_id = hh.riipl_id LEFT JOIN {medicaid_pharmacy} mp ON hh.riipl_id_hh = mp.riipl_id AND dd.date_dt = mp.dispensed_dt LEFT JOIN {ndc_opioids} no ON mp.ndc9_code = no.ndc LEFT JOIN {outcomes} o ON hh.riipl_id_hh = o.riipl_id AND dd.date_dt = o.outcome_any_dt GROUP BY pop.riipl_id """.format(**globals()) with Connection() as cxn: features = features.join( pd.read_sql(sql, cxn._connection).set_index(index)) features = features.fillna(0) labels = { "DHS_HH_OPIOID_RX": "Number of opioid prescriptions in household", "DHS_HH_RECOVERY_RX": "Number of recovery prescriptions in household", "DHS_HH_OUTCOMES": "Number of adverse outcomes in household" } SaveFeatures(features, out, manifest, population, labels)
def main(): index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, MAX(CASE WHEN rd.gender = 'M' THEN 1 ELSE 0 END) AS sex_m, MAX(CASE WHEN rd.primary_lang_cd = '01' THEN 1 ELSE 0 END) AS lang_spanish, MAX(CASE WHEN rd.primary_lang_cd = '31' THEN 1 ELSE 0 END) AS lang_portu, MAX(CASE WHEN rd.primary_lang_cd <> '00' AND rd.primary_lang_cd <> '01' AND rd.primary_lang_cd <> '31' THEN 1 ELSE 0 END) AS lang_other FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {medicaid_enrollment} me ON pop.riipl_id = me.riipl_id AND lb.yrmo = me.yrmo LEFT JOIN {recip_demo} rd ON me.re_unique_id = rd.recipient_id GROUP BY pop.riipl_id """.format(**globals()) with cx_Oracle.connect("/") as cxn: features = features.join(pd.read_sql(sql, cxn).set_index(index)) features = features.fillna(0) labels = { "SEX_M": "Sex is male", "LANG_SPANISH": "Primary language is Spanish", "LANG_PORTU": "Primary language is Portuguese", "LANG_OTHER": "Primary language is not English, Spanish, or Portuguese", } SaveFeatures(features, out, manifest, population, labels, bool_features=list(labels))
def main(): """ Social program payments from MEGA table """ index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, SUM(m.snap_payments) AS snap_payments, SUM(m.tanf_payments) AS tanf_payments, SUM(m.tdi_payments) AS tdi_payments, SUM(m.ui_payments) AS ui_payments, SUM(m.ssi_supplement) AS ssi_supplement, SUM(m.gpa_payments) AS gpa_payments, SUM(m.ccap_payments) AS ccap_payments FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id INNER JOIN {mega} m ON pop.riipl_id = m.riipl_id AND lb.yrmo = m.month GROUP BY pop.riipl_id """.format(**globals()) with cx_Oracle.connect("/") as cxn: features = features.join(pd.read_sql(sql, cxn).set_index(index)) # Payments are comprehensive, so a missing value means $0 features = features.fillna(0) labels = { "SNAP_PAYMENTS" : "Total Food Stamps payments", "TANF_PAYMENTS" : "Total TANF payments", "TDI_PAYMENTS" : "Total TDI payments", "UI_PAYMENTS" : "Total Unemployment Insurance payments", "SSI_SUPPLEMENT" : "Total SSI supplement", "GPA_PAYMENTS" : "Total GPA payments", "CCAP_PAYMENTS" : "Total CCAP payments" } SaveFeatures(features, out, manifest, population, labels)
def main(): """ Size of DHS household and indicator for births in the DHS household from MEGA table """ index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, COUNT(DISTINCT m2.riipl_id) AS dhs_hh_size, MAX(CASE WHEN m1.age > 1 AND m2.age < 1 THEN 1 ELSE 0 END) AS dhs_hh_birth FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id INNER JOIN {mega} m1 ON pop.riipl_id = m1.riipl_id AND lb.yrmo = m1.month INNER JOIN {mega} m2 ON m1.dhs_hh_id = m2.dhs_hh_id AND m1.month = m2.month GROUP BY pop.riipl_id """.format(**globals()) with Connection() as cxn: features = features.join(pd.read_sql(sql, cxn._connection).set_index(index)) features = features.fillna(0) labels = { "DHS_HH_SIZE" : "Size of household", "DHS_HH_BIRTH" : "New birth in household" } SaveFeatures(features, out, manifest, population, labels, bool_features=["DHS_HH_BIRTH"])
def main(): index = ["RIIPL_ID"] features = CachePopulation(population, index).set_index(index) sql = """ SELECT pop.riipl_id, COUNT(DISTINCT me.re_unique_id) AS medicaid_n_ids, MAX(CASE WHEN UPPER(dim.description) LIKE '%PREG%' THEN 1 ELSE 0 END) AS medicaid_pregnant, MAX(CASE WHEN UPPER(dim.description) LIKE '%PARTUM%' THEN 1 ELSE 0 END) AS medicaid_postpartum, MAX(CASE WHEN UPPER(dim.description) LIKE '%BLIND%' THEN 1 ELSE 0 END) AS medicaid_blind, MAX(CASE WHEN UPPER(dim.description) LIKE '%DISABLED%' THEN 1 ELSE 0 END) AS medicaid_disabled, MAX(CASE WHEN UPPER(dim.description) LIKE '%ALIEN%' THEN 1 ELSE 0 END) AS medicaid_alien, MAX(CASE WHEN UPPER(dim.description) LIKE '%CHILD%' THEN 1 ELSE 0 END) AS medicaid_child, MAX(CASE WHEN dim.needy_ind = 'Medically' THEN 1 ELSE 0 END) AS medicaid_med_needy, MAX(NVL(me.managed_care, 0)) AS medicaid_managed_care, MAX(NVL(me.premium_payment, 0)) AS medicaid_prem_payment, MAX(CASE WHEN rxn.payer_cd = 'B' THEN 1 ELSE 0 END) AS medicaid_payer_bhddh, MAX(CASE WHEN rxn.payer_cd = 'C' THEN 1 ELSE 0 END) AS medicaid_payer_cmap, MAX(CASE WHEN rxn.payer_cd = 'D' THEN 1 ELSE 0 END) AS medicaid_payer_doc, MAX(CASE WHEN rxn.payer_cd = 'H' THEN 1 ELSE 0 END) AS medicaid_payer_aids, MAX(CASE WHEN rxn.payer_cd = 'O' THEN 1 ELSE 0 END) AS medicaid_payer_ors, MAX(CASE WHEN rxn.payer_cd = 'R' THEN 1 ELSE 0 END) AS medicaid_payer_ripae FROM {population} pop LEFT JOIN {lookback} lb ON pop.riipl_id = lb.riipl_id LEFT JOIN {medicaid_enrollment} me ON pop.riipl_id = me.riipl_id AND lb.yrmo = me.yrmo LEFT JOIN {dim_aid_ctg_cde} dim ON me.aid_category = dim.aid_ctg_cde LEFT JOIN {recip_x_ssn} rxn ON me.re_unique_id = rxn.recipient_id GROUP BY pop.riipl_id """.format(**globals()) with cx_Oracle.connect("/") as cxn: features = features.join(pd.read_sql(sql, cxn).set_index(index)) features = features.fillna(0) labels = { "MEDICAID_N_IDS": "Number of unique Medicaid IDs", "MEDICAID_PREGNANT": "Medicaid eligibility category contains the word 'pregnant'", "MEDICAID_POSTPARTUM": "Medicaid eligibility category contains the word 'postpartum'", "MEDICAID_BLIND": "Medicaid eligibility category contains the word 'blind'", "MEDICAID_DISABLED": "Medicaid eligibility category contains the word 'disabled'", "MEDICAID_ALIEN": "Medicaid eligibility category contains the word 'alien'", "MEDICAID_CHILD": "Medicaid eligibility category contains the word 'child'", "MEDICAID_MED_NEEDY": "Eligible for Medicaid as medically needy", "MEDICAID_MANAGED_CARE": "Enrolled in Medicaid managed care", "MEDICAID_PREM_PAYMENT": "Enrolled in Medicaid premium payment plan", "MEDICAID_PAYER_BHDDH": "Medicaid payer code is BHDDH", "MEDICAID_PAYER_CMAP": "Enrolled in RI Community Medication Assistance Program", "MEDICAID_PAYER_DOC": "Medicaid payer code is Department of Corrections", "MEDICAID_PAYER_AIDS": "Medicaid payer code is Ryan White AIDS Drug Assistance Program", "MEDICAID_PAYER_ORS": "Medicaid payer code is ORS", "MEDICAID_PAYER_RIPAE": "Enrolled in RI Pharmaceutical Assistance to the Elderly" } SaveFeatures(features, out, manifest, population, labels, bool_features=[f for f in labels if f != "MEDICAID_N_IDS"])