def get_score_lines(df, conn, calc_date): #Get scoring columns using reference table in PHP96 cols = list(df.columns) qry = ''' SELECT DISTINCT metric FROM kcrsn.cd_stratum_metric WHERE ? BETWEEN start_date AND end_date ''' metric_df = pd.read_sql_query(qry, conn, params=[calc_date]) metric_values = set( [x.strip(' ') for x in metric_df[metric_df.columns[0]]]) & set(cols) metric_flags = ["{}{}".format(i, '_missing_data') for i in metric_values] #Subset data, convert to long format id_cols = ['kcid', 'auth_no', 'program', 'age_group', 'calc_date'] merge_cols = id_cols + ['metric'] ##Get input values vals = pd.melt(df, id_vars=id_cols, value_vars=metric_values, var_name='metric') ##Get missing flags flags = pd.melt(df, id_vars=id_cols, value_vars=metric_flags, var_name='metric', value_name='missing_data') flags["metric"] = flags["metric"].str.replace("_missing_data", "") #Copy data into PHP96, run query, output result conn.execute("DROP TABLE IF EXISTS #phs_score") sql_in = vals.merge(flags, how='outer') sql_in.to_sql('#phs_score', con=conn, index=False) sql = qryhelper.get_query('phs_stratum_score.sql') #Read qry sql_out = pd.read_sql(sql, conn) return (sql_out)
def get_chronic_conditions(df, php96_conn, phclaims_conn, calc_date): #PHClaims query ph_sql = qryhelper.get_query('phs_chron_cond.sql') #Read qry ph_sp = sqlparams.SQLParams('named', 'qmark') #Parameterize qry ph_sql2, ph_params = ph_sp.format(ph_sql, {'calc_date': calc_date}) ph_dat = pd.read_sql(ph_sql2, phclaims_conn, params=ph_params) #Get result #Subset data for PHP96 query df_in = df[['auth_no', 'p1_id']] php96_conn.execute("DROP TABLE IF EXISTS #phs_chron_cond_temp") df_in.to_sql('#phs_chron_cond_temp', con=php96_conn, index=False) #PHP96 query sql = qryhelper.get_query('phs_chron_cond_caa.sql') #Read qry sp = sqlparams.SQLParams('named', 'qmark') #Parameterize qry sql2, params = sp.format(sql, {'calc_date': calc_date}) php96_dat = pd.read_sql(sql2, php96_conn) #Combine php96 and phclaims data into main DF. Replace NA's for_merge = php96_dat.merge(ph_dat, how='outer') na_fill = { 'caa_asthma': 0, 'caa_diabetes': 0, 'caa_copd': 0, 'caa_cvd': 0, 'ccw_diabetes': 0, 'ccw_asthma': 0, 'ccw_cvd': 0, 'ccw_copd': 0, 'CHRON_missing_data': 'Y' } out = df.merge(for_merge, how='left').fillna(na_fill) #We need to max 2 columns for each condition. #Define the columns. asthma_cols = ['caa_asthma', 'ccw_asthma'] diabetes_cols = ['caa_diabetes', 'ccw_diabetes'] copd_cols = ['caa_copd', 'ccw_copd'] cvd_cols = ['caa_cvd', 'ccw_cvd'] drop_cols = asthma_cols + diabetes_cols + copd_cols + cvd_cols #Take max out['asthma'] = out[asthma_cols].max(axis=1) out['diabetes'] = out[diabetes_cols].max(axis=1) out['copd'] = out[copd_cols].max(axis=1) out['cvd'] = out[cvd_cols].max(axis=1) #Sum total chronic conditions sum_cols = ['asthma', 'diabetes', 'copd', 'cvd'] out['CHRON'] = out[sum_cols].sum(axis=1) out_fin = out.drop(drop_cols, axis=1) return (out_fin)
def get_asam(df, conn, calc_date): df_in = df[['kcid', 'agency_id']].drop_duplicates() conn.execute("DROP TABLE IF EXISTS #phs_asam") df_in.to_sql('#phs_asam', conn, index=False) sql = qryhelper.get_query('phs_asam.sql') #Read qry sp = sqlparams.SQLParams('named', 'qmark') #Parameterize qry sql2, params = sp.format(sql, {'calc_date': calc_date}) sql_out = pd.read_sql(sql2, conn, params=params) #Get result out = df.merge(sql_out, how='left').fillna({'ASAM': 0}) return (out)
def get_cj(df, conn, calc_date): sql = qryhelper.get_query('phs_cj.sql') #Read qry sp = sqlparams.SQLParams('named', 'qmark') #Parameterize qry sql2, params = sp.format(sql, {'calc_date': calc_date}) na_fill = {'NUMCJ': 0, 'LNGCJ': 0} sql_out = pd.read_sql(sql2, conn, params=params) #Get result out = df.merge(sql_out, how='left').fillna(na_fill) #Merge to input df out['NUMCJ_missing_data'] = 'N' out['LNGCJ_missing_data'] = 'N' return (out)
def get_idu(df, conn, calc_date): df_in = df[['kcid']] conn.execute("DROP TABLE IF EXISTS #phs_idu") df_in.to_sql('#phs_idu', conn, index=False) sql = qryhelper.get_query('phs_idu.sql') sp = sqlparams.SQLParams('named', 'qmark') sql2, params = sp.format(sql, {'calc_date': calc_date}) sql_out = pd.read_sql(sql2, conn, params=params) na_fill = {'IDU': 0} out = df.merge(sql_out, how='left').fillna({'IDU': 0}) out['IDU_missing_data'] = 'N' return (out)
def get_high_util(df, conn, calc_date): conn.execute("DROP TABLE IF EXISTS jdowns.phs_hutil_temp") #Subset data, load to HHSAW df_in = df[['kcid']] df_in.to_sql('phs_hutil_temp', con=conn, schema='jdowns', index=False) #HHSAW query sql = qryhelper.get_query('phs_high_util.sql') #Read qry sp = sqlparams.SQLParams('named', 'qmark') #Parameterize qry sql2, params = sp.format(sql, {'calc_date': calc_date}) sql_out = pd.read_sql(sql2, conn, params=params) #Get result na_fill = {'nSRS': 0, 'nDTX': 0, 'nITA': 0, 'nED': 0, 'nIP': 0, 'HUTIL': 0} out = df.merge(sql_out, how='left').fillna(na_fill) out['HUTIL_missing_data'] = 'N' return (out)
def get_total_score(df_client, df_score, conn): #Identify the ID columns for total score summing id_cols = ['auth_no', 'program', 'age_group', 'calc_date'] sum_vals = {'score': 'sum', 'missing_data': 'max'} #Get total score as sum of scorelines df_score_grp = df_score.groupby(id_cols).agg(sum_vals).reset_index() #Load data to SQL conn.execute("DROP TABLE IF EXISTS ##phs_strat_level") df_score_grp.to_sql('##phs_strat_level', conn) #Read in and execute query sql = qryhelper.get_query('phs_strat_level.sql') sql_out = pd.read_sql(sql, conn) #Merge to client DF, force clients under 6 into high out = df_client.merge(sql_out, how='left') out.loc[out.age < 6, 'strat_level'] = 'H' #Return final product return (out)
def get_foster(df, conn, calc_date): #Subset DF, make temp tab in SSMS key_vars = ['kcid', 'auth_no'] key_vars2 = ['kcid', 'auth_no', 'FOSTR'] df_in = df[key_vars] #Subset input df conn.execute("DROP TABLE IF EXISTS #phs_foster_temp") df_in.to_sql('#phs_foster_temp', con=conn, index=False) #make temp table #Read in and parameterize query sql = qryhelper.get_query('phs_foster.sql') #Read qry sp = sqlparams.SQLParams('named', 'qmark') #Parameterize qry sql2, params = sp.format(sql, {'calc_date': calc_date}) #Get result, join to main DF result = pd.read_sql(sql2, conn, params=params) #Get query result result2 = result[key_vars2] #Subset out = df.merge(result2, how='left') #Merge to input df out['FOSTR'] = out['FOSTR'].fillna(0) #Replace NA's with 0 out['FOSTR_missing_data'] = 'N' #Set missing flag to no (never missing) return (out)
def get_calocus(df, conn, calc_date): #Subset DF, make temp tab in SSMS key_vars = ['kcid', 'agency_id', 'auth_no', 'program', 'age_group'] key_vars2 = [ 'kcid', 'agency_id', 'auth_no', 'program', 'age_group', 'CALOC', 'CALOC_missing_data' ] df_in = df[key_vars] #Subset input df df_in.to_sql('#phs_calocus_temp', con=conn, index=False) #make temp table #Read in and parameterize query sql = qryhelper.get_query('phs_calocus.sql') #Read qry sp = sqlparams.SQLParams('named', 'qmark') #Parameterize qry sql2, params = sp.format(sql, {'calc_date': calc_date}) #Get result, join to main DF result = pd.read_sql(sql2, conn, params=params) #Get query result result2 = result[key_vars2] #Subset out = df.merge(result2, how='left') #Merge to input df out['CALOC'] = out['CALOC'].fillna(0) #Replace NA's with 0 conn.execute("DROP TABLE IF EXISTS #phs_calocus_temp") return (out)
def get_clients(conn, start_date, end_date): sql = qryhelper.get_query('phs_strat_population.sql') out = pd.read_sql_query(sql, conn, params=[start_date, end_date]) return (out)
########################################## # Create production tables, load production data ########################################## #Grab columns that go into AU_STRATUM au_stratum = output.get_au_stratum(all_data) #get data output.update_au_stratum(au_stratum, php96) #load to prod #Get stratum IDs for other table products. au_stratum_ids = output.get_au_stratum_ids(pop_start, php96) #Grab columns that go into AU_STRATUM_SCORE au_stratum_score = output.get_au_stratum_score(score_lines, au_stratum_ids) #get data output.update_au_stratum_score(au_stratum_score, php96) #load to prod #Grab columns that go into AU_STRATUM_SDA au_stratum_sda = output.get_au_stratum_sda(all_data) #get data output.update_au_stratum_sda(au_stratum_sda, php96) #Run the script to link SDA records to AU_STRATUM sda_link_qry = qryhelper.get_query('phs_sda_link.sql') php96.execute(sda_link_qry) #Now, update AU_STRATUM_SHADOW with data from AU_STRATUM au_stratum_shadow_qry = qryhelper.get_query('update_au_stratum_shadow.sql') php96.execute(au_stratum_shadow_qry) #Finally, for Q3 2020, we need to set all AU_STRATUM pay multipliers to 1.00 update_shadow_payments_qry = qryhelper.get_query('update_shadow_payments.sql') php96.execute(update_shadow_payments_qry)
def update_au_stratum_sda(df, conn): conn.execute('DROP TABLE IF EXISTS #au_stratum_sda_temp') df.to_sql('#au_stratum_sda_temp', conn) qry = qryhelper.get_query('update_au_stratum_sda.sql') conn.execute(qry)
def get_au_stratum_ids(start_date, conn): qry = qryhelper.get_query('phs_get_au_stratum_ids.sql') out = pd.read_sql(qry, conn, params = {start_date}) return(out)