def admission_claims_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ admission_claims = pd.read_csv( f"{processed_data}\\admit_claims.csv", low_memory=False ) admission_claims = create_sql_dates(admission_claims) primary_key = ["claim_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(admission_claims, "admission_claims", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("admission_claims updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS admission_claims") create_table( admission_claims, "admission_claims", conn, primary_key, foreign_key, ref_table, ref_col, ) print("admission_claims created...") conn.commit() conn.close() open( f"{update_logs_folder}\\admission_claims_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def center_days_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ center_days = pd.read_csv(f"{processed_data}\\center_days.csv") center_days = create_sql_dates(center_days, ["as_of"]) center_days.rename(columns={"center_days": "days"}, inplace=True) primary_key = ["member_id", "days"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(center_days, "center_days", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("center_days updated...") else: create_table( center_days, "center_days", conn, primary_key, foreign_key, ref_table, ref_col, ) print("center_days created...") conn.commit() conn.close() open( f"{update_logs_folder}\\center_days_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def auths_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If table is being updated - it is dropped and replaced entirely Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ auths = pd.read_csv(f"{processed_data}\\auths.csv") auths = create_sql_dates(auths) primary_key = ["member_id", "authorization_number"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS authorizations") create_table( auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col ) print("authorizations updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS authorizations") create_table( auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col ) print("authorizations created...") conn.commit() conn.close() open( f"{update_logs_folder}\\authorizations_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def er_only_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If table is being updated - any visit with an admission date greater than 3 months from today is dropped Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ er_only = pd.read_csv(f"{processed_data}\\er_only.csv") er_only = create_sql_dates(er_only) primary_key = ["visit_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS er_only") create_table(er_only, "er_only", conn, primary_key, foreign_key, ref_table, ref_col) print("er_only updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS er_only") create_table(er_only, "er_only", conn, primary_key, foreign_key, ref_table, ref_col) print("er_only created...") conn.commit() conn.close() open( f"{update_logs_folder}\\er_only_{str(pd.to_datetime('today').date())}.txt", "a").close()
def pnuemo_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ pneumo = pd.read_csv(f"{processed_data}\\pneumo.csv") pneumo = create_sql_dates(pneumo) primary_key = ["member_id", "date_administered"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(pneumo, "pneumo", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("pneumo updated...") else: create_table( pneumo, "pneumo", conn, primary_key, foreign_key, ref_table, ref_col ) print("pneumo created...") conn.commit() conn.close() open( f"{update_logs_folder}\\pnuemo_{str(pd.to_datetime('today').date())}.txt", "a" ).close()
def centers_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ centers = pd.read_csv(f"{processed_data}\\centers.csv") centers = create_sql_dates(centers) primary_key = ["member_id", "center", "start_date"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update: update_sql_table(centers, "centers", conn, primary_key) print("centers updated...") else: create_table( centers, "centers", conn, primary_key, foreign_key, ref_table, ref_col ) print("centers created...") conn.commit() conn.close() open( f"{update_logs_folder}\\centers_{str(pd.to_datetime('today').date())}.txt", "a" ).close()
def grievances_to_sql(update=True): date_cols = [ "date_grievance_received", "date_of_resolution", "date_of_oral_notification", "date_of_written_notification", ] grievances = pd.read_csv(f"{processed_data}\\grievances.csv", parse_dates=date_cols) for col in date_cols: grievances[col] = grievances[col].dt.date primary_key = ["griev_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(grievances, "grievances", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("grievances updated...") else: create_table( grievances, "grievances", conn, primary_key, foreign_key, ref_table, ref_col ) print("grievances created...") conn.commit() conn.close() open( f"{update_logs_folder}\\grievances_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def create_incidents_agg_tables( params=("2017-07-01", end_date), incident_table="falls", db_path=agg_db_path, freq="MS", update=True, ): """ Create an aggregate table of incidents values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' incident_table(str): incident to use to create aggregate table. db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ i = Incidents() if str(update).lower() == "true": if freq == "MS": params = (i.last_month()[0], i.month_to_date()[1]) update = True else: params = (i.last_quarter()[0], i.month_to_date()[1]) update = True else: update = False incidents_func = { "total": i.total_incidents, "unique_ppts": i.ppts_w_incident, "num_ppts_with_multiple": i.num_of_incident_repeaters, "num_by_ppts_with_multiple": i.incidents_by_repeaters, "percent_without": i.percent_without_incident_in_period, "percent_by_repeaters": i.percent_by_repeaters, "ppts_with_above_avg": i.ppts_above_avg, "percent_of_ppts_with_above_avg": i.percent_of_ppts_over_avg, } additonal_funcs = { "falls": { "major_harm_percent": i.major_harm_percent, "total_adjusted": i.adjusted_incident_count, "adjusted_per100MM": i.adjusted_per_100MM, }, "infections": { "sepsis_per_100MM": i.sepsis_per_100, "uti_per_100MM": i.uti_per_100, }, "med_errors": { "major_harm_percent": i.major_harm_percent, "high_risk": i.high_risk_med_error_count, }, "wounds": { "avg_healing_time": i.avg_wound_healing_time, "percent_unstageable": i.unstageable_wound_percent, "pressure_ulcer_per_100": i.pressure_ulcer_per_100, }, "burns": { "third_degree_rate": i.third_degree_burn_rate, "rn_assessment_percent": i.rn_assessment_following_burn_percent, }, } all_funcs = {**incidents_func, **additonal_funcs[incident_table]} df = i.loop_plot_df(i.incident_per_100MM, params, freq=freq, additional_func_args=[incident_table ]).rename(columns={ "Month": "month", "Value": "per_100MM" }) for col_title, func in all_funcs.items(): if col_title in [ "sepsis_per_100MM", "uti_per_100MM", "high_risk", "avg_healing_time", "percent_unstageable", "pressure_ulcer_per_100", "third_degree_rate", "rn_assessment_percent", ]: dff = i.loop_plot_df(func, params, freq=freq).rename(columns={ "Month": "month", "Value": col_title }) else: dff = i.loop_plot_df(func, params, freq=freq, additional_func_args=[ incident_table ]).rename(columns={ "Month": "month", "Value": col_title }) df = df.merge(dff, on="month", how="left") df.to_csv(f"{processed_data}\\{incident_table}_agg.csv", index=False) if freq == "QS": table_name = f"{incident_table}_q" else: table_name = incident_table conn = sqlite3.connect(db_path) if update: stu.update_sql_table(df, table_name, conn, ["month"], agg_table=True) else: stu.create_table(df, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\{incident_table}_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return df
def create_center_agg_table(params=("2005-12-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of center related enrollment values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ ce = CenterEnrollment() if str(update).lower() == "true": if freq == "MS": params = (ce.last_month()[0], ce.month_to_date()[1]) update = True else: params = (ce.last_quarter()[0], ce.month_to_date()[1]) update = True else: update = False enrollment_funcs = { "_disenrolled": ce.disenrolled, "_voluntary_disenrolled": ce.voluntary_disenrolled, "_enrolled": ce.enrolled, "_deaths": ce.deaths, } center_shorthand_dict = { "Providence": "pvd", "Woonsocket": "woon", "Westerly": "wes", } enrollment_agg = ce.loop_plot_df( ce.census_on_end_date, params, freq=freq, additional_func_args=["Providence"]).rename(columns={ "Month": "month", "Value": "pvd_census" }) dff = ce.loop_plot_df( ce.census_on_end_date, params, freq=freq, additional_func_args=["Woonsocket"]).rename(columns={ "Month": "month", "Value": "woon_census" }) enrollment_agg = enrollment_agg.merge(dff, on="month", how="left") dff = ce.loop_plot_df(ce.census_on_end_date, params, freq=freq, additional_func_args=["Westerly" ]).rename(columns={ "Month": "month", "Value": "wes_census" }) enrollment_agg = enrollment_agg.merge(dff, on="month", how="left") for col_title, func in enrollment_funcs.items(): for center, center_abr in center_shorthand_dict.items(): dff = ce.loop_plot_df(func, params, freq=freq, additional_func_args=[ center ]).rename(columns={ "Month": "month", "Value": center_abr + col_title }) enrollment_agg = enrollment_agg.merge(dff, on="month", how="left") dc_attendance = create_dc_attnd_table(params, freq) enrollment_agg = enrollment_agg.merge(dc_attendance, on="month", how="left") if freq == "QS": table_name = "center_enrollment_q" else: table_name = "center_enrollment" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(enrollment_agg, table_name, conn, ["month"], agg_table=True) else: stu.create_table(enrollment_agg, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\center_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return enrollment_agg
def create_team_incidents_agg_table(params=("2017-07-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of team incidents values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ t = Team() if str(update).lower() == "true": if freq == "MS": params = (t.last_month()[0], t.month_to_date()[1]) update = True else: params = (t.last_quarter()[0], t.month_to_date()[1]) update = True else: update = False incident_types = ["burns", "falls", "infections", "med_errors", "wounds"] incidents = { "": t.total_incidents_by_team, "_per_100MM": t.incidents_per_member_by_team, "_unique_ppts": t.ppts_w_incident_by_team, } incidents_team = t.loop_plot_team_df(t.ppts_on_team, params, freq=freq) incidents_team.drop( [col for col in incidents_team.columns if col != "month"], axis=1, inplace=True) for col_title, func in incidents.items(): for incident in incident_types: dff = t.loop_plot_team_df( func, params, freq=freq, additional_func_args=[incident], col_suffix=f"_{incident}{col_title}", ) incidents_team = incidents_team.merge(dff, on="month", how="left") incidents_team.to_csv(f"{processed_data}\\incidents_team.csv", index=False) if freq == "QS": table_name = "team_incidents_q" else: table_name = "team_incidents" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(incidents_team, table_name, conn, ["month"], agg_table=True) else: stu.create_table(incidents_team, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\team_incidents_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return incidents_team
def create_team_info_agg_table(params=("2017-07-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of team information related values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ t = Team() if str(update).lower() == "true": if freq == "MS": params = (t.last_month()[0], t.month_to_date()[1]) update = True else: params = (t.last_quarter()[0], t.month_to_date()[1]) update = True else: update = False team_info = { "avg_age": t.avg_age_by_team, "percent_primary_non_english": t.percent_primary_non_english_by_team, "avg_years_enrolled": t.avg_years_enrolled_by_team, "ppts": t.ppts_on_team, "mortality": t.mortality_by_team, } team_info_df = t.loop_plot_team_df(t.ppts_on_team, params, freq=freq) team_info_df.drop([col for col in team_info_df.columns if col != "month"], axis=1, inplace=True) for col_title, func in team_info.items(): dff = t.loop_plot_team_df(func, params, freq=freq, col_suffix=f"_{col_title}") team_info_df = team_info_df.merge(dff, on="month", how="left") team_info_df.to_csv(f"{processed_data}\\team_info_df.csv", index=False) if freq == "QS": table_name = "team_info_q" else: table_name = "team_info" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(team_info_df, table_name, conn, ["month"], agg_table=True) else: stu.create_table(team_info_df, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\team_info_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return team_info_df
def create_team_utl_agg_table(params=("2017-07-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of team utilization values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ t = Team() if str(update).lower() == "true": if freq == "MS": params = (t.last_month()[0], t.month_to_date()[1]) update = True else: params = (t.last_quarter()[0], t.month_to_date()[1]) update = True else: update = False utilization_types = ["acute", "psych", "skilled", "respite", "custodial"] utilization_need_args = { "_admissions": t.admissions_by_team, "_days": t.days_by_team, "_discharges": t.discharges_by_team, "_alos": t.alos_for_discharges_by_team, } utilization = { "readmits": t.readmits_by_team, "custodial_ppts": t.ppts_in_custodial_by_team, "percent_of_discharges_with_mortality_in_30": t.percent_of_discharges_with_mortality_in_30_by_team, "mortality_within_30_days_of_discharge": t.mortality_within_30days_of_discharge_rate_by_team, "no_hosp_admission_since_enrollment": t.no_hosp_admission_since_enrollment_by_team, "er_only_visits": t.er_only_visits_by_team, } utl_team = t.loop_plot_team_df(t.ppts_on_team, params, freq=freq) utl_team.drop([col for col in utl_team.columns if col != "month"], axis=1, inplace=True) for col_title, func in utilization.items(): dff = t.loop_plot_team_df(func, params, freq=freq, col_suffix=f"_{col_title}") utl_team = utl_team.merge(dff, on="month", how="left") for col_title, func in utilization_need_args.items(): for utilization in utilization_types: dff = t.loop_plot_team_df( func, params, freq=freq, additional_func_args=[utilization], col_suffix=f"_{utilization}{col_title}", ) utl_team = utl_team.merge(dff, on="month", how="left") utl_team.to_csv(f"{processed_data}\\utl_team.csv", index=False) if freq == "QS": table_name = "team_utl_q" else: table_name = "team_utl" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(utl_team, table_name, conn, ["month"], agg_table=True) else: stu.create_table(utl_team, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\team_utilization_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return utl_team
def create_quality_agg_table(params=("2005-12-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of quality values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ q = Quality() if str(update).lower() == "true": if freq == "MS": params = (q.last_month()[0], q.month_to_date()[1]) update = True else: params = (q.last_quarter()[0], q.month_to_date()[1]) update = True else: update = False quality_func = { "mortality_within_30_days_of_discharge": q.mortality_within_30days_of_discharge_rate, "percent_of_discharges_with_mortality_in_30": q.percent_of_discharges_with_mortality_in_30, "no_hosp_admission_since_enrollment": q.no_hosp_admission_since_enrollment, "no_hosp_admission_last_year": q.no_hosp_admission_last_year, "pneumo_rate": q.pneumo_rate, "influ_rate": q.influ_rate, "avg_days_until_nf_admission": q.avg_days_until_nf_admission, } quality_agg = q.loop_plot_df(q.mortality_rate, params, freq=freq).rename(columns={ "Month": "month", "Value": "mortality_rate" }) for col_title, func in quality_func.items(): dff = q.loop_plot_df(func, params, freq=freq).rename(columns={ "Month": "month", "Value": col_title }) quality_agg = quality_agg.merge(dff, on="month", how="left") quality_agg.to_csv(f"{processed_data}\\quality_agg.csv", index=False) if freq == "QS": table_name = "quality_q" else: table_name = "quality" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(quality_agg, table_name, conn, ["month"], agg_table=True) else: stu.create_table(quality_agg, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\quality_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return quality_agg
def create_utilization_table(params=("2017-07-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of utilization values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ u = Utilization() if str(update).lower() == "true": if freq == "MS": params = (u.last_month()[0], u.month_to_date()[1]) update = True else: params = (u.last_quarter()[0], u.month_to_date()[1]) update = True else: update = False utilization_types = ["acute", "psych", "skilled", "respite", "custodial"] utilization_func = { "_admissions": u.admissions_count, "_admissions_per_100MM": u.admissions_per_100MM, "_discharges": u.discharges_count, "_alos": u.alos, "_los_per_100MM": u.los_per_100mm, "_days": u.utilization_days, "_days_per_100MM": u.days_per_100MM, "_unique_admissions": u.unique_admissions_count, "_weekend_admissions": u.weekend_admissions_count, "_weekend_percent": u.weekend_admission_percent, } er_visit_func = { "er_visits_per_100MM": u.admissions_per_100MM, "er_visits": u.admissions_count, } nf_only_funcs = { "_per_100MM": u.ppts_in_utl_per_100MM, "_percent": u.ppts_in_utl_percent, } utl_agg = u.loop_plot_df(u.er_to_inp_rate, params, freq=freq).rename(columns={ "Month": "month", "Value": "er_to_inp_rate" }) for col_title, func in utilization_func.items(): for utilization in utilization_types: dff = u.loop_plot_df(func, params, freq=freq, additional_func_args=[ utilization ]).rename(columns={ "Month": "month", "Value": utilization + col_title }) utl_agg = utl_agg.merge(dff, on="month", how="left") for utilization in ["acute", "psych", "er_only"]: dff = u.loop_plot_df( u.readmits_30day_rate, params, freq=freq, additional_func_args=[ utilization ]).rename(columns={ "Month": "month", "Value": utilization + "_30_day_readmit_rate" }) utl_agg = utl_agg.merge(dff, on="month", how="left") for col_title, func in er_visit_func.items(): dff = u.loop_plot_df(func, params, freq=freq, additional_func_args=["er_only" ]).rename(columns={ "Month": "month", "Value": col_title }) utl_agg = utl_agg.merge(dff, on="month", how="left") for col_title, func in nf_only_funcs.items(): for nf_type in ["skilled", "respite", "custodial", "alfs"]: dff = u.loop_plot_df(func, params, freq=freq, additional_func_args=[ nf_type ]).rename(columns={ "Month": "month", "Value": nf_type + col_title }) utl_agg = utl_agg.merge(dff, on="month", how="left") dff = u.loop_plot_df( u.percent_nf_discharged_to_higher_loc, params, freq=freq).rename(columns={ "Month": "month", "Value": "nf_higher_loc_discharge_percent" }) utl_agg = utl_agg.merge(dff, on="month", how="left") utl_agg.to_csv(f"{processed_data}\\utilization_agg.csv", index=False) if freq == "QS": table_name = "utilization_q" else: table_name = "utilization" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(utl_agg, table_name, conn, ["month"], agg_table=True) else: stu.create_table(utl_agg, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\utilization_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return utl_agg
def claims_detail_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If table is being updated - it is dropped and replaced entirely Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ claims_detail = pd.read_csv(f"{processed_data}\\claims_detail.csv", low_memory=False) claims_detail = create_sql_dates(claims_detail, ["first_dos", "last_dos"]) primary_key = ["claim_line_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS claims_detail") create_table( claims_detail, "claims_detail", conn, primary_key, foreign_key, ref_table, ref_col, ) print("claims_detail updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS claims_detail") create_table( claims_detail, "claims_detail", conn, primary_key, foreign_key, ref_table, ref_col, ) print("claims_detail created...") conn.commit() conn.close() open( f"{update_logs_folder}\\claims_detail_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def inpatient_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys When creating the database, 6 views are created; acute, psych, nursing_home, custodial, respite, and skilled If table is being updated - any visit with an admission date greater than 3 months from today is dropped Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ acute = pd.read_csv(f"{processed_data}\\inpatient.csv") acute = create_sql_dates(acute) primary_key = ["visit_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS inpatient") create_table( acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col ) print("inpatient updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS inpatient") create_table( acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col ) print("inpatient created...") c.execute( """CREATE VIEW acute AS SELECT * FROM inpatient WHERE admission_type = 'Acute Hospital'; """ ) c.execute( """CREATE VIEW psych AS SELECT * FROM inpatient WHERE admission_type = 'Psych Unit / Facility'; """ ) c.execute( """CREATE VIEW nursing_home AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility' OR admission_type = 'End of Life') """ ) c.execute( """CREATE VIEW skilled AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility') AND admit_reason = 'skilled'; """ ) c.execute( """CREATE VIEW respite AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility') AND admit_reason = 'respite'; """ ) c.execute( """CREATE VIEW custodial AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility' OR admission_type = 'End of Life') AND admit_reason = 'custodial'; """ ) conn.commit() conn.close() open( f"{update_logs_folder}\\inpatient_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def create_demographic_agg_table(params=("2005-12-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of demographic values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ d = Demographics() if str(update).lower() == "true": if freq == "MS": params = (d.last_month()[0], d.month_to_date()[1]) update = True else: params = (d.last_quarter()[0], d.month_to_date()[1]) update = True else: update = False demographic_func = { "dual_enrolled": d.dual_count, "percent_dual_enrolled": d.percent_dual, "medicare_only": d.medicare_only_count, "percent_medicare_only": d.percent_medicare_only, "medicaid_only": d.medicaid_only_count, "percent_medicaid_only": d.percent_medicaid_only, "private_pay": d.private_pay_count, "percent_private_pay": d.percent_private_pay, "percent_primary_non_english": d.percent_primary_non_english, "percent_non_white": d.percent_non_white, "living_in_community": d.living_in_community, "percent_living_in_community": d.living_in_community_percent, "percent_below_65": d.percent_age_below_65, "percent_female": d.percent_female, "bh_dx_percent": d.behavorial_dx_percent, "six_chronic_conditions": d.over_six_chronic_conditions_percent, "percent_attending_dc": d.percent_attending_dc, } demo_agg = d.loop_plot_df(d.avg_age, params, freq=freq).rename(columns={ "Month": "month", "Value": "avg_age" }) for col_title, func in demographic_func.items(): dff = d.loop_plot_df(func, params, freq=freq).rename(columns={ "Month": "month", "Value": col_title }) demo_agg = demo_agg.merge(dff, on="month", how="left") demo_agg.to_csv(f"{processed_data}\\demographics_agg.csv", index=False) if freq == "QS": table_name = "demographics_q" else: table_name = "demographics" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(demo_agg, table_name, conn, ["month"], agg_table=True) else: stu.create_table(demo_agg, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\demographic_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return demo_agg
def payments_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If being updated only payments with a date greater than or equal to the most recent date in the database table are added Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ payments = pd.read_csv(f"{processed_data}\\payments.csv", low_memory=False) if update: h = Helpers(database_path) first_id_val = h.single_value_query( "SELECT MAX(id_col) FROM payments") + 1 max_date = h.single_value_query("SELECT MAX(date_paid) FROM payments") payments = payments[payments["date_paid"] >= max_date].copy() payments.reset_index(inplace=True, drop=True) payments["id_col"] = list( range(first_id_val, first_id_val + payments.shape[0])) else: payments.reset_index(inplace=True) payments.rename(columns={"index": "id_col"}, inplace=True) payments = create_sql_dates(payments) primary_key = ["id_col"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(payments, "payments", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("payments updated...") else: create_table(payments, "payments", conn, primary_key, foreign_key, ref_table, ref_col) print("payments created...") conn.commit() conn.close() open( f"{update_logs_folder}\\payments_{str(pd.to_datetime('today').date())}.txt", "a").close()
def create_enrollment_agg_table(params=("2005-12-01", end_date), db_path=agg_db_path, freq="MS", update=True): """ Create an aggregate table of enrollment values Loops through indicator/column name and matching function, creates a dataframe with a month column and value column and adds that value column to a master dataframe with a month column Args: params(tuple): start date and end date in format 'YYYY-MM-DD' db_path(str): path to the aggregate database freq(str): "MS" or "QS" indicates if values should be grouped monthly or quarterly update(bool): if the table being updated or created Returns: DataFrame: the created dataframe Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ e = Enrollment() if str(update).lower() == "true": if freq == "MS": params = (e.last_month()[0], e.month_to_date()[1]) update = True else: params = (e.last_quarter()[0], e.month_to_date()[1]) update = True else: update = False enrollment_funcs = { "disenrolled": e.disenrolled, "voluntary_disenrolled": e.voluntary_disenrolled_percent, "enrolled": e.enrolled, "deaths": e.deaths, "net_enrollment": e.net_enrollment, "avg_years_enrolled": e.avg_years_enrolled, "inquiries": e.inquiries, "avg_days_to_enrollment": e.avg_days_to_enrollment, "conversion_rate_180_days": e.conversion_rate_180_days, } enrollment_agg = e.loop_plot_df(e.census_on_end_date, params, freq=freq).rename(columns={ "Month": "month", "Value": "census" }) for col_title, func in enrollment_funcs.items(): dff = e.loop_plot_df(func, params, freq=freq).rename(columns={ "Month": "month", "Value": col_title }) enrollment_agg = enrollment_agg.merge(dff, on="month", how="left") prev_months = [0] prev_months.extend(enrollment_agg["census"][:-1]) growth = ((enrollment_agg["census"] - prev_months) / prev_months) * 100 if not update: growth[0] = 100 enrollment_agg["growth_rate"] = growth enrollment_agg["churn_rate"] = (enrollment_agg["disenrolled"] / enrollment_agg["census"]) * 100 enrollment_agg.to_csv(f"{processed_data}\\enrollment_agg.csv", index=False) if freq == "QS": table_name = "enrollment_q" else: table_name = "enrollment" conn = sqlite3.connect(db_path) if update: stu.update_sql_table(enrollment_agg, table_name, conn, ["month"], agg_table=True) else: stu.create_table(enrollment_agg, table_name, conn, ["month"], agg_table=True) conn.commit() conn.close() open( f"{update_logs_folder}\\enrollment_agg{str(pd.to_datetime('today').date())}.txt", "a", ).close() return enrollment_agg
def monthly_census_to_sql(update=True): """ Creates or updates a monthly census as of the first table in the database This table is an aggregate table used in 100 member month queries. If updating - finds the most recent first of the month value from today and checks to see if there is a value for the census as of that date If none exists it is created and appended Args: update(bool): is the database being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ conn = sqlite3.connect(database_path) primary_key = ["month"] if update is True: today = pd.to_datetime("today") add_leading_0 = lambda x: "0" if len(str(x)) == 1 else "" last_first_of_month = ( f"{today.year}-{add_leading_0(today.month)}{today.month}-01" ) c = conn.cursor() month_exists = c.execute( "SELECT * FROM monthly_census WHERE month = ?", [last_first_of_month] ).fetchone() if month_exists is None: enrollment = pd.read_csv(f"{processed_data}\\enrollment_for_census.csv") enrollment = create_sql_dates(enrollment) most_recent_month = pd.read_sql( "SELECT * FROM monthly_census ORDER BY month DESC LIMIT 1", conn ) disenrollments_mask = ( enrollment["disenrollment_date"] >= most_recent_month["month"][0] ) & (enrollment["disenrollment_date"] < last_first_of_month) enrollments_mask = enrollment["enrollment_date"] == last_first_of_month center_census = {} for center in enrollment.center.unique(): center_census[center.lower()] = {} for center in enrollment.center.unique(): center_mask = enrollment["center"] == center center_census[center.lower()][last_first_of_month] = most_recent_month[ center.lower() ][0] + ( enrollment[enrollments_mask & center_mask].shape[0] - enrollment[disenrollments_mask & center_mask].shape[0] ) monthly_census = pd.DataFrame.from_dict(center_census) monthly_census["total"] = monthly_census.sum(axis=1) monthly_census.reset_index(inplace=True) monthly_census.rename(columns={"index": "month"}, inplace=True) monthly_census = create_sql_dates(monthly_census, ["month"]) monthly_census.to_sql( "monthly_census", conn, if_exists="append", index=False ) print("monthly_census updated...") else: enrollment = pd.read_sql( "SELECT * FROM enrollment JOIN centers on enrollment.member_id=centers.member_id", conn, ) enrollment["enrollment_date"] = pd.to_datetime(enrollment["enrollment_date"]) enrollment["disenrollment_date"] = pd.to_datetime( enrollment["disenrollment_date"] ) center_census = {} for center in enrollment.center.unique(): center_census[center.lower()] = {} for month_start in pd.date_range( enrollment.enrollment_date.min(), pd.to_datetime("today"), freq="MS" ): enrollment_mask = enrollment["enrollment_date"] <= month_start disenrollment_mask = (enrollment["disenrollment_date"] > month_start) | ( enrollment["disenrollment_date"].isnull() ) for center in enrollment.center.unique(): center_mask = enrollment["center"] == center center_census[center.lower()][month_start] = enrollment[ center_mask & enrollment_mask & disenrollment_mask ].shape[0] monthly_census = pd.DataFrame.from_dict(center_census) monthly_census["total"] = monthly_census.sum(axis=1) monthly_census.reset_index(inplace=True) monthly_census.rename(columns={"index": "month"}, inplace=True) monthly_census = create_sql_dates(monthly_census, ["month"]) create_table(monthly_census, "monthly_census", conn, primary_key) print("monthly_census created...") conn.commit() conn.close() open( f"{update_logs_folder}\\monthly_census_{str(pd.to_datetime('today').date())}.txt", "a", ).close()