Esempio n. 1
0
def admission_claims_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    
    admission_claims = pd.read_csv(
        f"{processed_data}\\admit_claims.csv", low_memory=False
    )
    admission_claims = create_sql_dates(admission_claims)

    primary_key = ["claim_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        update_sql_table(admission_claims, "admission_claims", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("admission_claims updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS admission_claims")
        create_table(
            admission_claims,
            "admission_claims",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("admission_claims created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\admission_claims_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
Esempio n. 2
0
def center_days_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    center_days = pd.read_csv(f"{processed_data}\\center_days.csv")
    center_days = create_sql_dates(center_days, ["as_of"])
    center_days.rename(columns={"center_days": "days"}, inplace=True)

    primary_key = ["member_id", "days"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        update_sql_table(center_days, "center_days", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("center_days updated...")

    else:
        create_table(
            center_days,
            "center_days",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("center_days created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\center_days_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
def auths_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If table is being updated - it is dropped and replaced entirely

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    auths = pd.read_csv(f"{processed_data}\\auths.csv")
    auths = create_sql_dates(auths)

    primary_key = ["member_id", "authorization_number"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS authorizations")

        create_table(
            auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col
        )
        print("authorizations updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS authorizations")
        create_table(
            auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("authorizations created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\authorizations_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
def er_only_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If table is being updated - any visit with an admission date greater
    than 3 months from today is dropped
    
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    er_only = pd.read_csv(f"{processed_data}\\er_only.csv")
    er_only = create_sql_dates(er_only)

    primary_key = ["visit_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()

        c.execute(f"DROP TABLE IF EXISTS er_only")
        create_table(er_only, "er_only", conn, primary_key, foreign_key,
                     ref_table, ref_col)

        print("er_only updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS er_only")
        create_table(er_only, "er_only", conn, primary_key, foreign_key,
                     ref_table, ref_col)

        print("er_only created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\er_only_{str(pd.to_datetime('today').date())}.txt",
        "a").close()
def pnuemo_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    pneumo = pd.read_csv(f"{processed_data}\\pneumo.csv")
    pneumo = create_sql_dates(pneumo)

    primary_key = ["member_id", "date_administered"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        update_sql_table(pneumo, "pneumo", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("pneumo updated...")

    else:
        create_table(
            pneumo, "pneumo", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("pneumo created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\pnuemo_{str(pd.to_datetime('today').date())}.txt", "a"
    ).close()
def centers_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    centers = pd.read_csv(f"{processed_data}\\centers.csv")
    centers = create_sql_dates(centers)

    primary_key = ["member_id", "center", "start_date"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update:

        update_sql_table(centers, "centers", conn, primary_key)

        print("centers updated...")

    else:
        create_table(
            centers, "centers", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("centers created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\centers_{str(pd.to_datetime('today').date())}.txt", "a"
    ).close()
def grievances_to_sql(update=True):

    date_cols = [
        "date_grievance_received",
        "date_of_resolution",
        "date_of_oral_notification",
        "date_of_written_notification",
    ]

    grievances = pd.read_csv(f"{processed_data}\\grievances.csv", parse_dates=date_cols)

    for col in date_cols:
        grievances[col] = grievances[col].dt.date

    primary_key = ["griev_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        update_sql_table(grievances, "grievances", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("grievances updated...")

    else:
        create_table(
            grievances, "grievances", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("grievances created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\grievances_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
Esempio n. 8
0
def create_incidents_agg_tables(
    params=("2017-07-01", end_date),
    incident_table="falls",
    db_path=agg_db_path,
    freq="MS",
    update=True,
):
    """
    Create an aggregate table of incidents values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        incident_table(str): incident to use to create aggregate table.
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    i = Incidents()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (i.last_month()[0], i.month_to_date()[1])
            update = True
        else:
            params = (i.last_quarter()[0], i.month_to_date()[1])
            update = True
    else:
        update = False

    incidents_func = {
        "total": i.total_incidents,
        "unique_ppts": i.ppts_w_incident,
        "num_ppts_with_multiple": i.num_of_incident_repeaters,
        "num_by_ppts_with_multiple": i.incidents_by_repeaters,
        "percent_without": i.percent_without_incident_in_period,
        "percent_by_repeaters": i.percent_by_repeaters,
        "ppts_with_above_avg": i.ppts_above_avg,
        "percent_of_ppts_with_above_avg": i.percent_of_ppts_over_avg,
    }

    additonal_funcs = {
        "falls": {
            "major_harm_percent": i.major_harm_percent,
            "total_adjusted": i.adjusted_incident_count,
            "adjusted_per100MM": i.adjusted_per_100MM,
        },
        "infections": {
            "sepsis_per_100MM": i.sepsis_per_100,
            "uti_per_100MM": i.uti_per_100,
        },
        "med_errors": {
            "major_harm_percent": i.major_harm_percent,
            "high_risk": i.high_risk_med_error_count,
        },
        "wounds": {
            "avg_healing_time": i.avg_wound_healing_time,
            "percent_unstageable": i.unstageable_wound_percent,
            "pressure_ulcer_per_100": i.pressure_ulcer_per_100,
        },
        "burns": {
            "third_degree_rate": i.third_degree_burn_rate,
            "rn_assessment_percent": i.rn_assessment_following_burn_percent,
        },
    }

    all_funcs = {**incidents_func, **additonal_funcs[incident_table]}
    df = i.loop_plot_df(i.incident_per_100MM,
                        params,
                        freq=freq,
                        additional_func_args=[incident_table
                                              ]).rename(columns={
                                                  "Month": "month",
                                                  "Value": "per_100MM"
                                              })

    for col_title, func in all_funcs.items():
        if col_title in [
                "sepsis_per_100MM",
                "uti_per_100MM",
                "high_risk",
                "avg_healing_time",
                "percent_unstageable",
                "pressure_ulcer_per_100",
                "third_degree_rate",
                "rn_assessment_percent",
        ]:
            dff = i.loop_plot_df(func, params,
                                 freq=freq).rename(columns={
                                     "Month": "month",
                                     "Value": col_title
                                 })
        else:
            dff = i.loop_plot_df(func,
                                 params,
                                 freq=freq,
                                 additional_func_args=[
                                     incident_table
                                 ]).rename(columns={
                                     "Month": "month",
                                     "Value": col_title
                                 })

        df = df.merge(dff, on="month", how="left")

    df.to_csv(f"{processed_data}\\{incident_table}_agg.csv", index=False)

    if freq == "QS":
        table_name = f"{incident_table}_q"
    else:
        table_name = incident_table

    conn = sqlite3.connect(db_path)

    if update:
        stu.update_sql_table(df, table_name, conn, ["month"], agg_table=True)

    else:
        stu.create_table(df, table_name, conn, ["month"], agg_table=True)
    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\{incident_table}_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return df
Esempio n. 9
0
def create_center_agg_table(params=("2005-12-01", end_date),
                            db_path=agg_db_path,
                            freq="MS",
                            update=True):
    """
    Create an aggregate table of center related enrollment values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    ce = CenterEnrollment()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (ce.last_month()[0], ce.month_to_date()[1])
            update = True
        else:
            params = (ce.last_quarter()[0], ce.month_to_date()[1])
            update = True
    else:
        update = False

    enrollment_funcs = {
        "_disenrolled": ce.disenrolled,
        "_voluntary_disenrolled": ce.voluntary_disenrolled,
        "_enrolled": ce.enrolled,
        "_deaths": ce.deaths,
    }

    center_shorthand_dict = {
        "Providence": "pvd",
        "Woonsocket": "woon",
        "Westerly": "wes",
    }

    enrollment_agg = ce.loop_plot_df(
        ce.census_on_end_date,
        params,
        freq=freq,
        additional_func_args=["Providence"]).rename(columns={
            "Month": "month",
            "Value": "pvd_census"
        })

    dff = ce.loop_plot_df(
        ce.census_on_end_date,
        params,
        freq=freq,
        additional_func_args=["Woonsocket"]).rename(columns={
            "Month": "month",
            "Value": "woon_census"
        })

    enrollment_agg = enrollment_agg.merge(dff, on="month", how="left")

    dff = ce.loop_plot_df(ce.census_on_end_date,
                          params,
                          freq=freq,
                          additional_func_args=["Westerly"
                                                ]).rename(columns={
                                                    "Month": "month",
                                                    "Value": "wes_census"
                                                })

    enrollment_agg = enrollment_agg.merge(dff, on="month", how="left")

    for col_title, func in enrollment_funcs.items():
        for center, center_abr in center_shorthand_dict.items():
            dff = ce.loop_plot_df(func,
                                  params,
                                  freq=freq,
                                  additional_func_args=[
                                      center
                                  ]).rename(columns={
                                      "Month": "month",
                                      "Value": center_abr + col_title
                                  })
            enrollment_agg = enrollment_agg.merge(dff, on="month", how="left")

    dc_attendance = create_dc_attnd_table(params, freq)
    enrollment_agg = enrollment_agg.merge(dc_attendance,
                                          on="month",
                                          how="left")

    if freq == "QS":
        table_name = "center_enrollment_q"
    else:
        table_name = "center_enrollment"

    conn = sqlite3.connect(db_path)

    if update:
        stu.update_sql_table(enrollment_agg,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(enrollment_agg,
                         table_name,
                         conn, ["month"],
                         agg_table=True)

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\center_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return enrollment_agg
Esempio n. 10
0
def create_team_incidents_agg_table(params=("2017-07-01", end_date),
                                    db_path=agg_db_path,
                                    freq="MS",
                                    update=True):
    """
    Create an aggregate table of team incidents values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    t = Team()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (t.last_month()[0], t.month_to_date()[1])
            update = True
        else:
            params = (t.last_quarter()[0], t.month_to_date()[1])
            update = True
    else:
        update = False

    incident_types = ["burns", "falls", "infections", "med_errors", "wounds"]

    incidents = {
        "": t.total_incidents_by_team,
        "_per_100MM": t.incidents_per_member_by_team,
        "_unique_ppts": t.ppts_w_incident_by_team,
    }

    incidents_team = t.loop_plot_team_df(t.ppts_on_team, params, freq=freq)

    incidents_team.drop(
        [col for col in incidents_team.columns if col != "month"],
        axis=1,
        inplace=True)

    for col_title, func in incidents.items():
        for incident in incident_types:
            dff = t.loop_plot_team_df(
                func,
                params,
                freq=freq,
                additional_func_args=[incident],
                col_suffix=f"_{incident}{col_title}",
            )
            incidents_team = incidents_team.merge(dff, on="month", how="left")

    incidents_team.to_csv(f"{processed_data}\\incidents_team.csv", index=False)

    if freq == "QS":
        table_name = "team_incidents_q"
    else:
        table_name = "team_incidents"

    conn = sqlite3.connect(db_path)
    if update:
        stu.update_sql_table(incidents_team,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(incidents_team,
                         table_name,
                         conn, ["month"],
                         agg_table=True)
    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\team_incidents_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return incidents_team
Esempio n. 11
0
def create_team_info_agg_table(params=("2017-07-01", end_date),
                               db_path=agg_db_path,
                               freq="MS",
                               update=True):
    """
    Create an aggregate table of team information related values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    t = Team()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (t.last_month()[0], t.month_to_date()[1])
            update = True
        else:
            params = (t.last_quarter()[0], t.month_to_date()[1])
            update = True
    else:
        update = False

    team_info = {
        "avg_age": t.avg_age_by_team,
        "percent_primary_non_english": t.percent_primary_non_english_by_team,
        "avg_years_enrolled": t.avg_years_enrolled_by_team,
        "ppts": t.ppts_on_team,
        "mortality": t.mortality_by_team,
    }
    team_info_df = t.loop_plot_team_df(t.ppts_on_team, params, freq=freq)

    team_info_df.drop([col for col in team_info_df.columns if col != "month"],
                      axis=1,
                      inplace=True)

    for col_title, func in team_info.items():
        dff = t.loop_plot_team_df(func,
                                  params,
                                  freq=freq,
                                  col_suffix=f"_{col_title}")
        team_info_df = team_info_df.merge(dff, on="month", how="left")

    team_info_df.to_csv(f"{processed_data}\\team_info_df.csv", index=False)

    if freq == "QS":
        table_name = "team_info_q"
    else:
        table_name = "team_info"

    conn = sqlite3.connect(db_path)

    if update:
        stu.update_sql_table(team_info_df,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(team_info_df,
                         table_name,
                         conn, ["month"],
                         agg_table=True)
    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\team_info_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return team_info_df
Esempio n. 12
0
def create_team_utl_agg_table(params=("2017-07-01", end_date),
                              db_path=agg_db_path,
                              freq="MS",
                              update=True):
    """
    Create an aggregate table of team utilization values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    t = Team()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (t.last_month()[0], t.month_to_date()[1])
            update = True
        else:
            params = (t.last_quarter()[0], t.month_to_date()[1])
            update = True
    else:
        update = False

    utilization_types = ["acute", "psych", "skilled", "respite", "custodial"]

    utilization_need_args = {
        "_admissions": t.admissions_by_team,
        "_days": t.days_by_team,
        "_discharges": t.discharges_by_team,
        "_alos": t.alos_for_discharges_by_team,
    }

    utilization = {
        "readmits": t.readmits_by_team,
        "custodial_ppts": t.ppts_in_custodial_by_team,
        "percent_of_discharges_with_mortality_in_30":
        t.percent_of_discharges_with_mortality_in_30_by_team,
        "mortality_within_30_days_of_discharge":
        t.mortality_within_30days_of_discharge_rate_by_team,
        "no_hosp_admission_since_enrollment":
        t.no_hosp_admission_since_enrollment_by_team,
        "er_only_visits": t.er_only_visits_by_team,
    }

    utl_team = t.loop_plot_team_df(t.ppts_on_team, params, freq=freq)

    utl_team.drop([col for col in utl_team.columns if col != "month"],
                  axis=1,
                  inplace=True)

    for col_title, func in utilization.items():
        dff = t.loop_plot_team_df(func,
                                  params,
                                  freq=freq,
                                  col_suffix=f"_{col_title}")
        utl_team = utl_team.merge(dff, on="month", how="left")

    for col_title, func in utilization_need_args.items():
        for utilization in utilization_types:
            dff = t.loop_plot_team_df(
                func,
                params,
                freq=freq,
                additional_func_args=[utilization],
                col_suffix=f"_{utilization}{col_title}",
            )
            utl_team = utl_team.merge(dff, on="month", how="left")

    utl_team.to_csv(f"{processed_data}\\utl_team.csv", index=False)

    if freq == "QS":
        table_name = "team_utl_q"
    else:
        table_name = "team_utl"

    conn = sqlite3.connect(db_path)

    if update:
        stu.update_sql_table(utl_team,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(utl_team, table_name, conn, ["month"], agg_table=True)
    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\team_utilization_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return utl_team
Esempio n. 13
0
def create_quality_agg_table(params=("2005-12-01", end_date),
                             db_path=agg_db_path,
                             freq="MS",
                             update=True):
    """
    Create an aggregate table of quality values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    q = Quality()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (q.last_month()[0], q.month_to_date()[1])
            update = True
        else:
            params = (q.last_quarter()[0], q.month_to_date()[1])
            update = True
    else:
        update = False

    quality_func = {
        "mortality_within_30_days_of_discharge":
        q.mortality_within_30days_of_discharge_rate,
        "percent_of_discharges_with_mortality_in_30":
        q.percent_of_discharges_with_mortality_in_30,
        "no_hosp_admission_since_enrollment":
        q.no_hosp_admission_since_enrollment,
        "no_hosp_admission_last_year": q.no_hosp_admission_last_year,
        "pneumo_rate": q.pneumo_rate,
        "influ_rate": q.influ_rate,
        "avg_days_until_nf_admission": q.avg_days_until_nf_admission,
    }

    quality_agg = q.loop_plot_df(q.mortality_rate, params,
                                 freq=freq).rename(columns={
                                     "Month": "month",
                                     "Value": "mortality_rate"
                                 })

    for col_title, func in quality_func.items():
        dff = q.loop_plot_df(func, params, freq=freq).rename(columns={
            "Month": "month",
            "Value": col_title
        })
        quality_agg = quality_agg.merge(dff, on="month", how="left")

    quality_agg.to_csv(f"{processed_data}\\quality_agg.csv", index=False)

    if freq == "QS":
        table_name = "quality_q"
    else:
        table_name = "quality"

    conn = sqlite3.connect(db_path)

    if update:
        stu.update_sql_table(quality_agg,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(quality_agg,
                         table_name,
                         conn, ["month"],
                         agg_table=True)

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\quality_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return quality_agg
Esempio n. 14
0
def create_utilization_table(params=("2017-07-01", end_date),
                             db_path=agg_db_path,
                             freq="MS",
                             update=True):
    """
    Create an aggregate table of utilization values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    u = Utilization()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (u.last_month()[0], u.month_to_date()[1])
            update = True
        else:
            params = (u.last_quarter()[0], u.month_to_date()[1])
            update = True
    else:
        update = False

    utilization_types = ["acute", "psych", "skilled", "respite", "custodial"]

    utilization_func = {
        "_admissions": u.admissions_count,
        "_admissions_per_100MM": u.admissions_per_100MM,
        "_discharges": u.discharges_count,
        "_alos": u.alos,
        "_los_per_100MM": u.los_per_100mm,
        "_days": u.utilization_days,
        "_days_per_100MM": u.days_per_100MM,
        "_unique_admissions": u.unique_admissions_count,
        "_weekend_admissions": u.weekend_admissions_count,
        "_weekend_percent": u.weekend_admission_percent,
    }

    er_visit_func = {
        "er_visits_per_100MM": u.admissions_per_100MM,
        "er_visits": u.admissions_count,
    }

    nf_only_funcs = {
        "_per_100MM": u.ppts_in_utl_per_100MM,
        "_percent": u.ppts_in_utl_percent,
    }

    utl_agg = u.loop_plot_df(u.er_to_inp_rate, params,
                             freq=freq).rename(columns={
                                 "Month": "month",
                                 "Value": "er_to_inp_rate"
                             })

    for col_title, func in utilization_func.items():
        for utilization in utilization_types:
            dff = u.loop_plot_df(func,
                                 params,
                                 freq=freq,
                                 additional_func_args=[
                                     utilization
                                 ]).rename(columns={
                                     "Month": "month",
                                     "Value": utilization + col_title
                                 })
            utl_agg = utl_agg.merge(dff, on="month", how="left")

    for utilization in ["acute", "psych", "er_only"]:
        dff = u.loop_plot_df(
            u.readmits_30day_rate,
            params,
            freq=freq,
            additional_func_args=[
                utilization
            ]).rename(columns={
                "Month": "month",
                "Value": utilization + "_30_day_readmit_rate"
            })
        utl_agg = utl_agg.merge(dff, on="month", how="left")

    for col_title, func in er_visit_func.items():
        dff = u.loop_plot_df(func,
                             params,
                             freq=freq,
                             additional_func_args=["er_only"
                                                   ]).rename(columns={
                                                       "Month": "month",
                                                       "Value": col_title
                                                   })
        utl_agg = utl_agg.merge(dff, on="month", how="left")

    for col_title, func in nf_only_funcs.items():
        for nf_type in ["skilled", "respite", "custodial", "alfs"]:
            dff = u.loop_plot_df(func,
                                 params,
                                 freq=freq,
                                 additional_func_args=[
                                     nf_type
                                 ]).rename(columns={
                                     "Month": "month",
                                     "Value": nf_type + col_title
                                 })
            utl_agg = utl_agg.merge(dff, on="month", how="left")

    dff = u.loop_plot_df(
        u.percent_nf_discharged_to_higher_loc, params,
        freq=freq).rename(columns={
            "Month": "month",
            "Value": "nf_higher_loc_discharge_percent"
        })
    utl_agg = utl_agg.merge(dff, on="month", how="left")

    utl_agg.to_csv(f"{processed_data}\\utilization_agg.csv", index=False)

    if freq == "QS":
        table_name = "utilization_q"
    else:
        table_name = "utilization"
    conn = sqlite3.connect(db_path)

    if update:
        stu.update_sql_table(utl_agg,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(utl_agg, table_name, conn, ["month"], agg_table=True)
    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\utilization_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return utl_agg
def claims_detail_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If table is being updated - it is dropped and replaced entirely
    
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    claims_detail = pd.read_csv(f"{processed_data}\\claims_detail.csv",
                                low_memory=False)

    claims_detail = create_sql_dates(claims_detail, ["first_dos", "last_dos"])

    primary_key = ["claim_line_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS claims_detail")

        create_table(
            claims_detail,
            "claims_detail",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("claims_detail updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS claims_detail")
        create_table(
            claims_detail,
            "claims_detail",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("claims_detail created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\claims_detail_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
def inpatient_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    When creating the database, 6 views are created;
        acute, psych, nursing_home, custodial, respite, and skilled

    If table is being updated - any visit with an admission date greater
    than 3 months from today is dropped
    
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    acute = pd.read_csv(f"{processed_data}\\inpatient.csv")
    acute = create_sql_dates(acute)

    primary_key = ["visit_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()

        c.execute(f"DROP TABLE IF EXISTS inpatient")
        create_table(
            acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("inpatient updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS inpatient")
        create_table(
            acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("inpatient created...")

        c.execute(
            """CREATE VIEW acute AS
            SELECT * FROM inpatient
            WHERE admission_type = 'Acute Hospital';
            """
        )

        c.execute(
            """CREATE VIEW psych AS
            SELECT * FROM inpatient
            WHERE admission_type = 'Psych Unit / Facility';
            """
        )

        c.execute(
            """CREATE VIEW nursing_home AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility'
            OR admission_type = 'End of Life')
            """
        )

        c.execute(
            """CREATE VIEW skilled AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility')
            AND admit_reason = 'skilled';
            """
        )

        c.execute(
            """CREATE VIEW respite AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility')
            AND admit_reason = 'respite';
            """
        )

        c.execute(
            """CREATE VIEW custodial AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility'
            OR admission_type = 'End of Life')
            AND admit_reason = 'custodial';
            """
        )

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\inpatient_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
Esempio n. 17
0
def create_demographic_agg_table(params=("2005-12-01", end_date),
                                 db_path=agg_db_path,
                                 freq="MS",
                                 update=True):
    """
    Create an aggregate table of demographic values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    d = Demographics()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (d.last_month()[0], d.month_to_date()[1])
            update = True
        else:
            params = (d.last_quarter()[0], d.month_to_date()[1])
            update = True
    else:
        update = False

    demographic_func = {
        "dual_enrolled": d.dual_count,
        "percent_dual_enrolled": d.percent_dual,
        "medicare_only": d.medicare_only_count,
        "percent_medicare_only": d.percent_medicare_only,
        "medicaid_only": d.medicaid_only_count,
        "percent_medicaid_only": d.percent_medicaid_only,
        "private_pay": d.private_pay_count,
        "percent_private_pay": d.percent_private_pay,
        "percent_primary_non_english": d.percent_primary_non_english,
        "percent_non_white": d.percent_non_white,
        "living_in_community": d.living_in_community,
        "percent_living_in_community": d.living_in_community_percent,
        "percent_below_65": d.percent_age_below_65,
        "percent_female": d.percent_female,
        "bh_dx_percent": d.behavorial_dx_percent,
        "six_chronic_conditions": d.over_six_chronic_conditions_percent,
        "percent_attending_dc": d.percent_attending_dc,
    }

    demo_agg = d.loop_plot_df(d.avg_age, params,
                              freq=freq).rename(columns={
                                  "Month": "month",
                                  "Value": "avg_age"
                              })

    for col_title, func in demographic_func.items():
        dff = d.loop_plot_df(func, params, freq=freq).rename(columns={
            "Month": "month",
            "Value": col_title
        })
        demo_agg = demo_agg.merge(dff, on="month", how="left")

    demo_agg.to_csv(f"{processed_data}\\demographics_agg.csv", index=False)

    if freq == "QS":
        table_name = "demographics_q"
    else:
        table_name = "demographics"

    conn = sqlite3.connect(db_path)

    if update:
        stu.update_sql_table(demo_agg,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(demo_agg, table_name, conn, ["month"], agg_table=True)

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\demographic_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return demo_agg
def payments_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If being updated only payments with a date greater than or equal to
        the most recent date in the database table are added
        
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    payments = pd.read_csv(f"{processed_data}\\payments.csv", low_memory=False)
    if update:
        h = Helpers(database_path)
        first_id_val = h.single_value_query(
            "SELECT MAX(id_col) FROM payments") + 1
        max_date = h.single_value_query("SELECT MAX(date_paid) FROM payments")

        payments = payments[payments["date_paid"] >= max_date].copy()
        payments.reset_index(inplace=True, drop=True)

        payments["id_col"] = list(
            range(first_id_val, first_id_val + payments.shape[0]))

    else:
        payments.reset_index(inplace=True)
        payments.rename(columns={"index": "id_col"}, inplace=True)

    payments = create_sql_dates(payments)

    primary_key = ["id_col"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:

        update_sql_table(payments, "payments", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("payments updated...")

    else:
        create_table(payments, "payments", conn, primary_key, foreign_key,
                     ref_table, ref_col)

        print("payments created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\payments_{str(pd.to_datetime('today').date())}.txt",
        "a").close()
Esempio n. 19
0
def create_enrollment_agg_table(params=("2005-12-01", end_date),
                                db_path=agg_db_path,
                                freq="MS",
                                update=True):
    """
    Create an aggregate table of enrollment values

    Loops through indicator/column name and matching function,
    creates a dataframe with a month column and value column
    and adds that value column to a master dataframe with a month column

    Args:
        params(tuple): start date and end date in format 'YYYY-MM-DD'
        db_path(str): path to the aggregate database
        freq(str): "MS" or "QS" indicates if values should be grouped monthly
            or quarterly
        update(bool): if the table being updated or created

    Returns:
        DataFrame: the created dataframe

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    e = Enrollment()

    if str(update).lower() == "true":
        if freq == "MS":
            params = (e.last_month()[0], e.month_to_date()[1])
            update = True
        else:
            params = (e.last_quarter()[0], e.month_to_date()[1])
            update = True
    else:
        update = False

    enrollment_funcs = {
        "disenrolled": e.disenrolled,
        "voluntary_disenrolled": e.voluntary_disenrolled_percent,
        "enrolled": e.enrolled,
        "deaths": e.deaths,
        "net_enrollment": e.net_enrollment,
        "avg_years_enrolled": e.avg_years_enrolled,
        "inquiries": e.inquiries,
        "avg_days_to_enrollment": e.avg_days_to_enrollment,
        "conversion_rate_180_days": e.conversion_rate_180_days,
    }

    enrollment_agg = e.loop_plot_df(e.census_on_end_date, params,
                                    freq=freq).rename(columns={
                                        "Month": "month",
                                        "Value": "census"
                                    })

    for col_title, func in enrollment_funcs.items():
        dff = e.loop_plot_df(func, params, freq=freq).rename(columns={
            "Month": "month",
            "Value": col_title
        })
        enrollment_agg = enrollment_agg.merge(dff, on="month", how="left")

    prev_months = [0]
    prev_months.extend(enrollment_agg["census"][:-1])

    growth = ((enrollment_agg["census"] - prev_months) / prev_months) * 100
    if not update:
        growth[0] = 100

    enrollment_agg["growth_rate"] = growth

    enrollment_agg["churn_rate"] = (enrollment_agg["disenrolled"] /
                                    enrollment_agg["census"]) * 100

    enrollment_agg.to_csv(f"{processed_data}\\enrollment_agg.csv", index=False)

    if freq == "QS":
        table_name = "enrollment_q"
    else:
        table_name = "enrollment"

    conn = sqlite3.connect(db_path)

    if update:

        stu.update_sql_table(enrollment_agg,
                             table_name,
                             conn, ["month"],
                             agg_table=True)

    else:
        stu.create_table(enrollment_agg,
                         table_name,
                         conn, ["month"],
                         agg_table=True)

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\enrollment_agg{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()

    return enrollment_agg
Esempio n. 20
0
def monthly_census_to_sql(update=True):
    """
    Creates or updates a monthly census as of the first table in the database
    This table is an aggregate table used in 100 member month queries.

    If updating - finds the most recent first of the month value from today
        and checks to see if there is a value for the census as of that date
        If none exists it is created and appended
    
    Args:
        update(bool): is the database being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    conn = sqlite3.connect(database_path)
    primary_key = ["month"]

    if update is True:
        today = pd.to_datetime("today")
        add_leading_0 = lambda x: "0" if len(str(x)) == 1 else ""
        last_first_of_month = (
            f"{today.year}-{add_leading_0(today.month)}{today.month}-01"
        )
        c = conn.cursor()

        month_exists = c.execute(
            "SELECT * FROM monthly_census WHERE month = ?", [last_first_of_month]
        ).fetchone()

        if month_exists is None:

            enrollment = pd.read_csv(f"{processed_data}\\enrollment_for_census.csv")
            enrollment = create_sql_dates(enrollment)

            most_recent_month = pd.read_sql(
                "SELECT * FROM monthly_census ORDER BY month DESC LIMIT 1", conn
            )
            disenrollments_mask = (
                enrollment["disenrollment_date"] >= most_recent_month["month"][0]
            ) & (enrollment["disenrollment_date"] < last_first_of_month)

            enrollments_mask = enrollment["enrollment_date"] == last_first_of_month

            center_census = {}
            for center in enrollment.center.unique():
                center_census[center.lower()] = {}

            for center in enrollment.center.unique():
                center_mask = enrollment["center"] == center

                center_census[center.lower()][last_first_of_month] = most_recent_month[
                    center.lower()
                ][0] + (
                    enrollment[enrollments_mask & center_mask].shape[0]
                    - enrollment[disenrollments_mask & center_mask].shape[0]
                )

            monthly_census = pd.DataFrame.from_dict(center_census)
            monthly_census["total"] = monthly_census.sum(axis=1)
            monthly_census.reset_index(inplace=True)
            monthly_census.rename(columns={"index": "month"}, inplace=True)
            monthly_census = create_sql_dates(monthly_census, ["month"])
            monthly_census.to_sql(
                "monthly_census", conn, if_exists="append", index=False
            )

            print("monthly_census updated...")

    else:

        enrollment = pd.read_sql(
            "SELECT * FROM enrollment JOIN centers on enrollment.member_id=centers.member_id",
            conn,
        )
        enrollment["enrollment_date"] = pd.to_datetime(enrollment["enrollment_date"])
        enrollment["disenrollment_date"] = pd.to_datetime(
            enrollment["disenrollment_date"]
        )

        center_census = {}
        for center in enrollment.center.unique():
            center_census[center.lower()] = {}

        for month_start in pd.date_range(
            enrollment.enrollment_date.min(), pd.to_datetime("today"), freq="MS"
        ):
            enrollment_mask = enrollment["enrollment_date"] <= month_start
            disenrollment_mask = (enrollment["disenrollment_date"] > month_start) | (
                enrollment["disenrollment_date"].isnull()
            )
            for center in enrollment.center.unique():
                center_mask = enrollment["center"] == center
                center_census[center.lower()][month_start] = enrollment[
                    center_mask & enrollment_mask & disenrollment_mask
                ].shape[0]

        monthly_census = pd.DataFrame.from_dict(center_census)
        monthly_census["total"] = monthly_census.sum(axis=1)
        monthly_census.reset_index(inplace=True)
        monthly_census.rename(columns={"index": "month"}, inplace=True)
        monthly_census = create_sql_dates(monthly_census, ["month"])

        create_table(monthly_census, "monthly_census", conn, primary_key)

        print("monthly_census created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\monthly_census_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()