def surgery_usage_regression_df(surgery_df,
                                usage_df,
                                item_ids=[],
                                case_cart_df=None,
                                filters=[],
                                common_events=True):
    surgery_df = Analytics.process_filters(surgery_df, filters)
    surgery_df = surgery_df.drop_duplicates(subset=["event_id"])
    usage_df = Analytics.process_filters(usage_df, filters)
    if case_cart_df:
        case_cart_df = Analytics.process_filters(case_cart_df, filters)

    if common_events:
        surgery_df = surgery_df[surgery_df["event_id"].isin(
            set(usage_df["event_id"]))]

    # usage_df = usage_df[usage_df["item_id"].isin(item_ids)]
    # usage_df = usage_df.drop_duplicates(["event_id", "item_id"], keep="last")
    # usage_df = usage_df.pivot(index="event_id", columns="item_id", values="used_qty") \
    #                    .fillna(0) \
    #                    .reset_index()
    usage_df = usage_df.drop_duplicates(["event_id", "item_id"], keep="last")
    usage_df = surgery_df[surgery_df["event_id"].isin(set(usage_df["event_id"]))] \
        .join(usage_df[usage_df["item_id"].isin(item_ids)]
              .pivot(index="event_id", columns="item_id", values="used_qty"),
              on="event_id",
              how="left",
              rsuffix="item")\
        .fillna(0)

    procedure_df = pd.concat([
        pd.Series(row['event_id'], row['procedures'])
        for _, row in surgery_df.iterrows()
    ], ).reset_index().rename(columns={
        "index": "procedure",
        0: "event_id"
    })
    procedure_df["flag"] = 1
    regression_df = procedure_df \
        .pivot(index="event_id", columns="procedure", values="flag") \
        .fillna(0) \
        .reset_index() \
        .join(usage_df.set_index("event_id"),
              on="event_id",
              how="left",
              rsuffix="usage")
    if common_events:
        regression_df[item_ids] = regression_df[item_ids].fillna(0)
    return regression_df
 def dist(g_df):
     print(0)
     print(g_df.iloc[0])
     g_df = g_df.groupby("start_date").agg({"event_id": "nunique"})
     print(1)
     data_df = pd.DataFrame()
     data_df["dt"] = pd.date_range(start=start, end=end, freq='D')
     data_df["date"] = data_df["dt"].apply(lambda x: x.date())
     data_df = Analytics.process_day_df_columns(data_df)
     data_df = data_df.join(g_df, on="date", how="left").fillna(0)
     if day_filters:
         print(2)
         data_df = Analytics.process_filters(data_df, filters)
     if day_group_by:
         print(3)
         data_df.groupby("day_groupby")
     return data_df["event_id"].to_list()
def surgeries_per_day_distribution(df,
                                   group_by=None,
                                   filters=[],
                                   day_group_by=None,
                                   day_filters=[]):
    df = Analytics.process_filters(df, filters)
    start, end = min(df["start_date"]), max(df["start_date"])
    if group_by:

        def dist(g_df):
            print(0)
            print(g_df.iloc[0])
            g_df = g_df.groupby("start_date").agg({"event_id": "nunique"})
            print(1)
            data_df = pd.DataFrame()
            data_df["dt"] = pd.date_range(start=start, end=end, freq='D')
            data_df["date"] = data_df["dt"].apply(lambda x: x.date())
            data_df = Analytics.process_day_df_columns(data_df)
            data_df = data_df.join(g_df, on="date", how="left").fillna(0)
            if day_filters:
                print(2)
                data_df = Analytics.process_filters(data_df, filters)
            if day_group_by:
                print(3)
                data_df.groupby("day_groupby")
            return data_df["event_id"].to_list()

        df = df.groupby(group_by)
        return df.apply(lambda f: dist(f))
    else:
        g_df = df.groupby("start_date").agg({"event_id": "nunique"})
        data_df = pd.DataFrame()
        data_df["dt"] = pd.date_range(start=start, end=end, freq='D')
        data_df["date"] = data_df["dt"].apply(lambda x: x.date())
        data_df = Analytics.process_day_df_columns(data_df)
        data_df = data_df.join(g_df, on="date", how="left").fillna(0)
        if day_group_by:
            return data_df.groupby(day_group_by).agg({"event_id": lambda x: list(x)})\
                .reset_index()\
                .rename(columns={"event_id": "data"})
        else:
            return data_df["event_id"].to_list()
    return
def surgery_count_metric(df, group_by=None, filters=[]):
    """
    returns a df with count of number of surgeries for each group of the group by dimension.
    i.e. if group by case service, cardiac 5 meaning 5 surgeries total from cardiac department.
    """
    df = Analytics.process_filters(df, filters)
    return df[[group_by, "event_id"]] \
        .groupby([group_by]) \
        .agg({'event_id': 'nunique'}) \
        .reset_index() \
        .rename(columns={'event_id': 'metrix', group_by: 'dimension'})
def procedure_count_distribution(df, group_by=None, filters=[]):
    df = Analytics.process_filters(df, filters)
    df["procedure_count"] = df["procedures"].apply(lambda x: len(x))

    if group_by:
        return df.groupby(group_by)\
                 .agg({"procedure_count": lambda x: list(x)})\
                 .reset_index()\
                 .rename(columns={"procedure_count": "data"})

    return df["procedure_count"].to_list()
def item_usage_per_day_distribution(df, group_by=None, filters=[]):
    df = Analytics.process_filters(df, filters)
    df["start_date"] = df["start_dt"].apply(lambda x: x.date())
    start, end = min(df["start_date"]), max(df["start_date"])
    if group_by:
        df = df.groupby(group_by)
    date_df = pd.DataFrame()
    date_df["start_date"] = pd.date_range(start=start, end=end, freq='D')
    date_df["start_date"] = date_df["start_date"].apply(lambda x: x.date())

    def dist(g_df):
        g_df = g_df.groupby("start_date").agg({"used_qty": "sum"})
        data_df = date_df.join(g_df, on="start_date", how="left").fillna(0)
        return data_df["used_qty"].to_list()

    return df.apply(lambda f: dist(f))
def surgery_hours_metric(df, group_by=None, filters=[]):
    """
    df should be surgery_df
    returns df with total hours of surgeries in each group of the group by dimension.
    i.e. cardiac 10hours
    """
    df = Analytics.process_filters(df, filters)
    df = df[df["surgery_duration"].notna()]
    df = df[[group_by, "surgery_duration"]] \
        .groupby([group_by]) \
        .agg({'surgery_duration': 'sum'}) \
        .reset_index() \
        .rename(columns={'surgery_duration': 'metric', group_by: 'dimension'})
    # to convert from seconds to hours
    df["metric"] = df["metric"].apply(lambda x: x.days * 24 + x.seconds / 60)
    return df
def item_usage_per_week_distribution(usage_df, group_by=None, filters=[]):
    usage_df = Analytics.process_filters(usage_df, filters)
    usage_df["week"] = usage_df["start_dt"].apply(lambda x: "{0}-{1}".format(
        str(x.year - 1
            if (x.month == 1 and x.week == 52) else x.year), str(x.week)))
    start, end = min(usage_df["start_dt"]), max(usage_df["start_dt"])
    if group_by:
        usage_df = usage_df.groupby(group_by)
    date_df = pd.DataFrame()
    date_df["start_date"] = pd.date_range(start=start, end=end, freq='W')
    date_df["week"] = date_df["start_date"].apply(lambda x: "{0}-{1}".format(
        str(x.year - 1
            if (x.month == 1 and x.week == 52) else x.year), str(x.week)))
    date_df = date_df[["week"]]

    def dist(g_df):
        g_df = g_df.groupby("week").agg({"used_qty": "sum"})
        data_df = date_df.join(g_df, on="week", how="left").fillna(0)
        return data_df["used_qty"].to_list()

    return usage_df.apply(lambda f: dist(f))
def run(case_service="Cardiac Surgery", item_id="1686"):
    analytics = ScmAnalytics.ScmAnalytics(lhs_config)
    case_service_filter = [{
        "dim": "case_service",
        "op": "eq",
        "val": case_service
    }]

    usage_df = analytics.usage_df
    usage_df = usage_df[usage_df["start_date"].notna()]
    usage_df = Analytics.process_filters(usage_df, filters=case_service_filter)
    usage_events = set(usage_df["event_id"])
    item_usage_df = usage_df[usage_df["item_id"] == item_id]

    surgery_df = pre_process_columns(analytics.surgery_df)
    surgery_df = surgery_df[surgery_df["start_date"].notna()]
    surgery_df = surgery_df[
        surgery_df["start_date"] > datetime.date(2016, 1, 1)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=case_service_filter)
    surgery_df = surgery_df[surgery_df["event_id"].isin(usage_events)]

    surgery_df = surgery_df.join(
        item_usage_df.set_index("event_id")[["used_qty"]],
        on="event_id",
        how="left").fillna(0)
    surgery_df["procedures"] = surgery_df["procedures"].apply(
        lambda x: frozenset(x))

    usage_dist = surgery_df.groupby(["procedures"]).agg({
        "used_qty":
        lambda x: list(x)
    }).reset_index()
    usage_dist["occurrences"] = usage_dist["used_qty"].apply(lambda x: len(x))
    usage_dist = usage_dist[usage_dist["occurrences"] > 25]
    usage_dist["mean"] = usage_dist["used_qty"].apply(lambda x: np.mean(x))
    usage_dist["variance"] = usage_dist["used_qty"].apply(
        lambda x: np.var(x, ddof=1))
    usage_dist["var/mean"] = usage_dist["variance"] / usage_dist["mean"]

    df = surgery_df[surgery_df["procedures"].isin(
        usage_dist["procedures"])][["start_date", "used_qty"]]
    rolling_df = df[["used_qty"]].rolling(100).mean()
    plt.plot(list(rolling_df["used_qty"]))
    rolling_df = df[["used_qty"]].rolling(50).mean()
    plt.plot(list(rolling_df["used_qty"]))
    plt.savefig("{}_rolling_usage.png".format(item_id), format="png")

    traces = []
    x_max = 0
    for i in range(len(usage_dist)):
        case = usage_dist.iloc[i]["procedures"]
        data = usage_dist.iloc[i]["used_qty"]
        label = ", ".join(case)
        end = max(usage_dist.iloc[i]["used_qty"]) + 1
        traces.append(
            go.Histogram(x=data,
                         name=label,
                         xbins=dict(start=0, end=end, size=1),
                         histnorm='probability',
                         opacity=0.75))
        x_max = int(end) if end > x_max else x_max
def boostrap_info_process(item_id="38242"):
    case_service = "Cardiac Surgery"
    #item_id = "3824ns_info_state_rvs2"
    info_granularity = 1
    eps_trunk = 1e-3

    elective_outdir = "scm_implementation/ns_info_state_rvs/elective"
    emergency_outdir = "scm_implementation/ns_info_state_rvs/emergency"

    analytics = ScmAnalytics.ScmAnalytics(lhs_config)

    filters = [{
        "dim": "case_service",
        "op": "eq",
        "val": case_service
    }, {
        "dim": "urgent_elective",
        "op": "eq",
        "val": "Elective"
    }]
    elective_filter = [{
        "dim": "urgent_elective",
        "op": "eq",
        "val": "Elective"
    }]
    emergency_filter = [{
        "dim": "urgent_elective",
        "op": "eq",
        "val": "Urgent"
    }]
    case_service_filter = [{
        "dim": "case_service",
        "op": "eq",
        "val": case_service
    }]

    surgery_df = pre_process_columns(analytics.surgery_df)
    surgery_df = surgery_df[surgery_df["start_date"].notna()]
    surgery_df = surgery_df[
        surgery_df["start_date"] > datetime.date(2016, 1, 1)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=elective_filter +
                                           case_service_filter)
    dist_df = surgeries_per_day_distribution(surgery_df,
                                             day_group_by="is_weekday",
                                             filters=[])
    data = dist_df.set_index("is_weekday").loc[True]["data"]
    bins = range(1 + int(max(data)))
    binom_x = [x + 0.5 for x in bins]
    n = int(max(data))
    p = np.mean(data) / n

    surgery_df = pre_process_columns(analytics.surgery_df)
    surgery_df = surgery_df[surgery_df["start_date"].notna()]
    surgery_df = surgery_df[
        surgery_df["start_date"] > datetime.date(2016, 1, 1)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=emergency_filter +
                                           case_service_filter)
    dist_df = surgeries_per_day_distribution(surgery_df, filters=[])
    emergency_surgeries_mean = np.mean(dist_df)

    surgery_df = Analytics.process_filters(analytics.surgery_df,
                                           filters=case_service_filter)
    surgery_df["procedure_count"] = surgery_df["procedures"].apply(
        lambda x: len(x))
    procedure_count_df = surgery_df.groupby("procedure_count").agg({
        "event_id":
        "count"
    }).reset_index()
    procedure_count_df = procedure_count_df[
        procedure_count_df["procedure_count"] != 6]
    procedure_count_df["p"] = procedure_count_df["procedure_count"] / sum(
        procedure_count_df["procedure_count"])
    procedure_count_rv = pacal.DiscreteDistr(
        procedure_count_df["procedure_count"], procedure_count_df["p"])
    """
    Procedure weights
    """
    usage_events = set(analytics.usage_df["event_id"])
    surgery_df = analytics.surgery_df[analytics.surgery_df["event_id"].isin(
        usage_events)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=case_service_filter)
    surgery_df["procedures"] = surgery_df["procedures"].apply(
        lambda x: set(e.replace(" ", "_") for e in x))
    procedures = surgery_df["procedures"].apply(lambda x: list(x)).to_list()
    procedures = pd \
        .DataFrame({"procedure": [val for sublist in procedures for val in sublist],
                    "count": [1 for sublist in procedures for val in sublist]}) \
        .groupby("procedure") \
        .agg({"count": "count"}) \
        .reset_index()

    procedures["p"] = procedures["count"] / sum(procedures["count"])

    def procedure_pick_rv(size):
        return np.random.choice(procedures["procedure"],
                                p=procedures["p"],
                                replace=False,
                                size=size)

    synthetic_surgeries = pd.DataFrame({"event_id": list(range(1000))})
    synthetic_surgeries["procedure_count"] = procedure_count_rv.rand(1000)
    synthetic_surgeries["procedures"] = synthetic_surgeries[
        "procedure_count"].apply(lambda x: procedure_pick_rv(x))

    synthetic_procedure_df = pd.concat(
        [pd.Series(row['event_id'], row['procedures']) for _, row in synthetic_surgeries.iterrows()]) \
        .reset_index() \
        .rename(columns={"index": "procedure",
                         0: "event_id"}
                )
    synthetic_procedure_df["flag"] = 1
    synthetic_surgeries_df = synthetic_procedure_df \
        .pivot(index="event_id", columns="procedure", values="flag") \
        .fillna(0) \
        .reset_index()

    feature_df = pd.read_csv(os.path.join("regression_results", item_id))
    features = feature_df["feature"]
    featured_procedures = list(
        filter(lambda x: "." not in x, feature_df["feature"]))
    if "other" in featured_procedures:
        featured_procedures.remove("other")
    for fp in featured_procedures:
        if fp not in synthetic_surgeries_df:
            print(procedures.set_index("procedure").loc[fp])
            synthetic_surgeries_df[fp] = 0

    all_procedures = set.union(*surgery_df["procedures"])

    interactions = list(filter(lambda x: "." in x, feature_df["feature"]))
    interactions = list(Interaction(i.split(".")) for i in interactions)
    data, _ = SURegressionModel.extract_features_data(synthetic_surgeries_df,
                                                      featured_procedures, [],
                                                      interactions,
                                                      other=True)

    for f in feature_df["feature"]:
        if f not in data:
            print(f)
            data[f] = 0
    synthetic_surgeries_df["feature_vector"] = data[features].values.tolist()
    coeff = np.array(feature_df["estimate"])
    synthetic_surgeries_df["expected_usage"] = synthetic_surgeries_df["feature_vector"] \
        .apply(lambda x: np.exp(np.dot(x, coeff)))
    """
    Information rv for empirical surgeries
    """
    surgery_df = surgery_df.drop_duplicates("event_id", keep="last")
    empirical_procedure_df = pd.concat(
        [pd.Series(row['event_id'], row['procedures']) for _, row in surgery_df.iterrows()]) \
        .reset_index() \
        .rename(columns={"index": "procedure",
                         0: "event_id"}
                )
    empirical_procedure_df["flag"] = 1
    empirical_surgeries_df = empirical_procedure_df \
        .pivot(index="event_id", columns="procedure", values="flag") \
        .fillna(0) \
        .reset_index()
    data, _ = SURegressionModel.extract_features_data(empirical_surgeries_df,
                                                      featured_procedures, [],
                                                      interactions,
                                                      other=True)
    empirical_surgeries_df["feature_vector"] = data[features].values.tolist()
    empirical_surgeries_df["expected_usage"] = empirical_surgeries_df["feature_vector"] \
        .apply(lambda x: np.exp(np.dot(x, coeff)))
    """
    Plotly histogram for per surgery info rv, empirical surgeries and synthetic using regression results 
    """
    s = 0
    e = int(
        max(max(empirical_surgeries_df["expected_usage"]),
            max(synthetic_surgeries_df["expected_usage"])) + 1)
    empirical_trace = go.Histogram(
        x=empirical_surgeries_df["expected_usage"],
        name='Empirical Surgery Info RV (mean={:0.2f})'.format(
            np.mean(empirical_surgeries_df["expected_usage"])),
        xbins=dict(start=s, end=e, size=info_granularity),
        histnorm='probability density',
        opacity=0.75)
    synthetic_trace = go.Histogram(
        x=synthetic_surgeries_df["expected_usage"],
        name='Synthetic Surgery Info RV (mean={:0.2f})'.format(
            np.mean(synthetic_surgeries_df["expected_usage"])),
        xbins=dict(start=s, end=e, size=info_granularity),
        histnorm='probability density',
        opacity=0.75)
    layout = go.Layout(title="Per Surgery Info R.V Item: {0}".format(item_id),
                       xaxis={'title': 'Info [Expected Usage]'},
                       yaxis={'title': 'Probability Density'})
    figure = go.Figure(data=[empirical_trace, synthetic_trace], layout=layout)
    plot(figure, filename="{0}_Per_Surgery_Info_Rv.html".format(item_id))
    """
    Plotly histogram for per weekday elective surgery RV
    """
    empirical_rv_df = empirical_surgeries_df.groupby(["expected_usage"]) \
        .agg({"event_id": "count"}) \
        .rename(columns={"event_id": "count"}) \
        .reset_index()
    empirical_rv_df["p"] = empirical_rv_df["count"] / sum(
        empirical_rv_df["count"])
    emp_surgery_rv = pacal.DiscreteDistr(empirical_rv_df["expected_usage"],
                                         empirical_rv_df["p"])
    surgery_demand_rv = pacal.BinomialDistr(n, p)
    days = 100000
    elective_samples = [
        sum(emp_surgery_rv.rand(x)) for x in np.random.binomial(n, p, days)
    ]
    elective_samples = [
        round(sample / info_granularity) * info_granularity
        for sample in elective_samples
    ]
    weekday_elective_trace = go.Histogram(
        x=elective_samples,
        name='{} Elective Info RV (mean={:0.2f})'.format(
            item_id, np.mean(elective_samples)),
        xbins=dict(start=0, end=max(elective_samples), size=info_granularity),
        histnorm='probability',
        opacity=0.75)
    """
    Plotly histogram for per day emergency surgery RV
    """
    emergency_samples = [
        sum(emp_surgery_rv.rand(x))
        for x in np.random.poisson(emergency_surgeries_mean, days)
    ]
    emergency_samples = [
        round(sample / info_granularity) * info_granularity
        for sample in emergency_samples
    ]
    emergency_trace = go.Histogram(
        x=emergency_samples,
        name='{} Emergency Info RV (mean={:0.2f})'.format(
            item_id, np.mean(emergency_samples)),
        xbins=dict(start=0, end=max(emergency_samples), size=info_granularity),
        histnorm='probability',
        opacity=0.75)
    layout = go.Layout(
        title="Weekday Elective Info R.V Item: {0}".format(item_id),
        xaxis={'title': 'Info State (Poisson Usage)]'},
        yaxis={'title': 'Probability'})
    figure = go.Figure(data=[weekday_elective_trace, emergency_trace],
                       layout=layout)
    plot(figure, filename="{0}_Weekday_Elective_Info_Rv.html".format(item_id))

    elective_info_df = pd.DataFrame({"info": elective_samples, "count": [1] * len(elective_samples)}) \
        .groupby(["info"]) \
        .agg({"count": "count"}) \
        .reset_index()
    elective_info_df["p"] = elective_info_df["count"] / sum(
        elective_info_df["count"])
    elective_info_rv = pacal.DiscreteDistr(elective_info_df["info"],
                                           elective_info_df["p"])

    emergency_info_df = pd.DataFrame({"info": emergency_samples, "count": [1] * len(emergency_samples)}) \
        .groupby(["info"]) \
        .agg({"count": "count"}) \
        .reset_index()
    emergency_info_df["p"] = emergency_info_df["count"] / sum(
        emergency_info_df["count"])
    emergency_info_rv = pacal.DiscreteDistr(emergency_info_df["info"],
                                            emergency_info_df["p"])

    max_v = 999
    for d in elective_info_rv.get_piecewise_pdf().getDiracs():
        if 1 - elective_info_rv.cdf(d.a) < eps_trunk:
            max_v = d.a
            break
    diracs = (pacal.CondLtDistr(elective_info_rv, max_v)) \
        .get_piecewise_pdf().getDiracs()
    diracs = list(filter(lambda d: d.f > 0, diracs))
    elective_info_rv = pacal.DiscreteDistr([d.a for d in diracs],
                                           [d.f for d in diracs])

    max_v = 999
    for d in emergency_info_rv.get_piecewise_pdf().getDiracs():
        if 1 - emergency_info_rv.cdf(d.a) < eps_trunk:
            max_v = d.a
            break
    diracs = (pacal.CondLtDistr(emergency_info_rv, max_v)) \
        .get_piecewise_pdf().getDiracs()
    diracs = list(filter(lambda d: d.f > 0, diracs))
    emergency_info_rv = pacal.DiscreteDistr([d.a for d in diracs],
                                            [d.f for d in diracs])

    with open(os.path.join(elective_outdir, "{0}.pickle".format(item_id)),
              "wb") as f:
        pickle.dump(elective_info_rv, f)

    with open(os.path.join(emergency_outdir, "{0}.pickle".format(item_id)),
              "wb") as f:
        pickle.dump(emergency_info_rv, f)

    return emergency_trace, weekday_elective_trace
Beispiel #11
0
from os import path
import plotly.plotly as py
import plotly.graph_objs as go
import numpy as np
import datetime

case_service = "All"
analytics = ScmAnalytics.ScmAnalytics(lhs_config)

for case_service in set(analytics.surgery_df["case_service"]):

    case = {"dim": "case_service", "op": "==", "val": case_service}
    elec = {"dim": "urgent_elective", "op": "==", "val": "Elective"}

    surgery_df = Analytics.process_filters(analytics.surgery_df,
                                           filters=[case, elec])
    if len(surgery_df) < 1:
        continue

    # surgery_df = Analytics.process_filters(analytics.surgery_df, filters=[elec])
    surgery_df["is_weekday"] = surgery_df["start_date"].apply(
        lambda x: True if x.weekday() < 5 else False)
    surgery_df = surgery_df[surgery_df["is_weekday"]]
    surgery_df = surgery_df.drop_duplicates("event_id", keep="last")
    surgery_df = surgery_df[
        surgery_df["start_date"] > datetime.date(2016, 1, 1)]

    surgeries_per_day = surgery_df.groupby("start_date") \
        .agg({"event_id": "nunique"}) \
        .rename(columns={"event_id": "n_surgery_day"})
Beispiel #12
0
    "21920", "38197", "82099"
]
case_service = "Cardiac Surgery"
#item_id = "47320"
analytics = ScmAnalytics.ScmAnalytics(lhs_config)

case_service_filter = [{
    "dim": "case_service",
    "op": "eq",
    "val": case_service
}]

usage_events = set(analytics.usage_df["event_id"])
surgery_df = analytics.surgery_df[analytics.surgery_df["event_id"].isin(
    usage_events)]
surgery_df = Analytics.process_filters(surgery_df, filters=case_service_filter)
surgery_df["procedures"] = surgery_df["procedures"].apply(
    lambda x: set(e.replace(" ", "_") for e in x))
surgery_df = surgery_df.drop_duplicates("event_id", keep="last")
extracted_surgery_df = surgery_df[["event_id"]]

empirical_procedure_df = pd.concat(
    [pd.Series(row['event_id'], row['procedures']) for _, row in surgery_df.iterrows()]) \
    .reset_index() \
    .rename(columns={"index": "procedure",
                     0: "event_id"}
            )
empirical_procedure_df["flag"] = 1
empirical_surgeries_df = empirical_procedure_df \
    .pivot(index="event_id", columns="procedure", values="flag") \
    .fillna(0) \
Beispiel #13
0
def lead_time_distribution(df, groupby=None, filters=[]):
    df = Analytics.process_filters(df, filters)
    return df["order_leadtime"].dt.days
Beispiel #14
0
def run(case_service="Cardiac Surgery", item_id="1686", procedure_set=None):
    analytics = ScmAnalytics.ScmAnalytics(lhs_config)
    case_service_filter = [{
        "dim": "case_service",
        "op": "eq",
        "val": case_service
    }]

    usage_df = analytics.usage_df
    usage_df = usage_df[usage_df["start_date"].notna()]
    usage_df = Analytics.process_filters(usage_df, filters=case_service_filter)
    usage_events = set(usage_df["event_id"])
    item_usage_df = usage_df[usage_df["item_id"] == item_id]

    surgery_df = pre_process_columns(analytics.surgery_df)
    surgery_df = surgery_df[surgery_df["start_date"].notna()]
    surgery_df = surgery_df[
        surgery_df["start_date"] > datetime.date(2016, 1, 1)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=case_service_filter)
    surgery_df = surgery_df[surgery_df["event_id"].isin(usage_events)]

    surgery_df = surgery_df.join(
        item_usage_df.set_index("event_id")[["used_qty"]],
        on="event_id",
        how="left").fillna(0)
    surgery_df["procedures"] = surgery_df["procedures"].apply(
        lambda x: frozenset(x))
    surgery_df = surgery_df[surgery_df["procedures"] == procedure_set]

    traces = []

    x_max = int(max(surgery_df["used_qty"])) + 1

    data = surgery_df["used_qty"]
    label = ", ".join(procedure_set)
    fn = "__".join(procedure_set)
    fn = "Usage_Dist_item_" + item_id + "_" + fn.replace(" ", "_")
    #
    # traces.append(go.Histogram(
    #     x=data,
    #     name=label,
    #     xbins=dict(
    #         start=0,
    #         end=x_max,
    #         size=1
    #     ),
    #     histnorm='probability',
    #     opacity=1,
    #
    # ))
    #
    # tickvals = list(x + 0.5 for x in range(x_max))
    # ticktext = list(str(x) for x in range(x_max))
    # layout = go.Layout(  # title="Item: {} Empirical Usage Distribution for common cases".format(item_id),
    #     xaxis={'title': 'Used Qty',
    #            'tickvals': tickvals,
    #            'ticktext': ticktext},
    #     yaxis={'title': 'Probability'},
    #     font={"size": 16},
    #     plot_bgcolor="white",
    #     bargap=0.2)
    # figure = go.Figure(
    #     data=traces,
    #     layout=layout,
    # )
    # # figure.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey')
    # figure.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey')
    # # plot(figure, filename="{}_empircal_usage_distribution.html".format(item_id))
    # figure.write_image(fn, width=900, height=600)

    import matplotlib
    import matplotlib.ticker as plticker

    matplotlib.rcParams.update({'font.size': 12})
    plt.figure(figsize=(4, 3.5))
    plt.tight_layout()
    plt.gcf().subplots_adjust(bottom=0.15, left=0.15)

    n, bins, patches = plt.hist(data,
                                range(x_max + 1),
                                density=True,
                                facecolor='#08306b',
                                rwidth=0.95)

    spacing = np.round((max(n) + 0.1) / 4, decimals=1)
    plt.yticks(np.arange(0, max(n) + 0.1, spacing))
    #matplotlib.pyplot.grid(b=True, which='major', axis='y')
    plt.ylabel("Probability")
    plt.xlabel("Used Quantity")
    plt.xticks(range(x_max + 1))
    plt.savefig(fn + ".svg", format='svg')
    plt.savefig(fn + ".eps", format='eps')