コード例 #1
0
 def usage(self, o):
     return pacal.BinomialDistr(int(o * self.n), p=self.p)
def boostrap_info_process(item_id="38242"):
    case_service = "Cardiac Surgery"
    #item_id = "3824ns_info_state_rvs2"
    info_granularity = 1
    eps_trunk = 1e-3

    elective_outdir = "scm_implementation/ns_info_state_rvs/elective"
    emergency_outdir = "scm_implementation/ns_info_state_rvs/emergency"

    analytics = ScmAnalytics.ScmAnalytics(lhs_config)

    filters = [{
        "dim": "case_service",
        "op": "eq",
        "val": case_service
    }, {
        "dim": "urgent_elective",
        "op": "eq",
        "val": "Elective"
    }]
    elective_filter = [{
        "dim": "urgent_elective",
        "op": "eq",
        "val": "Elective"
    }]
    emergency_filter = [{
        "dim": "urgent_elective",
        "op": "eq",
        "val": "Urgent"
    }]
    case_service_filter = [{
        "dim": "case_service",
        "op": "eq",
        "val": case_service
    }]

    surgery_df = pre_process_columns(analytics.surgery_df)
    surgery_df = surgery_df[surgery_df["start_date"].notna()]
    surgery_df = surgery_df[
        surgery_df["start_date"] > datetime.date(2016, 1, 1)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=elective_filter +
                                           case_service_filter)
    dist_df = surgeries_per_day_distribution(surgery_df,
                                             day_group_by="is_weekday",
                                             filters=[])
    data = dist_df.set_index("is_weekday").loc[True]["data"]
    bins = range(1 + int(max(data)))
    binom_x = [x + 0.5 for x in bins]
    n = int(max(data))
    p = np.mean(data) / n

    surgery_df = pre_process_columns(analytics.surgery_df)
    surgery_df = surgery_df[surgery_df["start_date"].notna()]
    surgery_df = surgery_df[
        surgery_df["start_date"] > datetime.date(2016, 1, 1)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=emergency_filter +
                                           case_service_filter)
    dist_df = surgeries_per_day_distribution(surgery_df, filters=[])
    emergency_surgeries_mean = np.mean(dist_df)

    surgery_df = Analytics.process_filters(analytics.surgery_df,
                                           filters=case_service_filter)
    surgery_df["procedure_count"] = surgery_df["procedures"].apply(
        lambda x: len(x))
    procedure_count_df = surgery_df.groupby("procedure_count").agg({
        "event_id":
        "count"
    }).reset_index()
    procedure_count_df = procedure_count_df[
        procedure_count_df["procedure_count"] != 6]
    procedure_count_df["p"] = procedure_count_df["procedure_count"] / sum(
        procedure_count_df["procedure_count"])
    procedure_count_rv = pacal.DiscreteDistr(
        procedure_count_df["procedure_count"], procedure_count_df["p"])
    """
    Procedure weights
    """
    usage_events = set(analytics.usage_df["event_id"])
    surgery_df = analytics.surgery_df[analytics.surgery_df["event_id"].isin(
        usage_events)]
    surgery_df = Analytics.process_filters(surgery_df,
                                           filters=case_service_filter)
    surgery_df["procedures"] = surgery_df["procedures"].apply(
        lambda x: set(e.replace(" ", "_") for e in x))
    procedures = surgery_df["procedures"].apply(lambda x: list(x)).to_list()
    procedures = pd \
        .DataFrame({"procedure": [val for sublist in procedures for val in sublist],
                    "count": [1 for sublist in procedures for val in sublist]}) \
        .groupby("procedure") \
        .agg({"count": "count"}) \
        .reset_index()

    procedures["p"] = procedures["count"] / sum(procedures["count"])

    def procedure_pick_rv(size):
        return np.random.choice(procedures["procedure"],
                                p=procedures["p"],
                                replace=False,
                                size=size)

    synthetic_surgeries = pd.DataFrame({"event_id": list(range(1000))})
    synthetic_surgeries["procedure_count"] = procedure_count_rv.rand(1000)
    synthetic_surgeries["procedures"] = synthetic_surgeries[
        "procedure_count"].apply(lambda x: procedure_pick_rv(x))

    synthetic_procedure_df = pd.concat(
        [pd.Series(row['event_id'], row['procedures']) for _, row in synthetic_surgeries.iterrows()]) \
        .reset_index() \
        .rename(columns={"index": "procedure",
                         0: "event_id"}
                )
    synthetic_procedure_df["flag"] = 1
    synthetic_surgeries_df = synthetic_procedure_df \
        .pivot(index="event_id", columns="procedure", values="flag") \
        .fillna(0) \
        .reset_index()

    feature_df = pd.read_csv(os.path.join("regression_results", item_id))
    features = feature_df["feature"]
    featured_procedures = list(
        filter(lambda x: "." not in x, feature_df["feature"]))
    if "other" in featured_procedures:
        featured_procedures.remove("other")
    for fp in featured_procedures:
        if fp not in synthetic_surgeries_df:
            print(procedures.set_index("procedure").loc[fp])
            synthetic_surgeries_df[fp] = 0

    all_procedures = set.union(*surgery_df["procedures"])

    interactions = list(filter(lambda x: "." in x, feature_df["feature"]))
    interactions = list(Interaction(i.split(".")) for i in interactions)
    data, _ = SURegressionModel.extract_features_data(synthetic_surgeries_df,
                                                      featured_procedures, [],
                                                      interactions,
                                                      other=True)

    for f in feature_df["feature"]:
        if f not in data:
            print(f)
            data[f] = 0
    synthetic_surgeries_df["feature_vector"] = data[features].values.tolist()
    coeff = np.array(feature_df["estimate"])
    synthetic_surgeries_df["expected_usage"] = synthetic_surgeries_df["feature_vector"] \
        .apply(lambda x: np.exp(np.dot(x, coeff)))
    """
    Information rv for empirical surgeries
    """
    surgery_df = surgery_df.drop_duplicates("event_id", keep="last")
    empirical_procedure_df = pd.concat(
        [pd.Series(row['event_id'], row['procedures']) for _, row in surgery_df.iterrows()]) \
        .reset_index() \
        .rename(columns={"index": "procedure",
                         0: "event_id"}
                )
    empirical_procedure_df["flag"] = 1
    empirical_surgeries_df = empirical_procedure_df \
        .pivot(index="event_id", columns="procedure", values="flag") \
        .fillna(0) \
        .reset_index()
    data, _ = SURegressionModel.extract_features_data(empirical_surgeries_df,
                                                      featured_procedures, [],
                                                      interactions,
                                                      other=True)
    empirical_surgeries_df["feature_vector"] = data[features].values.tolist()
    empirical_surgeries_df["expected_usage"] = empirical_surgeries_df["feature_vector"] \
        .apply(lambda x: np.exp(np.dot(x, coeff)))
    """
    Plotly histogram for per surgery info rv, empirical surgeries and synthetic using regression results 
    """
    s = 0
    e = int(
        max(max(empirical_surgeries_df["expected_usage"]),
            max(synthetic_surgeries_df["expected_usage"])) + 1)
    empirical_trace = go.Histogram(
        x=empirical_surgeries_df["expected_usage"],
        name='Empirical Surgery Info RV (mean={:0.2f})'.format(
            np.mean(empirical_surgeries_df["expected_usage"])),
        xbins=dict(start=s, end=e, size=info_granularity),
        histnorm='probability density',
        opacity=0.75)
    synthetic_trace = go.Histogram(
        x=synthetic_surgeries_df["expected_usage"],
        name='Synthetic Surgery Info RV (mean={:0.2f})'.format(
            np.mean(synthetic_surgeries_df["expected_usage"])),
        xbins=dict(start=s, end=e, size=info_granularity),
        histnorm='probability density',
        opacity=0.75)
    layout = go.Layout(title="Per Surgery Info R.V Item: {0}".format(item_id),
                       xaxis={'title': 'Info [Expected Usage]'},
                       yaxis={'title': 'Probability Density'})
    figure = go.Figure(data=[empirical_trace, synthetic_trace], layout=layout)
    plot(figure, filename="{0}_Per_Surgery_Info_Rv.html".format(item_id))
    """
    Plotly histogram for per weekday elective surgery RV
    """
    empirical_rv_df = empirical_surgeries_df.groupby(["expected_usage"]) \
        .agg({"event_id": "count"}) \
        .rename(columns={"event_id": "count"}) \
        .reset_index()
    empirical_rv_df["p"] = empirical_rv_df["count"] / sum(
        empirical_rv_df["count"])
    emp_surgery_rv = pacal.DiscreteDistr(empirical_rv_df["expected_usage"],
                                         empirical_rv_df["p"])
    surgery_demand_rv = pacal.BinomialDistr(n, p)
    days = 100000
    elective_samples = [
        sum(emp_surgery_rv.rand(x)) for x in np.random.binomial(n, p, days)
    ]
    elective_samples = [
        round(sample / info_granularity) * info_granularity
        for sample in elective_samples
    ]
    weekday_elective_trace = go.Histogram(
        x=elective_samples,
        name='{} Elective Info RV (mean={:0.2f})'.format(
            item_id, np.mean(elective_samples)),
        xbins=dict(start=0, end=max(elective_samples), size=info_granularity),
        histnorm='probability',
        opacity=0.75)
    """
    Plotly histogram for per day emergency surgery RV
    """
    emergency_samples = [
        sum(emp_surgery_rv.rand(x))
        for x in np.random.poisson(emergency_surgeries_mean, days)
    ]
    emergency_samples = [
        round(sample / info_granularity) * info_granularity
        for sample in emergency_samples
    ]
    emergency_trace = go.Histogram(
        x=emergency_samples,
        name='{} Emergency Info RV (mean={:0.2f})'.format(
            item_id, np.mean(emergency_samples)),
        xbins=dict(start=0, end=max(emergency_samples), size=info_granularity),
        histnorm='probability',
        opacity=0.75)
    layout = go.Layout(
        title="Weekday Elective Info R.V Item: {0}".format(item_id),
        xaxis={'title': 'Info State (Poisson Usage)]'},
        yaxis={'title': 'Probability'})
    figure = go.Figure(data=[weekday_elective_trace, emergency_trace],
                       layout=layout)
    plot(figure, filename="{0}_Weekday_Elective_Info_Rv.html".format(item_id))

    elective_info_df = pd.DataFrame({"info": elective_samples, "count": [1] * len(elective_samples)}) \
        .groupby(["info"]) \
        .agg({"count": "count"}) \
        .reset_index()
    elective_info_df["p"] = elective_info_df["count"] / sum(
        elective_info_df["count"])
    elective_info_rv = pacal.DiscreteDistr(elective_info_df["info"],
                                           elective_info_df["p"])

    emergency_info_df = pd.DataFrame({"info": emergency_samples, "count": [1] * len(emergency_samples)}) \
        .groupby(["info"]) \
        .agg({"count": "count"}) \
        .reset_index()
    emergency_info_df["p"] = emergency_info_df["count"] / sum(
        emergency_info_df["count"])
    emergency_info_rv = pacal.DiscreteDistr(emergency_info_df["info"],
                                            emergency_info_df["p"])

    max_v = 999
    for d in elective_info_rv.get_piecewise_pdf().getDiracs():
        if 1 - elective_info_rv.cdf(d.a) < eps_trunk:
            max_v = d.a
            break
    diracs = (pacal.CondLtDistr(elective_info_rv, max_v)) \
        .get_piecewise_pdf().getDiracs()
    diracs = list(filter(lambda d: d.f > 0, diracs))
    elective_info_rv = pacal.DiscreteDistr([d.a for d in diracs],
                                           [d.f for d in diracs])

    max_v = 999
    for d in emergency_info_rv.get_piecewise_pdf().getDiracs():
        if 1 - emergency_info_rv.cdf(d.a) < eps_trunk:
            max_v = d.a
            break
    diracs = (pacal.CondLtDistr(emergency_info_rv, max_v)) \
        .get_piecewise_pdf().getDiracs()
    diracs = list(filter(lambda d: d.f > 0, diracs))
    emergency_info_rv = pacal.DiscreteDistr([d.a for d in diracs],
                                            [d.f for d in diracs])

    with open(os.path.join(elective_outdir, "{0}.pickle".format(item_id)),
              "wb") as f:
        pickle.dump(elective_info_rv, f)

    with open(os.path.join(emergency_outdir, "{0}.pickle".format(item_id)),
              "wb") as f:
        pickle.dump(emergency_info_rv, f)

    return emergency_trace, weekday_elective_trace
configs = []
i = 0

for demand_n in [1, 2, 3, 4]:
    for usage_n in [1, 2, 3, 4]:
        for p in [0.25, 0.5, 0.75]:
            for q in [0.25, 0.5, 0.75]:
                for horizon in [0, 1, 2, 3, 4]:
                    for b in [0.001, 0.05, 0.1, 0.5, 1, 2, 4, 100]:
                        configs.append(
                            ModelConfig(
                                gamma=1,
                                lead_time=0,
                                info_state_rvs=None,
                                holding_cost=1,
                                backlogging_cost=b,
                                setup_cost=0,
                                unit_price=0,
                                usage_model=BinomUsageModel(n=usage_n, p=p),
                                increments=1,
                                horizon=horizon,
                                info_rv=pacal.BinomialDistr(demand_n, q),
                                label="numerical_experiments_binomial_model",
                                label_index=i))
                        i += 1

if __name__ == "__main__":
    xs = list(range(0, 20))
    ts = list(range(0, 21))
    run_configs(configs, ts, xs, pools=8)
コード例 #4
0
from scm_optimization.model import ModelConfig, run_configs, PoissonUsageModel, BinomUsageModel, DeterministUsageModel
import pacal
from decimal import *

configs = []
i = 0

for horizon in [0, 1, 2, 3, 4]:
    for b in [10, 1000]:
        for d in [1, 5, 10, 20]:
            configs.append(
                ModelConfig(gamma=1,
                            lead_time=0,
                            info_state_rvs=None,
                            holding_cost=1,
                            backlogging_cost=b,
                            setup_cost=0,
                            unit_price=0,
                            usage_model=PoissonUsageModel(1),
                            horizon=horizon,
                            info_rv=pacal.BinomialDistr(d, 0.5),
                            label="demand_scale_experiment",
                            label_index=i))
            i += 1
configs = [configs[35], configs[39]]
if __name__ == "__main__":
    xs = list(range(0, 1))
    ts = list(range(0, 21))
    run_configs(configs, ts, xs, pools=8)
コード例 #5
0
from multiprocessing import Pool
from datetime import date, datetime

configs = []
i = 0
binomial_usage_models = [
    BinomUsageModel(n=2, p=0.5),
    BinomUsageModel(n=3, p=0.3333),
    BinomUsageModel(n=4, p=0.25),
    BinomUsageModel(n=5, p=0.2),
    BinomUsageModel(n=10, p=0.1),
]
b = 1000
poisson_usage_model = PoissonUsageModel(1, trunk=1e-10)

booking_model = pacal.BinomialDistr(10, 0.5)

for horizon in [0, 1, 2, 3]:
    configs.append(ModelConfig(
        gamma=1,
        lead_time=0,
        info_state_rvs=None,
        holding_cost=1,
        backlogging_cost=b,
        setup_cost=0,
        unit_price=0,
        usage_model=poisson_usage_model,
        horizon=horizon,
        info_rv=booking_model,
        label="poisson_usage_policy",
        label_index=i)