Esempio n. 1
0
 def test_weibull_aft_plot_partial_effects_on_outcome(self, block):
     df = load_rossi()
     aft = WeibullAFTFitter()
     aft.fit(df, "week", "arrest")
     aft.plot_partial_effects_on_outcome("age", [10, 50, 80])
     self.plt.tight_layout()
     self.plt.title("test_weibull_aft_plot_partial_effects_on_outcome")
     self.plt.show(block=block)
Esempio n. 2
0
 def test_weibull_aft_plot_partial_effects_on_outcome_with_multiple_columns(self, block):
     df = load_rossi()
     aft = WeibullAFTFitter()
     aft.fit(df, "week", "arrest")
     aft.plot_partial_effects_on_outcome(["age", "prio"], [[10, 0], [50, 10], [80, 50]])
     self.plt.tight_layout()
     self.plt.title("test_weibull_aft_plot_partial_effects_on_outcome_with_multiple_columns")
     self.plt.show(block=block)
Esempio n. 3
0
 def test_weibull_aft_plotting(self, block):
     df = load_regression_dataset()
     aft = WeibullAFTFitter()
     aft.fit(df, "T", "E")
     aft.plot()
     self.plt.tight_layout()
     self.plt.title("test_weibull_aft_plotting")
     self.plt.show(block=block)
Esempio n. 4
0
 def test_weibull_aft_plotting_with_subset_of_columns(self, block):
     df = load_regression_dataset()
     aft = WeibullAFTFitter()
     aft.fit(df, "T", "E")
     aft.plot(columns=["var1", "var2"])
     self.plt.tight_layout()
     self.plt.title("test_weibull_aft_plotting_with_subset_of_columns")
     self.plt.show(block=block)
Esempio n. 5
0
 def test_weibull_aft_plot_covariate_groups(self, block):
     df = load_rossi()
     aft = WeibullAFTFitter()
     aft.fit(df, "week", "arrest")
     aft.plot_covariate_groups("age", [10, 50, 80])
     self.plt.tight_layout()
     self.plt.title("test_weibull_aft_plot_covariate_groups")
     self.plt.show(block=block)
Esempio n. 6
0
 def test_aft_plot_partial_effects_on_outcome_with_categorical(self, block):
     df = load_rossi()
     df["cat"] = np.random.choice(["a", "b", "c"], size=df.shape[0])
     aft = WeibullAFTFitter()
     aft.fit(df, "week", "arrest", formula="cat + age + fin")
     aft.plot_partial_effects_on_outcome("cat", values=["a", "b", "c"])
     self.plt.title("test_aft_plot_partial_effects_on_outcome_with_categorical")
     self.plt.show(block=block)
Esempio n. 7
0
def fit_aft_model(data, formula_, yvar_="mainline_vol", event_var="failure"):
    aft = WeibullAFTFitter()
    aft.fit(
        data,
        duration_col=yvar_,
        event_col=event_var,
        formula=formula_,
    )
    return aft
def _train_aft(x, t, e, folds, l2):

  fold_model = {}

  for f in set(folds):
    df = convert_to_data_frame(x[folds != f], t[folds != f], e[folds != f])
    aft = WeibullAFTFitter(penalizer=l2).fit(df, duration_col='T',
                                             event_col='E')
    fold_model[f] = copy.deepcopy(aft)
  return fold_model
    def __init__(self):

        super(Weibull, self).__init__()
        # super().__init__()

        self.name = 'Weibull'

        self.model = WeibullAFTFitter()  #otherwise error occured
        self.direction = 1
        self.prob_FLAG = True

        self.explained = "*Parameteric model - Weibull"
        self.image_name = "Weibull.png"
        self.image_size = (500, 500)
Esempio n. 10
0
def home_vary_survival(df, DURATION, EVENT, option):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
    #plt.ylim(0, 1.01)
    #plt.xlim(0, 60)
    times = np.arange(0, 120)
    if option == "Baseline":
        kmf = KaplanMeierFitter().fit(df[DURATION], df[EVENT])
        kmf.survival_function_.plot(ax=ax)
    else:
        min_value = min(df[option].values)
        max_value = max(df[option].values) + 1
        interval = math.ceil((max_value - min_value) / 4)
        value_range = range(min_value, max_value, interval)
        wft = WeibullAFTFitter().fit(df,
                                     DURATION,
                                     EVENT,
                                     ancillary=True,
                                     timeline=times)
        wft.plot_partial_effects_on_outcome(option,
                                            value_range,
                                            cmap='coolwarm',
                                            ax=ax)
    st.pyplot(plt)
Esempio n. 11
0
    def fit(
        self,
        X,
        y,
        num_boost_round=1000,
        validation_data=None,
        early_stopping_rounds=None,
        verbose_eval=0,
        persist_train=False,
        index_id=None,
        time_bins=None,
    ):
        """
        Fit XGBoost model to predict a value that is interpreted as a risk metric.
        Fit Weibull Regression model using risk metric as only independent variable.

        Args:
            X ([pd.DataFrame, np.array]): Features to be used while fitting XGBoost model

            y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field,
                and time of event or time of censoring as second field.

            num_boost_round (Int): Number of boosting iterations.

            validation_data (Tuple): Validation data in the format of a list of tuples [(X, y)]
                if user desires to use early stopping

            early_stopping_rounds (Int): Activates early stopping.
                Validation metric needs to improve at least once
                in every **early_stopping_rounds** round(s) to continue training.
                See xgboost.train documentation.

            verbose_eval ([Bool, Int]): Level of verbosity. See xgboost.train documentation.

            persist_train (Bool): Whether or not to persist training data to use explainability
                through prototypes

            index_id (pd.Index): User defined index if intended to use explainability
                through prototypes

            time_bins (np.array): Specified time windows to use when making survival predictions

        Returns:
            XGBSEStackedWeibull: Trained XGBSEStackedWeibull instance
        """

        E_train, T_train = convert_y(y)
        if time_bins is None:
            time_bins = get_time_bins(T_train, E_train)
        self.time_bins = time_bins

        # converting data to xgb format
        dtrain = convert_data_to_xgb_format(X, y, self.xgb_params["objective"])

        # converting validation data to xgb format
        evals = ()
        if validation_data:
            X_val, y_val = validation_data
            dvalid = convert_data_to_xgb_format(X_val, y_val,
                                                self.xgb_params["objective"])
            evals = [(dvalid, "validation")]

        # training XGB
        self.bst = xgb.train(
            self.xgb_params,
            dtrain,
            num_boost_round=num_boost_round,
            early_stopping_rounds=early_stopping_rounds,
            evals=evals,
            verbose_eval=verbose_eval,
        )

        # predicting risk from XGBoost
        train_risk = self.bst.predict(dtrain)

        # replacing 0 by minimum positive value in df
        # so Weibull can be fitted
        min_positive_value = T_train[T_train > 0].min()
        T_train = np.clip(T_train, min_positive_value, None)

        # creating df to use lifelines API
        weibull_train_df = pd.DataFrame({
            "risk": train_risk,
            "duration": T_train,
            "event": E_train
        })

        # fitting weibull aft
        self.weibull_aft = WeibullAFTFitter(**self.weibull_params)
        self.weibull_aft.fit(weibull_train_df,
                             "duration",
                             "event",
                             ancillary=True)

        if persist_train:
            self.persist_train = True
            if index_id is None:
                index_id = X.index.copy()

            index_leaves = self.bst.predict(dtrain, pred_leaf=True)
            self.tree = BallTree(index_leaves, metric="hamming")

        self.index_id = index_id

        return self
import numpy as np
from scipy.stats import weibull_min
import pandas as pd
from lifelines import WeibullAFTFitter, CoxPHFitter

# This is an implementation of https://uwspace.uwaterloo.ca/bitstream/handle/10012/10265/Cook_Richard-10265.pdf

N = 50000
p = 0.5
bX = np.log(0.5)
bZ = np.log(4)

Z = np.random.binomial(1, p, size=N)
X = np.random.binomial(1, 0.5, size=N)
X_ = 20000 + 10 * np.random.randn(N)

W = weibull_min.rvs(1, scale=1, loc=0, size=N)

Y = bX * X + bZ * Z + np.log(W)
T = np.exp(Y)

#######################################

df = pd.DataFrame({"T": T, "x": X, "x_": X_})

wf = WeibullAFTFitter().fit(df, "T")
wf.print_summary(4)

cph = CoxPHFitter().fit(df, "T", show_progress=True, step_size=1.0)
cph.print_summary(4)
Esempio n. 13
0
import streamlit as st
import pandas as pd
from pandas import CategoricalDtype
from lifelines.datasets import load_rossi
from lifelines import WeibullAFTFitter, CoxPHFitter
from utils import plotter, read_config
from joblib import dump, load
import json
import matplotlib.pyplot as plt
import numpy as np
import math

# SETUP
#st.set_page_config(layout="wide")
df = load_rossi()
model = WeibullAFTFitter().fit(df, 'week', 'arrest')
cph = CoxPHFitter()
cph.fit(df, 'week', 'arrest')

DURATION = 'week'
EVENT = 'arrest'


def home_vary_survival(df, DURATION, EVENT, option):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
    #plt.ylim(0, 1.01)
    #plt.xlim(0, 60)
    times = np.arange(0, 120)
    if option == "Baseline":
        kmf = KaplanMeierFitter().fit(df[DURATION], df[EVENT])
        kmf.survival_function_.plot(ax=ax)
    for i in range(n):
        u = np.random.random()
        x = X[i]
        sol = root_scalar(lambda t: S(t, x) - u, x0=1, x1=3)
        assert sol.converged
        T_actual[i] = sol.root

    MAX_TIME = 5
    T_observed = np.minimum(MAX_TIME, T_actual)
    E = T_actual < MAX_TIME
    return pd.DataFrame({"E": E, "T": T_observed, "X": X})


df = generate_data()
WeibullAFTFitter().fit(df, "T", "E").print_summary()

regressors = {
    "beta_": "X - 1",
    "gamma0_": "1",
    "gamma1_": "1",
    "gamma2_": "1",
    "gamma3_": "1"
}

cf = CRCSplineFitter(4).fit(df, "T", "E", regressors=regressors)
# beta_   X should be around 0.5

cf.print_summary()
cf.predict_hazard(df)[[0, 1, 2, 3]].plot()
        [
            prebreakdown_merge_len_acc_1500_model_df.drop(
                columns="geometry_type"), temp
        ],
        axis=1,
    )

    fig = px.histogram(
        prebreakdown_merge_len_acc_1500_model_df_one_hot,
        x="mainline_vol",
        color="failure",
    )
    prebreakdown_merge_len_acc_1500_model_df_one_hot_no_censor = prebreakdown_merge_len_acc_1500_model_df_one_hot.query(
        "failure==1")

    aft = WeibullAFTFitter()

    aft.fit(
        prebreakdown_merge_len_acc_1500_model_df_one_hot,
        duration_col="mainline_vol",
        event_col="failure",
        formula=
        "ramp_metering+length_of_acceleration_lane+ffs_cap_df+number_of_mainline_lane_downstream+simple_merge",
    )

    aft.print_summary()
    aft.plot()
    aft.median_survival_time_
    aft.mean_survival_time_

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5, 4))
Esempio n. 16
0
# -*- coding: utf-8 -*-
# weibull aft

if __name__ == "__main__":
    import pandas as pd
    import time

    from lifelines import WeibullAFTFitter
    from lifelines.datasets import load_rossi

    df = load_rossi()
    df = pd.concat([df] * 1)
    # df = df.reset_index()
    # df['week'] = np.random.exponential(1, size=df.shape[0])
    wp = WeibullAFTFitter()
    start_time = time.time()
    wp.fit(df, duration_col="week", event_col="arrest")
    print("--- %s seconds ---" % (time.time() - start_time))
    wp.print_summary()
Esempio n. 17
0
fin = st.sidebar.slider(
    'Discount',
    0, 1
)
age = st.sidebar.slider(
    'Age',
    17, 75
)

mar = st.sidebar.slider(
    'Marital Status',
    0, 1
)

paro = st.sidebar.slider(
    'Referral',
    0, 1
)





wf = WeibullAFTFitter().fit(rossi, "week", "arrest")
predict_input = pd.DataFrame([week, 0, fin, age, 1, 1, mar, paro, 1]).T
predict_input.columns = ['week', 'arrest', 'fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio']
prediction_output = wf.predict_median(predict_input, conditional_after=predict_input[DURATION])

st.sidebar.write("## Weeks until churn:", round(prediction_output[0]))