def test_weibull_aft_plot_partial_effects_on_outcome(self, block): df = load_rossi() aft = WeibullAFTFitter() aft.fit(df, "week", "arrest") aft.plot_partial_effects_on_outcome("age", [10, 50, 80]) self.plt.tight_layout() self.plt.title("test_weibull_aft_plot_partial_effects_on_outcome") self.plt.show(block=block)
def test_weibull_aft_plot_partial_effects_on_outcome_with_multiple_columns(self, block): df = load_rossi() aft = WeibullAFTFitter() aft.fit(df, "week", "arrest") aft.plot_partial_effects_on_outcome(["age", "prio"], [[10, 0], [50, 10], [80, 50]]) self.plt.tight_layout() self.plt.title("test_weibull_aft_plot_partial_effects_on_outcome_with_multiple_columns") self.plt.show(block=block)
def test_weibull_aft_plotting(self, block): df = load_regression_dataset() aft = WeibullAFTFitter() aft.fit(df, "T", "E") aft.plot() self.plt.tight_layout() self.plt.title("test_weibull_aft_plotting") self.plt.show(block=block)
def test_weibull_aft_plotting_with_subset_of_columns(self, block): df = load_regression_dataset() aft = WeibullAFTFitter() aft.fit(df, "T", "E") aft.plot(columns=["var1", "var2"]) self.plt.tight_layout() self.plt.title("test_weibull_aft_plotting_with_subset_of_columns") self.plt.show(block=block)
def test_weibull_aft_plot_covariate_groups(self, block): df = load_rossi() aft = WeibullAFTFitter() aft.fit(df, "week", "arrest") aft.plot_covariate_groups("age", [10, 50, 80]) self.plt.tight_layout() self.plt.title("test_weibull_aft_plot_covariate_groups") self.plt.show(block=block)
def test_aft_plot_partial_effects_on_outcome_with_categorical(self, block): df = load_rossi() df["cat"] = np.random.choice(["a", "b", "c"], size=df.shape[0]) aft = WeibullAFTFitter() aft.fit(df, "week", "arrest", formula="cat + age + fin") aft.plot_partial_effects_on_outcome("cat", values=["a", "b", "c"]) self.plt.title("test_aft_plot_partial_effects_on_outcome_with_categorical") self.plt.show(block=block)
def fit_aft_model(data, formula_, yvar_="mainline_vol", event_var="failure"): aft = WeibullAFTFitter() aft.fit( data, duration_col=yvar_, event_col=event_var, formula=formula_, ) return aft
def _train_aft(x, t, e, folds, l2): fold_model = {} for f in set(folds): df = convert_to_data_frame(x[folds != f], t[folds != f], e[folds != f]) aft = WeibullAFTFitter(penalizer=l2).fit(df, duration_col='T', event_col='E') fold_model[f] = copy.deepcopy(aft) return fold_model
def __init__(self): super(Weibull, self).__init__() # super().__init__() self.name = 'Weibull' self.model = WeibullAFTFitter() #otherwise error occured self.direction = 1 self.prob_FLAG = True self.explained = "*Parameteric model - Weibull" self.image_name = "Weibull.png" self.image_size = (500, 500)
def home_vary_survival(df, DURATION, EVENT, option): fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7)) #plt.ylim(0, 1.01) #plt.xlim(0, 60) times = np.arange(0, 120) if option == "Baseline": kmf = KaplanMeierFitter().fit(df[DURATION], df[EVENT]) kmf.survival_function_.plot(ax=ax) else: min_value = min(df[option].values) max_value = max(df[option].values) + 1 interval = math.ceil((max_value - min_value) / 4) value_range = range(min_value, max_value, interval) wft = WeibullAFTFitter().fit(df, DURATION, EVENT, ancillary=True, timeline=times) wft.plot_partial_effects_on_outcome(option, value_range, cmap='coolwarm', ax=ax) st.pyplot(plt)
def fit( self, X, y, num_boost_round=1000, validation_data=None, early_stopping_rounds=None, verbose_eval=0, persist_train=False, index_id=None, time_bins=None, ): """ Fit XGBoost model to predict a value that is interpreted as a risk metric. Fit Weibull Regression model using risk metric as only independent variable. Args: X ([pd.DataFrame, np.array]): Features to be used while fitting XGBoost model y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field, and time of event or time of censoring as second field. num_boost_round (Int): Number of boosting iterations. validation_data (Tuple): Validation data in the format of a list of tuples [(X, y)] if user desires to use early stopping early_stopping_rounds (Int): Activates early stopping. Validation metric needs to improve at least once in every **early_stopping_rounds** round(s) to continue training. See xgboost.train documentation. verbose_eval ([Bool, Int]): Level of verbosity. See xgboost.train documentation. persist_train (Bool): Whether or not to persist training data to use explainability through prototypes index_id (pd.Index): User defined index if intended to use explainability through prototypes time_bins (np.array): Specified time windows to use when making survival predictions Returns: XGBSEStackedWeibull: Trained XGBSEStackedWeibull instance """ E_train, T_train = convert_y(y) if time_bins is None: time_bins = get_time_bins(T_train, E_train) self.time_bins = time_bins # converting data to xgb format dtrain = convert_data_to_xgb_format(X, y, self.xgb_params["objective"]) # converting validation data to xgb format evals = () if validation_data: X_val, y_val = validation_data dvalid = convert_data_to_xgb_format(X_val, y_val, self.xgb_params["objective"]) evals = [(dvalid, "validation")] # training XGB self.bst = xgb.train( self.xgb_params, dtrain, num_boost_round=num_boost_round, early_stopping_rounds=early_stopping_rounds, evals=evals, verbose_eval=verbose_eval, ) # predicting risk from XGBoost train_risk = self.bst.predict(dtrain) # replacing 0 by minimum positive value in df # so Weibull can be fitted min_positive_value = T_train[T_train > 0].min() T_train = np.clip(T_train, min_positive_value, None) # creating df to use lifelines API weibull_train_df = pd.DataFrame({ "risk": train_risk, "duration": T_train, "event": E_train }) # fitting weibull aft self.weibull_aft = WeibullAFTFitter(**self.weibull_params) self.weibull_aft.fit(weibull_train_df, "duration", "event", ancillary=True) if persist_train: self.persist_train = True if index_id is None: index_id = X.index.copy() index_leaves = self.bst.predict(dtrain, pred_leaf=True) self.tree = BallTree(index_leaves, metric="hamming") self.index_id = index_id return self
import numpy as np from scipy.stats import weibull_min import pandas as pd from lifelines import WeibullAFTFitter, CoxPHFitter # This is an implementation of https://uwspace.uwaterloo.ca/bitstream/handle/10012/10265/Cook_Richard-10265.pdf N = 50000 p = 0.5 bX = np.log(0.5) bZ = np.log(4) Z = np.random.binomial(1, p, size=N) X = np.random.binomial(1, 0.5, size=N) X_ = 20000 + 10 * np.random.randn(N) W = weibull_min.rvs(1, scale=1, loc=0, size=N) Y = bX * X + bZ * Z + np.log(W) T = np.exp(Y) ####################################### df = pd.DataFrame({"T": T, "x": X, "x_": X_}) wf = WeibullAFTFitter().fit(df, "T") wf.print_summary(4) cph = CoxPHFitter().fit(df, "T", show_progress=True, step_size=1.0) cph.print_summary(4)
import streamlit as st import pandas as pd from pandas import CategoricalDtype from lifelines.datasets import load_rossi from lifelines import WeibullAFTFitter, CoxPHFitter from utils import plotter, read_config from joblib import dump, load import json import matplotlib.pyplot as plt import numpy as np import math # SETUP #st.set_page_config(layout="wide") df = load_rossi() model = WeibullAFTFitter().fit(df, 'week', 'arrest') cph = CoxPHFitter() cph.fit(df, 'week', 'arrest') DURATION = 'week' EVENT = 'arrest' def home_vary_survival(df, DURATION, EVENT, option): fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7)) #plt.ylim(0, 1.01) #plt.xlim(0, 60) times = np.arange(0, 120) if option == "Baseline": kmf = KaplanMeierFitter().fit(df[DURATION], df[EVENT]) kmf.survival_function_.plot(ax=ax)
for i in range(n): u = np.random.random() x = X[i] sol = root_scalar(lambda t: S(t, x) - u, x0=1, x1=3) assert sol.converged T_actual[i] = sol.root MAX_TIME = 5 T_observed = np.minimum(MAX_TIME, T_actual) E = T_actual < MAX_TIME return pd.DataFrame({"E": E, "T": T_observed, "X": X}) df = generate_data() WeibullAFTFitter().fit(df, "T", "E").print_summary() regressors = { "beta_": "X - 1", "gamma0_": "1", "gamma1_": "1", "gamma2_": "1", "gamma3_": "1" } cf = CRCSplineFitter(4).fit(df, "T", "E", regressors=regressors) # beta_ X should be around 0.5 cf.print_summary() cf.predict_hazard(df)[[0, 1, 2, 3]].plot()
[ prebreakdown_merge_len_acc_1500_model_df.drop( columns="geometry_type"), temp ], axis=1, ) fig = px.histogram( prebreakdown_merge_len_acc_1500_model_df_one_hot, x="mainline_vol", color="failure", ) prebreakdown_merge_len_acc_1500_model_df_one_hot_no_censor = prebreakdown_merge_len_acc_1500_model_df_one_hot.query( "failure==1") aft = WeibullAFTFitter() aft.fit( prebreakdown_merge_len_acc_1500_model_df_one_hot, duration_col="mainline_vol", event_col="failure", formula= "ramp_metering+length_of_acceleration_lane+ffs_cap_df+number_of_mainline_lane_downstream+simple_merge", ) aft.print_summary() aft.plot() aft.median_survival_time_ aft.mean_survival_time_ fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5, 4))
# -*- coding: utf-8 -*- # weibull aft if __name__ == "__main__": import pandas as pd import time from lifelines import WeibullAFTFitter from lifelines.datasets import load_rossi df = load_rossi() df = pd.concat([df] * 1) # df = df.reset_index() # df['week'] = np.random.exponential(1, size=df.shape[0]) wp = WeibullAFTFitter() start_time = time.time() wp.fit(df, duration_col="week", event_col="arrest") print("--- %s seconds ---" % (time.time() - start_time)) wp.print_summary()
fin = st.sidebar.slider( 'Discount', 0, 1 ) age = st.sidebar.slider( 'Age', 17, 75 ) mar = st.sidebar.slider( 'Marital Status', 0, 1 ) paro = st.sidebar.slider( 'Referral', 0, 1 ) wf = WeibullAFTFitter().fit(rossi, "week", "arrest") predict_input = pd.DataFrame([week, 0, fin, age, 1, 1, mar, paro, 1]).T predict_input.columns = ['week', 'arrest', 'fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio'] prediction_output = wf.predict_median(predict_input, conditional_after=predict_input[DURATION]) st.sidebar.write("## Weeks until churn:", round(prediction_output[0]))