def test_coxph_plot_covariate_groups_with_single_strata(self, block): df = load_rossi() cp = CoxPHFitter() cp.fit(df, "week", "arrest", strata="paro") cp.plot_covariate_groups("age", [10, 50, 80]) self.plt.title("test_coxph_plot_covariate_groups_with_strata") self.plt.show(block=block)
def test_coxph_plot_covariate_groups_with_multiple_variables(self, block): df = load_rossi() cp = CoxPHFitter() cp.fit(df, "week", "arrest") cp.plot_covariate_groups(["age", "prio"], [[10, 0], [50, 10], [80, 90]]) self.plt.title("test_coxph_plot_covariate_groups_with_multiple_variables") self.plt.show(block=block)
def test_coxph_plot_covariate_groups(self, block): df = load_rossi() cp = CoxPHFitter() cp.fit(df, "week", "arrest") cp.plot_covariate_groups("age", [10, 50, 80]) self.plt.title("test_coxph_plot_covariate_groups") self.plt.show(block=block)
def test_coxph_plot_covariate_groups_with_nonnumeric_strata(self, block): df = load_rossi() df["strata"] = np.random.choice(["A", "B"], size=df.shape[0]) cp = CoxPHFitter() cp.fit(df, "week", "arrest", strata="strata") cp.plot_covariate_groups("age", [10, 50, 80]) self.plt.title("test_coxph_plot_covariate_groups_with_single_strata") self.plt.show(block=block)
def test_coxph_plot_covariate_groups_with_multiple_variables_and_strata(self, block): df = load_rossi() df["strata"] = np.random.choice(["A", "B"], size=df.shape[0]) cp = CoxPHFitter() cp.fit(df, "week", "arrest", strata="strata") cp.plot_covariate_groups(["age", "prio"], [[10, 0], [50, 10], [80, 90]]) self.plt.title("test_coxph_plot_covariate_groups_with_multiple_variables_and_strata") self.plt.show(block=block)
def survival_analyze(dataframe, lifetime_col, dead_col, strata_cols, covariate_col=None): # Based on notebook here. https://github.com/CamDavidsonPilon/lifelines/tree/master/examples import pandas as pd from matplotlib import pyplot as plt from lifelines import CoxPHFitter cph = CoxPHFitter().fit(dataframe, lifetime_col, dead_col, strata=strata_cols) cph.plot(ax=ax[1]) if covariate_col: cph.plot_covariate_groups(covariate_col, values=[0, 1]) pass
one_hot_train = to_one_hot(df_train, to_encode) one_hot_val = to_one_hot(df_val, to_encode) one_hot_test = to_one_hot(df_test, to_encode) print(one_hot_val.columns.tolist()) print(f"There are {len(one_hot_val.columns)} columns") print(one_hot_train.shape) one_hot_train.head() cph = CoxPHFitter() cph.fit(one_hot_train, duration_col='time', event_col='status', step_size=0.1) cph.print_summary() cph.plot_covariate_groups('edema_1.0', values=[0, 1]) def hazard_ratio(case_1, case_2, cox_params): hr = np.exp(np.dot(cox_params, (case_1 - case_2))) return hr i = 1 case_1 = one_hot_train.iloc[i, :].drop(['time', 'status']) j = 5 case_2 = one_hot_train.iloc[j, :].drop(['time', 'status'])
# </summary> # <p> # <ul> # <ul> # <li>You should see that the treatment (trt) was beneficial because it has a negative impact on the hazard (the coefficient is negative, and exp(coef) is less than 1).</li> # <li>The associated hazard ratio is ~0.8, because this is the exp(coef) of treatment.</li> # </ul> # </p> # We can compare the predicted survival curves for treatment variables. Run the next cell to plot survival curves using the `plot_covariate_groups()` function. # - The y-axis is th survival rate # - The x-axis is time # In[15]: cph.plot_covariate_groups('trt', values=[0, 1]) # Notice how the group without treatment has a lower survival rate at all times (the x-axis is time) compared to the treatment group. # <a name='6'></a> # ## 6. Hazard Ratio # # Recall from the lecture videos that the Hazard Ratio between two patients was the likelihood of one patient (e.g smoker) being more at risk than the other (e.g non-smoker). # $$ # \frac{\lambda_{smoker}(t)}{\lambda_{nonsmoker}(t)} = e^{\theta (X_{smoker} - X_{nonsmoker})^T} # $$ # # Where # # $$ # \lambda_{smoker}(t) = \lambda_0(t)e^{\theta X_{smoker}^T}
cph.print_summary() # awesome!!! # There are a few important things to notice about this output. # 1. We can see the number of observations listed as n=5634 right at the top of the output, next to that we have our # number of events (churned customers). # 2. We get the coefficients of our model. These are very important and they tell us how each feature increases risk, # so if it’s a positive number that attribute makes a customer more likely to churn, and if it is negative then # customers with that feature are less likely to churn. # 3. We get significance codes for our features. A very nice addition! # 4. We get the concordance. Our model has a concordance of .929 out of 1, so it’s a very good Cox model. We can use # this to compare between models, kind of like accuracy in Logistic Regression. # lets actually plot all of this to get a better picture cph.plot() cph.plot_covariate_groups('TotalCharges', values=[0,4000], cmap='coolwarm') # you can see in the survival curve plot that customers that have Total charges closer to 0 are at a higher risk of # churning compared to those with charges closer to 4000. # now lets do some churn prediction now that we have some useful insights into what makes customers churn. # lets take all the non churners as we can't retain those who have already churned, these are called censored_subjects # sticking to Survival Analysis lingo. censored_subjects = data.loc[data['Churn_Yes'] == 0] # now we can predict their unconditioned survival curves unconditioned_sf = cph.predict_survival_function(censored_subjects) # these are unconditioned because we will predict some churn before the customers current tenure time. # lets condition the above prediction conditioned_sf = unconditioned_sf.apply(lambda c: (c/c.loc[data.loc[c.name, 'tenure']]).clip_upper(1))
# Take a peek at one of the transformed data sets for new features. print(one_hot_train.shape) one_hot_train.head() # Run the following cell to fit your Cox Proportional Hazards model using the `lifelines` package. cph = CoxPHFitter() cph.fit(one_hot_train, duration_col = 'time', event_col = 'status', step_size=0.1) # Use `cph.print_summary()` to view the coefficients associated with each covariate as well as confidence intervals. cph.print_summary() # Run the next cell to plot survival curves using the `plot_covariate_groups()` function. cph.plot_covariate_groups('trt', values=[0, 1]); # Can compare the predicted survival curves for treatment variables. # Write a function to compute the hazard ratio between two individuals given the cox model's coefficients def hazard_ratio(case_1, case_2, cox_params): ''' Return the hazard ratio of case_1 : case_2 using the coefficients of the cox model. Args: case_1 (np.array): (1 x d) array of covariates case_2 (np.array): (1 x d) array of covariates model (np.array): (1 x d) array of cox model coefficients Returns: hazard_ratio (float): hazard ratio of case_1 : case_2 '''
T = data['Adjusted Time to Delivery'] C = (data['Vaginal Delivery'] == 'Yes') kmf.fit(T[~ix], C[~ix], label='Routine Interval Dosing', alpha=.95) ax = kmf.plot(show_censors=True, color='b') kmf.fit(T[ix], C[ix], label='Extended Interval Dosing', alpha=.95) kmf.plot(ax=ax, show_censors=True, color='r') ax.set_xlim(0, 60) ax.set_ylim(0, 1) plt.xlabel('Time to Delivery (hrs)') plt.ylabel('Frac. Not Delivered') plt.title( 'Delay Adjusted Time to Vaginal Delivery as a Function of Dosing Interval') plt.savefig('output/adjusted_KM.png') cph = CoxPHFitter() cph.fit(data[['Time to Delivery', 'Vaginal Delivery', 'CumulativeDelay']], duration_col='Time to Delivery', event_col='Vaginal Delivery', show_progress=True) cph.plot_covariate_groups('CumulativeDelay', [0, 2, 4, 8, 16]) ax.set_xlim(0, 60) ax.set_ylim(0, 1) plt.xlabel('Time to Delivery (hrs)') plt.ylabel('Frac. Not Delivered') plt.title('Predicted Delivery Curves as a Function of Cumulative Dosing Delay') plt.savefig('output/predicted_KM.png')