def test_coxph_plotting_with_subset_of_columns(self, block): df = load_regression_dataset() cp = CoxPHFitter() cp.fit(df, "T", "E") cp.plot(columns=["var1", "var2"]) self.plt.title("test_coxph_plotting_with_subset_of_columns") self.plt.show(block=block)
def test_coxph_plotting(self, block): df = load_regression_dataset() cp = CoxPHFitter() cp.fit(df, "T", "E") cp.plot() self.plt.title("test_coxph_plotting") self.plt.show(block=block)
def test_coxph_plotting_with_hazards_ratios(self, block): df = load_regression_dataset() cp = CoxPHFitter() cp.fit(df, "T", "E") cp.plot(hazard_ratios=True) self.plt.title("test_coxph_plotting") self.plt.show(block=block)
def fit_cox( train_df: Union[pd.DataFrame, str], covariates: List[str], test_df: Union[pd.DataFrame, str] = None, strata: List[str] = None, plot: bool = False, process_dir: str = None, ): if isinstance(train_df, str): train_df = pd.read_csv(train_df) if isinstance(test_df, str): test_df = pd.read_csv(test_df) cphf = CoxPHFitter() included_cols = ["duration", "event"] + list(covariates) print(train_df.columns) cphf.fit( train_df[included_cols], duration_col="duration", event_col="event", strata=strata, ) results = { "log_likelihood": cphf.log_likelihood_, "concordance_index": cphf.concordance_index_, "log_likelihood_ratio_test_pvalue": cphf.log_likelihood_ratio_test().p_value, } if test_df is not None: results["test_log_likelihood"] = cphf.score( test_df[included_cols], scoring_method="log_likelihood") results["test_concordance_index"] = cphf.score( test_df[included_cols], scoring_method="concordance_index") if plot and process_dir is not None: plt.figure(figsize=(5, 10)) cphf.plot() plt.savefig(os.path.join(process_dir, "hazard_plot.pdf")) if process_dir is not None: cphf.summary.to_csv(os.path.join(process_dir, "summary.csv")) save_dict_to_json(os.path.join(process_dir, "results.json"), results) return results, cphf
def survival_analyze(dataframe, lifetime_col, dead_col, strata_cols, covariate_col=None): # Based on notebook here. https://github.com/CamDavidsonPilon/lifelines/tree/master/examples import pandas as pd from matplotlib import pyplot as plt from lifelines import CoxPHFitter cph = CoxPHFitter().fit(dataframe, lifetime_col, dead_col, strata=strata_cols) cph.plot(ax=ax[1]) if covariate_col: cph.plot_covariate_groups(covariate_col, values=[0, 1]) pass
def CoxAnalysis(pd_data, pd_surval, tp): cph = CoxPHFitter(penalizer=0.1) if tp == 'univariate': pd_out = '' for i in range(pd_data.shape[1]): df = pd_surval.T.append(pd_data.iloc[:, i].T).T cph.fit(df, 'OS', event_col='status') if type(pd_out) == str: pd_out = cph.summary else: pd_out = pd_out.append(cph.summary) elif tp == 'multivariable': df = pd_data.T.append(pd_surval.T).T df = df.dropna(axis=0, how='any') cph.fit(df, 'OS', event_col='status', step_size=0.1) pd_out = cph.summary pd_out.to_csv('CoxRegress.txt', sep='\t', header=True, index=True) plt.style.use('my-paper') fig, axe = plt.subplots(figsize=(25, 8)) cph.plot(ax=axe) axe.set_ylim(-0.2, 3.2) axe.set_xlim(-2.5, 2.1) plt.savefig('CoxRegress.pdf')
from lifelines import CoxPHFitter import matplotlib.pyplot as plt import json import pandas as pd with open("test_data_Cox.json", "r", encoding='UTF-8') as f: temp = json.loads(f.read()) regression_dataset = pd.DataFrame( temp, columns=['T', 'E', 'base', 'hotestVideo', 'length']) print(regression_dataset.head()) print(regression_dataset['E'].value_counts()) cph = CoxPHFitter() cph.fit(regression_dataset, 'T', event_col='E') cph.print_summary() cph.plot() plt.savefig('Cox3.png')
cph.plot() ''') if st.checkbox("Fit Cox model"): with st.spinner('Construction in progress...'): cph = CoxPHFitter() col_list.extend(['Churn', 'Tenure']) cph.fit(df[col_list], duration_col='Tenure', event_col='Churn') plt.style.use('seaborn-ticks') fig = cph.plot() plt.tick_params(axis='both', which='major', labelsize=20) plt.xlabel("Log Hazard Ratio", fontsize=22) plt.tight_layout() st.pyplot(height=400) plt.clf() st.markdown(''' The **hazard ratio** (HR) of each variable provides a measure of how it influences the probability of the event of interest, in this case customer churn, happening at a given point in time.
print( np.mean( k_fold_cross_validation(fitter, times, duration_col='time', event_col='success'))) print("End cross-validation of " + name) from lifelines import CoxPHFitter cph = CoxPHFitter() cph.fit(times, duration_col='time', event_col='success', show_progress=True) cph.print_summary() save('coxph', cph.plot()) #cph.check_assumptions(times, show_plots=True) crossValidate('cox', cph) fitters = {'cox': cph} from lifelines import WeibullAFTFitter, LogNormalAFTFitter, LogLogisticAFTFitter for (name, fitter) in [("weibull", WeibullAFTFitter), ("lognormal", LogNormalAFTFitter), ("loglogistic", LogLogisticAFTFitter)]: print("BEGIN " + name) aft = fitter() aft.fit(times, duration_col='time', event_col='success') aft.print_summary(3)
delimiter="\t") regression_dataset['Class'] = regression_dataset['Class'].map({ 'YES': 1, 'NO': 0 }) regression_dataset = regression_dataset.join( pd.get_dummies(regression_dataset['clusters'])) regression_dataset = regression_dataset.drop(['id', 'clusters'], axis=1) print(regression_dataset.head(100)) print(regression_dataset.dtypes) kmf = KaplanMeierFitter() kmf.fit(regression_dataset['Time'], event_observed=regression_dataset['Class']) kmf.survival_function_ survival_function = kmf.plot_survival_function() # or just kmf.plot() survival_function.get_figure().savefig("KaplanMeierFitter.png") # Using Cox Proportional Hazards model cph = CoxPHFitter(penalizer=0.1) cph.fit(regression_dataset, 'Time', event_col='Class') cph.print_summary() # predict=cph.predict_survival_function(regression_dataset).plot() # predict.get_figure().savefig("predicted.png") ax = cph.plot() ax.get_figure().savefig("CoxPHFitter.png")
y_te_eicu, X_te_mimic, y_te_mimic) nzeros = neq_zero(best_cph.params_) coefs = nzeros.index[nzeros.values].to_list() coefs_val = best_cph.params_[nzeros.values].tolist() summary = { "penalizer": p, "C_train": ctr, "C_eicu:": ceicu, "C_mimic": cmimic, "nfeatures": len(coefs), "features": coefs, "coefs": coefs_val, "df": pd.DataFrame({ "features": coefs, "coefs": coefs_val }) } cph_results[best_ps[i]] = summary best_cphs.append(best_cph) plt.rcParams['figure.figsize'] = [5, 10] fig = best_cph.plot() figures.append(fig) plt.close() print(summary) with open("{}/{}_summary.pkl".format(model_path, tag), "wb") as fout: pickle.dump(cph_results, fout) print(cph_results)