from lifelines import CoxPHFitter import numpy as np import pandas as pd import lifelines path = './totalData.xlsx' data = pd.read_excel(path) col = list(data.columns) del col[0:5] cph = CoxPHFitter() cph.fit(data, 'totaltime', 'failure', strata=col) cph.print_summary() cph.predict_cumulative_hazard(data)
pheno = pheno.iloc[ib] pheno = pheno.reset_index(drop=True) geno_ia = geno[:,ia] # Function for null model logger.info('Generating Null models\n') cph = CoxPHFitter() cph.fit(pheno[[T_name, event_name] + covname], T_name, event_col=event_name) # res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'deviance').sort_index()['deviance'] res_surv = cph.compute_residuals(pheno[[T_name, event_name] + covname], 'martingale').sort_index()['martingale'] # This is the most memory intensive part. Might need to change if we are dealing with biobank scale data if args.apr_flag == 'N': logger.info('Calculating Null covariance matrix\n') mat = cph.predict_cumulative_hazard(pheno) P = np.diff(mat,axis=-0) for isubj in range(P.shape[1]): idx = np.abs(mat.index - pheno[T_name][isubj]).argmin() P[idx::,isubj] = 0 V = da.diag(np.array(pheno[event_name] - res_surv)) - da.dot(P.transpose(),P) X = np.array(pheno[covname]) C = V - da.matmul(da.matmul(da.matmul(V,X), da.linalg.inv(da.matmul(da.matmul(X.transpose(), V),X))), da.matmul(X.transpose(), V)) else: logger.info('Using first order approximations for testing statistics\n') # auto chunk to reduce the query time chunk_array = [bim.i.values[i:i + chunk_size] for i in xrange(0, len(bim.i.values), chunk_size)] nchunk = len(chunk_array) chunk_ind = 1
sorted_y_test = np.sort(np.unique(y_test[:, 0])) if sorted_y_test[0] != 0: mesh_points = np.concatenate(([0.], sorted_y_test)) else: mesh_points = sorted_y_test surv = \ surv_model.predict_survival_function(X_test_standardized, mesh_points) surv = surv.values.T # --------------------------------------------------------------------- # compute c-index # if cindex_method == 'cum_haz': cum_haz = \ surv_model.predict_cumulative_hazard(X_test_standardized, sorted_y_test) cum_haz = cum_haz.values.T cum_hazard_scores = cum_haz.sum(axis=1) test_cindex = concordance_index(y_test[:, 0], -cum_hazard_scores, y_test[:, 1]) elif cindex_method == 'cum_haz_from_surv': surv_thresholded = np.maximum(surv, np.finfo(float).eps) cum_haz = -np.log(surv_thresholded) cum_hazard_scores = cum_haz.sum(axis=1) test_cindex = concordance_index(y_test[:, 0], -cum_hazard_scores, y_test[:, 1]) elif cindex_method == 'median': predicted_medians = \