def calcSurvHazardCat(df: pd.DataFrame, *, hazardcol: str = "hazard",) -> pd.DataFrame: """ Calculate cumulative hazard survived for each individual patient, as an alternative to raw (and often censored) survival time. Parameters ---------- df A data frame with two compulsory columns: time and event. hazardcol Column name for the survived hazard. Returns ------- The input dataframe, with an extra column of hazards. """ ### Fit survival Nelson-Aalen Estimator of Hazard on survival data T = df["time"] E = df["event"] naf = NelsonAalenFitter() naf.fit(T, E) df[hazardcol] = naf.predict(T).tolist() return df
naf = NelsonAalenFitter() #Fit our data into the object: naf.fit(data["time"], event_observed=data["dead"]) #Print the cumulative hazard: naf.cumulative_hazard_ #Plot the cumulative hazard grpah: naf.plot_cumulative_hazard() plt.title("Cumulative Probability for Event of Interest") plt.xlabel("Number of days") plt.ylabel("Cumulative Probability of person's death") #We can predict the value at a certain point : print("Time = 500 days: ",naf.predict(500)) print("Time = 1022 days: ",naf.predict(1022)) #Cumulative hazard with confidence interval: naf.confidence_interval_ #Plot cumulative hazard with confidence interval: confidence_interval = naf.confidence_interval_ plt.plot(confidence_interval["NA_estimate_lower_0.95"],label="Lower") plt.plot(confidence_interval["NA_estimate_upper_0.95"],label="Upper") plt.title("Cumulative hazard With Confidence Interval") plt.xlabel("Number of days") plt.ylabel("Cumulative hazard") plt.legend() #Plot the cumulative_hazard and cumulative density:
def test(ini_file): ''' Performs training according to .ini file :param ini_file: (String) the path of .ini file :return best_c_index: the best c-index ''' # reads configuration from .ini file config = read_config(ini_file) # builds network|criterion|optimizer based on configuration model = DeepSurv(config['network']).to(device) criterion = NegativeLogLikelihood(config['network'], device).to(device) # cph = CoxPHFitter() # constructs data loaders based on configuration train_dataset = SurvivalDataset(config['train']['h5_file'], is_train=True, device=device) test_dataset = SurvivalDataset(config['train']['h5_file'], is_train=False, device=device) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=train_dataset.__len__()) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=test_dataset.__len__()) test_df = pd.read_csv( r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_test.csv', index_col=['PatientID']) train_df = pd.read_csv( r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_train.csv', index_col=['PatientID']) # train step best_c_index = 0 # kmf = KaplanMeierFitter() naf = NelsonAalenFitter() # wf = WeibullFitter() naf.fit(test_df['Time_d'], event_observed=test_df['Event']) timeline = np.arange(0, 25000) base_risk = naf.predict(timeline) i = timeline[-1] while i > 0: base_risk[i] = base_risk[i] - base_risk[i - 1] i -= 1 np.savetxt('temp.txt', base_risk, '%.17f') # base_risk.to_csv('test_base_risk.csv', header=True) model.load_state_dict( torch.load(os.path.join(models_dir, ini_file.split('\\')[-1] + '.pth'))['model']) model.eval() for X, y, e in test_loader: with torch.no_grad(): risk_pred = model(X) valid_loss = criterion(risk_pred, y, e, model) print(valid_loss) valid_c = c_index(-risk_pred, y, e) best_c_index = valid_c R = risk_pred.detach().cpu().numpy()[:, 0] for test_index in range(len(R)): # test_index = 120 # people _r = R[test_index] _y = y.detach().cpu().numpy()[test_index, 0] _e = e.detach().cpu().numpy()[test_index, 0] t0 = naf.predict(_y) risk = t0 * np.exp(_r) # print(np.exp(_r)) print(risk, int(_e)) # pre_y = 0. # m = np.min(np.where(p > 0.5)) # print(int(_y), m, _e, p[int(_y)] >= 0.5) # if (p[int(_y)] >= 0.5) == bool(_e): # ture += 1 # print(ture/len(R)) # if _e == pre_y: # ture += 1 # plt.plot(p) # plt.show() naf.plot() plt.show() return best_c_index