예제 #1
0
def calcSurvHazardCat(df: pd.DataFrame, *, hazardcol: str = "hazard",) -> pd.DataFrame:

    """
    Calculate cumulative hazard survived for each individual patient, as an alternative
    to raw (and often censored) survival time.

    Parameters
    ----------
    df
        A data frame with two compulsory columns: time and event.
    hazardcol
        Column name for the survived hazard.

    Returns
    -------
    The input dataframe, with an extra column of hazards.
    """

    ### Fit survival Nelson-Aalen Estimator of Hazard on survival data
    T = df["time"]
    E = df["event"]
    naf = NelsonAalenFitter()
    naf.fit(T, E)
    df[hazardcol] = naf.predict(T).tolist()
    return df
예제 #2
0
naf = NelsonAalenFitter()

#Fit our data into the object:
naf.fit(data["time"], event_observed=data["dead"])

#Print the cumulative hazard:
naf.cumulative_hazard_

#Plot the cumulative hazard grpah:
naf.plot_cumulative_hazard()
plt.title("Cumulative Probability for Event of Interest")
plt.xlabel("Number of days")
plt.ylabel("Cumulative Probability of person's death")

#We can predict the value at a certain point :
print("Time = 500 days: ",naf.predict(500))
print("Time = 1022 days: ",naf.predict(1022))

#Cumulative hazard with confidence interval:
naf.confidence_interval_

#Plot cumulative hazard with confidence interval:
confidence_interval = naf.confidence_interval_
plt.plot(confidence_interval["NA_estimate_lower_0.95"],label="Lower")
plt.plot(confidence_interval["NA_estimate_upper_0.95"],label="Upper")
plt.title("Cumulative hazard With Confidence Interval")
plt.xlabel("Number of days")
plt.ylabel("Cumulative hazard")
plt.legend()

#Plot the cumulative_hazard and cumulative density:
예제 #3
0
def test(ini_file):
    ''' Performs training according to .ini file

    :param ini_file: (String) the path of .ini file
    :return best_c_index: the best c-index
    '''
    # reads configuration from .ini file
    config = read_config(ini_file)
    # builds network|criterion|optimizer based on configuration
    model = DeepSurv(config['network']).to(device)
    criterion = NegativeLogLikelihood(config['network'], device).to(device)

    # cph = CoxPHFitter()
    # constructs data loaders based on configuration
    train_dataset = SurvivalDataset(config['train']['h5_file'],
                                    is_train=True,
                                    device=device)
    test_dataset = SurvivalDataset(config['train']['h5_file'],
                                   is_train=False,
                                   device=device)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_dataset.__len__())
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=test_dataset.__len__())
    test_df = pd.read_csv(
        r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_test.csv',
        index_col=['PatientID'])
    train_df = pd.read_csv(
        r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_train.csv',
        index_col=['PatientID'])

    # train step
    best_c_index = 0
    # kmf = KaplanMeierFitter()
    naf = NelsonAalenFitter()
    # wf = WeibullFitter()
    naf.fit(test_df['Time_d'], event_observed=test_df['Event'])
    timeline = np.arange(0, 25000)
    base_risk = naf.predict(timeline)
    i = timeline[-1]
    while i > 0:
        base_risk[i] = base_risk[i] - base_risk[i - 1]
        i -= 1
    np.savetxt('temp.txt', base_risk, '%.17f')
    # base_risk.to_csv('test_base_risk.csv', header=True)

    model.load_state_dict(
        torch.load(os.path.join(models_dir,
                                ini_file.split('\\')[-1] + '.pth'))['model'])
    model.eval()

    for X, y, e in test_loader:
        with torch.no_grad():
            risk_pred = model(X)
            valid_loss = criterion(risk_pred, y, e, model)
            print(valid_loss)
            valid_c = c_index(-risk_pred, y, e)
            best_c_index = valid_c

            R = risk_pred.detach().cpu().numpy()[:, 0]
            for test_index in range(len(R)):
                # test_index = 120    # people
                _r = R[test_index]
                _y = y.detach().cpu().numpy()[test_index, 0]
                _e = e.detach().cpu().numpy()[test_index, 0]
                t0 = naf.predict(_y)

                risk = t0 * np.exp(_r)
                # print(np.exp(_r))
                print(risk, int(_e))

                # pre_y = 0.
                # m = np.min(np.where(p > 0.5))
                # print(int(_y), m, _e, p[int(_y)] >= 0.5)
                # if (p[int(_y)] >= 0.5) == bool(_e):
                #     ture += 1
            # print(ture/len(R))
            # if _e == pre_y:
            #     ture += 1
            # plt.plot(p)
            # plt.show()
    naf.plot()
    plt.show()
    return best_c_index