def output_stats(model, surv, X_train, df_train, X_val, df_val):
    """ Compute the output of the model on the test set
    # Arguments
        model: neural network model trained with final parameters.
        X_train : input variables of the training set
        df_train: training dataset
        X_val : input variables of the validation set
        df_val: validation dataset
    # Returns
        results_test: Uno C-index at 5 and 10 years and Integrated Brier Score
    """
    time_grid = np.linspace(np.percentile(df_val['yy'], 10),
                            np.percentile(df_val['yy'], 90), 100)
    data_train = skSurv.from_arrays(event=df_train['status'],
                                    time=df_train['yy'])
    data_test = skSurv.from_arrays(event=df_val['status'], time=df_val['yy'])
    c5 = concordance_index_ipcw(data_train, data_test,
                                np.array(-determine_surv_prob(surv, 5)), 5)[0]
    c10 = concordance_index_ipcw(data_train, data_test,
                                 np.array(-determine_surv_prob(surv, 10)),
                                 10)[0]
    ev = EvalSurv(surv,
                  np.array(df_val['yy']),
                  np.array(df_val['status']),
                  censor_surv='km')
    ibs = ev.integrated_brier_score(time_grid)
    res = pd.DataFrame([c5, c10, ibs]).T
    res.columns = ['unoc5', 'unoc10', 'ibs']
    return res
Exemple #2
0
def compute_CI_scores(model,
                      quantiles,
                      DATA_te,
                      DATA_tr,
                      pretrain_state,
                      risk=0):
    '''Compute CI score based on CDF

    Inputs:
    model: trained model
    quantiles:
    DATA_te: passed test Data (featurs)
    DATA_tr: passed train Data (featurs)
    t_horizon: a vector of times to calculate cdf
    pretrain_state: show we use pretrain model or not
    '''
    cdf_preds = predict_cdf(model, DATA_te, quantiles, pretrain_state)
    cdf_preds = [cdf.numpy() for cdf in cdf_preds]

    _, t_valid, e_valid = DATA_te
    _, t_train, e_train = DATA_tr

    t_train = t_train.astype('float64')
    t_tvalid = t_valid.astype('float64')

    e_train = e_train.astype('bool')
    e_valid = e_valid.astype('bool')

    uncensored = np.where(e_valid == 1)[0]

    et1 = np.array([(e_train[i], t_train[i]) for i in range(len(e_train))],
                   dtype=[('e', bool), ('t', int)])
    et2 = np.array([(e_valid[i], t_valid[i]) for i in range(len(e_valid))],
                   dtype=[('e', bool), ('t', int)])

    if (cdf_preds[0].shape[0] > 0 and cdf_preds[1].shape[0] > 0
            and cdf_preds[2].shape[0] > 0 and cdf_preds[3].shape[0] > 0):
        cdf_ci_25 = concordance_index_ipcw(et1,
                                           et2,
                                           -cdf_preds[0],
                                           tau=quantiles[0])
        cdf_ci_50 = concordance_index_ipcw(et1,
                                           et2,
                                           -cdf_preds[1],
                                           tau=quantiles[1])
        cdf_ci_75 = concordance_index_ipcw(et1,
                                           et2,
                                           -cdf_preds[2],
                                           tau=quantiles[2])
        cdf_ci_m = concordance_index_ipcw(et1,
                                          et2,
                                          -cdf_preds[3],
                                          tau=quantiles[3])
    else:
        cdf_ci_25 = (0, 0)
        cdf_ci_50 = (0, 0)
        cdf_ci_75 = (0, 0)
        cdf_ci_m = (0, 0)

    return cdf_ci_25[0], cdf_ci_50[0], cdf_ci_75[0], cdf_ci_m[0]
Exemple #3
0
def test_uno_c_not_1d(whas500_pred, dim):
    event, time, risk = whas500_pred
    y = Surv.from_arrays(event, time)

    risk = numpy.tile(risk[:, numpy.newaxis], (1, dim))

    with pytest.raises(ValueError,
                       match="Expected 1D array, got 2D array instead:"):
        concordance_index_ipcw(y, y, risk)
Exemple #4
0
def evaluate_performance(T_train, c_train, T_test, c_test, prediction, time_horizon, 
                         num_causes=2, cause_names=["Cause 1", "Cause 2"]):

    Harell_c_index     = []
    UNO_c_index        = []
    dynamic_auc        = []

    for _ in range(num_causes):

        y_train = np.array([((c_train.loc[c_train.index[k]]== _ + 1), T_train.loc[T_train.index[k]]) for k in range(len(T_train))], dtype=[('Status', '?'), ('Survival_in_days', '<f8')])
        y_test  = np.array([((c_test.loc[c_test.index[k]]== _ + 1), T_test.loc[T_test.index[k]]) for k in range(len(T_test))], dtype=[('Status', '?'), ('Survival_in_days', '<f8')])

        Harell_c_index.append(concordance_index(T_test, prediction[_ + 1], event_observed=(c_test==(_+1))*1))
        tau = max(y_train['Survival_in_days'])
        ci_tau = concordance_index_ipcw(y_train, y_test, 1 - prediction[_ + 1], tau=tau)[0]

        UNO_c_index.append(ci_tau)
        try:
            dynamic_auc_val = cumulative_dynamic_auc(y_train, y_test, 1 - prediction[_ + 1], times=[time_horizon])[0][0]
        except ValueError:
            print('*warning: exception while calculating dynamic_auc, dynamic_auc is not calculated*')
            dynamic_auc_val = "-"
        dynamic_auc.append(dynamic_auc_val)
        print("--- Cause: {} -> [C-index: {:0.4f} ] [Dynamic AUC-ROC: {} ]".format(
            cause_names[_],
            UNO_c_index[-1],
            '{:0.4f}'.format(dynamic_auc[-1]) if dynamic_auc[-1] != "-"  else "-"))
def output_simulations(surv, df_train, x_test, df_test, name):
    """ Compute the output of the model on the test set
    # Arguments
        model: neural network model trained with final parameters.
        df_train: training dataset
        x_test: 20 simulated input variables
        df_test: test dataset
        name: name of the model
    # Returns
        results_test: AUC and Uno C-index at median survival time
    """

    data_train = skSurv.from_arrays(event=df_train['status'],
                                    time=df_train['yy'])
    data_test = skSurv.from_arrays(event=df_test['status'], time=df_test['yy'])
    cens_test = 100. - df_test['status'].sum(
    ) * 100. / df_test['status'].shape[0]

    time_med = np.percentile(data_test['time'], np.linspace(0, 50, 2))
    auc_med = float(
        cumulative_dynamic_auc(data_train, data_test,
                               -determine_surv_prob(surv, time_med[1]),
                               time_med[1])[0])
    unoc = float(
        concordance_index_ipcw(data_train, data_test,
                               -determine_surv_prob(surv, time_med[1]),
                               time_med[1])[0])

    results_test = pd.DataFrame({
        't_med': time_med[1],
        'auc_med': [auc_med],
        'unoc': [unoc],
        'cens_rate': [cens_test]
    })
    return results_test
Exemple #6
0
    def ipcw(self, F_train, F_test, T_train, T_test, survival_prob_valid):
        struct_train = np.zeros(len(F_train), dtype={'names':('F_train', 'T_train'),'formats':('?','i4')})
        struct_test = np.zeros(len(F_test), dtype={'names':('F_test', 'T_test'),'formats':('?','i4')})
        struct_train['F_train'] = F_train.astype('bool')
        struct_train['T_train'] = T_train
        struct_test['F_test'] = F_test.astype('bool')
        struct_test['T_test'] = T_test

        c_ipcw = '%.5g'%(1-concordance_index_ipcw(struct_train, struct_test, survival_prob_valid)[0])
        return c_ipcw
def test_uno_c_all_censored():
    y_train = Surv.from_arrays(
        time=(2, 4, 6, 8, 10, 11, 15, 19),
        event=(True, True, True, True, True, True, True, True))
    y_test = Surv.from_arrays(
        time=(1, 3, 5, 7, 12, 13, 20),
        event=(True, False, False, True, True, False, False))
    estimate = (5, 8, 13, 11, 9, 7, 4)

    ret_uno = concordance_index_ipcw(y_train, y_test, estimate)
    ret_harrell = concordance_index_censored(y_test['event'], y_test['time'], estimate)
    assert ret_uno == ret_harrell
Exemple #8
0
def calc_metrics(tr_t_, tr_y_, te_t_, te_y_, preds, eval_time):
    train_y_ = [(tr_y_.iloc[i, 0], tr_t_.iloc[i, 0])
                for i in range(len(tr_y_))]
    train_y_ = np.array(train_y_, dtype=[('status', 'bool'), ('time', '<f8')])

    test_y_ = [(te_y_.iloc[i, 0], te_t_.iloc[i, 0]) for i in range(len(te_y_))]
    test_y_ = np.array(test_y_, dtype=[('status', 'bool'), ('time', '<f8')])

    c_index, _, _, _, _ = concordance_index_ipcw(train_y_, test_y_, preds,
                                                 int(eval_time))
    brier_score = weighted_brier_score(np.asarray(tr_t_), np.asarray(tr_y_),
                                       preds, np.asarray(te_t_),
                                       np.asarray(te_y_), int(eval_time))
    return c_index, brier_score
def simulation(n_samples, hazard_ratio, n_repeats=100):
    measures = (
        "censoring",
        "Harrel's C",
        "Uno's C",
    )
    data_mean = {}
    data_std = {}
    for measure in measures:
        data_mean[measure] = []
        data_std[measure] = []

    rnd = np.random.RandomState(seed=987)
    # iterate over different amount of censoring
    for cens in (.1, .25, .4, .5, .6, .7):
        data = {
            "censoring": [],
            "Harrel's C": [],
            "Uno's C": [],
        }

        # repeaditly perform simulation
        for _ in range(n_repeats):
            # generate data
            X_test, y_test, y_train, actual_c = generate_survival_data(
                n_samples,
                hazard_ratio,
                baseline_hazard=0.1,
                percentage_cens=cens,
                rnd=rnd)

            # estimate c-index
            c_harrell = concordance_index_censored(y_test["event"],
                                                   y_test["time"], X_test)
            c_uno = concordance_index_ipcw(y_train, y_test, X_test)

            # save results
            data["censoring"].append(100. - y_test["event"].sum() * 100. /
                                     y_test.shape[0])
            data["Harrel's C"].append(actual_c - c_harrell[0])
            data["Uno's C"].append(actual_c - c_uno[0])

        # aggregate results
        for key, values in data.items():
            data_mean[key].append(np.mean(data[key]))
            data_std[key].append(np.std(data[key], ddof=1))

    data_mean = pd.DataFrame.from_dict(data_mean)
    data_std = pd.DataFrame.from_dict(data_std)
    return data_mean, data_std
Exemple #10
0
def train_model():
    from dsm import datasets, DeepSurvivalMachines
    import numpy as np
    from sksurv.metrics import concordance_index_ipcw, brier_score

    survival_data = np.loadtxt('./new_survival_data.csv', delimiter=',')
    features = np.loadtxt('./new_features.csv', delimiter=',')

    x = features
    t = survival_data[:, 0]
    e = survival_data[:, 1]

    times = np.quantile(t[e == 1], [0.25, 0.5, 0.75]).tolist()

    cv_folds = 2
    folds = list(range(cv_folds))*10000
    folds = np.array(folds[:len(x)])


    cis = []
    brs = []
    for fold in range(cv_folds):

        print("On Fold:", fold)

        x_train, t_train, e_train = x[folds != fold], t[folds != fold], e[folds != fold]
        x_test, t_test, e_test = x[folds == fold], t[folds == fold], e[folds == fold]
        print(x_train.shape)

        model = DeepSurvivalMachines(distribution='Weibull', layers=[100])
        model.fit(x_train, t_train, e_train, iters=10, learning_rate=1e-3, batch_size=10)

        et_train = np.array([(e_train[i], t_train[i]) for i in range(len(e_train))],
                            dtype=[('e', bool), ('t', int)])

        et_test = np.array([(e_test[i], t_test[i]) for i in range(len(e_test))],
                        dtype=[('e', bool), ('t', int)])

        out_risk = model.predict_risk(x_test, times)
        out_survival = model.predict_survival(x_test, times)

        cis_ = []
        for i in range(len(times)):
            cis_.append(concordance_index_ipcw(et_train, et_test, out_risk[:, i], times[i])[0])
        cis.append(cis_)

        brs.append(brier_score(et_train, et_test, out_survival, times)[1])

    print("Concordance Index:", np.mean(cis, axis=0))
    print("Brier Score:", np.mean(brs, axis=0))
def test_uno_c_failure(uno_c_failure_data):
    y_train, y_test, estimate, match = uno_c_failure_data

    with pytest.raises(ValueError, match=match):
        concordance_index_ipcw(y_train, y_test, estimate)
def assert_uno_c_almost_equal(y_train, y_test, estimate, expected, tau=None):
    result = concordance_index_ipcw(y_train, y_test, estimate, tau=tau)
    assert_array_equal(result[1:], expected[1:])
    assert_almost_equal(result[0], expected[0])
def main(args):
    """
    Runs evaluation for the data set
        1. Loads model from tar.gz
        2. Reads in test features
        3. Runs an accuracy report
        4. Generates feature importance with SHAP

    Args:
        model-name (str): Name of the trained model, default xgboost
        test-features (str): preprocessed test features for
         evaluation, default test_features.csv
        train-features (str): preproceed train features for SHAP,
        default train_features.csv
        test-features (str): preproceed test features for SHAP,
        default test_features.csv
        report-name (str): Name of the evaluation output
        , default evaluation.json
        shap-name (str): Name of the SHAP feature importance
        output file, default shap.csv
        threshold (float): Threshold to cut probablities at
        , default 0.5
        tau (int): time range for the c-index will be from 0 to tau
        , default 100
    """

    model_path = os.path.join("/opt/ml/processing/model", "model.tar.gz")

    logger.info(f"Extracting model from path: {model_path}")

    with tarfile.open(model_path) as tar:
        tar.extractall(path=".")
    logger.info("Loading model")
    with open(args.model_name, "rb") as f:
        model = pickle.load(f)

    logger.info("Loading train and test data")

    test_features_data = os.path.join("/opt/ml/processing/test",
                                      args.test_features)
    train_features_data = os.path.join("/opt/ml/processing/train",
                                       args.train_features)

    X_test = pd.read_csv(test_features_data, header=0)
    X_train = pd.read_csv(train_features_data, header=0)

    y_test = X_test.iloc[:, 0]
    y_train = X_train.iloc[:, 0]

    # Reverse transfrom to event and duration columns
    y_test_df = pd.DataFrame(
        np.vstack((np.where(y_test > 0, 1, 0), np.abs(y_test))).T,
        columns=["event", "duration"],
    )

    y_train_df = pd.DataFrame(
        np.vstack((np.where(y_train > 0, 1, 0), np.abs(y_train))).T,
        columns=["event", "duration"],
    )

    X_test.drop(X_test.columns[0], axis=1, inplace=True)
    X_train.drop(X_test.columns[0], axis=1, inplace=True)

    logger.info("Running inference")

    predictions = model.predict(xgboost.DMatrix(X_test.values[:, 1:]),
                                output_margin=False)

    logger.info("Creating evaluation report")

    # NOTE: technical evaluation is really not as a classifier
    # TO DO: Normalize to 0 to 1 scale
    report_dict = classification_report(y_test_df["event"],
                                        predictions > args.threshold,
                                        output_dict=True)
    report_dict["accuracy"] = accuracy_score(y_test_df["event"],
                                             predictions > args.threshold)

    _, y_train_tuple = get_x_y(y_train_df, ["event", "duration"],
                               pos_label=True)
    _, y_test_tuple = get_x_y(y_test_df, ["event", "duration"], pos_label=True)

    concordance_index = concordance_index_ipcw(
        y_train_tuple,
        y_test_tuple,
        predictions,
        tau=args.tau,  # default within 100 days
    )

    report_dict["concordance_index"] = {
        "cindex": float(concordance_index[0]),
        "concordant": int(concordance_index[1]),
        "discordant": int(concordance_index[2]),
        "tied_risk": int(concordance_index[3]),
        "tied_time": int(concordance_index[4]),
    }

    times, score = brier_score(y_train_tuple, y_test_tuple, predictions,
                               y_test_df["duration"].max() - 1)

    report_dict["brier_score"] = {
        "times": times.astype(np.int32).tolist(),
        "score": score.astype(np.float32).tolist(),
    }

    logger.info(f"Classification report:\n{report_dict}")

    evaluation_output_path = os.path.join("/opt/ml/processing/evaluation",
                                          args.report_name)
    logger.info(f"Saving classification report to {evaluation_output_path}")

    logger.debug(report_dict)

    with open(evaluation_output_path, "w") as f:
        f.write(json.dumps(report_dict))

    # SHAP
    latest_job_debugger_artifacts_path = "/opt/ml/processing/debug/debug-output"
    trial = create_trial(latest_job_debugger_artifacts_path)

    shap_values = trial.tensor("full_shap/f0").value(trial.last_complete_step)

    pd.DataFrame(shap_values).to_csv(
        os.path.join("/opt/ml/processing/evaluation", args.shap_name))

    shap_no_base = shap_values[1:, :-1]
    feature_names = X_train.columns
    os.makedirs("/opt/ml/processing/plot/", exist_ok=True)
    logger.info(shap_values.shape, shap_no_base.shape, X_train.shape)
    shap.summary_plot(shap_no_base,
                      features=X_train,
                      feature_names=feature_names,
                      show=False)
    plt.savefig("/opt/ml/processing/plot/feature_importance.png",
                bbox_inches="tight")
print(times)


def plot_cumulative_dynamic_auc(risk_score, label, color=None):
    auc, mean_auc = cumulative_dynamic_auc(y_train, y_test, risk_score, times)

    plt.plot(times, auc, marker="o", color=color, label=label)
    plt.xlabel("days from enrollment")
    plt.ylabel("time-dependent AUC")
    plt.axhline(mean_auc, color=color, linestyle="--")
    plt.legend()


for i, col in enumerate(num_columns):
    plot_cumulative_dynamic_auc(x_test[:, i], col, color="C{}".format(i))
    ret = concordance_index_ipcw(y_train, y_test, x_test[:, i], tau=times[-1])

from sksurv.datasets import load_veterans_lung_cancer

va_x, va_y = load_veterans_lung_cancer()

cph = make_pipeline(OneHotEncoder(), CoxPHSurvivalAnalysis())
cph.fit(va_x, va_y)

va_times = np.arange(7, 183, 7)
# estimate performance on training data, thus use `va_y` twice.
va_auc, va_mean_auc = cumulative_dynamic_auc(va_y, va_y, cph.predict(va_x),
                                             va_times)

plt.plot(va_times, va_auc, marker="o")
plt.axhline(va_mean_auc, linestyle="--")
def output_bootstrap(model, n_iterations, df_train, data_train, y_train,
                     df_test, name):
    """ Compute the output of the model on the bootstraped test set
    # Arguments
        model: neural network model trained with final parameters.
        n_iterations: number of bootstrap iterations
        df_train: training dataset
        data_train: two columns dataset with survival time and censoring status for training samples
        y_train: survival time
        df_test: test dataset
        name: name of the model
    # Returns
        results_all: AUC and Uno C-index at 5 and 10 years 
    """
    if name == "CoxTime" or name == "Cox-CC":
        _ = model.compute_baseline_hazards()
    results_all = pd.DataFrame(columns=['auc5', 'auc10', 'unoc5', 'unoc10'])
    results_final = pd.DataFrame(
        columns=['mean', 'ci95_lo', 'ci95_hi', 'std', 'count'])

    for i in range(n_iterations):
        print(i)
        test_boot = resample(df_test, n_samples=len(df_test), replace=True)
        x_test_boot = test_boot.drop(['surv_test', 'cen_test'], axis=1)
        duration_test_b, event_test_b = test_boot[
            'surv_test'].values, test_boot['cen_test'].values
        data_test_b = skSurv.from_arrays(event=event_test_b,
                                         time=duration_test_b)
        if name == "Cox-CC" or name == "CoxTime" or name == "DeepHit":
            surv = model.predict_surv_df(np.array(x_test_boot,
                                                  dtype='float32'))
        else:
            n_picktime = int(y_train[['s']].apply(pd.Series.nunique))
            x_test_boot_all = pd.concat([x_test_boot] * n_picktime)
            time_test = pd.DataFrame(
                np.repeat(np.unique(y_train[['s']]), len(x_test_boot)))
            x_test_boot_all.reset_index(inplace=True, drop=True)
            x_test_boot_all = pd.concat([x_test_boot_all, time_test], axis=1)
            surv = make_predictions_pseudobs(model, y_train, x_test_boot_all,
                                             x_test_boot, name)

        time_grid = np.linspace(duration_test_b.min(), duration_test_b.max(),
                                100)
        prob_5_10 = pd.concat([
            determine_surv_prob(surv, i)
            for i in (duration_test_b.min(), 5, 10)
        ],
                              axis=1)
        auc5 = float(
            cumulative_dynamic_auc(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 1], 5)[0])
        auc10 = float(
            cumulative_dynamic_auc(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 2], 10)[0])
        unoc5 = float(
            concordance_index_ipcw(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 1], 5)[0])
        unoc10 = float(
            concordance_index_ipcw(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 2], 10)[0])
        results = pd.DataFrame({
            'auc5': [auc5],
            'auc10': [auc10],
            'unoc5': [unoc5],
            'unoc10': [unoc10]
        })
        results_all = results_all.append(results,
                                         ignore_index=True,
                                         sort=False)

    for column in results_all:
        stats = results_all[column].agg(['mean', 'count', 'std'])
        scores = np.array(results_all[column])
        sorted_scores = np.sort(scores, axis=None)
        ci95_lo = sorted_scores[int(0.05 * len(sorted_scores))]
        ci95_hi = sorted_scores[int(0.95 * len(sorted_scores))]
        results_stat = pd.DataFrame({
            'mean': [stats[0]],
            'ci95_lo': ci95_lo,
            'ci95_hi': [ci95_hi],
            'std': [stats[2]],
            'count': [stats[1]]
        })
        results_final = results_final.append(results_stat,
                                             ignore_index=False,
                                             sort=False)
    results_final.index = results_all.columns.tolist()
    return results_final
def test_uno_c_no_comparable(no_comparable_pairs):
    y, scores = no_comparable_pairs

    with pytest.raises(NoComparablePairException):
        concordance_index_ipcw(y, y, scores)
Exemple #17
0
 def __call__(self, E_y_true, y_pred):
     self.check_y_pred_dimensions(E_y_true, y_pred)
     risk = self._survival_to_risk(y_pred)
     struct_E_y_test = to_structured_array(E_y_true)
     score = concordance_index_ipcw(self.struct_E_y_train, struct_E_y_test, risk)[0]
     return score
Exemple #18
0
    dd.output = pt_output
    dd.event = pt_event
    dd.time = pt_time
    pt_output = torch.tensor(pt_output).cuda()
    pt_event = torch.tensor(pt_event).cuda()
    pt_time = torch.tensor(pt_time).cuda()
    cindex, concordant, discordant, tied_risk, tied_time, _, _ = concordance_index_censored(
        pt_event, pt_time, pt_output, tied_tol=1e-8)
    print("Harrell's C-index = " + str(cindex))
    print("Concordant = " + str(concordant))
    print("Discordant = " + str(discordant))
    print("Tied risk = " + str(tied_risk))
    print("Tied time = " + str(tied_time))

    dev_event = np.concatenate(
        (datasets['train'].df.event.values, datasets['val'].df.event.values))
    dev_time = np.concatenate(
        (datasets['train'].df.time.values, datasets['val'].df.time.values))
    _dev_event = [bool(i) for i in dev_event]
    dev_data = np.array([(i, j) for i, j in zip(_dev_event, dev_time)],
                        dtype=[('event', '?'), ('time', '<f8')])
    _pt_event = [bool(i) for i in pt_event.cpu()]
    pt_data = np.array([(i, j) for i, j in zip(_pt_event, pt_time.cpu())],
                       dtype=[('event', '?'), ('time', '<f8')])
    cindex2, concordant2, discordant2, tied_risk2, tied_time2 = concordance_index_ipcw(
        dev_data, pt_data, pt_output.cpu(), tau=None, tied_tol=1e-08)
    print("Uno's C-index = " + str(cindex2))
    print("Concordant = " + str(concordant2))
    print("Discordant = " + str(discordant2))
    print("Tied risk = " + str(tied_risk2))
    print("Tied time = " + str(tied_time2))