def fit_and_prepare(x_train, y_train, test_df):

    # 3.1. Prepare Y-----
    y_train.specific_death = y_train.specific_death.astype(bool)

    # Transform it into a structured array
    y_train = y_train.to_records(index=False)

    # 3.2. Prepare X-----
    # obtain the x variables that are categorical
    categorical_feature_mask = x_train.dtypes == object

    # Filter categorical columns using mask and turn it into a list
    categorical_cols = x_train.columns[categorical_feature_mask].tolist()

    # Ensure categorical columns are category type
    for col in categorical_cols:
        x_train[col] = x_train[col].astype('category')
        test_df[col] = test_df[col].astype('category')

    # 3.3. Fit model-----
    # initiate
    encoder = OneHotEncoder()
    estimator = CoxPHSurvivalAnalysis()

    # fit model
    estimator.fit(encoder.fit_transform(x_train), y_train)

    # transform the test variables to match the train
    x_test = encoder.transform(test_df)

    return (estimator, x_test, x_train, y_train)
def fit_and_score_features(X, y):
    n_features = X.shape[1]
    scores = np.empty(n_features)
    m = CoxPHSurvivalAnalysis()
    for j in range(n_features):
        Xj = X[:, j:j+1]
        m.fit(Xj, y)
        scores[j] = m.score(Xj, y)
    return scores
Esempio n. 3
0
def test_brier_coxph():
    X, y = load_gbsg2()
    X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)

    Xt = OneHotEncoder().fit_transform(X)

    est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
    survs = est.predict_survival_function(Xt)

    preds = [fn(1825) for fn in survs]

    _, score = brier_score(y, y, preds, 1825)

    assert round(abs(score[0] - 0.208817407492645), 5) == 0
 def __init__(self):
     genes_filter = GenesFilter()
     scaler = StandardScaler()
     regressor = CoxPHSurvivalAnalysis()
     self.regr = Pipeline([('genes_filter', genes_filter),
                           ('scaler', scaler), ('regressor', regressor)])
     return
Esempio n. 5
0
    def mpss_ph_sksurv(self):
        """

        Performs proportional hazards regression using sksurv package.

        :return: Feature importance
        """
        # Reformat for sksurv package
        x_train = pd.DataFrame(self.x_train)
        y_structured = [
            (ll, s) for ll, s in zip(self.y_train.astype(bool), self.scores)
        ]
        y_structured = np.array(y_structured,
                                dtype=[('class', 'bool_'),
                                       ('score', 'single')])

        # Remove any feature columns that are all 0 values, otherwise cannot run regression
        x_train_nonzero = x_train.loc[:, (x_train != 0).any(axis=0)]

        # Run proportional hazards regression
        estimator = CoxPHSurvivalAnalysis(alpha=0.1, verbose=1)
        estimator.fit(x_train_nonzero, y_structured)
        prediction = estimator.predict(x_train_nonzero)

        # Estimate p-values for each feature
        f, pvals = f_regression(x_train_nonzero, [x[1] for x in y_structured])
        approximate_se = pd.DataFrame(pd.Series(
            pvals, index=x_train_nonzero.columns).sort_values(ascending=False),
                                      columns=['p']).reset_index()

        # Calculate concordance indicating the goodness of fit
        concordance = concordance_index_censored(self.y_train.astype(bool),
                                                 self.scores, prediction)
        print('concordance', concordance[0])

        # Dataframe with coefficients, absolute value of coefficients, and p-values
        importance = pd.DataFrame(estimator.coef_, columns=['coef'])
        importance['coef_abs'] = [math.fabs(c) for c in importance['coef']]
        importance['feature'] = importance.index.values
        importance = importance.merge(approximate_se,
                                      left_on='feature',
                                      right_on='index').drop('index', axis=1)

        # Sort feature importance
        importance = importance.sort_values(
            'coef_abs', ascending=False).reset_index(drop=True)
        return importance
Esempio n. 6
0
def cox(name):

    filename = filename_dict[name]
    raw_data = pd.read_csv(os.path.join(DATA_DIR, filename))
    formatted_x, formatted_y = sksurv_data_formatting(raw_data)

    x_train, x_test, y_train, y_test = train_test_split(
        formatted_x, formatted_y, test_size=0.25, random_state=RANDOM_STATE)

    estimator = CoxPHSurvivalAnalysis()
    estimator.fit(x_train, y_train)

    prediction = estimator.predict(x_test)
    result = concordance_index_censored(y_test["Status"],
                                        y_test["Survival_in_days"], prediction)

    return result[0]
Esempio n. 7
0
def train_coxph(data_df, r_splits):
  c_index_at = []
  c_index_30 = []

  time_auc_30 = []
  time_auc_60 = []
  time_auc_365 = []

  for i in range(len(r_splits)):
    print("\nIteration %s"%(i))
    #DATA PREP
    df_train, df_val, df_test, df_test_30 = prepare_datasets(data_df, r_splits[i][2], r_splits[i][1], r_splits[i][0])

    (data_x, data_y), (val_x, val_y), (test_x, test_y), (test_30_x, test_30_y) = df2array(data_df, df_train, df_val, df_test, df_test_30)

    estimator = CoxPHSurvivalAnalysis(alpha=1e-04)
    estimator.fit(data_x, data_y)

    c_index_at.append(estimator.score(test_x, test_y))
    c_index_30.append(estimator.score(test_30_x, test_30_y))

    for time_x in [30, 60, 365]:
      t_auc, t_mean_auc = cumulative_dynamic_auc(data_y, test_y, estimator.predict(test_x), time_x)
      eval("time_auc_" + str(time_x)).append(t_auc[0])

    print("C-index_30:", c_index_30[i])
    print("C-index_AT:", c_index_at[i])

    print("time_auc_30", time_auc_30[i])
    print("time_auc_60", time_auc_60[i])
    print("time_auc_365", time_auc_365[i]) 

  return c_index_at, c_index_30, time_auc_30, time_auc_60, time_auc_365
Esempio n. 8
0
    def test_predict_proba(self):
        meta = Stacking(_PredictDummy(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)

        self.assertRaisesRegex(AttributeError,
                               "'_PredictDummy' object has no attribute 'predict_proba'",
                               getattr, meta, "predict_proba")
    def test_predict_proba():
        meta = Stacking(_PredictDummy(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)

        with pytest.raises(AttributeError,
                           match="'_PredictDummy' object has no attribute 'predict_proba'"):
            getattr(meta, "predict_proba")
Esempio n. 10
0
    def test_fit(self):
        meta = Stacking(MeanEstimator(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)
        self.assertEqual(2, len(meta))
        meta.fit(self.x.values, self.y)

        p = meta._predict_estimators(self.x.values)
        self.assertTupleEqual((self.x.shape[0], 2), p.shape)
Esempio n. 11
0
    def test_set_params(self):
        meta = Stacking(_PredictDummy(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)

        meta.set_params(coxph__alpha=1.0, svm__alpha=0.4132)

        self.assertEqual(1.0, meta.get_params()["coxph__alpha"])
        self.assertEqual(0.4132, meta.get_params()["svm__alpha"])
    def test_score(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)

        meta = Stacking(MeanEstimator(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)

        meta.fit(whas500.x, whas500.y)
        c_index = meta.score(whas500.x, whas500.y)

        assert round(abs(c_index - 0.7848807), 5) == 0
    def test_fit(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)

        meta = Stacking(MeanEstimator(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)
        assert 2 == len(meta)
        meta.fit(whas500.x, whas500.y)

        p = meta._predict_estimators(whas500.x)
        assert (whas500.x.shape[0], 2) == p.shape
Esempio n. 14
0
    def test_predict(self):
        meta = Stacking(MeanEstimator(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)

        meta.fit(self.x.values, self.y)

        # result is different if randomForestSRC has not been compiled with OpenMP support
        p = meta.predict(self.x.values)
        actual_cindex = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p)

        expected_cindex = numpy.array([0.7848807, 58983, 16166, 0, 119])
        assert_array_almost_equal(expected_cindex, actual_cindex)
    def test_predict(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)

        meta = Stacking(MeanEstimator(),
                        [('coxph', CoxPHSurvivalAnalysis()),
                         ('svm', FastSurvivalSVM(random_state=0))],
                        probabilities=False)

        meta.fit(whas500.x, whas500.y)

        # result is different if randomForestSRC has not been compiled with OpenMP support
        p = meta.predict(whas500.x)
        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7848807, 58983, 16166, 0, 14))
    def __init__(self):

        super(CoxPH, self).__init__()
        # super().__init__()

        self.name = 'CoxPH'

        self.model = CoxPHSurvivalAnalysis(
            alpha=0.01)  #otherwise error occured
        self.direction = 1
        self.prob_FLAG = True

        self.explained = "*Cox proportional model"
        self.image_name = "Cox.png"
        self.image_size = (500, 500)
    def __init__(self, alpha=10.0):

        super(CoxPHRidge, self).__init__()
        # super().__init__()

        self.alpha = alpha
        self.name = 'CoxPHRidge'

        self.model = CoxPHSurvivalAnalysis(
            alpha=self.alpha)  #ridge regression penalty
        self.direction = 1
        self.prob_FLAG = True

        self.explained = "*Cox proportional model with ridge regression"
        self.image_name = "CoxPHRidge.png"
        self.image_size = (500, 500)
Esempio n. 18
0
def traditional_surv_analysis(datas, opts):

    # tidy data as ndarray
    train_X, train_Y = datas["train"].xs.numpy(), datas["train"].ys.numpy()
    test_X, test_Y = datas["test"].xs.numpy(), datas["test"].ys.numpy()
    if "val" in datas.keys():
        train_X = np.concatenate([train_X, datas["val"].xs])
        train_Y = np.concatenate([train_Y, datas["val"].ys])
    # construct structured array
    train_Y = Surv.from_arrays(train_Y[:, 1].astype("bool"), train_Y[:, 0])
    test_Y = Surv.from_arrays(test_Y[:, 1].astype("bool"), test_Y[:, 0])

    # construct estimators
    estimators = {
        "CoxPH": CoxPHSurvivalAnalysis(),
        "CGBSA": CGBSA(n_estimators=500, random_state=opts.random_seed),
        "GBSA": GBSA(n_estimators=500, random_state=opts.random_seed),
        "FKSVM": FKSVM(random_state=opts.random_seed),
        "FSVM": FSVM(random_state=opts.random_seed)
    }

    # training
    for name, estimator in estimators.items():
        print("%s training." % name)
        estimator.fit(train_X, train_Y)

    # evaluation
    train_scores = {}
    test_scores = {}
    for name, estimator in estimators.items():
        print("%s evaluation." % name)
        train_scores[name] = estimator.score(train_X, train_Y)
        test_scores[name] = estimator.score(test_X, test_Y)

    # return
    return train_scores, test_scores
Esempio n. 19
0
print("Linear.coef_: {}".format(Linear.coef_))
print("Linear.intercept_ : {}".format(Linear.intercept_))
print("훈련 세트의 정확도 : {:.2f}".format(Linear.score(X_train, y_train)))
print("테스트 세트의 정확도 : {:.2f}".format(Linear.score(X_test, y_test)))

# %%
from sklearn.linear_model import Lasso
Lasso = Lasso(alpha=0.000000000005, normalize=True).fit(X_train, y_train)
print("Lasso.coef_: {}".format(Lasso.coef_))
print("Lasso.intercept_ : {}".format(Lasso.intercept_))
print("훈련 세트의 정확도 : {:.2f}".format(Lasso.score(X_train, y_train)))
print("테스트 세트의 정확도 : {:.2f}".format(Lasso.score(X_test, y_test)))

# %%

Cox = CoxPHSurvivalAnalysis().fit(
    np.array(list(map(int, y_train)), df_LR[['w_SS']]))
print("Cox.coef_: {}".format(Cox.coef_))
print("Cox.intercept_ : {}".format(Cox.intercept_))
print("훈련 세트의 정확도 : {:.2f}".format(Cox.score(X_train, y_train)))
print("테스트 세트의 정확도 : {:.2f}".format(Cox.score(X_test, y_test)))

# %%
X = df_LR[[
    "정규화_인구", "정규화_교통량_07", "정규화_교통량_15", "정규화_혼잡빈도강도합", "정규화_혼잡시간강도합",
    "정규화_자동차등록", "정규화_전기자동차등록"
]]

# %%
X = X.astype(float)
Cox = CoxPHSurvivalAnalysis().fit(X, np.array(list(map(int, y_train))))
# %%
Esempio n. 20
0
#data_x_1['date_weekday'] = data_x_1['date'].dt.weekday
#data_x_1['date_hour'] = data_x_1['date'].dt.hour
#data_x_1['date_minute'] = data_x_1['date'].dt.minute
#data_x_1['date_second'] = data_x_1['date'].dt.second
#data_x_1.drop(columns=['date','created', 'install_diff','device_brand','install_seconds','user_agent'], inplace=True)
#data_x_1_numeric = pd.get_dummies(data_x_1, dummy_na=True, prefix_sep='=')

#%%
data_y_1 = np.fromiter(zip(
    data_full_1.head(100)["status_censored"],
    data_full_1.head(100)["in_seconds"]),
                       dtype=[('status_censored', np.bool),
                              ('in_seconds', np.float64)])

#%%
estimator = CoxPHSurvivalAnalysis(alpha=0.1)
estimator.fit(data_x_1_numeric.head(100), data_y_1)
estimator.score(data_x_1_numeric.head(100), data_y_1)


#%%
def fit_and_score_features(X, y, alpha=0.1):
    n_features = X.shape[1]
    scores = np.empty(n_features)
    m = CoxPHSurvivalAnalysis(alpha=alpha)
    for j in range(n_features):
        Xj = X[:, j:j + 1]
        m.fit(Xj, y)
        scores[j] = m.score(Xj, y)
    return scores
Esempio n. 21
0
# %%
params = {
    'regressor__model__gamma': (1e-6, 1e+6, 'log-uniform'),
    'regressor__model__learning_rate': (1e-6, 1e+1, 'log-uniform'),
    'regressor__model__max_depth': (1, 8),  # integer valued parameter
    'regressor__model__min_child_weight': (10, 500, 'log-uniform'),  # categorical parameter
    'regressor__model__n_estimators': (1, 8),  # integer valued parameter
    'regressor__model__reg_alpha': (1, 8, 'log-uniform'),  # integer valued parameter
    'regressor__model__reg_lambda': (1, 8, 'log-uniform'),  # integer valued parameter
    'regressor__model__subsample': (1, 8, 'log-uniform'),  # integer valued parameter

}
#%%
# Since sksurv output log hazard ratios (here relative to 0 on predictors)
# we must use 'output_margin=True' for comparability.
estimator = CoxPHSurvivalAnalysis().fit(data_x, data_y)
gbm = xgb.XGBRegressor(objective='survival:cox',
                       booster='gblinear',
                       base_score=1,
                       n_estimators=1000)

search = BayesSearchCV(gbm, params, n_iter=3, cv=3)
search.fit(data_x, data_y_xgb)

#%%
prediction_sksurv = estimator.predict(data_x)
predictions_xgb = search.predict(data_x)
d = pd.DataFrame({'xgb': predictions_xgb,
                  'sksurv': prediction_sksurv})
d.head()
def plot_cumulative_dynamic_auc(risk_score, label, color=None):
    auc, mean_auc = cumulative_dynamic_auc(y_train, y_test, risk_score, times)

    plt.plot(times, auc, marker="o", color=color, label=label)
    plt.xlabel("days from enrollment")
    plt.ylabel("time-dependent AUC")
    plt.axhline(mean_auc, color=color, linestyle="--")
    plt.legend()


for i, col in enumerate(num_columns):
    plot_cumulative_dynamic_auc(x_test[:, i], col, color="C{}".format(i))
    ret = concordance_index_ipcw(y_train, y_test, x_test[:, i], tau=times[-1])

from sksurv.datasets import load_veterans_lung_cancer

va_x, va_y = load_veterans_lung_cancer()

cph = make_pipeline(OneHotEncoder(), CoxPHSurvivalAnalysis())
cph.fit(va_x, va_y)

va_times = np.arange(7, 183, 7)
# estimate performance on training data, thus use `va_y` twice.
va_auc, va_mean_auc = cumulative_dynamic_auc(va_y, va_y, cph.predict(va_x),
                                             va_times)

plt.plot(va_times, va_auc, marker="o")
plt.axhline(va_mean_auc, linestyle="--")
plt.xlabel("days from enrollment")
plt.ylabel("time-dependent AUC")
plt.grid(True)
Esempio n. 23
0
trainData = np.concatenate([trainData, testData], 0)
trainData = trainData[:, lowerbound:upperbound]

data_y = trainData[:, :2]
data_x = trainData[:, 2:]
x, y = data_x.shape
data_x += 0.001 * np.random.random((x, y))
gf_day = list(trainData[:, 0])
gf_1year_label = list(trainData[:, 1])
gf_1year_label = list(map(lambda x: x == 1, gf_1year_label))
dt = np.dtype('bool,float')
data_y = [(gf_1year_label[i], gf_day[i]) for i in range(len(gf_1year_label))]
data_y = np.array(data_y, dtype=dt)

t1 = time()
estimator = CoxPHSurvivalAnalysis()
estimator.fit(data_x[:train_num], data_y[:train_num])
print('fitting estimate cost {} seconds'.format(int(time() - t1)))
print(estimator.score(data_x[train_num:], data_y[train_num:]))
'''
data_x, data_y = load_veterans_lung_cancer()

#pd.DataFrame.from_records(data_y[[11, 5, 32, 13, 23]], index=range(1, 6))


time, survival_prob = kaplan_meier_estimator(data_y["Status"], data_y["Survival_in_days"])
plt.step(time, survival_prob, where="post")
plt.ylabel("est. probability of survival $\hat{S}(t)$")
plt.xlabel("time $t$")

print(data_x["Treatment"].value_counts())
Esempio n. 24
0
T = T * 7  # weeks --> days

# DataFrame --> structured array
dataframe = {'censor': C, 'time': T}  # series --> DataFrame
df_ss_tc = pd.DataFrame(dataframe)
s = df_ss_tc.dtypes
y_ss = np.array([tuple(x) for x in df_ss_tc.values],
                dtype=list(zip(s.index, s)))

#%%
# Conventional CPH

#%%
X_ss = df.copy(deep=True)

cph = CoxPHSurvivalAnalysis()
c_index['cph'] = cross_val_score(cph,
                                 X_ss,
                                 y_ss,
                                 cv=k_fold,
                                 scoring=c_index_scorer,
                                 verbose=1)

# %%
# Random Survival Forests (RSFs)

#%%
X_rsf = df.copy(deep=True)

rsf = RandomSurvivalForest(random_state=SEED, n_jobs=-1, verbose=True)
rsf.criterion = 'log_rank'
    E = df['LapseIndicator'].apply(lambda x: True if x == 1 else False)

    df2['E'] = E
    df2['T'] = T

    X, y = get_x_y(df2, ['E', 'T'], pos_label=True)

    for c in X.columns.values:
        if c != 'AGE AT DOC':
            X[c] = X[c].astype('category')

    data_x_numeric = OneHotEncoder().fit_transform(X)
    #%%

    estimator = CoxPHSurvivalAnalysis(verbose=True, n_iter=10000)
    estimator.fit(data_x_numeric, y)
    #%%

    print()
    print(pd.Series(estimator.coef_, index=data_x_numeric.columns))
    print()

    print(estimator.score(data_x_numeric, y))
    print()

    scores = fit_and_score_features(data_x_numeric.values, y)
    print(
        pd.Series(scores,
                  index=data_x_numeric.columns).sort_values(ascending=False))
    #%%
Esempio n. 26
0
#
# The model that I will try first is a CoxPH model that expresses the hazard ration depending on the features and associated parameters that I want to evaluate.
#
# ### CoxPH model with features selection
#
# Since I need to test my model I will subdivise my dataset into smaller folds and perform cross-validation with 3 folds : the model will be fitted on 2 out of the 3 folds, tested on the tenth and the score (concordance index used in CoxPH models) will be calculated. This process is repeated 3 times and comparing the scores will give us an insight on the best model.

# In[536]:

from sksurv.linear_model import CoxPHSurvivalAnalysis
from sklearn import model_selection

output_train = output_train_ini[['Event', 'SurvivalTime']]
output_train['Event'] = output_train['Event'].astype('bool')

estimator = CoxPHSurvivalAnalysis(alpha=0.1)


def create_apply_model(est, feature, target, nb_cv):
    #adding a little noise on the data to be sure that they are independant and do not raise linalg error
    n = np.size(feature, 0)
    p = np.size(feature, 1)
    cv_results = model_selection.cross_validate(est,
                                                feature.values +
                                                0.00001 * np.random.rand(n, p),
                                                target.to_records(index=False),
                                                cv=nb_cv,
                                                return_estimator=True)

    best_estimator = cv_results['estimator'][np.where(
        cv_results['test_score'] == np.max(cv_results['test_score']))[0][0]]
## Correct the follow uo days less than 0 to 0
for idx, item in enumerate(data_y['time_to_event']):
    if item < 0:
        data_y['time_to_event'][idx] = 0
data_y

df.groupby('status').count()

### Part 2: Cox-PH model with Ridge Penalty

# tuning parameter alpha over grid search according to c-index

from sksurv.linear_model import CoxPHSurvivalAnalysis
from sklearn.model_selection import GridSearchCV

coxph = CoxPHSurvivalAnalysis()
grid_values = {'alpha': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}

grid_c = GridSearchCV(coxph, param_grid=grid_values, scoring=None)
grid_c.fit(data_x, data_y)

print('Grid best parameter (max c-index): ', grid_c.best_params_)
print('Grid best score (c-index): ', grid_c.best_score_)

# Apply Cox-PH model based on 3-fold 10-repeated CV using optimal alpha selected from grid search:

from sklearn.model_selection import RepeatedKFold

rkf = RepeatedKFold(n_splits=3, n_repeats=10,
                    random_state=0)  # 3-fold 10-repeated CV
Esempio n. 28
0
    # -------------------  Compute C-Index for Individual features --------------------
    results = {}

    logger.info("Start C-Index computation for each feature.")
    for anno in feat.keys():
        n_feat = len(feat[anno])
        for k_feat, f_name in enumerate(feat[anno].keys()):
            rater_dict = {'rank': [], 'icc': [], 'c-index': []}
            for rater in range(num_raters[anno]):
                x = feat[anno][f_name][:,
                                       rater][:,
                                              None]  # (n_samples, m_features)
                y = survial_data[['event', 'time']]

                # Model
                predictor = CoxPHSurvivalAnalysis(alpha=0, n_iter=1e9)

                # Evaluation
                c_indexes = stratified_cross_val(predictor, x, y,
                                                 n_splits=5).tolist()
                rater_dict['c-index'].append(c_indexes)

                # Option 1: Stability ranking based on this dataset
                rater_dict['rank'].append(sorted_rad_stab[f_name])
                rater_dict['icc'].append(rad_metric_val[f_name])

                # Option 2: Stability ranking based on all datasets
                # rater_dict['rank'].append(sorted_rad_stab_mul[f_name])
                # rater_dict['icc'].append(rad_metric_val_mul[f_name])

                logger.info(
Esempio n. 29
0
                                                           data_y["Survival_in_days"][mask])
    plt.step(time_cell, survival_prob_cell, where="post",
             label="%s (n = %d)" % (value, mask.sum()))

plt.ylabel("est. probability of survival $\hat{S}(t)$")
plt.xlabel("time $t$")
plt.legend(loc="best")

from sksurv.preprocessing import OneHotEncoder

data_x_numeric = OneHotEncoder().fit_transform(data_x)
data_x_numeric.head()

from sksurv.linear_model import CoxPHSurvivalAnalysis

estimator = CoxPHSurvivalAnalysis()
estimator.fit(data_x_numeric, data_y)

pd.Series(estimator.coef_, index=data_x_numeric.columns)

x_new = pd.DataFrame.from_dict({
    1: [65, 0, 0, 1, 60, 1, 0, 1],
    2: [65, 0, 0, 1, 60, 1, 0, 0],
    3: [65, 0, 1, 0, 60, 1, 0, 0],
    4: [65, 0, 1, 0, 60, 1, 0, 1]},
     columns=data_x_numeric.columns, orient='index')
x_new

import numpy as np

pred_surv = estimator.predict_survival_function(x_new)
Esempio n. 30
0
    data[['tum_area_from_vol', 'dens_ADCT2w']], data[['init_tum_area']],
    data[['TTum330_alphaG1120']]
]

y = np.zeros(76,
             dtype={
                 'names': ('bio_rec_6', 'bio_rec_6_delay'),
                 'formats': ('bool', 'int')
             })
y['bio_rec_6'] = data[['bio_rec_6']].to_numpy().ravel()
y['bio_rec_6_delay'] = data[['bio_rec_6_delay']].to_numpy().ravel()

nTest = 38
auc = np.zeros((len(tx), 3))
meanAuc = np.zeros((N, len(tx)))
cph = CoxPHSurvivalAnalysis()
sss = StratifiedShuffleSplit(n_splits=N, test_size=nTest)

for i, x in enumerate(tx):
    x = x.to_numpy()
    j = 0
    for train, test in sss.split(x, y['bio_rec_6']):
        xtrain, ytrain = x[train], y[train]
        xtest, ytest = x[test], y[test]
        cph.fit(xtrain, ytrain)
        ypred = cph.predict(xtrain)
        time = np.linspace(
            np.min(ytrain['bio_rec_6_delay']) + 1,
            np.max(ytrain['bio_rec_6_delay']) - 1, 3)
        auc[i, :], meanAuc[j,
                           i] = cumulative_dynamic_auc(ytrain, ytrain, ypred,