Esempio n. 1
0
 def compare_to_actual(self, is_at_risk=False):
     results = compare_to_actual(self.model_forest,
                                 self.get_X_test(),
                                 self.get_T_test(),
                                 self.get_E_test(),
                                 is_at_risk = is_at_risk,
                                 figure_size=(16, 6),
                                 metrics = ['rmse', 'mean', 'median'])
Esempio n. 2
0
c_index = concordance_index(csf, test[features], test['PFS'],
                            test['disease_progress'])
print('C-index: {:.2f}'.format(c_index))

ibs = integrated_brier_score(csf,
                             test[features],
                             test['PFS'],
                             test['disease_progress'],
                             t_max=84,
                             figure_size=(20, 6.5))
print('IBS: {:.2f}'.format(ibs))

results = compare_to_actual(csf,
                            test[features],
                            test['PFS'],
                            test['disease_progress'],
                            is_at_risk=False,
                            figure_size=(16, 6),
                            metrics=['rmse', 'mean', 'median'])

csf_c_index, csf_brier_score = compute_scores(csf, test,
                                              list(np.arange(0, 86, 2)),
                                              features)

csf_c_index_table_nonutrition = pd.DataFrame(columns=list(np.arange(0, 86, 2)))
series_c_index = pd.Series(csf_c_index,
                           index=csf_c_index_table_nonutrition.columns)
csf_c_index_table_nonutrition = csf_c_index_table_nonutrition.append(
    series_c_index, ignore_index=True)
csf_brier_score_table_nonutrition = pd.DataFrame(columns=csf_brier_score[0])
series_brier_score = pd.Series(csf_brier_score[1],
                             X_test,
                             T_test,
                             E_test,
                             t_max=12,
                             figure_size=(15, 5))
print('IBS: {:.2f}'.format(ibs))

# Now that we have built a model that seems to provide great performances, let's compare the time series of the actual and predicted number of customers who stop doing business with the SaaS company, for each time t.

# In[ ]:

from pysurvival.utils.display import compare_to_actual
results = compare_to_actual(csf,
                            X_test,
                            T_test,
                            E_test,
                            is_at_risk=False,
                            figure_size=(16, 6),
                            metrics=['rmse', 'mean', 'median'])

# Now that we know that we can provide reliable predictions for an entire cohort, let's compute the probability of remaining a customer for all times t.
#
# First, we can construct the risk groups based on risk scores distribution. The helper function create_risk_groups, which can be found in pysurvival.utils.display, will help us do that:

# In[ ]:

from pysurvival.utils.display import create_risk_groups

risk_groups = create_risk_groups(model=csf,
                                 X=X_test,
                                 use_log=False,
Esempio n. 4
0
def cv_train_and_report_model(X,
                              T,
                              E,
                              show=True,
                              num_tree=10,
                              max_depth=1,
                              min_node=2,
                              kf=None,
                              prep_model=None):
    if prep_model is None:

        def _prep_model(X, T, E):
            xst = RandomSurvivalForestModel(num_trees=num_tree)
            xst.fit(X,
                    T,
                    E,
                    max_features='sqrt',
                    max_depth=max_depth,
                    min_node_size=min_node,
                    num_threads=-1,
                    sample_size_pct=0.63,
                    importance_mode='normalized_permutation',
                    seed=None,
                    save_memory=False)
            return xst

        prep_model = _prep_model
    i = 1
    if kf is None:
        kf = StratifiedKFold(n_splits=10, shuffle=True)
    cis = []
    ibss = []
    for train_index, test_index in kf.split(X, E):

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        T_train, T_test = T.iloc[train_index], T.iloc[test_index]
        E_train, E_test = E.iloc[train_index], E.iloc[test_index]
        #xst = RandomSurvivalForestModel(num_trees=num_tree)
        #xst.fit(X_train, T_train, E_train, max_features = 'sqrt', max_depth = max_depth,
        #    min_node_size = min_node, num_threads = -1,
        #    sample_size_pct = 0.63, importance_mode = 'normalized_permutation',
        #    seed = None, save_memory=False )
        xst = prep_model(X_train, T_train, E_train)
        c_index = concordance_index(xst, X_test, T_test, E_test)

        if show:
            print('\n {} of kfold {}'.format(i, kf.n_splits))
            print('C-index: {:.2f}'.format(c_index))
            results = compare_to_actual(xst,
                                        X_test,
                                        T_test,
                                        E_test,
                                        is_at_risk=True,
                                        figure_size=(16, 6),
                                        metrics=['rmse', 'mean', 'median'])
            ibs = integrated_brier_score(xst,
                                         X_test,
                                         T_test,
                                         E_test,
                                         t_max=100,
                                         figure_size=(15, 5))
            print('IBS: {:.2f}'.format(ibs))
        else:
            ibs = ibs_no_figure(xst, X_test, T_test, E_test, t_max=100)
        cis.append(c_index)
        ibss.append(ibs)
        i = i + 1
    return cis, ibss