Beispiel #1
0
    def test_from_array_int_time(surv_arrays):
        event, time = surv_arrays
        time += 1
        time *= time

        expected = numpy.empty(dtype=[('event', bool), ('time', float)],
                               shape=100)
        expected['event'] = event.astype(bool)
        expected['time'] = time.astype(int)

        y = Surv.from_arrays(event.astype(bool), time.astype(int))
        assert_array_equal(y, expected)
Beispiel #2
0
    def test_from_array_with_one_name_2(surv_arrays):
        event, time = surv_arrays

        expected = numpy.empty(dtype=[('event', bool),
                                      ('survival_time', float)],
                               shape=100)
        expected['event'] = event.astype(bool)
        expected['survival_time'] = time

        y = Surv.from_arrays(event.astype(bool),
                             time,
                             name_time='survival_time')
        assert_array_equal(y, expected)
    def test_simple(simple_data_km):
        time, event, true_x, true_y = simple_data_km

        x, y = kaplan_meier_estimator(event, time)

        assert_array_equal(x, true_x)
        assert_array_almost_equal(y, true_y)

        ys = Surv.from_arrays(event, time)
        est = SurvivalFunctionEstimator().fit(ys)
        assert_array_equal(est.unique_time_[1:], true_x)
        assert_array_almost_equal(est.prob_[1:], true_y)
        prob = est.predict_proba(true_x)
        assert_array_almost_equal(prob, true_y)
Beispiel #4
0
    def test_from_array_with_names(self):
        event, time = self.arrays

        expected = numpy.empty(dtype=[('death', bool),
                                      ('survival_time', float)],
                               shape=100)
        expected['death'] = event.astype(bool)
        expected['survival_time'] = time

        y = Surv.from_arrays(event.astype(bool),
                             time,
                             name_time='survival_time',
                             name_event='death')
        assert_array_equal(y, expected)
Beispiel #5
0
    def test_dropout_rate(self):
        model = self.ESTIMATOR(dropout_rate=-0.1)

        x = numpy.arange(100).reshape(5, 20)
        y = Surv.from_arrays([False, False, True, True, False],
                             [12, 14, 6, 9, 1])

        self.assertRaisesRegex(
            ValueError, r"dropout_rate must be within \[0; 1\[, but was -0.1",
            model.fit, x, y)

        model.set_params(dropout_rate=1.2)
        self.assertRaisesRegex(
            ValueError, r"dropout_rate must be within \[0; 1\[, but was 1.2",
            model.fit, x, y)
Beispiel #6
0
def traditional_surv_analysis(datas, opts):

    # tidy data as ndarray
    train_X, train_Y = datas["train"].xs.numpy(), datas["train"].ys.numpy()
    test_X, test_Y = datas["test"].xs.numpy(), datas["test"].ys.numpy()
    if "val" in datas.keys():
        train_X = np.concatenate([train_X, datas["val"].xs])
        train_Y = np.concatenate([train_Y, datas["val"].ys])
    # construct structured array
    train_Y = Surv.from_arrays(train_Y[:, 1].astype("bool"), train_Y[:, 0])
    test_Y = Surv.from_arrays(test_Y[:, 1].astype("bool"), test_Y[:, 0])

    # construct estimators
    estimators = {
        "CoxPH": CoxPHSurvivalAnalysis(),
        "CGBSA": CGBSA(n_estimators=500, random_state=opts.random_seed),
        "GBSA": GBSA(n_estimators=500, random_state=opts.random_seed),
        "FKSVM": FKSVM(random_state=opts.random_seed),
        "FSVM": FSVM(random_state=opts.random_seed)
    }

    # training
    for name, estimator in estimators.items():
        print("%s training." % name)
        estimator.fit(train_X, train_Y)

    # evaluation
    train_scores = {}
    test_scores = {}
    for name, estimator in estimators.items():
        print("%s evaluation." % name)
        train_scores[name] = estimator.score(train_X, train_Y)
        test_scores[name] = estimator.score(test_X, test_Y)

    # return
    return train_scores, test_scores
Beispiel #7
0
    def test_n_estimators(self):
        model = self.ESTIMATOR(n_estimators=0)

        x = numpy.arange(100).reshape(5, 20)
        y = Surv.from_arrays([False, False, True, True, False],
                             [12, 14, 6, 9, 1])

        self.assertRaisesRegex(
            ValueError, "n_estimators must be greater than 0 but was 0",
            model.fit, x, y)

        model.set_params(n_estimators=-1)
        self.assertRaisesRegex(
            ValueError, "n_estimators must be greater than 0 but was -1",
            model.fit, x, y)
Beispiel #8
0
    def test_subsample(self):
        model = self.ESTIMATOR(subsample=0)

        x = numpy.arange(100).reshape(5, 20)
        y = Surv.from_arrays([False, False, True, True, False],
                             [12, 14, 6, 9, 1])

        self.assertRaisesRegex(ValueError,
                               "subsample must be in ]0; 1] but was 0",
                               model.fit, x, y)

        model.set_params(subsample=1.2)
        self.assertRaisesRegex(ValueError,
                               "subsample must be in ]0; 1] but was 1.2",
                               model.fit, x, y)
def toy_data():
    x = numpy.array([[1., 1.],
                     [10.2, 15.],
                     [20., 5.],
                     [40, 30],
                     [45, 21],
                     [50, 36]])

    rnd = numpy.random.RandomState(0)
    t = rnd.exponential(scale=8, size=x.shape[0])
    t.sort()
    y = Surv.from_arrays([True, True, False, True, False, False],
                         t,
                         name_event='status')
    return x, y
    def test_regression_not_supported(self):
        x = numpy.zeros((100, 10))
        y = Surv.from_arrays(numpy.ones(100, dtype=bool),
                             numpy.arange(100, dtype=float))

        ssvm = FastSurvivalSVM(rank_ratio=0, optimizer='simple')
        self.assertRaisesRegex(
            ValueError,
            "optimizer 'simple' does not implement regression objective",
            ssvm.fit, x, y)

        ssvm.set_params(optimizer='PRSVM')
        self.assertRaisesRegex(
            ValueError,
            "optimizer 'PRSVM' does not implement regression objective",
            ssvm.fit, x, y)
Beispiel #11
0
    def test_simple(self):
        y = Surv.from_arrays([True, False, False, True, False],
                             [7., 8., 11., 11., 23.],
                             name_event="D",
                             name_time="Y")

        x = pandas.DataFrame({
            "F1": [1, 1, 1, 0, 0],
            "F2": [23, 43, 54, 75, 67],
            "F3": [120, 98, 78, 91, 79],
            "F4": [0.123, 0.541, 0.784, 0.846, 0.331]
        })

        coxnet = CoxnetSurvivalAnalysis(l1_ratio=1.0)
        coxnet.fit(x.values, y)

        expected_alphas = numpy.array([
            7.02666666666667, 6.40243696630484, 5.83366211207401,
            5.31541564828386, 4.84320877198972, 4.41295145312887,
            4.02091700863675, 3.66370982370111, 3.3382359405709,
            3.04167626017436, 2.77146212443153, 2.52525306776672,
            2.30091654511542, 2.09650946083909, 1.91026133856035,
            1.74055898614351, 1.5859325229961, 1.44504264866632,
            1.31666904246323, 1.19969979362274, 1.09312177046848,
            0.996011845149902, 0.907528897950459, 0.826906531910992,
            0.753446434665921, 0.686512329995589, 0.625524466706047,
            0.569954597101554, 0.519321401555745, 0.473186319551291,
            0.431149751078499, 0.392847595491192, 0.357948097841098,
            0.326148975375191, 0.297174799307102, 0.270774609184727,
            0.24671973919085, 0.22480183754923, 0.204831061881182,
            0.186634434881721, 0.170054346072885, 0.154947186657187,
            0.141182105646904, 0.128639876495421, 0.117211864413924,
            0.106799085428826, 0.0973113490299429, 0.0886664769834391,
            0.0807895915432809, 0.0736124668960205, 0.0670729382214382
        ])

        # FIXME
        assert_array_almost_equal(expected_alphas,
                                  coxnet.alphas_[:len(expected_alphas)])

        coef = pandas.DataFrame(coxnet.coef_[:, :len(expected_alphas)],
                                dtype=float)
        expected_coef = pandas.read_csv(SIMPLE_COEF_FILE,
                                        header=None,
                                        skiprows=1)

        assert_columns_almost_equal(coef, expected_coef)
Beispiel #12
0
    def test_sample_weight(self):
        model = self.ESTIMATOR()

        x = numpy.arange(100).reshape(5, 20)
        y = Surv.from_arrays([False, False, True, True, False],
                             [12, 14, 6, 9, 1])

        self.assertRaisesRegex(
            ValueError,
            r"Found input variables with inconsistent numbers of samples: \[5, 3\]",
            model.fit, x, y, [2, 3, 4])

        model.set_params(dropout_rate=1.2)
        self.assertRaisesRegex(
            ValueError,
            r"Found input variables with inconsistent numbers of samples: \[5, 8\]",
            model.fit, x, y, [2, 4, 5, 6, 7, 1, 2, 7])
    def test_rank_ratio_out_of_bounds(self):
        x = numpy.zeros((100, 10))
        y = Surv.from_arrays(numpy.ones(100, dtype=bool),
                             numpy.arange(100, dtype=float))

        ssvm = FastSurvivalSVM(rank_ratio=-1)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=1.2)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.nan)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.inf)
        self.assertRaisesRegex(ValueError, r"rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)
def uno_c_data(request, whas500_pred):
    p = request.param

    y = None
    y_train = None
    y_test = None
    estimate = None
    expected = None
    tau = None

    if p == 'no_ties':
        y = Surv.from_arrays(
            event=numpy.array((0, 1, 1, 0, 1, 0, 1, 0, 0, 1), dtype=bool),
            time=(1, 5, 6, 10, 11, 34, 45, 46, 50, 56))
        estimate = (5, 8, 11, 19, 34, 12, 3, 9, 12, 20)
        expected = (0.347890360332615, 8, 15, 0, 0)
    elif p == 'tied_risk_1':
        y = Surv.from_arrays(
            time=(1, 5, 6, 10, 11, 34, 45, 46, 50, 56),
            event=numpy.array((0, 1, 1, 0, 1, 0, 1, 0, 0, 1), dtype=bool))
        estimate = (5, 8, 11, 11, 34, 12, 3, 9, 12, 20)
        expected = (0.365629810028969, 8, 14, 1, 0)
    elif p == 'tied_risk_2':
        y = Surv.from_arrays(
            time=(1, 5, 6, 10, 11, 34, 45, 46, 50, 56),
            event=numpy.array((0, 1, 1, 0, 1, 0, 1, 0, 0, 1), dtype=bool))
        estimate = (5, 8, 11, 11, 34, 12, 11, 9, 12, 20)
        expected = (0.387865723332956, 7, 14, 2, 0)
    elif p == 'truncated_1':
        y_train = Surv.from_arrays(
            time=(2, 4, 6, 8, 10, 11, 15, 19),
            event=(False, True, False, True, False, False, False, False))
        y_test = Surv.from_arrays(
            time=(1, 3, 5, 8, 12, 13),
            event=(True, False, False, True, True, True))
        estimate = (5, 8, 13, 11, 9, 4)
        expected = (0.7543736528146774, 4, 4, 0, 0)
        tau = 19
    elif p == 'truncated_2':
        y = Surv.from_arrays(
            time=(1, 5, 6, 10, 11, 34, 45, 46, 50, 56),
            event=numpy.array((0, 1, 1, 0, 1, 0, 1, 1, 1, 1), dtype=bool))
        estimate = (5, 8, 11, 19, 34, 12, 3, 9, 12, 18)
        expected = (0.347890361949191, 8, 18, 0, 0)
        tau = 45.25
    elif p == 'last_time_censored':
        y_train = Surv.from_arrays(
            time=(2, 4, 6, 8, 10, 11, 15, 19),
            event=(False, True, False, True, False, False, False, False))
        y_test = Surv.from_arrays(
            time=(1, 3, 5, 7, 12, 13, 20),
            event=(True, False, False, True, True, False, False))
        estimate = (5, 8, 13, 11, 9, 7, 4)
        expected = (0.8126567565914234, 6, 5, 0, 0)
    elif p == 'tied_event':
        y = Surv.from_arrays(
            event=[False, True, False, True, True, False, True, False, False, True],
            time=[1, 5, 6, 11, 11, 34, 45, 45, 50, 55])
        estimate = (5, 8, 11, 19, 34, 12, 3, 9, 12, 18)
        expected = (0.4036321031048623, 11, 10, 0, 1)
    elif p == 'tied_event_and_time':
        y = Surv.from_arrays(
            event=[True, False, False, False, True, False, True, True, False, False, False, True, True],
            time=[34, 11, 11, 5, 1, 89, 13, 45, 7, 13, 9, 13, 90])
        estimate = (1, 19, 13, 13, 15, 14, 19, 23, 11, 10, 11, 1, 18)
        expected = (0.46795357052737824, 14, 12, 1, 2)
    elif p == 'whas500':
        event, time, estimate = whas500_pred
        y = Surv.from_arrays(event, time)
        expected = (0.7929275009049014, 57849, 17300, 0, 14)

    y_train = y if y_train is None else y_train
    y_test = y if y_test is None else y_test

    yield y_train, y_test, estimate, expected, tau
Beispiel #15
0
    def test_from_array_event_value_wrong_5(surv_arrays):
        event, time = surv_arrays
        event = numpy.arange(event.shape[0])

        with pytest.raises(ValueError, match="event indicator must be binary"):
            Surv.from_arrays(event, time)
Beispiel #16
0
    def test_from_array_event_value_wrong_4(surv_arrays):
        event, time = surv_arrays
        event[1] = 3

        with pytest.raises(ValueError, match="event indicator must be binary"):
            Surv.from_arrays(event, time)
Beispiel #17
0
def sample_gb_class(request):
    x = numpy.arange(100).reshape(5, 20)
    y = Surv.from_arrays([False, False, True, True, False], [12, 14, 6, 9, 1])

    return request.param, x, y
Beispiel #18
0
def fake_data():
    x = numpy.random.randn(100, 11)
    y = Surv.from_arrays(numpy.ones(100, dtype=bool),
                         numpy.arange(1, 101, dtype=float))
    return x, y
Beispiel #19
0
def output_bootstrap(model, n_iterations, df_train, data_train, y_train,
                     df_test, name):
    """ Compute the output of the model on the bootstraped test set
    # Arguments
        model: neural network model trained with final parameters.
        n_iterations: number of bootstrap iterations
        df_train: training dataset
        data_train: two columns dataset with survival time and censoring status for training samples
        y_train: survival time
        df_test: test dataset
        name: name of the model
    # Returns
        results_all: AUC and Uno C-index at 5 and 10 years 
    """
    if name == "CoxTime" or name == "Cox-CC":
        _ = model.compute_baseline_hazards()
    results_all = pd.DataFrame(columns=['auc5', 'auc10', 'unoc5', 'unoc10'])
    results_final = pd.DataFrame(
        columns=['mean', 'ci95_lo', 'ci95_hi', 'std', 'count'])

    for i in range(n_iterations):
        print(i)
        test_boot = resample(df_test, n_samples=len(df_test), replace=True)
        x_test_boot = test_boot.drop(['surv_test', 'cen_test'], axis=1)
        duration_test_b, event_test_b = test_boot[
            'surv_test'].values, test_boot['cen_test'].values
        data_test_b = skSurv.from_arrays(event=event_test_b,
                                         time=duration_test_b)
        if name == "Cox-CC" or name == "CoxTime" or name == "DeepHit":
            surv = model.predict_surv_df(np.array(x_test_boot,
                                                  dtype='float32'))
        else:
            n_picktime = int(y_train[['s']].apply(pd.Series.nunique))
            x_test_boot_all = pd.concat([x_test_boot] * n_picktime)
            time_test = pd.DataFrame(
                np.repeat(np.unique(y_train[['s']]), len(x_test_boot)))
            x_test_boot_all.reset_index(inplace=True, drop=True)
            x_test_boot_all = pd.concat([x_test_boot_all, time_test], axis=1)
            surv = make_predictions_pseudobs(model, y_train, x_test_boot_all,
                                             x_test_boot, name)

        time_grid = np.linspace(duration_test_b.min(), duration_test_b.max(),
                                100)
        prob_5_10 = pd.concat([
            determine_surv_prob(surv, i)
            for i in (duration_test_b.min(), 5, 10)
        ],
                              axis=1)
        auc5 = float(
            cumulative_dynamic_auc(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 1], 5)[0])
        auc10 = float(
            cumulative_dynamic_auc(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 2], 10)[0])
        unoc5 = float(
            concordance_index_ipcw(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 1], 5)[0])
        unoc10 = float(
            concordance_index_ipcw(data_train, data_test_b,
                                   -prob_5_10.iloc[:, 2], 10)[0])
        results = pd.DataFrame({
            'auc5': [auc5],
            'auc10': [auc10],
            'unoc5': [unoc5],
            'unoc10': [unoc10]
        })
        results_all = results_all.append(results,
                                         ignore_index=True,
                                         sort=False)

    for column in results_all:
        stats = results_all[column].agg(['mean', 'count', 'std'])
        scores = np.array(results_all[column])
        sorted_scores = np.sort(scores, axis=None)
        ci95_lo = sorted_scores[int(0.05 * len(sorted_scores))]
        ci95_hi = sorted_scores[int(0.95 * len(sorted_scores))]
        results_stat = pd.DataFrame({
            'mean': [stats[0]],
            'ci95_lo': ci95_lo,
            'ci95_hi': [ci95_hi],
            'std': [stats[2]],
            'count': [stats[1]]
        })
        results_final = results_final.append(results_stat,
                                             ignore_index=False,
                                             sort=False)
    results_final.index = results_all.columns.tolist()
    return results_final