コード例 #1
0
    def test_from_array_event_value_wrong_5(surv_arrays):
        event, time = surv_arrays
        event = numpy.arange(event.shape[0])

        with pytest.raises(ValueError,
                           match="event indicator must be binary"):
            Surv.from_arrays(event, time)
コード例 #2
0
    def test_from_array_event_value_wrong_3(surv_arrays):
        event, time = surv_arrays
        event[event == 0] = 3

        with pytest.raises(ValueError,
                           match="non-boolean event indicator must contain 0 and 1 only"):
            Surv.from_arrays(event, time)
コード例 #3
0
    def test_from_array_event_value_wrong_4(surv_arrays):
        event, time = surv_arrays
        event[1] = 3

        with pytest.raises(ValueError,
                           match="event indicator must be binary"):
            Surv.from_arrays(event, time)
コード例 #4
0
def output_simulations(surv, df_train, x_test, df_test, name):
    """ Compute the output of the model on the test set
    # Arguments
        model: neural network model trained with final parameters.
        df_train: training dataset
        x_test: 20 simulated input variables
        df_test: test dataset
        name: name of the model
    # Returns
        results_test: AUC and Uno C-index at median survival time
    """

    data_train = skSurv.from_arrays(event=df_train['status'],
                                    time=df_train['yy'])
    data_test = skSurv.from_arrays(event=df_test['status'], time=df_test['yy'])
    cens_test = 100. - df_test['status'].sum(
    ) * 100. / df_test['status'].shape[0]

    time_med = np.percentile(data_test['time'], np.linspace(0, 50, 2))
    auc_med = float(
        cumulative_dynamic_auc(data_train, data_test,
                               -determine_surv_prob(surv, time_med[1]),
                               time_med[1])[0])
    unoc = float(
        concordance_index_ipcw(data_train, data_test,
                               -determine_surv_prob(surv, time_med[1]),
                               time_med[1])[0])

    results_test = pd.DataFrame({
        't_med': time_med[1],
        'auc_med': [auc_med],
        'unoc': [unoc],
        'cens_rate': [cens_test]
    })
    return results_test
コード例 #5
0
ファイル: output_results.py プロジェクト: eroblin/NN_Pseudobs
def output_sim_data(model, surv, X_train, df_train, X_test, df_test):
    """ Compute the output of the model on the test set
    # Arguments
        model: neural network model trained with final parameters.
        X_train : input variables of the training set
        df_train: training dataset
        X_val : input variables of the validation set
        df_val: validation dataset
    # Returns
        results_test: Uno C-index at median survival time and Integrated Brier Score
    """
    time_grid = np.linspace(np.percentile(df_test['yy'], 10),
                            np.percentile(df_test['yy'], 90), 100)
    median_time = np.percentile(df_test['yy'], 50)
    data_train = skSurv.from_arrays(event=df_train['status'],
                                    time=df_train['yy'])
    data_test = skSurv.from_arrays(event=df_test['status'], time=df_test['yy'])

    c_med = concordance_index_ipcw(
        data_train, data_test,
        np.array(-determine_surv_prob(surv, median_time)), median_time)[0]
    ev = EvalSurv(surv,
                  np.array(df_test['yy']),
                  np.array(df_test['status']),
                  censor_surv='km')
    ibs = ev.integrated_brier_score(time_grid)
    res = pd.DataFrame([c_med, ibs]).T
    res.columns = ['c_median', 'ibs']
    return res
コード例 #6
0
    def test_from_array_names_match(surv_arrays):
        event, time = surv_arrays

        with pytest.raises(ValueError,
                           match="name_time must be different from name_event"):
            Surv.from_arrays(event, time,
                             name_event='time_and_event', name_time='time_and_event')
コード例 #7
0
def uno_c_failure_data(request):
    p = request.param

    if p == 'last_time_uncensored_1':
        y_train = Surv.from_arrays(
            time=(2, 4, 6, 8, 10, 11, 15, 19),
            event=(False, True, False, True, False, False, False, True))
        y_test = Surv.from_arrays(
            time=(1, 3, 5, 7, 12, 13, 20),
            event=(True, False, False, True, True, False, True))
        estimate = (5, 8, 13, 11, 9, 7, 4)
        match = "time must be smaller than largest " \
                "observed time point:"
    elif p == 'last_time_uncensored_2':
        y_train = Surv.from_arrays(
            time=(2, 4, 6, 8, 10, 11, 15, 19),
            event=(False, True, False, True, False, False, False, True))
        y_test = Surv.from_arrays(
            time=(1, 23, 5, 27, 12),
            event=(True, False, True, True, False))
        estimate = (5, 13, 11, 9, 4)
        match = "time must be smaller than largest " \
                "observed time point:"
    elif p == 'zero_prob_1':
        y_train = Surv.from_arrays(
            time=(2, 4, 6, 8, 10, 11, 15, 19),
            event=(False, True, False, True, False, False, False, False))
        y_test = Surv.from_arrays(
            time=(1, 3, 5, 7, 12, 13, 19),
            event=(True, False, False, True, True, False, True))
        estimate = (5, 8, 13, 11, 9, 7, 4)
        match = "censoring survival function is zero " \
                "at one or more time points"
    elif p == 'zero_prob_2':
        y_train = Surv.from_arrays(
            time=(2, 4, 6, 8, 10, 11, 15, 18),
            event=(False, True, False, True, False, False, False, False))
        y_test = Surv.from_arrays(
            time=(1, 3, 5, 7, 12, 13, 19),
            event=(True, False, False, True, True, False, True))
        estimate = (5, 8, 13, 11, 9, 7, 4)
        match = "censoring survival function is zero " \
                "at one or more time points"
    elif p == 'zero_prob_3':
        y_train = Surv.from_arrays(
            time=(2, 4, 6, 8, 10, 11, 15, 18),
            event=(False, True, False, True, False, False, False, False))
        y_test = Surv.from_arrays(
            time=(1, 3, 5, 19, 12, 13, 7),
            event=(True, False, False, True, True, False, True))
        estimate = (5, 8, 13, 11, 9, 7, 4)
        match = "censoring survival function is zero " \
                "at one or more time points"
    else:
        assert False

    yield y_train, y_test, estimate, match
コード例 #8
0
    def test_from_array_shape_mismatch(surv_arrays):
        event, time = surv_arrays

        msg = "Found input variables with inconsistent numbers of samples"
        with pytest.raises(ValueError, match=msg):
            Surv.from_arrays(event[1:], time)

        with pytest.raises(ValueError, match=msg):
            Surv.from_arrays(event, time[1:])
コード例 #9
0
    def test_from_dataframe_wrong_class(surv_data_frame):
        data = surv_data_frame

        with pytest.raises(TypeError,
                           match=r"exepected pandas.DataFrame, but got <class 'dict'>"):
            Surv.from_dataframe('event', 'time', data.to_dict())

        with pytest.raises(TypeError,
                           match=r"exepected pandas.DataFrame, but got <class 'numpy.ndarray'>"):
            Surv.from_dataframe('event', 'time', data.values)
コード例 #10
0
def test_uno_c_all_censored():
    y_train = Surv.from_arrays(
        time=(2, 4, 6, 8, 10, 11, 15, 19),
        event=(True, True, True, True, True, True, True, True))
    y_test = Surv.from_arrays(
        time=(1, 3, 5, 7, 12, 13, 20),
        event=(True, False, False, True, True, False, False))
    estimate = (5, 8, 13, 11, 9, 7, 4)

    ret_uno = concordance_index_ipcw(y_train, y_test, estimate)
    ret_harrell = concordance_index_censored(y_test['event'], y_test['time'], estimate)
    assert ret_uno == ret_harrell
コード例 #11
0
def uno_auc_data_20():
    y_train = Surv.from_arrays(
        time=[77.6, 57.6, 66.6, 67.0, 31.5, 5.5, 67.4, 43.7, 31.7, 71.9, 81.1, 56.2, 88.1, 2.9, 62.0, 17.2, 88.0,
              26.4, 93.5, 79.9],
        event=[1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
    )
    y_test = Surv.from_arrays(
        time=[10.88, 19.78, 40.92, 98.7, 70.19, 10.15, 28.95, 29.57, 17.9, 63.78, 36.22, 83.14, 13.69, 99.51, 3.19],
        event=[1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1])
    estimate = [-1.019, -0.016, 0.132, 0.269, -0.777, -1.077, 0.894, -1.227, -0.417, 0.072, -1.275, -0.91, -0.825,
                -0.292, -0.045]
    return y_train, y_test, estimate
コード例 #12
0
def generate_survival_data(n_samples, hazard_ratio, baseline_hazard,
                           percentage_cens, rnd):
    X, time_event, actual_c = generate_marker(n_samples, hazard_ratio,
                                              baseline_hazard, rnd)

    def get_observed_time(x):
        rnd_cens = np.random.RandomState(0)
        # draw censoring times
        time_censor = rnd_cens.uniform(high=x, size=n_samples)
        event = time_event < time_censor
        time = np.where(event, time_event, time_censor)
        return event, time

    def censoring_amount(x):
        event, _ = get_observed_time(x)
        cens = 1.0 - event.sum() / event.shape[0]
        return (cens - percentage_cens)**2

    # search for upper limit to obtain the desired censoring amount
    res = opt.minimize_scalar(censoring_amount,
                              method="bounded",
                              bounds=(0, time_event.max()))

    # compute observed time
    event, time = get_observed_time(res.x)

    # upper time limit such that the probability
    # of being censored is non-zero for `t > tau`
    tau = time[event].max()
    y = Surv.from_arrays(event=event, time=time)
    mask = time < tau
    X_test = X[mask]
    y_test = y[mask]

    return X_test, y_test, y, actual_c
コード例 #13
0
def uno_auc_data_15():
    y = Surv.from_arrays(
        time=[10.88, 19.78, 40.92, 98.7, 70.19, 10.15, 28.95, 29.57, 17.9, 63.78, 36.22, 83.14, 13.69, 99.51, 3.19],
        event=[1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1])
    estimate = [-1.019, -0.016, 0.132, 0.269, -0.777, -1.077, 0.894, -1.227, -0.417, 0.072, -1.275, -0.91, -0.825,
                -0.292, -0.045]
    return y, estimate
コード例 #14
0
def uno_auc_whas500_data(request, whas500_pred):
    p = request.param

    event, time, estimate = whas500_pred
    y_train = Surv.from_arrays(event=event[:300], time=time[:300])
    y_test = Surv.from_arrays(event=event[300:], time=time[300:])
    estimate = estimate[300:]
    if p == 'whas500_unordered_time':
        times = (1000, 600, 1400, 200, 400, 1200, 800, 1000, 200)
    elif p == 'whas500':
        times = (200, 400, 600, 800, 1000, 1200, 1400)
    else:
        assert False
    iauc = 0.8045058
    expected = numpy.array([0.7720669, 0.7765915, 0.7962623, 0.8759295, 0.8759295, 0.8759513, 0.9147647])
    yield y_train, y_test, estimate, times, expected, iauc
コード例 #15
0
    def test_simple():
        y = Surv.from_arrays([True, False, False, True, False], [7., 8., 11., 11., 23.],
                             name_event="D", name_time="Y")

        x = pandas.DataFrame({"F1": [1, 1, 1, 0, 0],
                              "F2": [23, 43, 54, 75, 67],
                              "F3": [120, 98, 78, 91, 79],
                              "F4": [0.123, 0.541, 0.784, 0.846, 0.331]})

        coxnet = CoxnetSurvivalAnalysis(l1_ratio=1.0)
        coxnet.fit(x.values, y)

        expected_alphas = numpy.array(
            [7.02666666666667, 6.40243696630484, 5.83366211207401, 5.31541564828386, 4.84320877198972, 4.41295145312887,
             4.02091700863675, 3.66370982370111, 3.3382359405709, 3.04167626017436, 2.77146212443153, 2.52525306776672,
             2.30091654511542, 2.09650946083909, 1.91026133856035, 1.74055898614351, 1.5859325229961, 1.44504264866632,
             1.31666904246323, 1.19969979362274, 1.09312177046848, 0.996011845149902, 0.907528897950459,
             0.826906531910992, 0.753446434665921, 0.686512329995589, 0.625524466706047, 0.569954597101554,
             0.519321401555745, 0.473186319551291, 0.431149751078499, 0.392847595491192, 0.357948097841098,
             0.326148975375191, 0.297174799307102, 0.270774609184727, 0.24671973919085, 0.22480183754923,
             0.204831061881182, 0.186634434881721, 0.170054346072885, 0.154947186657187, 0.141182105646904,
             0.128639876495421, 0.117211864413924, 0.106799085428826, 0.0973113490299429, 0.0886664769834391,
             0.0807895915432809, 0.0736124668960205, 0.0670729382214382])

        # FIXME
        assert_array_almost_equal(expected_alphas, coxnet.alphas_[:len(expected_alphas)])

        coef = pandas.DataFrame(coxnet.coef_[:, :len(expected_alphas)],
                                dtype=float)
        expected_coef = pandas.read_csv(SIMPLE_COEF_FILE, header=None, skiprows=1)

        assert_columns_almost_equal(coef, expected_coef)
コード例 #16
0
def uno_auc_time_dependent_without_censoring_data(request):
    from sklearn.metrics import roc_auc_score

    p = request.param

    y = Surv.from_arrays(
        time=[7, 9, 11, 12, 13, 15, 28, 39, 41, 76],
        event=[True, True, True, True, True, True, True, True, True, True])
    times = [10, 14, 40]

    if p == 'time_dependent_without_censoring':
        estimate = numpy.array([
            [1, 6, 18, 56, 32, 3, 99, 7, 67, 541],
            [6, 9, 11, 5, 3, 12, 56, 56.1, 81, 77],
            [13, 11, 12, 76, 55, 134, 70, 78, 75, 99],
        ])
    elif p == 'time_dependent_with_ties_without_censoring':
        estimate = numpy.array([
            [1, 6, 7, 56, 32, 3, 99, 7, 79, 17],
            [3, 6, 11, 5, 17, 12, 17, 56.1, 81, 77],
            [13, 11, 12, 17, 17, 134, 70, 78, 13, 99],
        ])
    else:
        assert False

    expected_auc = numpy.array(
        [roc_auc_score(y["time"] > t, e) for t, e in zip(times, estimate)])
    km_delta = numpy.array([1 - 0.8, 0.8 - 0.5, 0.5 - 0.2])
    expected_iauc = numpy.sum(km_delta * expected_auc) / 0.8

    return y, times, -estimate.T, expected_auc, expected_iauc
コード例 #17
0
    def test_unknown_optimizer(self):
        x = numpy.zeros((100, 10))
        y = Surv.from_arrays(numpy.ones(100, dtype=bool),
                             numpy.arange(1, 101, dtype=float))

        ssvm = FastSurvivalSVM(rank_ratio=0, optimizer='random stuff')
        self.assertRaisesRegex(ValueError, "unknown optimizer: random stuff",
                               ssvm.fit, x, y)
コード例 #18
0
    def test_all_censored(self):
        x = numpy.arange(80).reshape(10, 8)
        y = Surv.from_arrays(numpy.zeros(10, dtype=bool),
                             [0, 1, 2, 1, 1, 0, 1, 2, 3, 1])

        rsvm = FastSurvivalSVM()
        self.assertRaisesRegex(ValueError, "all samples are censored",
                               rsvm.fit, x, y)
コード例 #19
0
    def test_alpha_negative(self):
        x = numpy.zeros((100, 10))
        y = Surv.from_arrays(numpy.ones(100, dtype=bool),
                             numpy.arange(100, dtype=float))

        ssvm = FastSurvivalSVM(alpha=-1)
        self.assertRaisesRegex(ValueError, "alpha must be positive", ssvm.fit,
                               x, y)
コード例 #20
0
ファイル: conftest.py プロジェクト: xuyxu/scikit-survival
def rossi():
    """Load rossi.csv"""
    p = Path(__file__)
    f = p.parent / 'data' / 'rossi.csv'
    data = pandas.read_csv(f)
    y = Surv.from_dataframe("arrest", "week", data)
    x = data.drop(["arrest", "week"], axis=1)
    return DataSet(x=x, y=y)
コード例 #21
0
    def test_ranking_with_fit_intercept():
        x = numpy.zeros((100, 10))
        y = Surv.from_arrays(numpy.ones(100, dtype=bool), numpy.arange(1, 101, dtype=float))

        ssvm = FastSurvivalSVM(rank_ratio=1.0, fit_intercept=True)
        with pytest.raises(ValueError,
                           match="fit_intercept=True is only meaningful if rank_ratio < 1.0"):
            ssvm.fit(x, y)
コード例 #22
0
    def test_negative_time():
        x = numpy.arange(80).reshape(10, 8)
        y = Surv.from_arrays([0, 1, 0, 1, 1, 0, 1, 0, 0, 1], [1, 1, -2, 1, 1, 6, 1, 2, 3, 1])

        rsvm = FastSurvivalSVM(rank_ratio=0.5)
        with pytest.raises(ValueError,
                           match="observed time contains values smaller or equal to zero"):
            rsvm.fit(x, y)
コード例 #23
0
def toy_data():
    x = numpy.array([[1., 1.], [10.2, 15.], [20., 5.], [40, 30], [45, 21],
                     [50, 36]])

    y = Surv.from_arrays([True, True, False, True, False, False],
                         numpy.arange(1, 7) + 2**numpy.arange(1, 7),
                         name_event='status')
    return x, y
コード例 #24
0
    def test_all_censored():
        x = numpy.arange(80).reshape(10, 8)
        y = Surv.from_arrays(numpy.zeros(10, dtype=bool), [0, 1, 2, 1, 1, 0, 1, 2, 3, 1])

        rsvm = FastSurvivalSVM()
        with pytest.raises(ValueError,
                           match="all samples are censored"):
            rsvm.fit(x, y)
コード例 #25
0
    def test_from_array_with_one_name_1(surv_arrays):
        event, time = surv_arrays

        expected = numpy.empty(dtype=[('death', bool), ('time', float)], shape=100)
        expected['death'] = event.astype(bool)
        expected['time'] = time

        y = Surv.from_arrays(event.astype(bool), time, name_event='death')
        assert_array_equal(y, expected)
コード例 #26
0
    def test_from_array_with_one_name_2(surv_arrays):
        event, time = surv_arrays

        expected = numpy.empty(dtype=[('event', bool), ('survival_time', float)], shape=100)
        expected['event'] = event.astype(bool)
        expected['survival_time'] = time

        y = Surv.from_arrays(event.astype(bool), time, name_time='survival_time')
        assert_array_equal(y, expected)
コード例 #27
0
    def test_from_array_float(surv_arrays):
        event, time = surv_arrays

        expected = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        expected['event'] = event.astype(bool)
        expected['time'] = time

        y = Surv.from_arrays(event.astype(float), time)
        assert_array_equal(y, expected)
コード例 #28
0
    def test_from_dataframe_int(surv_data_frame):
        data = surv_data_frame

        expected = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        expected['event'] = data['event'].astype(bool)
        expected['time'] = data['time']

        y = Surv.from_dataframe('event', 'time', data)
        assert_array_equal(y, expected)
コード例 #29
0
def test_uno_c_not_1d(whas500_pred, dim):
    event, time, risk = whas500_pred
    y = Surv.from_arrays(event, time)

    risk = numpy.tile(risk[:, numpy.newaxis], (1, dim))

    with pytest.raises(ValueError,
                       match="Expected 1D array, got 2D array instead:"):
        concordance_index_ipcw(y, y, risk)
コード例 #30
0
    def test_from_dataframe_no_such_column(surv_data_frame):
        data = surv_data_frame
        data['event'] = data['event'].astype(bool)

        expected = numpy.empty(dtype=[('event', bool), ('time', float)],
                               shape=100)
        expected['event'] = data['event']
        expected['time'] = data['time']

        with pytest.raises(
                KeyError,
                match=r'the label \[unknown\] is not in the \[columns\]'):
            Surv.from_dataframe('unknown', 'time', data)

        with pytest.raises(
                KeyError,
                match=r'the label \[unknown\] is not in the \[columns\]'):
            Surv.from_dataframe('event', 'unknown', data)