Example #1
0
    def test_survival_squared_hinge_loss(self):
        nrsvm = NaiveSurvivalSVM(loss='squared_hinge',
                                 dual=False,
                                 tol=1e-8,
                                 max_iter=1000,
                                 random_state=0)
        nrsvm.fit(self.x, self.y)

        rsvm = FastSurvivalSVM(optimizer='avltree',
                               tol=1e-8,
                               max_iter=1000,
                               random_state=0)
        rsvm.fit(self.x, self.y)

        assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3)

        pred_nrsvm = nrsvm.predict(self.x)
        pred_rsvm = rsvm.predict(self.x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(self.y['fstat'], self.y['lenfol'],
                                        pred_nrsvm)
        c2 = concordance_index_censored(self.y['fstat'], self.y['lenfol'],
                                        pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Example #2
0
    def test_compare_rbf(self):
        x, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1')

        kpca = KernelPCA(kernel="rbf")
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=1000,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel="rbf",
                                     tol=1e-8,
                                     max_iter=1000,
                                     random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
    def test_compare_clinical_kernel(self):
        x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1',
                                         standardize_numeric=False, to_numeric=False)

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        kpca = KernelPCA(kernel=trans.pairwise_kernel)
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel,
                                     tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
def train_test_model(data):
    train_index, test_index, params, fold = data

    # Training
    est = clone(estimator)
    est.set_params(**params)

    ret = params.copy()
    try:
        est.fit(x[train_index, :], y[train_index])

        # Testing
        p = est.predict(x[test_index, :])
        test_y = y[test_index]
        c = concordance_index_censored(test_y['event'], test_y['time'], p)

        ret['c-index'] = c[0]
        # for c-index, the sign of the predictions is flipped, flip it again for regression
        p_regression = -p[test_y['event']]

        # convert from log-scale back to original scale and compute RMSE
        ret['error'] = numpy.sqrt(mean_squared_error(numpy.exp(test_y['time'][test_y['event']]),
                                                     numpy.exp(p_regression)))
        ret['n_events'] = numpy.sum(test_y['event'])
    except Exception as e:
        # log errors to IPython profile's log files
        Application.instance().log.exception(e)
        ret['c-index'] = float('nan')
        ret['error'] = float('nan')
        ret['n_events'] = float('nan')

    ret['fold'] = fold

    return ret
Example #5
0
def _score_cindex(est, X_test, y_test, **kwargs):
    y_pred = est.predict(X_test)
    name_event, name_time = y_test.dtype.names

    result = concordance_index_censored(y_test[name_event], y_test[name_time],
                                        y_pred)
    return result[0]
Example #6
0
def train_test_model(data):
    train_index, test_index, params, fold = data

    # Training
    est = clone(estimator)
    est.set_params(**params)

    ret = params.copy()
    try:
        est.fit(x[train_index, :], y[train_index])

        # Testing
        p = est.predict(x[test_index, :])
        test_y = y[test_index]
        c = concordance_index_censored(test_y['event'], test_y['time'], p)

        ret['c-index'] = c[0]
        # for c-index, the sign of the predictions is flipped, flip it again for regression
        p_regression = -p[test_y['event']]

        # convert from log-scale back to original scale and compute RMSE
        ret['error'] = numpy.sqrt(
            mean_squared_error(numpy.exp(test_y['time'][test_y['event']]),
                               numpy.exp(p_regression)))
        ret['n_events'] = numpy.sum(test_y['event'])
    except Exception as e:
        # log errors to IPython profile's log files
        Application.instance().log.exception(e)
        ret['c-index'] = float('nan')
        ret['error'] = float('nan')
        ret['n_events'] = float('nan')

    ret['fold'] = fold

    return ret
    def test_survival_squared_hinge_loss(self):
        nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(self.x, self.y)

        rsvm = FastSurvivalSVM(optimizer='avltree', tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(self.x, self.y)

        assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3)

        pred_nrsvm = nrsvm.predict(self.x)
        pred_rsvm = rsvm.predict(self.x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(self.y['fstat'], self.y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(self.y['fstat'], self.y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Example #8
0
    def test_breast_cancer_cvxpy(self):
        m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, pairs="next")
        m.fit(self.x.values, self.y)

        self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape)

        p = m.predict(self.x.values)
        v = concordance_index_censored(self.y['cens'], self.y['time'], p)
        expected = numpy.array([0.59576770470121443, 79280, 53792, 0, 32])

        assert_array_almost_equal(expected, v)
    def test_concordance_index_no_censoring_all_correct(self):
        time = [1, 5, 6, 11, 34, 45, 46, 50]
        event = numpy.repeat(True, len(time))
        estimate = numpy.arange(len(time))[::-1]

        c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate)
        self.assertEqual(28, con)
        self.assertEqual(0, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(0, tie_t)
        self.assertEqual(1.0, c)
    def test_concordance_index_no_censoring_all_correct(self):
        time = [1, 5, 6, 11, 34, 45, 46, 50]
        event = numpy.repeat(True, len(time))
        estimate = numpy.arange(len(time))[::-1]

        c, con, dis, tie_r, tie_t = concordance_index_censored(
            event, time, estimate)
        self.assertEqual(28, con)
        self.assertEqual(0, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(0, tie_t)
        self.assertEqual(1.0, c)
Example #11
0
    def test_breast_cancer_rbf_cvxopt(self):
        m = MinlipSurvivalAnalysis(solver="cvxopt", alpha=1, kernel="rbf", pairs="next")
        m.fit(self.x.values, self.y)

        self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape)

        p = m.predict(self.x.values)
        v = concordance_index_censored(self.y['cens'], self.y['time'], p)

        expected = numpy.array([0.63261242034387399, 84182, 48888, 2, 32])

        assert_array_almost_equal(expected, v)
Example #12
0
    def test_toy_minlip_predict_1_cvxpy(self):
        m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, pairs="next")
        m.fit(self.x, self.y)

        p = m.predict(self.x)
        v = concordance_index_censored(self.y['status'], self.y['time'], p)

        self.assertEqual(1.0, v[0])
        self.assertEqual(11, v[1])
        self.assertEqual(0, v[2])
        self.assertEqual(0, v[3])
        self.assertEqual(0, v[4])
    def test_concordance_index_with_tied_event_and_time(self):
        event = [True, False, False, False, True, False, True, True, False, False, False, True]
        time = [34, 11, 11, 5, 1, 89, 13, 45, 7, 13, 9, 13]
        estimate = [1, 19, 13, 13, 15, 14, 19, 23, 11, 10, 11, 1]

        c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate)

        self.assertEqual(12, con)
        self.assertEqual(9, dis)
        self.assertEqual(1, tie_r)
        self.assertEqual(1, tie_t)
        self.assertAlmostEqual(0.5681818, c, 6)
    def test_concordance_index_with_tied_event(self):
        event = [False, True, False, True, True, False, True, False, False]
        time = [1, 5, 6, 11, 11, 34, 45, 45, 50]
        estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12]

        c, con, dis, tie_r, tie_t = concordance_index_censored(event[::-1], time[::-1], estimate[::-1])

        self.assertEqual(9, con)
        self.assertEqual(8, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(1, tie_t)
        self.assertAlmostEqual(0.5294118, c, 6)
    def test_concordance_index_with_tied_time2(self):
        event = [False, True, True, False, False, False, True, False, False]
        time = [1, 5, 6, 11, 11, 34, 45, 45, 50]
        estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12]

        c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate)

        self.assertEqual(3, con)
        self.assertEqual(12, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(0, tie_t)
        self.assertAlmostEqual(0.2, c, 6)
    def test_concordance_index_no_censoring_all_wrong(self):
        time = [1, 5, 6, 11, 34, 45, 46, 50]
        event = numpy.repeat(True, len(time))
        # order is exactly reversed
        estimate = numpy.arange(len(time))

        c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate)
        self.assertEqual(0, con)
        self.assertEqual(28, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(0, tie_t)
        self.assertEqual(0.0, c)
    def test_concordance_index(self):
        dat = numpy.loadtxt(WHAS500_DATA_FILE, delimiter=",")
        event = dat[:, 0] == 1
        time = dat[:, 1]
        risk = dat[:, 2]

        c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, risk)
        self.assertEqual(57849, con)
        self.assertEqual(17300, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(119, tie_t)
        self.assertAlmostEqual(0.7697907, c, 6)
    def test_concordance_index_no_censoring_all_wrong(self):
        time = [1, 5, 6, 11, 34, 45, 46, 50]
        event = numpy.repeat(True, len(time))
        # order is exactly reversed
        estimate = numpy.arange(len(time))

        c, con, dis, tie_r, tie_t = concordance_index_censored(
            event, time, estimate)
        self.assertEqual(0, con)
        self.assertEqual(28, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(0, tie_t)
        self.assertEqual(0.0, c)
    def test_concordance_index(self):
        dat = numpy.loadtxt(WHAS500_DATA_FILE, delimiter=",")
        event = dat[:, 0] == 1
        time = dat[:, 1]
        risk = dat[:, 2]

        c, con, dis, tie_r, tie_t = concordance_index_censored(
            event, time, risk)
        self.assertEqual(57849, con)
        self.assertEqual(17300, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(119, tie_t)
        self.assertAlmostEqual(0.7697907, c, 6)
    def test_concordance_index_with_tied_event(self):
        event = [False, True, False, True, True, False, True, False, False]
        time = [1, 5, 6, 11, 11, 34, 45, 45, 50]
        estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12]

        c, con, dis, tie_r, tie_t = concordance_index_censored(
            event[::-1], time[::-1], estimate[::-1])

        self.assertEqual(9, con)
        self.assertEqual(8, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(1, tie_t)
        self.assertAlmostEqual(0.5294118, c, 6)
    def test_concordance_index_with_tied_time2(self):
        event = [False, True, True, False, False, False, True, False, False]
        time = [1, 5, 6, 11, 11, 34, 45, 45, 50]
        estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12]

        c, con, dis, tie_r, tie_t = concordance_index_censored(
            event, time, estimate)

        self.assertEqual(3, con)
        self.assertEqual(12, dis)
        self.assertEqual(0, tie_r)
        self.assertEqual(0, tie_t)
        self.assertAlmostEqual(0.2, c, 6)
Example #22
0
    def test_breast_cancer_rbf_cvxpy(self):
        m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, kernel="rbf", pairs="next")
        m.fit(self.x.values, self.y)

        self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape)

        p = m.predict(self.x.values)
        v = concordance_index_censored(self.y['cens'], self.y['time'], p)

        self.assertAlmostEqual(0.6286334, v[0], 3)
        self.assertEqual(83653, v[1])
        self.assertEqual(49418, v[2])
        self.assertEqual(1, v[3])
        self.assertEqual(32, v[4])
Example #23
0
    def test_toy_hinge_nearest_fit(self):
        m = HingeLossSurvivalSVM(alpha=1, pairs="nearest")
        m.fit(self.x, self.y)

        self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape)

        p = m.predict(self.x)
        v = concordance_index_censored(self.y['status'], self.y['time'], p)

        self.assertEqual(1.0, v[0])
        self.assertEqual(11, v[1])
        self.assertEqual(0, v[2])
        self.assertEqual(0, v[3])
        self.assertEqual(0, v[4])
    def test_compare_rbf(self):
        x, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1')

        kpca = KernelPCA(kernel="rbf")
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf",
                                     tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Example #25
0
    def test_compare_clinical_kernel(self):
        x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'],
                                         '1',
                                         standardize_numeric=False,
                                         to_numeric=False)

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        kpca = KernelPCA(kernel=trans.pairwise_kernel)
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=1000,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=1000,
                                     random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
    def test_concordance_index_with_tied_event_and_time(self):
        event = [
            True, False, False, False, True, False, True, True, False, False,
            False, True
        ]
        time = [34, 11, 11, 5, 1, 89, 13, 45, 7, 13, 9, 13]
        estimate = [1, 19, 13, 13, 15, 14, 19, 23, 11, 10, 11, 1]

        c, con, dis, tie_r, tie_t = concordance_index_censored(
            event, time, estimate)

        self.assertEqual(12, con)
        self.assertEqual(9, dis)
        self.assertEqual(1, tie_r)
        self.assertEqual(1, tie_t)
        self.assertAlmostEqual(0.5681818, c, 6)
Example #27
0
    def test_kernel_precomputed(self):
        from sklearn.metrics.pairwise import pairwise_kernels
        from sklearn.cross_validation import _safe_split

        m = MinlipSurvivalAnalysis(kernel="precomputed", solver="cvxpy")
        K = pairwise_kernels(self.x, metric="rbf")

        train_idx = numpy.arange(50, self.x.shape[0])
        test_idx = numpy.arange(50)
        X_fit, y_fit = _safe_split(m, K, self.y, train_idx)
        X_test, y_test = _safe_split(m, K, self.y, test_idx, train_idx)

        m.fit(X_fit, y_fit)

        p = m.predict(X_test)
        v = concordance_index_censored(y_test['cens'], y_test['time'], p)

        expected = numpy.array([0.508748, 378, 365, 0, 0])

        assert_array_almost_equal(expected, v)
def score_concordance_index(estimator, X, y, **predict_params):
    p = estimator.predict(X, **predict_params)
    result = concordance_index_censored(y['event'], y['time'], p)
    return result[0]