Beispiel #1
0
    def test_compare_rbf(self):
        x, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1')

        kpca = KernelPCA(kernel="rbf")
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=1000,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel="rbf",
                                     tol=1e-8,
                                     max_iter=1000,
                                     random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
    def test_compare_clinical_kernel(self):
        x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1',
                                         standardize_numeric=False, to_numeric=False)

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        kpca = KernelPCA(kernel=trans.pairwise_kernel)
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel,
                                     tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Beispiel #3
0
    def test_survival_squared_hinge_loss(self):
        nrsvm = NaiveSurvivalSVM(loss='squared_hinge',
                                 dual=False,
                                 tol=1e-8,
                                 max_iter=1000,
                                 random_state=0)
        nrsvm.fit(self.x, self.y)

        rsvm = FastSurvivalSVM(optimizer='avltree',
                               tol=1e-8,
                               max_iter=1000,
                               random_state=0)
        rsvm.fit(self.x, self.y)

        assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3)

        pred_nrsvm = nrsvm.predict(self.x)
        pred_rsvm = rsvm.predict(self.x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(self.y['fstat'], self.y['lenfol'],
                                        pred_nrsvm)
        c2 = concordance_index_censored(self.y['fstat'], self.y['lenfol'],
                                        pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Beispiel #4
0
    def test_fit_and_predict_hybrid(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " +
                                    self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               rank_ratio=0.5,
                               max_iter=50,
                               fit_intercept=True,
                               random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertAlmostEqual(6.1409367385513729, ssvm.intercept_)
        expected_coef = numpy.array([
            -0.0209254120718, -0.265768317208, -0.154254689136,
            0.0800600947891, -0.290121131022, -0.0288851785213,
            0.0998004550073, 0.0454100937492, -0.125863947621, 0.0343588337797,
            -0.000710219364914, 0.0546969104996, -0.5375338235,
            -0.0137995110308
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(780.52617631863893, rmse)
Beispiel #5
0
def get_estimator(method):
    if method == 'l2_ranking_regression':
        estimator = FastSurvivalSVM(optimizer='rbtree',
                                    random_state=0,
                                    fit_intercept=True,
                                    max_iter=1000,
                                    tol=1e-6)
    elif method == 'l2_ranking':
        estimator = FastSurvivalSVM(optimizer='rbtree',
                                    random_state=0,
                                    rank_ratio=1.0,
                                    fit_intercept=False,
                                    max_iter=1000,
                                    tol=1e-6)
    elif method == 'l2_regression':
        estimator = FastSurvivalSVM(optimizer='rbtree',
                                    random_state=0,
                                    rank_ratio=0.0,
                                    fit_intercept=True,
                                    max_iter=1000,
                                    tol=1e-6)
    elif method == 'l1':
        estimator = NaiveSurvivalSVM(loss='hinge',
                                     random_state=0,
                                     dual=True,
                                     max_iter=1000,
                                     tol=1e-6)
    else:
        raise ValueError('unknown method: %s' % method)

    return estimator
    def test_fit_timeit(self):
        rnd = numpy.random.RandomState(0)
        idx = rnd.choice(numpy.arange(self.x.shape[0]), replace=False, size=100)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, timeit=3, random_state=0)
        ssvm.fit(self.x.values[idx, :], self.y[idx])

        self.assertTrue('timings' in ssvm.optimizer_result_)
Beispiel #7
0
    def test_fit_timeit(self):
        rnd = numpy.random.RandomState(0)
        idx = rnd.choice(numpy.arange(self.x.shape[0]),
                         replace=False,
                         size=100)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               timeit=3,
                               random_state=0)
        ssvm.fit(self.x.values[idx, :], self.y[idx])

        self.assertTrue('timings' in ssvm.optimizer_result_)
    def test_fit_and_predict_ranking(self):
        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([-0.02066177, -0.26449933, -0.15205399, 0.0794547, -0.28840498, -0.02864288,
                                     0.09901995, 0.04505302, -0.12512215, 0.03341365, -0.00110442, 0.05446756,
                                     -0.53009875, -0.01394175])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        self.assertEquals(self.x.shape[1], ssvm.coef_.shape[0])

        c = ssvm.score(self.x.values, self.y)

        self.assertAlmostEqual(0.7860650174985695, c, 6)
    def test_regression_not_supported(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(100, dtype=float)

        ssvm = FastSurvivalSVM(rank_ratio=0, optimizer='simple')
        self.assertRaisesRegex(ValueError,
                               "optimizer 'simple' does not implement regression objective",
                               ssvm.fit, x, y)

        ssvm.set_params(optimizer='PRSVM')
        self.assertRaisesRegex(ValueError,
                               "optimizer 'PRSVM' does not implement regression objective",
                               ssvm.fit, x, y)
Beispiel #10
0
    def test_survial_constraints_no_ties(self):
        y = numpy.array([True, True, False, True, False, False, False, False])
        time = numpy.array([20, 46, 56, 63, 77, 90, 100, 104])

        expected_order = numpy.arange(len(time)).astype(numpy.int)

        expected = numpy.array([
            [-1, 1, 0, 0, 0, 0, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0],
            [-1, 0, 0, 1, 0, 0, 0, 0],
            [-1, 0, 0, 0, 1, 0, 0, 0],
            [-1, 0, 0, 0, 0, 1, 0, 0],
            [-1, 0, 0, 0, 0, 0, 1, 0],
            [-1, 0, 0, 0, 0, 0, 0, 1],
            [0, -1, 1, 0, 0, 0, 0, 0],
            [0, -1, 0, 1, 0, 0, 0, 0],
            [0, -1, 0, 0, 1, 0, 0, 0],
            [0, -1, 0, 0, 0, 1, 0, 0],
            [0, -1, 0, 0, 0, 0, 1, 0],
            [0, -1, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, -1, 1, 0, 0, 0],
            [0, 0, 0, -1, 0, 1, 0, 0],
            [0, 0, 0, -1, 0, 0, 1, 0],
            [0, 0, 0, -1, 0, 0, 0, 1],
        ],
                               dtype=numpy.int8)

        samples_order = FastSurvivalSVM._argsort_and_resolve_ties(time, None)
        assert_array_equal(expected_order, samples_order)

        A = survival_constraints_simple(
            numpy.asarray(y[samples_order], dtype=numpy.uint8))
        assert_array_equal(expected, A.todense())
    def test_survial_constraints_no_ties(self):
        y = numpy.array([True, True, False, True, False, False, False, False])
        time = numpy.array([20, 46, 56, 63, 77, 90, 100, 104])

        expected_order = numpy.arange(len(time)).astype(numpy.int)

        expected = numpy.array([
            [-1, 1, 0, 0, 0, 0, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0],
            [-1, 0, 0, 1, 0, 0, 0, 0],
            [-1, 0, 0, 0, 1, 0, 0, 0],
            [-1, 0, 0, 0, 0, 1, 0, 0],
            [-1, 0, 0, 0, 0, 0, 1, 0],
            [-1, 0, 0, 0, 0, 0, 0, 1],
            [0, -1, 1, 0, 0, 0, 0, 0],
            [0, -1, 0, 1, 0, 0, 0, 0],
            [0, -1, 0, 0, 1, 0, 0, 0],
            [0, -1, 0, 0, 0, 1, 0, 0],
            [0, -1, 0, 0, 0, 0, 1, 0],
            [0, -1, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, -1, 1, 0, 0, 0],
            [0, 0, 0, -1, 0, 1, 0, 0],
            [0, 0, 0, -1, 0, 0, 1, 0],
            [0, 0, 0, -1, 0, 0, 0, 1],
        ], dtype=numpy.int8)

        samples_order = FastSurvivalSVM._argsort_and_resolve_ties(time, None)
        assert_array_equal(expected_order, samples_order)

        A = survival_constraints_simple(numpy.asarray(y[samples_order], dtype=numpy.uint8))
        assert_array_equal(expected, A.todense())
Beispiel #12
0
    def test_fit_and_predict_ranking(self):
        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([
            -0.02066177, -0.26449933, -0.15205399, 0.0794547, -0.28840498,
            -0.02864288, 0.09901995, 0.04505302, -0.12512215, 0.03341365,
            -0.00110442, 0.05446756, -0.53009875, -0.01394175
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        self.assertEquals(self.x.shape[1], ssvm.coef_.shape[0])

        c = ssvm.score(self.x.values, self.y)

        self.assertAlmostEqual(0.7860650174985695, c, 6)
Beispiel #13
0
    def test_regression_not_supported(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(100, dtype=float)

        ssvm = FastSurvivalSVM(rank_ratio=0, optimizer='simple')
        self.assertRaisesRegex(
            ValueError,
            "optimizer 'simple' does not implement regression objective",
            ssvm.fit, x, y)

        ssvm.set_params(optimizer='PRSVM')
        self.assertRaisesRegex(
            ValueError,
            "optimizer 'PRSVM' does not implement regression objective",
            ssvm.fit, x, y)
    def test_fit_and_predict_hybrid_no_intercept(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.5,
                               max_iter=50, fit_intercept=False, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([0.00669121, -0.2754864, -0.14124808, 0.0748376, -0.2812598, 0.07543884,
                                     0.09845683, 0.08398258, -0.12182314, 0.02637739, 0.03060149, 0.11870598,
                                     -0.52688224, -0.01762842])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(1128.4460587629746, rmse)
    def test_fit_and_predict_regression_no_intercept(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.0,
                               max_iter=50, fit_intercept=False, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([1.39989875, -1.16903161, -0.40195857, -0.05848903, -0.08421557, 4.11924729,
                                     0.25135451, 1.89067276, -0.25751401, -0.10213143, 1.56333622, 3.10136873,
                                     -2.23644848, -0.11620715])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(15838.510668936022, rmse)
Beispiel #16
0
    def test_unknown_optimizer(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(1, 101, dtype=float)

        ssvm = FastSurvivalSVM(rank_ratio=0, optimizer='random stuff')
        self.assertRaisesRegex(ValueError, "unknown optimizer: random stuff",
                               ssvm.fit, x, y)
Beispiel #17
0
    def test_only_one_label(self):
        x = numpy.zeros((100, 10))
        y = numpy.ones(100, dtype=int)

        rsvm = FastSurvivalSVM()
        self.assertRaisesRegex(
            ValueError, 'y must be a structured array with the first field'
            ' being a binary class event indicator and the second field'
            ' the time of the event/censoring', rsvm.fit, x, y)
Beispiel #18
0
    def test_all_censored(self):
        x = numpy.arange(80).reshape(10, 8)
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=10)
        y['event'] = numpy.zeros(10, dtype=bool)
        y['time'] = numpy.array([0, 1, 2, 1, 1, 0, 1, 2, 3, 1])

        rsvm = FastSurvivalSVM()
        self.assertRaisesRegex(ValueError, "all samples are censored",
                               rsvm.fit, x, y)
Beispiel #19
0
    def test_alpha_negative(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(100, dtype=float)

        ssvm = FastSurvivalSVM(alpha=-1)
        self.assertRaisesRegex(ValueError, "alpha must be positive", ssvm.fit,
                               x, y)
Beispiel #20
0
    def test_y_one_field(self):
        x = numpy.arange(80).reshape(10, 8)
        y = numpy.ones(dtype=[('event', bool)], shape=10)

        rsvm = FastSurvivalSVM()
        self.assertRaisesRegex(
            ValueError, 'y must be a structured array with the first field'
            ' being a binary class event indicator and the second field'
            ' the time of the event/censoring', rsvm.fit, x, y)
Beispiel #21
0
    def test_survival_constraints_with_ties(self):
        y = numpy.array([
            True, True, False, False, True, False, True, True, False, False,
            False, True
        ])
        time = numpy.array([20, 33, 33, 40, 50, 66, 66, 66, 89, 110, 110, 111])

        expected_order = numpy.array([0, 2, 1, 3, 4, 7, 5, 6, 8, 9, 10, 11])
        samples_order = FastSurvivalSVM._argsort_and_resolve_ties(
            time, numpy.random.RandomState(0))
        numpy.testing.assert_array_equal(expected_order, samples_order)

        expected = numpy.array([
            [-1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 1],
        ],
                               dtype=numpy.int8)

        A = survival_constraints_simple(
            numpy.asarray(y[samples_order], dtype=numpy.uint8))
        assert_array_equal(expected, A.todense())
    def test_survival_squared_hinge_loss(self):
        nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(self.x, self.y)

        rsvm = FastSurvivalSVM(optimizer='avltree', tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(self.x, self.y)

        assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3)

        pred_nrsvm = nrsvm.predict(self.x)
        pred_rsvm = rsvm.predict(self.x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(self.y['fstat'], self.y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(self.y['fstat'], self.y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
    def test_fit_and_predict_hybrid(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.5,
                               max_iter=50, fit_intercept=True, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertAlmostEqual(6.1409367385513729, ssvm.intercept_)
        expected_coef = numpy.array(
            [-0.0209254120718, -0.265768317208, -0.154254689136, 0.0800600947891, -0.290121131022, -0.0288851785213,
             0.0998004550073, 0.0454100937492, -0.125863947621, 0.0343588337797, -0.000710219364914, 0.0546969104996,
             -0.5375338235, -0.0137995110308
             ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(780.52617631863893, rmse)
    def test_fit_and_predict_regression(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.0,
                               max_iter=50, fit_intercept=True, random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertAlmostEqual(6.4160179606675278, ssvm.intercept_)
        expected_coef = numpy.array(
            [-0.0730891368237, -0.536630355029, -0.497411603275, 0.269039958377, -0.730559850692, -0.0148443526234,
             0.285916578892, 0.165960302339, -0.301749910087, 0.334855938531, 0.0886214732161, 0.0554890272028,
             -2.12680470014, 0.0421466831393
             ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(1206.6556186869332, rmse)
Beispiel #25
0
    def test_time_not_numeric(self):
        x = numpy.arange(80).reshape(10, 8)
        y = numpy.empty(dtype=[('event', bool), ('time', bool)], shape=10)
        y['event'] = numpy.array([0, 1, 0, 1, 1, 0, 1, 0, 0, 1], dtype=bool)
        y['time'] = numpy.ones(10, dtype=bool)

        rsvm = FastSurvivalSVM()
        self.assertRaisesRegex(ValueError,
                               "time must be numeric, but found bool",
                               rsvm.fit, x, y)
Beispiel #26
0
    def test_event_not_binary(self):
        x = numpy.arange(80).reshape(10, 8)
        y = numpy.empty(dtype=[('event', int), ('time', float)], shape=10)
        y['event'] = numpy.array([0, 1, 2, 1, 1, 0, 1, 2, 3, 1], dtype=int)
        y['time'] = numpy.arange(10)

        rsvm = FastSurvivalSVM()
        self.assertRaisesRegex(
            ValueError,
            "elements of event indicator must be boolean, but found int",
            rsvm.fit, x, y)
Beispiel #27
0
    def test_negative_time(self):
        x = numpy.arange(80).reshape(10, 8)
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=10)
        y['event'] = numpy.array([0, 1, 0, 1, 1, 0, 1, 0, 0, 1], dtype=bool)
        y['time'] = numpy.array([1, 1, -2, 1, 1, 6, 1, 2, 3, 1])

        rsvm = FastSurvivalSVM(rank_ratio=0.5)
        self.assertRaisesRegex(
            ValueError,
            "observed time contains values smaller or equal to zero", rsvm.fit,
            x, y)
Beispiel #28
0
    def test_ranking_with_fit_intercept(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(1, 101, dtype=float)

        ssvm = FastSurvivalSVM(rank_ratio=1.0, fit_intercept=True)
        self.assertRaisesRegex(
            ValueError,
            "fit_intercept=True is only meaningful if rank_ratio < 1.0",
            ssvm.fit, x, y)
Beispiel #29
0
    def test_fit_and_predict_hybrid_no_intercept(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " +
                                    self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               rank_ratio=0.5,
                               max_iter=50,
                               fit_intercept=False,
                               random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([
            0.00669121, -0.2754864, -0.14124808, 0.0748376, -0.2812598,
            0.07543884, 0.09845683, 0.08398258, -0.12182314, 0.02637739,
            0.03060149, 0.11870598, -0.52688224, -0.01762842
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(1128.4460587629746, rmse)
Beispiel #30
0
    def test_fit_and_predict_regression_no_intercept(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " +
                                    self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               rank_ratio=0.0,
                               max_iter=50,
                               fit_intercept=False,
                               random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertFalse(hasattr(ssvm, "intercept_"))
        expected_coef = numpy.array([
            1.39989875, -1.16903161, -0.40195857, -0.05848903, -0.08421557,
            4.11924729, 0.25135451, 1.89067276, -0.25751401, -0.10213143,
            1.56333622, 3.10136873, -2.23644848, -0.11620715
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(15838.510668936022, rmse)
    def test_survival_constraints_with_ties(self):
        y = numpy.array([True, True, False, False, True, False, True, True, False, False, False, True])
        time = numpy.array([20, 33, 33, 40, 50, 66, 66, 66, 89, 110, 110, 111])

        expected_order = numpy.array([0, 2, 1, 3, 4, 7, 5, 6, 8, 9, 10, 11])
        samples_order = FastSurvivalSVM._argsort_and_resolve_ties(time, numpy.random.RandomState(0))
        numpy.testing.assert_array_equal(expected_order, samples_order)

        expected = numpy.array([
            [-1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 1],
        ], dtype=numpy.int8)

        A = survival_constraints_simple(numpy.asarray(y[samples_order], dtype=numpy.uint8))
        assert_array_equal(expected, A.todense())
    def test_compare_rbf(self):
        x, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1')

        kpca = KernelPCA(kernel="rbf")
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf",
                                     tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Beispiel #33
0
    def test_compare_clinical_kernel(self):
        x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'],
                                         '1',
                                         standardize_numeric=False,
                                         to_numeric=False)

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        kpca = KernelPCA(kernel=trans.pairwise_kernel)
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=1000,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=1000,
                                     random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Beispiel #34
0
    def test_fit_and_predict_regression(self):
        if self.OPTIMIZER in {'simple', 'PRSVM'}:
            raise unittest.SkipTest("regression not implemented for " +
                                    self.OPTIMIZER)

        ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER,
                               rank_ratio=0.0,
                               max_iter=50,
                               fit_intercept=True,
                               random_state=0)
        ssvm.fit(self.x.values, self.y)

        self.assertAlmostEqual(6.4160179606675278, ssvm.intercept_)
        expected_coef = numpy.array([
            -0.0730891368237, -0.536630355029, -0.497411603275, 0.269039958377,
            -0.730559850692, -0.0148443526234, 0.285916578892, 0.165960302339,
            -0.301749910087, 0.334855938531, 0.0886214732161, 0.0554890272028,
            -2.12680470014, 0.0421466831393
        ])
        assert_array_almost_equal(expected_coef, ssvm.coef_)

        pred = ssvm.predict(self.x.values)
        rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred))
        self.assertAlmostEqual(1206.6556186869332, rmse)
Beispiel #35
0
    def test_rank_ratio_out_of_bounds(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(100, dtype=float)

        ssvm = FastSurvivalSVM(rank_ratio=-1)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=1.2)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.nan)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.inf)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)
    def test_rank_ratio_out_of_bounds(self):
        x = numpy.zeros((100, 10))
        y = numpy.empty(dtype=[('event', bool), ('time', float)], shape=100)
        y['event'] = numpy.ones(100, dtype=bool)
        y['time'] = numpy.arange(100, dtype=float)

        ssvm = FastSurvivalSVM(rank_ratio=-1)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=1.2)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.nan)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)

        ssvm.set_params(rank_ratio=numpy.inf)
        self.assertRaisesRegex(ValueError, "rank_ratio must be in \[0; 1\]",
                               ssvm.fit, x, y)
Beispiel #37
0
 def test_default_optimizer(self):
     self.assertEqual(
         'avltree',
         FastSurvivalSVM().fit(self.x.values, self.y).optimizer)