コード例 #1
0
def refit_and_predict(cut_points_estimates, X_train, X_test, Y_train,
                      delta_train, Y_test, delta_test):

    binarizer = FeaturesBinarizer(method='given',
                                  bins_boundaries=cut_points_estimates,
                                  remove_first=True)
    binarizer.fit(pd.concat([X_train, X_test]))
    X_bin_train = binarizer.transform(X_train)
    X_bin_test = binarizer.transform(X_test)

    learner = CoxRegression(penalty='none',
                            tol=1e-5,
                            solver='agd',
                            verbose=False,
                            max_iter=100,
                            step=0.3,
                            warm_start=True)
    learner._solver_obj.linesearch = False
    learner.fit(X_bin_train, Y_train, delta_train)
    coeffs = learner.coeffs
    marker = X_bin_test.dot(coeffs)
    lp_train = X_bin_train.dot(coeffs)
    c_index = concordance_index(Y_test, marker, delta_test)
    c_index = max(c_index, 1 - c_index)

    return c_index, marker, lp_train
コード例 #2
0
def get_times2(n_simu, n_samples, n_features, n_cut_points):
    print("  n_simu=%s" % n_simu)
    seed = n_simu
    simu = SimuCoxRegWithCutPoints(n_samples=n_samples, n_features=n_features,
                                   seed=seed, verbose=False,
                                   n_cut_points=n_cut_points,
                                   shape=2, scale=.1, cov_corr=cov_corr,
                                   sparsity=sparsity)
    X, Y, delta, cut_points, beta_star, S = simu.simulate()

    # Binacox method
    tic = time()
    n_cuts = 50
    binarizer = FeaturesBinarizer(n_cuts=n_cuts)
    X_bin = binarizer.fit_transform(X)
    blocks_start = binarizer.blocks_start
    blocks_length = binarizer.blocks_length
    solver = 'agd'
    learner = CoxRegression(penalty='binarsity', tol=1e-5,
                            solver=solver, verbose=False,
                            max_iter=100, step=0.3,
                            blocks_start=blocks_start,
                            blocks_length=blocks_length,
                            C=25, warm_start=True)
    learner._solver_obj.linesearch = False
    learner.fit(X_bin, Y, delta)
    tac = time()

    return tac - tic
コード例 #3
0
 def test_CoxRegression_solver_step(self):
     """...Test CoxRegression setting of step parameter of solver
     """
     for solver in self.solvers:
         learner = CoxRegression(solver=solver, step=self.float_1)
         self.assertEqual(learner.step, self.float_1)
         self.assertEqual(learner._solver_obj.step, self.float_1)
         learner.step = self.float_2
         self.assertEqual(learner.step, self.float_2)
         self.assertEqual(learner._solver_obj.step, self.float_2)
コード例 #4
0
def get_times1(n_simu, n_samples, n_features, n_cut_points):
    print("  n_simu=%s" % n_simu)
    seed = n_simu
    simu = SimuCoxRegWithCutPoints(n_samples=n_samples, n_features=n_features,
                                   seed=seed, verbose=False,
                                   n_cut_points=n_cut_points,
                                   shape=2, scale=.1, cov_corr=cov_corr,
                                   sparsity=sparsity)
    X, Y, delta, cut_points, beta_star, S = simu.simulate()

    # Binacox method
    n_cuts = 50
    binarizer = FeaturesBinarizer(n_cuts=n_cuts)
    X_bin = binarizer.fit_transform(X)
    blocks_start = binarizer.blocks_start
    blocks_length = binarizer.blocks_length
    boundaries = binarizer.boundaries['0']

    solver = 'agd'
    learner = CoxRegression(penalty='binarsity', tol=1e-5,
                            solver=solver, verbose=False,
                            max_iter=100, step=0.3,
                            blocks_start=blocks_start,
                            blocks_length=blocks_length,
                            C=25, warm_start=True)
    learner._solver_obj.linesearch = False
    learner.fit(X_bin, Y, delta)
    tac = time()
    time_bina = tac - tic

    # Auto Cutoff Method
    X = np.array(X)
    epsilon = 10
    p1 = np.percentile(X, epsilon)
    p2 = np.percentile(X, 100 - epsilon)
    values_to_test = X[np.where((X <= p2) & (X >= p1))]
    tic = time()
    get_p_values_j(X, 0, Y, delta, values_to_test, epsilon)
    tac = time()
    time_ac_all = tac - tic

    tic = time()
    p1 = np.percentile(X, epsilon)
    p2 = np.percentile(X, 100 - epsilon)
    values_to_test = boundaries[
        np.where((boundaries <= p2) & (boundaries >= p1))]
    get_p_values_j(X, 0, Y, delta, values_to_test, epsilon)
    tac = time()
    time_ac_grid = tac - tic

    return n_samples, time_bina, time_ac_all, time_ac_grid
コード例 #5
0
    def test_CoxRegression_penalty_elastic_net_ratio(self):
        """...Test CoxRegression setting of parameter of elastic_net_ratio
        """
        ratio_1 = 0.6
        ratio_2 = 0.3

        for penalty in self.penalties:
            if penalty == 'elasticnet':

                learner = CoxRegression(penalty=penalty,
                                        C=self.float_1,
                                        elastic_net_ratio=ratio_1)
                self.assertEqual(learner.C, self.float_1)
                self.assertEqual(learner.elastic_net_ratio, ratio_1)
                self.assertEqual(learner._prox_obj.strength, 1. / self.float_1)
                self.assertEqual(learner._prox_obj.ratio, ratio_1)

                learner.elastic_net_ratio = ratio_2
                self.assertEqual(learner.C, self.float_1)
                self.assertEqual(learner.elastic_net_ratio, ratio_2)
                self.assertEqual(learner._prox_obj.ratio, ratio_2)

            else:
                msg = '^Penalty "%s" has no elastic_net_ratio attribute$' % \
                      penalty
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    if penalty == 'binarsity':
                        CoxRegression(penalty=penalty,
                                      elastic_net_ratio=0.8,
                                      blocks_start=[0],
                                      blocks_length=[1])
                    else:
                        CoxRegression(penalty=penalty, elastic_net_ratio=0.8)

                if penalty == 'binarsity':
                    learner = CoxRegression(penalty=penalty,
                                            blocks_start=[0],
                                            blocks_length=[1])
                else:
                    learner = CoxRegression(penalty=penalty)
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner.elastic_net_ratio = ratio_1
コード例 #6
0
    def test_CoxRegression_settings(self):
        """...Test CoxRegression basic settings
        """
        # solver
        solver_class_map = {'gd': GD, 'agd': AGD}
        for solver in self.solvers:
            learner = CoxRegression(solver=solver)
            solver_class = solver_class_map[solver]
            self.assertTrue(isinstance(learner._solver_obj, solver_class))

        msg = '^``solver`` must be one of agd, gd, got wrong_name$'
        with self.assertRaisesRegex(ValueError, msg):
            CoxRegression(solver='wrong_name')

        # prox
        prox_class_map = {
            'none': ProxZero,
            'l1': ProxL1,
            'l2': ProxL2Sq,
            'elasticnet': ProxElasticNet,
            'tv': ProxTV,
            'binarsity': ProxBinarsity
        }

        for penalty in self.penalties:
            if penalty == 'binarsity':
                learner = CoxRegression(penalty=penalty,
                                        blocks_start=[0],
                                        blocks_length=[1])
            else:
                learner = CoxRegression(penalty=penalty)

            prox_class = prox_class_map[penalty]
            self.assertTrue(isinstance(learner._prox_obj, prox_class))

        msg = '^``penalty`` must be one of binarsity, elasticnet, l1, l2, none, ' \
              'tv, got wrong_name$'
        with self.assertRaisesRegex(ValueError, msg):
            CoxRegression(penalty='wrong_name')
コード例 #7
0
    def test_CoxRegression_score(self):
        """...Test CoxRegression score
        """
        features, times, censoring = Test.get_train_data()
        learner = CoxRegression()
        learner.fit(features, times, censoring)
        self.assertAlmostEqual(learner.score(), 3.856303803547875)

        features, times, censoring = Test.get_train_data(seed=123)
        self.assertAlmostEqual(learner.score(features, times, censoring),
                               5.556509086276002)

        msg = '^You must fit the model first$'
        learner = CoxRegression()
        with self.assertRaisesRegex(RuntimeError, msg):
            learner.score()

        msg = '^Passed ``features`` is None$'
        learner = CoxRegression().fit(features, times, censoring)
        with self.assertRaisesRegex(ValueError, msg):
            learner.score(None, times, censoring)

        msg = '^Passed ``times`` is None$'
        learner = CoxRegression().fit(features, times, censoring)
        with self.assertRaisesRegex(ValueError, msg):
            learner.score(times, None, censoring)

        msg = '^Passed ``censoring`` is None$'
        learner = CoxRegression().fit(features, times, censoring)
        with self.assertRaisesRegex(ValueError, msg):
            learner.score(features, times, None)
コード例 #8
0
    def test_CoxRegression_fit(self):
        """...Test CoxRegression fit with different solvers and penalties
        """
        raw_features, times, censoring = Test.get_train_data()

        coeffs_pen = {
            'none':
            np.array([
                -0.03068462, 0.03940001, 0.16758354, -0.24838003, 0.16940664,
                0.9650363, -0.14818724, -0.0802245, -1.52869811, 0.0414509
            ]),
            'l2':
            np.array([
                -0.02403681, 0.03455527, 0.13470436, -0.21654892, 0.16637723,
                0.83125941, -0.08555382, -0.12914753, -1.35294435, 0.02312935
            ]),
            'l1':
            np.array([
                0., 1.48439371e-02, 1.03806171e-01, -1.57313537e-01,
                1.40448847e-01, 8.05306416e-01, -5.41296030e-02,
                -1.07753576e-01, -1.37612207e+00, 6.43289248e-05
            ]),
            'elasticnet':
            np.array([
                0., 0.01011823, 0.10530518, -0.16885214, 0.14373715,
                0.82547312, -0.06122141, -0.09479487, -1.39454662, 0.00312597
            ]),
            'tv':
            np.array([
                0.03017556, 0.03714465, 0.0385349, -0.10169967, 0.15783755,
                0.64860815, -0.00617636, -0.22235137, -1.07938977, -0.07181225
            ]),
            'binarsity':
            np.array([
                0.03794176, -0.04473702, 0.00339763, 0.00339763, -0.16493989,
                0.05497996, 0.05497996, 0.05497996, -0.08457476, -0.08457476,
                0.0294825, 0.13966702, 0.10251257, 0.02550264, -0.07207419,
                -0.05594102, -0.10018038, -0.10018038, 0.10018038, 0.10018038,
                -0.47859686, -0.06685181, -0.00850803, 0.55395669, 0.00556327,
                -0.00185442, -0.00185442, -0.00185442, 0.26010429, 0.09752455,
                -0.17881442, -0.17881442, 0.932516, 0.32095387, -0.49766315,
                -0.75580671, 0.0593833, -0.01433773, 0.01077109, -0.05581666
            ])
        }

        for penalty in self.penalties:

            if penalty == 'binarsity':
                # binarize features
                n_cuts = 3
                binarizer = FeaturesBinarizer(n_cuts=n_cuts)
                features = binarizer.fit_transform(raw_features)
            else:
                features = raw_features

            for solver in self.solvers:

                solver_kwargs = {
                    'penalty': penalty,
                    'tol': 0,
                    'solver': solver,
                    'verbose': False,
                    'max_iter': 10
                }

                if penalty != 'none':
                    solver_kwargs['C'] = 50

                if penalty == 'binarsity':
                    solver_kwargs['blocks_start'] = binarizer.blocks_start
                    solver_kwargs['blocks_length'] = binarizer.blocks_length

                learner = CoxRegression(**solver_kwargs)
                learner.fit(features, times, censoring)

                np.testing.assert_array_almost_equal(coeffs_pen[penalty],
                                                     learner.coeffs,
                                                     decimal=1)
コード例 #9
0
    def test_CoxRegression_solver_basic_settings(self):
        """...Test CoxRegression setting of basic parameters of solver
        """
        for solver in self.solvers:
            # tol
            learner = CoxRegression(solver=solver, tol=self.float_1)
            self.assertEqual(learner.tol, self.float_1)
            self.assertEqual(learner._solver_obj.tol, self.float_1)
            learner.tol = self.float_2
            self.assertEqual(learner.tol, self.float_2)
            self.assertEqual(learner._solver_obj.tol, self.float_2)

            # max_iter
            learner = CoxRegression(solver=solver, max_iter=self.int_1)
            self.assertEqual(learner.max_iter, self.int_1)
            self.assertEqual(learner._solver_obj.max_iter, self.int_1)
            learner.max_iter = self.int_2
            self.assertEqual(learner.max_iter, self.int_2)
            self.assertEqual(learner._solver_obj.max_iter, self.int_2)

            # verbose
            learner = CoxRegression(solver=solver, verbose=True)
            self.assertEqual(learner.verbose, True)
            self.assertEqual(learner._solver_obj.verbose, True)
            learner.verbose = False
            self.assertEqual(learner.verbose, False)
            self.assertEqual(learner._solver_obj.verbose, False)

            learner = CoxRegression(solver=solver, verbose=False)
            self.assertEqual(learner.verbose, False)
            self.assertEqual(learner._solver_obj.verbose, False)
            learner.verbose = True
            self.assertEqual(learner.verbose, True)
            self.assertEqual(learner._solver_obj.verbose, True)

            # print_every
            learner = CoxRegression(solver=solver, print_every=self.int_1)
            self.assertEqual(learner.print_every, self.int_1)
            self.assertEqual(learner._solver_obj.print_every, self.int_1)
            learner.print_every = self.int_2
            self.assertEqual(learner.print_every, self.int_2)
            self.assertEqual(learner._solver_obj.print_every, self.int_2)

            # record_every
            learner = CoxRegression(solver=solver, record_every=self.int_1)
            self.assertEqual(learner.record_every, self.int_1)
            self.assertEqual(learner._solver_obj.record_every, self.int_1)
            learner.record_every = self.int_2
            self.assertEqual(learner.record_every, self.int_2)
            self.assertEqual(learner._solver_obj.record_every, self.int_2)
コード例 #10
0
    def test_CoxRegression_penalty_C(self):
        """...Test CoxRegression setting of parameter of C
        """

        for penalty in self.penalties:
            if penalty != 'none':
                if penalty == 'binarsity':
                    learner = CoxRegression(penalty=penalty,
                                            C=self.float_1,
                                            blocks_start=[0],
                                            blocks_length=[1])
                else:
                    learner = CoxRegression(penalty=penalty, C=self.float_1)
                self.assertEqual(learner.C, self.float_1)
                self.assertEqual(learner._prox_obj.strength, 1. / self.float_1)
                learner.C = self.float_2
                self.assertEqual(learner.C, self.float_2)
                self.assertEqual(learner._prox_obj.strength, 1. / self.float_2)

                msg = '^``C`` must be positive, got -1$'
                with self.assertRaisesRegex(ValueError, msg):
                    if penalty == 'binarsity':
                        CoxRegression(penalty=penalty,
                                      C=-1,
                                      blocks_start=[0],
                                      blocks_length=[1])
                    else:
                        CoxRegression(penalty=penalty, C=-1)

            else:
                msg = '^You cannot set C for penalty "%s"$' % penalty
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    CoxRegression(penalty=penalty, C=self.float_1)

                learner = CoxRegression(penalty=penalty)
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner.C = self.float_1

            msg = '^``C`` must be positive, got -2$'
            with self.assertRaisesRegex(ValueError, msg):
                learner.C = -2
コード例 #11
0
    def test_CoxRegression_warm_start(self):
        """...Test CoxRegression warm start
        """
        features, times, censoring = Test.get_train_data()

        for solver in self.solvers:
            solver_kwargs = {
                'solver': solver,
                'max_iter': 2,
                'warm_start': True,
                'tol': 0,
                'penalty': 'none'
            }
            learner = CoxRegression(**solver_kwargs)
            learner.fit(features, times, censoring)
            score_1 = learner.score()
            learner.fit(features, times, censoring)
            score_2 = learner.score()
            # Thanks to warm start the score should have decreased (no
            # penalization here)
            self.assertLess(score_2, score_1)

        for solver in self.solvers:
            solver_kwargs = {
                'solver': solver,
                'max_iter': 2,
                'warm_start': False,
                'tol': 0,
                'penalty': 'none'
            }
            learner = CoxRegression(**solver_kwargs)
            learner.fit(features, times, censoring)
            score_1 = learner.score()
            learner.fit(features, times, censoring)
            score_2 = learner.score()
            # No warm start here, so the scores should be the same
            self.assertAlmostEqual(score_2, score_1)
コード例 #12
0
def fit_and_score(features, features_bin, times, censoring, blocks_start,
                  blocks_length, boundaries, features_names, idx_train,
                  idx_test, validation_data, C):
    if features_names is None:
        features_names = [str(j) for j in range(features.shape[1])]
    X_train, X_test = features_bin[idx_train], features_bin[idx_test]
    Y_train, Y_test = times[idx_train], times[idx_test]
    delta_train, delta_test = censoring[idx_train], censoring[idx_test]

    learner = CoxRegression(penalty='binarsity',
                            tol=1e-5,
                            verbose=False,
                            max_iter=100,
                            step=0.3,
                            blocks_start=blocks_start,
                            blocks_length=blocks_length,
                            warm_start=True)
    learner._solver_obj.linesearch = False
    learner.C = C
    learner.fit(X_train, Y_train, delta_train)
    coeffs = learner.coeffs

    cut_points_estimates = {}
    for j, start in enumerate(blocks_start):
        coeffs_j = coeffs[start:start + blocks_length[j]]
        all_zeros = not np.any(coeffs_j)
        if all_zeros:
            cut_points_estimate_j = np.array([-np.inf, np.inf])
        else:
            groups_j = get_groups(coeffs_j)
            jump_j = np.where(groups_j[1:] - groups_j[:-1] != 0)[0] + 1
            if jump_j.size == 0:
                cut_points_estimate_j = np.array([-np.inf, np.inf])
            else:
                cut_points_estimate_j = boundaries[features_names[j]][jump_j]
                if cut_points_estimate_j[0] != -np.inf:
                    cut_points_estimate_j = np.insert(cut_points_estimate_j, 0,
                                                      -np.inf)
                if cut_points_estimate_j[-1] != np.inf:
                    cut_points_estimate_j = np.append(cut_points_estimate_j,
                                                      np.inf)
        cut_points_estimates[features_names[j]] = cut_points_estimate_j
    binarizer = FeaturesBinarizer(method='given',
                                  bins_boundaries=cut_points_estimates)
    binarized_features = binarizer.fit_transform(features)
    blocks_start = binarizer.blocks_start
    blocks_length = binarizer.blocks_length
    X_bin_train = binarized_features[idx_train]
    X_bin_test = binarized_features[idx_test]
    learner_ = CoxRegression(penalty='binarsity',
                             tol=1e-5,
                             verbose=False,
                             max_iter=100,
                             step=0.3,
                             blocks_start=blocks_start,
                             blocks_length=blocks_length,
                             warm_start=True,
                             C=1e10)
    learner_._solver_obj.linesearch = False
    learner_.fit(X_bin_train, Y_train, delta_train)
    score = learner_.score(X_bin_test, Y_test, delta_test)

    if validation_data is not None:
        X_validation = validation_data[0]
        X_bin_validation = binarizer.fit_transform(X_validation)
        Y_validation = validation_data[1]
        delta_validation = validation_data[2]
        score_validation = learner_.score(X_bin_validation, Y_validation,
                                          delta_validation)
    else:
        score_validation = None

    return score, score_validation
コード例 #13
0
        rs = ShuffleSplit(n_splits=1, test_size=test_size)
        for train_index, test_index in rs.split(X):
            X_test = X.iloc[test_index, :]
            Y_test = Y[test_index]
            delta_test = delta[test_index]

            X_train = X.iloc[train_index, :]
            Y_train = Y[train_index]
            delta_train = delta[train_index]

        # 2) screening cox, top-P features
        n_features = X_train.shape[1]
        screening_cox = pd.Series(index=X_train.columns)
        learner = CoxRegression(tol=1e-5,
                                solver='agd',
                                verbose=False,
                                penalty='none',
                                max_iter=100)

        for j in range(n_features):
            stdout.write("\rscreening: %d/%s" % (j + 1, n_features))
            stdout.flush()

            feat_name = X_train.columns[j]
            X_j = X_train[[feat_name]]
            learner.fit(X_j, Y_train, delta_train)
            coeffs = learner.coeffs
            marker = X_j.dot(coeffs)
            c_index = concordance_index(Y_train, marker, delta_train)
            c_index = max(c_index, 1 - c_index)
            screening_cox[feat_name] = c_index
コード例 #14
0
def fit_Cox(features, T, E):
    cox_m = CoxRegression(verbose=True)
    cox_m.fit(features, T, E)
    return
コード例 #15
0
            # binarize data
            n_cuts = 50
            binarizer = FeaturesBinarizer(n_cuts=n_cuts)
            X_bin = binarizer.fit_transform(X)
            blocks_start = binarizer.blocks_start
            blocks_length = binarizer.blocks_length
            boundaries = binarizer.boundaries

            tic = time()

            solver = 'agd'
            learner = CoxRegression(penalty='binarsity',
                                    tol=1e-5,
                                    solver=solver,
                                    verbose=False,
                                    max_iter=100,
                                    step=0.3,
                                    blocks_start=blocks_start,
                                    blocks_length=blocks_length,
                                    warm_start=True)
            learner._solver_obj.linesearch = False

            # cross-validation
            n_folds = 10
            grid_size = 30
            grid_C = np.logspace(0, 3, grid_size)
            scores_cv = pd.DataFrame(columns=['ll_test', 'test_std'])
            for i, C in enumerate(grid_C):
                stdout.write("\rbinacox n_samples: %s/%s, "
                             "n_simu: %s/%s, "
                             "CV: %d%%" %