Ejemplo n.º 1
0
def refit_and_predict(cut_points_estimates, X_train, X_test, Y_train,
                      delta_train, Y_test, delta_test):

    binarizer = FeaturesBinarizer(method='given',
                                  bins_boundaries=cut_points_estimates,
                                  remove_first=True)
    binarizer.fit(pd.concat([X_train, X_test]))
    X_bin_train = binarizer.transform(X_train)
    X_bin_test = binarizer.transform(X_test)

    learner = CoxRegression(penalty='none',
                            tol=1e-5,
                            solver='agd',
                            verbose=False,
                            max_iter=100,
                            step=0.3,
                            warm_start=True)
    learner._solver_obj.linesearch = False
    learner.fit(X_bin_train, Y_train, delta_train)
    coeffs = learner.coeffs
    marker = X_bin_test.dot(coeffs)
    lp_train = X_bin_train.dot(coeffs)
    c_index = concordance_index(Y_test, marker, delta_test)
    c_index = max(c_index, 1 - c_index)

    return c_index, marker, lp_train
Ejemplo n.º 2
0
    def test_binarizer_fit(self):
        """...Test binarizer fit
        """
        n_cuts = 3
        enc = OneHotEncoder(sparse=True)
        expected_binarization = enc.fit_transform(
            self.default_expected_intervals)

        binarizer = FeaturesBinarizer(method='quantile',
                                      n_cuts=n_cuts,
                                      detect_column_type="auto",
                                      remove_first=False)
        # for pandas dataframe
        binarizer.fit(self.df_features)
        binarized_df = binarizer.transform(self.df_features)
        self.assertEqual(binarized_df.__class__, csr.csr_matrix)

        np.testing.assert_array_equal(expected_binarization.toarray(),
                                      binarized_df.toarray())
        # for numpy array
        binarizer.fit(self.features)
        binarized_array = binarizer.transform(self.features)
        self.assertEqual(binarized_array.__class__, csr.csr_matrix)

        np.testing.assert_array_equal(expected_binarization.toarray(),
                                      binarized_array.toarray())

        # test fit_transform
        binarized_array = binarizer.fit_transform(self.features)
        self.assertEqual(binarized_array.__class__, csr.csr_matrix)

        np.testing.assert_array_equal(expected_binarization.toarray(),
                                      binarized_array.toarray())
Ejemplo n.º 3
0
    def test_binarizer_remove_first(self):
        """...Test binarizer fit when remove_first=True
        """
        n_cuts = 3
        one_hot_encoder = OneHotEncoder(sparse=True)
        expected_binarization = one_hot_encoder.fit_transform(
            self.default_expected_intervals)

        binarizer = FeaturesBinarizer(method='quantile',
                                      n_cuts=n_cuts,
                                      detect_column_type="auto",
                                      remove_first=True)

        binarizer.fit(self.features)
        binarized_array = binarizer.transform(self.features)
        self.assertEqual(binarized_array.__class__, csr.csr_matrix)

        expected_binarization_without_first = \
            np.delete(expected_binarization.toarray(), [0, 4, 8, 10], 1)

        np.testing.assert_array_equal(expected_binarization_without_first,
                                      binarized_array.toarray())

        return
Ejemplo n.º 4
0
        ibs_cox = integrated_brier_score(predictions['values'],
                                         predictions['times'], Y_test,
                                         delta_test, Y_train, delta_train)

        # Binacox
        print("Train Binacox screening_cox_topP...")

        X_train_ = X_train[screening_cox_topP]
        X_test_ = X_test[screening_cox_topP]

        # binarize feature
        n_cuts = 50
        binarizer = FeaturesBinarizer(n_cuts=n_cuts)

        binarizer.fit(pd.concat([X_train_, X_test_]))
        X_bin_train = binarizer.transform(X_train_)
        blocks_start = binarizer.blocks_start
        blocks_length = binarizer.blocks_length
        boundaries = binarizer.boundaries

        # fit binacox
        learner = CoxRegression(penalty='binarsity',
                                tol=1e-5,
                                solver='agd',
                                verbose=False,
                                max_iter=100,
                                step=0.3,
                                blocks_start=blocks_start,
                                blocks_length=blocks_length,
                                C=C_chosen[cancer],
                                warm_start=True)