Beispiel #1
0
    def test_gaussian_sklearn(self):
        n = 100
        p = 20
        k = 3
        family = "gaussian"
        rho = 0.5
        sigma = 1
        M = 1
        np.random.seed(2)
        # data = gen_data(family=family, n=n, p=p, k=k, rho=rho, M=M)
        data = gen_data(n, p, family=family, k=k, rho=rho)
        # data3 = gen_data_splicing(
        #     family=family, n=n, p=p, k=k, rho=rho, M=M, sparse_ratio=0.1)
        s_max = 20
        support_size = np.linspace(0, s_max, s_max + 1)
        alpha = [0., 0.1, 0.2, 0.3, 0.4]

        model = abessLm()
        cv = KFold(n_splits=5, shuffle=True, random_state=0)
        gcv = GridSearchCV(model,
                           param_grid={
                               "support_size": support_size,
                               "alpha": alpha
                           },
                           cv=cv,
                           n_jobs=5).fit(data.x, data.y)

        assert gcv.best_params_["support_size"] == k
        assert gcv.best_params_["alpha"] == 0.
Beispiel #2
0
    def test_poisson(self):
        # to do
        n = 100
        p = 20
        k = 3
        family = "poisson"
        rho = 0.5
        sigma = 1
        M = 1
        np.random.seed(3)
        data = gen_data(n, p, family=family, k=k, rho=rho, sigma=sigma)
        data2 = gen_data_splicing(family=family, n=n, p=p, k=k, rho=rho, M=M)
        support_size = range(0, 20)

        model = abessPoisson(path_type="seq",
                             support_size=support_size,
                             ic_type='ebic',
                             is_screening=True,
                             screening_size=20,
                             K_max=10,
                             epsilon=10,
                             powell_path=2,
                             s_min=1,
                             s_max=p,
                             lambda_min=0.01,
                             lambda_max=100,
                             is_cv=True,
                             K=5,
                             exchange_num=2,
                             tau=0.1 * np.log(n * p) / n,
                             primary_model_fit_max_iter=10,
                             primary_model_fit_epsilon=1e-6,
                             early_stop=False,
                             approximate_Newton=True,
                             ic_coef=1.,
                             thread=5,
                             sparse_matrix=True)
        group = np.linspace(1, p, p)
        model.fit(data.x, data.y, group=group)

        model2 = abessPoisson(path_type="seq",
                              support_size=support_size,
                              ic_type='ebic',
                              is_screening=True,
                              screening_size=20,
                              K_max=10,
                              epsilon=10,
                              powell_path=2,
                              s_min=1,
                              s_max=p,
                              lambda_min=0.01,
                              lambda_max=100,
                              is_cv=True,
                              K=5,
                              exchange_num=2,
                              tau=0.1 * np.log(n * p) / n,
                              primary_model_fit_max_iter=80,
                              primary_model_fit_epsilon=1e-6,
                              early_stop=False,
                              approximate_Newton=False,
                              ic_coef=1.,
                              thread=5)
        group = np.linspace(1, p, p)
        model2.fit(data.x, data.y, group=group)
        model2.predict(data.x)

        nonzero_true = np.nonzero(data.coef_)[0]
        nonzero_fit = np.nonzero(model2.coef_)[0]
        print(nonzero_true)
        print(nonzero_fit)
        assert (nonzero_true == nonzero_fit).all()

        if sys.version_info[1] >= 6:
            new_x = data.x[:, nonzero_fit]
            reg = PoissonRegressor(alpha=0, tol=1e-6, max_iter=200)
            reg.fit(new_x, data.y)
            print(model2.coef_[nonzero_fit])
            print(reg.coef_)
            assert model2.coef_[nonzero_fit] == approx(reg.coef_,
                                                       rel=1e-2,
                                                       abs=1e-2)
Beispiel #3
0
    def test_gaussian(self):
        n = 100
        p = 20
        k = 3
        family = "gaussian"
        rho = 0.5
        sigma = 1
        M = 1
        # np.random.seed(2)
        data = gen_data_splicing(family=family, n=n, p=p, k=k, rho=rho, M=M)
        data2 = gen_data(n, p, family=family, k=k, rho=rho, sigma=sigma)
        data3 = gen_data_splicing(family=family,
                                  n=n,
                                  p=p,
                                  k=k,
                                  rho=rho,
                                  M=M,
                                  sparse_ratio=0.1)
        s_max = 20

        model = abessLm(path_type="seq",
                        support_size=range(0, s_max),
                        ic_type='ebic',
                        is_screening=True,
                        screening_size=20,
                        K_max=10,
                        epsilon=10,
                        powell_path=2,
                        s_min=1,
                        s_max=p,
                        lambda_min=0.01,
                        lambda_max=100,
                        is_cv=True,
                        K=5,
                        exchange_num=2,
                        tau=0.1 * np.log(n * p) / n,
                        primary_model_fit_max_iter=10,
                        primary_model_fit_epsilon=1e-6,
                        early_stop=False,
                        approximate_Newton=True,
                        ic_coef=1.,
                        thread=5,
                        covariance_update=True)
        model.fit(data.x, data.y)
        model.predict(data.x)

        model2 = abessLm(path_type="seq",
                         support_size=range(0, s_max),
                         ic_type='ebic',
                         is_screening=True,
                         screening_size=20,
                         K_max=10,
                         epsilon=10,
                         powell_path=2,
                         s_min=1,
                         s_max=p,
                         lambda_min=0.01,
                         lambda_max=100,
                         is_cv=True,
                         K=5,
                         exchange_num=2,
                         tau=0.1 * np.log(n * p) / n,
                         primary_model_fit_max_iter=10,
                         primary_model_fit_epsilon=1e-6,
                         early_stop=False,
                         approximate_Newton=True,
                         ic_coef=1.,
                         thread=1,
                         covariance_update=True)
        model2.fit(data.x, data.y)

        model3 = abessLm(path_type="seq",
                         support_size=range(0, s_max),
                         ic_type='ebic',
                         is_screening=True,
                         screening_size=20,
                         K_max=10,
                         epsilon=10,
                         powell_path=2,
                         s_min=1,
                         s_max=p,
                         lambda_min=0.01,
                         lambda_max=100,
                         is_cv=True,
                         K=5,
                         exchange_num=2,
                         tau=0.1 * np.log(n * p) / n,
                         primary_model_fit_max_iter=10,
                         primary_model_fit_epsilon=1e-6,
                         early_stop=False,
                         approximate_Newton=True,
                         ic_coef=1.,
                         thread=0,
                         covariance_update=False,
                         sparse_matrix=True)
        model3.fit(data.x, data.y)

        model4 = abessLm(path_type="seq",
                         support_size=range(0, s_max),
                         ic_type='ebic',
                         is_screening=True,
                         screening_size=20,
                         K_max=10,
                         epsilon=10,
                         powell_path=2,
                         s_min=1,
                         s_max=p,
                         lambda_min=0.01,
                         lambda_max=100,
                         is_cv=False,
                         K=5,
                         exchange_num=2,
                         tau=0.1 * np.log(n * p) / n,
                         primary_model_fit_max_iter=10,
                         primary_model_fit_epsilon=1e-6,
                         early_stop=False,
                         approximate_Newton=True,
                         ic_coef=1.,
                         thread=0,
                         covariance_update=True)
        model4.fit(data.x, data.y)

        nonzero_true = np.nonzero(data.coef_)[0]
        nonzero_fit = np.nonzero(model.coef_)[0]
        print(nonzero_true)
        print(nonzero_fit)
        new_x = data.x[:, nonzero_fit]
        reg = LinearRegression()
        reg.fit(new_x, data.y.reshape(-1))
        assert model.coef_[nonzero_fit] == approx(reg.coef_,
                                                  rel=1e-5,
                                                  abs=1e-5)
        assert (nonzero_true == nonzero_fit).all()
Beispiel #4
0
    def test_cox(self):
        n = 100
        p = 20
        k = 3
        family = "cox"
        rho = 0.5
        sigma = 1

        # np.random.seed(3)
        np.random.seed(3)
        data = gen_data(n, p, family=family, k=k, rho=rho, sigma=sigma)
        support_size = range(0, 20)

        model = abessCox(path_type="seq",
                         support_size=support_size,
                         ic_type='ebic',
                         is_screening=True,
                         screening_size=20,
                         K_max=10,
                         epsilon=10,
                         powell_path=2,
                         s_min=1,
                         s_max=p,
                         lambda_min=0.01,
                         lambda_max=100,
                         is_cv=True,
                         K=5,
                         exchange_num=2,
                         tau=0.1 * np.log(n * p) / n,
                         primary_model_fit_max_iter=30,
                         primary_model_fit_epsilon=1e-6,
                         early_stop=False,
                         approximate_Newton=True,
                         ic_coef=1.,
                         thread=5)
        group = np.linspace(1, p, p)
        model.fit(data.x, data.y, group=group)
        model.predict(data.x)

        model2 = abessCox(path_type="seq",
                          support_size=support_size,
                          ic_type='ebic',
                          is_screening=True,
                          screening_size=20,
                          K_max=10,
                          epsilon=10,
                          powell_path=2,
                          s_min=1,
                          s_max=p,
                          lambda_min=0.01,
                          lambda_max=100,
                          is_cv=True,
                          K=5,
                          exchange_num=2,
                          tau=0.1 * np.log(n * p) / n,
                          primary_model_fit_max_iter=60,
                          primary_model_fit_epsilon=1e-6,
                          early_stop=False,
                          approximate_Newton=False,
                          ic_coef=1.,
                          thread=5,
                          sparse_matrix=True)
        group = np.linspace(1, p, p)
        model2.fit(data.x, data.y, group=group)

        nonzero_true = np.nonzero(data.coef_)[0]
        nonzero_fit = np.nonzero(model2.coef_)[0]
        print(nonzero_true)
        print(nonzero_fit)
        assert (nonzero_true == nonzero_fit).all()

        if sys.version_info[1] >= 6:
            new_x = data.x[:, nonzero_fit]
            survival = pd.DataFrame()
            for i in range(new_x.shape[1]):
                survival["Var" + str(i)] = new_x[:, i]
            survival["T"] = data.y[:, 0]
            survival["E"] = data.y[:, 1]
            cph = CoxPHFitter(penalizer=0, l1_ratio=0)
            cph.fit(survival, 'T', event_col='E')
            print(model2.coef_[nonzero_fit])
            print(cph.params_.values)

            assert model2.coef_[nonzero_fit] == approx(cph.params_.values,
                                                       rel=5e-1,
                                                       abs=5e-1)
Beispiel #5
0
    def test_binomial(self):
        n = 100
        p = 20
        k = 3
        family = "binomial"
        rho = 0.5
        sigma = 1
        np.random.seed(1)
        data = gen_data(n, p, family=family, k=k, rho=rho, sigma=sigma)
        support_size = range(0, 20)
        print("logistic abess")

        model = abessLogistic(path_type="seq",
                              support_size=support_size,
                              ic_type='ebic',
                              is_screening=False,
                              screening_size=30,
                              K_max=10,
                              epsilon=10,
                              powell_path=2,
                              s_min=1,
                              s_max=p,
                              lambda_min=0.01,
                              lambda_max=100,
                              is_cv=True,
                              K=5,
                              exchange_num=2,
                              tau=0.1 * np.log(n * p) / n,
                              primary_model_fit_max_iter=10,
                              primary_model_fit_epsilon=1e-6,
                              early_stop=False,
                              approximate_Newton=False,
                              ic_coef=1.,
                              thread=5)
        group = np.linspace(1, p, p)
        model.fit(data.x, data.y, group=group)

        model2 = abessLogistic(path_type="seq",
                               support_size=support_size,
                               ic_type='ebic',
                               is_screening=True,
                               screening_size=20,
                               K_max=10,
                               epsilon=10,
                               powell_path=2,
                               s_min=1,
                               s_max=p,
                               lambda_min=0.01,
                               lambda_max=100,
                               is_cv=True,
                               K=5,
                               exchange_num=2,
                               tau=0.1 * np.log(n * p) / n,
                               primary_model_fit_max_iter=80,
                               primary_model_fit_epsilon=1e-6,
                               early_stop=False,
                               approximate_Newton=False,
                               ic_coef=1.,
                               thread=5,
                               sparse_matrix=True)
        group = np.linspace(1, p, p)
        model2.fit(data.x, data.y, group=group)
        model2.predict(data.x)

        nonzero_true = np.nonzero(data.coef_)[0]
        nonzero_fit = np.nonzero(model2.coef_)[0]
        print(nonzero_true)
        print(nonzero_fit)
        assert (nonzero_true == nonzero_fit).all()

        if sys.version_info[1] >= 6:
            new_x = data.x[:, nonzero_fit]
            reg = LogisticRegression(penalty="none")
            reg.fit(new_x, data.y)
            print(model2.coef_[nonzero_fit])
            print(reg.coef_)
            assert model2.coef_[nonzero_fit] == approx(reg.coef_[0],
                                                       rel=1e-2,
                                                       abs=1e-2)