コード例 #1
0
    def test_fit_regularized(self):

        # Data set sizes
        for n,p in (50,2),(100,5):

            # Penalty weights
            for js,s in enumerate([0,0.1]):

                coef_name = "coef_%d_%d_%d" % (n, p, js)
                coef = getattr(survival_enet_r_results, coef_name)

                fname = "survival_data_%d_%d.csv" % (n, p)
                time, status, entry, exog = self.load_file(fname)

                exog -= exog.mean(0)
                exog /= exog.std(0, ddof=1)

                mod = PHReg(time, exog, status=status, ties='breslow')
                rslt = mod.fit_regularized(alpha=s)

                # The agreement isn't very high, the issue may be on
                # their side.  They seem to use some approximations
                # that we are not using.
                assert_allclose(rslt.params, coef, rtol=0.3)

                # Smoke test for summary
                smry = rslt.summary()
コード例 #2
0
ファイル: test_phreg.py プロジェクト: bashtage/statsmodels
    def test_fit_regularized(self):

        # Data set sizes
        for n,p in (50,2),(100,5):

            # Penalty weights
            for js,s in enumerate([0,0.1]):

                coef_name = "coef_%d_%d_%d" % (n, p, js)
                params = getattr(survival_enet_r_results, coef_name)

                fname = "survival_data_%d_%d.csv" % (n, p)
                time, status, entry, exog = self.load_file(fname)

                exog -= exog.mean(0)
                exog /= exog.std(0, ddof=1)

                model = PHReg(time, exog, status=status, ties='breslow')
                sm_result = model.fit_regularized(alpha=s)

                # The agreement isn't very high, the issue may be on
                # the R side.  See below for further checks.
                assert_allclose(sm_result.params, params, rtol=0.3)

                # The penalized log-likelihood that we are maximizing.
                def plf(params):
                    llf = model.loglike(params) / len(time)
                    L1_wt = 1
                    llf = llf - s * ((1 - L1_wt)*np.sum(params**2) / 2 + L1_wt*np.sum(np.abs(params)))
                    return llf

                # Confirm that we are doing better than glmnet.
                llf_r = plf(params)
                llf_sm = plf(sm_result.params)
                assert_equal(np.sign(llf_sm - llf_r), 1)
コード例 #3
0
    def test_fit_regularized(self):

        # Data set sizes
        for n, p in (50, 2), (100, 5):

            # Penalty weights
            for js, s in enumerate([0, 0.1]):

                coef_name = "coef_%d_%d_%d" % (n, p, js)
                coef = getattr(survival_enet_r_results, coef_name)

                fname = "survival_data_%d_%d.csv" % (n, p)
                time, status, entry, exog = self.load_file(fname)

                exog -= exog.mean(0)
                exog /= exog.std(0, ddof=1)

                mod = PHReg(time, exog, status=status, ties='breslow')
                rslt = mod.fit_regularized(alpha=s)

                # The agreement isn't very high, the issue may be on
                # their side.  They seem to use some approximations
                # that we are not using.
                assert_allclose(rslt.params, coef, rtol=0.3)

                # Smoke test for summary
                smry = rslt.summary()
コード例 #4
0
    def test_fit_regularized(self):

        # Data set sizes
        for n,p in (50,2),(100,5):

            # Penalty weights
            for js,s in enumerate([0,0.1]):

                coef_name = "coef_%d_%d_%d" % (n, p, js)
                params = getattr(survival_enet_r_results, coef_name)

                fname = "survival_data_%d_%d.csv" % (n, p)
                time, status, entry, exog = self.load_file(fname)

                exog -= exog.mean(0)
                exog /= exog.std(0, ddof=1)

                model = PHReg(time, exog, status=status, ties='breslow')
                sm_result = model.fit_regularized(alpha=s)

                # The agreement is not very high, the issue may be on
                # the R side.  See below for further checks.
                assert_allclose(sm_result.params, params, rtol=0.3)

                # The penalized log-likelihood that we are maximizing.
                def plf(params):
                    llf = model.loglike(params) / len(time)
                    L1_wt = 1
                    llf = llf - s * ((1 - L1_wt)*np.sum(params**2) / 2 + L1_wt*np.sum(np.abs(params)))
                    return llf

                # Confirm that we are doing better than glmnet.
                llf_r = plf(params)
                llf_sm = plf(sm_result.params)
                assert_equal(np.sign(llf_sm - llf_r), 1)