def hessian(self, params): """ Generic Zero Inflated model Hessian matrix of the loglikelihood Parameters ---------- params : array-like The parameters of the model Returns ------- hess : ndarray, (k_vars, k_vars) The Hessian, second derivative of loglikelihood function, evaluated at `params` Notes ----- """ hess_arr_main = self._hessian_main(params) hess_arr_infl = self._hessian_inflate(params) if hess_arr_main is None or hess_arr_infl is None: return approx_hess(params, self.loglike) dim = self.k_exog + self.k_inflate hess_arr = np.zeros((dim, dim)) hess_arr[:self.k_inflate,:] = hess_arr_infl hess_arr[self.k_inflate:,self.k_inflate:] = hess_arr_main tri_idx = np.triu_indices(self.k_exog + self.k_inflate, k=1) hess_arr[tri_idx] = hess_arr.T[tri_idx] return hess_arr
def hessian(self, params): """ Conditional logit Hessian matrix of the log-likelihood Parameters ----------- params : array-like The parameters of the model Returns ------- hess : ndarray, (K, K) The Hessian Notes ----- It is the second derivative with respect to the flattened parameters of the loglikelihood function of the conditional logit model evaluated at `params`. .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta_{j}\\partial\\beta_{l}}=-\\sum_{i=1}^{n}\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\left[\\boldsymbol{1}\\left(j=l\\right)-\\frac{\\exp\\left(\\beta_{l}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right]x_{i}x_{l}^{\\prime} where :math:`\\boldsymbol{1}\\left(j=l\\right)` equals 1 if `j` = `l` and 0 otherwise. """ # TODO: analytical derivatives from statsmodels.tools.numdiff import approx_hess # need options for hess (epsilon) return approx_hess(params, self.loglike)
def compute_param_cov(self, params, backcast=None, robust=True): """ Computes parameter covariances using numerical derivatives. Parameters ---------- params : 1-d array Model parameters robust : bool, optional Flag indicating whether to use robust standard errors (True) or classic MLE (False) """ resids = self.resids(self.starting_values()) var_bounds = self.volatility.variance_bounds(resids) nobs = resids.shape[0] if backcast is None and self._backcast is None: backcast = self.volatility.backcast(resids) self._backcast = backcast elif backcast is None: backcast = self._backcast kwargs = {"sigma2": np.zeros_like(resids), "backcast": backcast, "var_bounds": var_bounds, "individual": False} hess = approx_hess(params, self._loglikelihood, kwargs=kwargs) hess /= nobs inv_hess = np.linalg.inv(hess) if robust: kwargs["individual"] = True scores = approx_fprime(params, self._loglikelihood, kwargs=kwargs) score_cov = np.cov(scores.T) return inv_hess.dot(score_cov).dot(inv_hess) / nobs else: return inv_hess / nobs
def bse(self): # allow user to specify? if self.model.method == "cmle": # uses different scale/sigma def. resid = self.resid ssr = np.dot(resid, resid) ols_scale = ssr / (self.nobs - self.k_ar - self.k_trend) return np.sqrt(np.diag(self.cov_params(scale=ols_scale))) else: hess = approx_hess(self.params, self.model.loglike) return np.sqrt(np.diag(-np.linalg.inv(hess)))
def hessian_numdiff(self, params, pen_weight=None, **kwds): """hessian based on finite difference derivative """ if pen_weight is None: pen_weight = self.pen_weight loglike = lambda p: self.loglike(p, pen_weight=pen_weight, **kwds) from statsmodels.tools.numdiff import approx_hess return approx_hess(params, loglike)
def test_derivatives(self): pen = self.pen x = self.params ps = np.array([pen.deriv(np.atleast_1d(xi)) for xi in x]) psn = np.array([approx_fprime(np.atleast_1d(xi), pen.func) for xi in x]) assert_allclose(ps, psn, rtol=1e-7, atol=1e-8) ph = np.array([pen.deriv2(np.atleast_1d(xi)) for xi in x]) phn = np.array([approx_hess(np.atleast_1d(xi), pen.func) for xi in x]) if ph.ndim == 2: # SmoothedSCAD returns only diagonal if hessian if independent # TODO should ww allow this also in L@? ph = np.array([np.diag(phi) for phi in ph]) assert_allclose(ph, phn, rtol=1e-7, atol=1e-8)
def test_compute_covariance_without_hess_inverse(self): fitmethod = "powell" opt = scipy.optimize.minimize(self.lpost, self.t0, method=fitmethod, args=self.neg, tol=1.e-10) optres = OptimizationResultsSubclassDummy(self.lpost, opt, neg=True) optres._compute_covariance(self.lpost, opt) if comp_hessian: phess = approx_hess(opt.x, self.lpost) hess_inv = np.linalg.inv(phess) assert np.all(optres.cov == hess_inv) assert np.all(optres.err == np.sqrt(np.diag(np.abs(hess_inv)))) else: assert optres.cov is None assert optres.err is None
def _compute_covariance(self, lpost, res): if hasattr(res, "hess_inv"): if not isinstance(res.hess_inv, np.ndarray): self.cov = np.asarray(res.hess_inv.todense()) else: self.cov = res.hess_inv self.err = np.sqrt(np.diag(self.cov)) else: if comp_hessian: # calculate Hessian approximating with finite differences logging.info("Approximating Hessian with finite differences ...") phess = approx_hess(self.p_opt, lpost) self.cov = np.linalg.inv(phess) self.err = np.sqrt(np.diag(np.abs(self.cov))) else: self.cov = None self.err = None
def test_compare_numdiff(self, use_sqrt, reml, profile_fe): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM( endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function( model, profile_fe=profile_fe, has_fe=not profile_fe) try: # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2)**2 params = MixedLMParams.from_components( fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed( has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed( use_sqrt=False, has_fe=True) loglike_h = loglike_function( model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3) except AssertionError: # See GH#5628; because this test fails unpredictably but only on # OSX, we only xfail it there. if PLATFORM_OSX: pytest.xfail("fails on OSX due to unresolved " "numerical differences") else: raise
def fit_from_data(self, data, disp=True): """ MLE estimator for the GARCH """ # Automatic re-scaling: 1. the solver has problems with positive derivative in linesearch. # 2. the log has overflows using small values n = np.floor(np.log10(np.abs(data.mean()))) R = data / 10**n # initial guesses a0 = 0.05 b0 = 0.9 g0 = 1 - a0 - b0 w0 = g0 * np.var(R) # bounds and constraint bounds = ((0, None), (0, 1), (0, 1)) def sum_small_1(x): return 1 - x[1] - x[2] cons = ({"fun": sum_small_1, "type": "ineq"}) def log_likely(x): var = R[0]**2 # initial variance N = len(R) log_lik = 0 for i in range(1, N): var = x[0] + x[1] * R[i - 1]**2 + x[2] * var # variance update log_lik += -np.log(var) - (R[i]**2 / var) return (-1) * log_lik result = minimize(log_likely, x0=[w0, a0, b0], method='SLSQP', bounds=bounds, constraints=cons, tol=1e-8, options={"maxiter": 150}) print(result.message) self.omega = result.x[0] * 10**(2 * n) self.alpha, self.beta = result.x[1:] self.gamma = 1 - self.alpha - self.beta self.VL = self.omega / self.gamma if disp == True: hess = approx_hess(result.x, log_likely) # hessian by finite differences se = np.sqrt(np.diag(np.linalg.inv(hess))) # standard error cv = ss.norm.ppf(1.0 - 0.05 / 2.0) # alpha=0.05 p_val = ss.norm.sf(np.abs(result.x / se)) # survival function df = pd.DataFrame(index=["omega", "alpha", "beta"]) df["Params"] = result.x df["SE"] = se df["P-val"] = p_val df["95% CI lower"] = result.x - cv * se df["95% CI upper"] = result.x + cv * se df.loc["omega", ["Params", "SE", "95% CI lower", "95% CI upper"]] *= 10**( 2 * n) print(df)
score = lambda params: -self.score(params) File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\s tatsmodels\model.py", line 480, in score return approx_fprime1(params, self.nloglike) File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\s tatsmodels\sandbox\regression\numdiff.py", line 81, in approx_fprime1 nobs = np.size(f0) #len(f0) TypeError: object of type 'numpy.float64' has no len() ''' res_bfgs = mod_norm2.fit(start_params=start_params, method="bfgs", fprime=None, maxiter = 500, retall=0) from statsmodels.tools.numdiff import approx_fprime, approx_hess hb=-approx_hess(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) hf=-approx_hess(res_norm3.params, mod_norm2.loglike, epsilon=1e-4) hh = (hf+hb)/2. print(np.linalg.eigh(hh)) grad = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) print(grad) gradb = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) gradf = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=1e-4) print((gradb+gradf)/2.) print(res_norm3.model.score(res_norm3.params)) print(res_norm3.model.score(start_params)) mod_norm2.loglike(start_params/2.) print(np.linalg.inv(-1*mod_norm2.hessian(res_norm3.params))) print(np.sqrt(np.diag(res_bfgs.cov_params())))
modp.fixed_params = None modp.fixed_paramsmask = None print('\nResults with TLinearModel') print('-------------------------') resp = modp.fit(start_params = modp.start_params, disp=1, method='nm', maxfun=10000, maxiter=5000)#'newton') #resp = modp.fit(start_params = modp.start_params, disp=1, method='newton') print('using Nelder-Mead') print(resp.params) print(resp.bse) resp2 = modp.fit(start_params = resp.params, method='Newton') print('using Newton') print(resp2.params) print(resp2.bse) from statsmodels.tools.numdiff import approx_hess hb=-approx_hess(modp.start_params, modp.loglike, epsilon=-1e-4) tmp = modp.loglike(modp.start_params) print(tmp.shape) print('eigenvalues of numerical Hessian') print(np.linalg.eigh(np.linalg.inv(hb))[0]) #store_params is only available in original test script ##pp=np.array(store_params) ##print pp.min(0) ##print pp.max(0)
print('llf', res2.llf, res_norm3.llf) bse = np.sqrt(np.diag(np.linalg.inv(res_norm3.model.hessian( res_norm3.params)))) res_norm3.model.score(res_norm3.params) #fprime in fit option cannot be overwritten, set to None, when score is defined # exception is fixed, but I don't think score was supposed to be called res_bfgs = mod_norm2.fit(start_params=start_params, method="bfgs", fprime=None, maxiter=500, retall=0) hb = -approx_hess(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) hf = -approx_hess(res_norm3.params, mod_norm2.loglike, epsilon=1e-4) hh = (hf + hb) / 2. print(np.linalg.eigh(hh)) grad = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) print(grad) gradb = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) gradf = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=1e-4) print((gradb + gradf) / 2.) print(res_norm3.model.score(res_norm3.params)) print(res_norm3.model.score(start_params)) mod_norm2.loglike(start_params / 2.) print(np.linalg.inv(-1 * mod_norm2.hessian(res_norm3.params))) print(np.sqrt(np.diag(res_bfgs.cov_params())))
data_exog = sm.add_constant(rvs, prepend=False) xbeta = 0.9 + 0.1*rvs.sum(1) data_endog = xbeta + 0.1*np.random.standard_t(5, size=nobs) #print data_endog modp = MyT(data_endog, data_exog) modp.start_value = np.ones(data_exog.shape[1]+2) modp.start_value[-2] = 10 modp.start_params = modp.start_value resp = modp.fit(start_params = modp.start_value) print(resp.params) print(resp.bse) from statsmodels.tools.numdiff import approx_hess hb=-approx_hess(modp.start_value, modp.loglike, epsilon=-1e-4) tmp = modp.loglike(modp.start_value) print(tmp.shape) ''' >>> tmp = modp.loglike(modp.start_value) 8 >>> tmp.shape (100,) >>> tmp.sum(0) -24220.877108016182 >>> tmp = modp.nloglikeobs(modp.start_value) 8 >>> tmp.shape (100, 100)
xk = np.array([1,2,3]) xk = np.array([1.,1.,1.]) #xk = np.zeros(3) beta = xk y = np.dot(x, beta) + 0.1*np.random.randn(nobs) xkols = np.dot(np.linalg.pinv(x),y) print(approx_fprime((1,2,3),fun,epsilon,x)) gradtrue = x.sum(0) print(x.sum(0)) gradcs = approx_fprime_cs((1,2,3), fun, (x,), h=1.0e-20) print(gradcs, maxabs(gradcs, gradtrue)) print(approx_hess_cs((1,2,3), fun, (x,), h=1.0e-20)) #this is correctly zero print(approx_hess_cs((1,2,3), fun2, (y,x), h=1.0e-20)-2*np.dot(x.T, x)) print(numdiff.approx_hess(xk,fun2,1e-3, (y,x))[0] - 2*np.dot(x.T, x)) gt = (-x*2*(y-np.dot(x, [1,2,3]))[:,None]) g = approx_fprime_cs((1,2,3), fun1, (y,x), h=1.0e-20)#.T #this shouldn't be transposed gd = numdiff.approx_fprime((1,2,3),fun1,epsilon,(y,x)) print(maxabs(g, gt)) print(maxabs(gd, gt)) import statsmodels.api as sm data = sm.datasets.spector.load(as_pandas=False) data.exog = sm.add_constant(data.exog, prepend=False) #mod = sm.Probit(data.endog, data.exog) mod = sm.Logit(data.endog, data.exog) #res = mod.fit(method="newton")
def hessian(self, params): """ Returns numerical hessian for now. """ loglike = self.loglike return approx_hess(params, loglike)
data_exog = sm.add_constant(rvs, prepend=False) xbeta = 0.9 + 0.1 * rvs.sum(1) data_endog = xbeta + 0.1 * np.random.standard_t(5, size=nobs) #print data_endog modp = MyT(data_endog, data_exog) modp.start_value = np.ones(data_exog.shape[1] + 2) modp.start_value[-2] = 10 modp.start_params = modp.start_value resp = modp.fit(start_params=modp.start_value) print(resp.params) print(resp.bse) from statsmodels.tools.numdiff import approx_fprime, approx_hess hb = -approx_hess(modp.start_value, modp.loglike, epsilon=-1e-4) tmp = modp.loglike(modp.start_value) print(tmp.shape) ''' >>> tmp = modp.loglike(modp.start_value) 8 >>> tmp.shape (100,) >>> tmp.sum(0) -24220.877108016182 >>> tmp = modp.nloglikeobs(modp.start_value) 8 >>> tmp.shape (100, 100) >>> np.dot(modp.exog, beta).shape Traceback (most recent call last):
def hessian(self, AB_mask): """ Returns numerical hessian. """ loglike = self.loglike return approx_hess(AB_mask, loglike)
def test_compare_numdiff(self, use_sqrt, reml, profile_fe): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function(model, profile_fe=profile_fe, has_fe=not profile_fe) try: # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2)**2 params = MixedLMParams.from_components(fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed(has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed(use_sqrt=False, has_fe=True) loglike_h = loglike_function(model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3) except AssertionError: # See GH#5628; because this test fails unpredictably but only on # OSX, we only xfail it there. if PLATFORM_OSX: pytest.xfail("fails on OSX due to unresolved " "numerical differences") else: raise
def test_compare_numdiff(self): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for use_sqrt in False, True: for reml in False, True: for profile_fe in False, True: np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function( model, profile_fe=profile_fe, has_fe=not profile_fe) # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2) ** 2 params = MixedLMParams.from_components( fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed( has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed( use_sqrt=False, has_fe=True) loglike_h = loglike_function( model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3)
modp.fixed_paramsmask = None resp = modp.fit(start_params=modp.start_params, disp=1, method='nm') #'newton') #resp = modp.fit(start_params = modp.start_params, disp=1, method='newton') print('\nestimation results t-dist') print(resp.params) print(resp.bse) resp2 = modp.fit(start_params=resp.params, method='Newton') print('using Newton') print(resp2.params) print(resp2.bse) from statsmodels.tools.numdiff import approx_fprime, approx_hess hb = -approx_hess(modp.start_params, modp.loglike, epsilon=-1e-4) tmp = modp.loglike(modp.start_params) print(tmp.shape) #np.linalg.eigh(np.linalg.inv(hb))[0] pp = np.array(store_params) print(pp.min(0)) print(pp.max(0)) ##################### Example: Pareto # estimating scale doesn't work yet, a bug somewhere ? # fit_ks works well, but no bse or other result statistics yet #import for kstest based estimation #should be replace import statsmodels.sandbox.distributions.sppatch
def test_compare_numdiff(self): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for use_sqrt in False, True: for reml in False, True: for profile_fe in False, True: np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function(model, profile_fe=profile_fe, has_fe=not profile_fe) # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2) ** 2 params = MixedLMParams.from_components(fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed(has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed(use_sqrt=False, has_fe=True) loglike_h = loglike_function(model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3)
def _fitting(self, optfunc, ain, optfuncprime=None, neg=True, obs=True): lenpower = float(len(self.y)) if neg == True: if scipy.__version__ < "0.10.0": args = [neg] else: args = (neg,) else: args = () ### different commands for different fitting methods, ### at least until scipy 0.11 is out funcval = 100.0 while funcval == 100 or funcval == 200 or funcval == 0.0 or funcval == np.inf or funcval == -np.inf: ## constrained minimization with truncated newton or constrained bfgs ## Newton conjugate gradient, which doesn't work if self.fitmethod == scipy.optimize.fmin_ncg: aopt = self.fitmethod(optfunc, ain, optfuncprime, disp=0, args=args) ### BFGS algorithm elif self.fitmethod == scipy.optimize.fmin_bfgs: aopt = self.fitmethod(optfunc, ain, disp=0, full_output=True, args=args) warnflag = aopt[6] if warnflag == 1: print("*** ACHTUNG! Maximum number of iterations exceeded! ***") # elif warnflag == 2: # print("Gradient and/or function calls not changing!") ## all other methods: Simplex, Powell, Gradient else: aopt = self.fitmethod(optfunc, ain, disp=0, full_output=True, args=args) funcval = aopt[1] ain = np.array(ain) * ((np.random.rand(len(ain)) - 0.5) * 4.0) ### make a dictionary with best-fit parameters: ## popt: best fit parameters (list) ## result: value of ML function at minimum ## model: the model used fitparams = {"popt": aopt[0], "result": aopt[1]} ### calculate model power spectrum from optimal parameters # fitparams['mfit'] = func(self.x, *fitparams['popt']) ### degrees of freedom fitparams["dof"] = lenpower - float(len(fitparams["popt"])) ### Akaike Information Criterion fitparams["aic"] = fitparams["result"] + 2.0 * len(ain) ### Bayesian Information Criterion fitparams["bic"] = fitparams["result"] + len(ain) * len(self.x) ### compute deviance try: fitparams["deviance"] = 2.0 * optfunc.loglikelihood(fitparams["popt"]) except AttributeError: fitparams["deviance"] = 2.0 * optfunc(fitparams["popt"]) fitparams["sexp"] = 2.0 * len(self.x) * len(fitparams["popt"]) fitparams["ssd"] = np.sqrt(2.0 * fitparams["sexp"]) fitparams["smooth3"] = scipy.signal.wiener(self.y, 3) fitparams["smooth5"] = scipy.signal.wiener(self.y, 5) fitparams["smooth11"] = scipy.signal.wiener(self.y, 11) ### if this is an observation (not fake data), compute the covariance matrix if obs == True: ### for BFGS, get covariance from algorithm output if self.fitmethod == scipy.optimize.fmin_bfgs: print("Approximating covariance from BFGS: ") covar = aopt[3] stderr = np.sqrt(np.diag(covar)) else: ### calculate Hessian approximating with finite differences print("Approximating Hessian with finite differences ...") if comp_hessian: phess = approx_hess(aopt[0], optfunc, neg=args) ### covariance is the inverse of the Hessian print("Hessian (empirical): " + str(phess)) covar = np.linalg.inv(phess) stderr = np.sqrt(np.diag(covar)) else: print("Cannot compute hessian! Use BFGS or install statsmodels!") covar = None stderr = None print("Covariance (empirical): " + str(covar)) fitparams["cov"] = covar ### errors of parameters are on the diagonal of the covariance ### matrix; take square root to get standard deviation fitparams["err"] = stderr ### Print results to screen print("The best-fit model parameters plus errors are:") for i, (x, y) in enumerate(zip(fitparams["popt"], stderr)): print("Parameter " + str(i) + ": " + str(x) + " +/- " + str(y)) print("The Akaike Information Criterion of the power law model is: " + str(fitparams["aic"]) + ".") return fitparams
def _fitting(self, optfunc, ain, optfuncprime=None, neg=True, obs=True): lenpower = float(len(self.y)) if neg == True: if scipy.__version__ < "0.10.0": args = [neg] else: args = (neg, ) else: args = () ### different commands for different fitting methods, ### at least until scipy 0.11 is out funcval = 100.0 while funcval == 100 or funcval == 200 or funcval == 0.0 or funcval == np.inf or funcval == -np.inf: ## constrained minimization with truncated newton or constrained bfgs ## Newton conjugate gradient, which doesn't work if self.fitmethod == scipy.optimize.fmin_ncg: aopt = self.fitmethod(optfunc, ain, optfuncprime, disp=0, args=args) ### BFGS algorithm elif self.fitmethod == scipy.optimize.fmin_bfgs: aopt = self.fitmethod(optfunc, ain, disp=0, full_output=True, args=args) warnflag = aopt[6] if warnflag == 1: print( "*** ACHTUNG! Maximum number of iterations exceeded! ***" ) #elif warnflag == 2: #print("Gradient and/or function calls not changing!") ## all other methods: Simplex, Powell, Gradient else: aopt = self.fitmethod(optfunc, ain, disp=0, full_output=True, args=args) funcval = aopt[1] ain = np.array(ain) * ((np.random.rand(len(ain)) - 0.5) * 4.0) ### make a dictionary with best-fit parameters: ## popt: best fit parameters (list) ## result: value of ML function at minimum ## model: the model used fitparams = {'popt': aopt[0], 'result': aopt[1]} ### calculate model power spectrum from optimal parameters #fitparams['mfit'] = func(self.x, *fitparams['popt']) ### degrees of freedom fitparams['dof'] = lenpower - float(len(fitparams['popt'])) ### Akaike Information Criterion fitparams['aic'] = fitparams['result'] + 2.0 * len(ain) ### Bayesian Information Criterion fitparams['bic'] = fitparams['result'] + len(ain) * len(self.x) ### compute deviance try: fitparams['deviance'] = 2.0 * optfunc.loglikelihood( fitparams['popt']) except AttributeError: fitparams['deviance'] = 2.0 * optfunc(fitparams['popt']) fitparams['sexp'] = 2.0 * len(self.x) * len(fitparams['popt']) fitparams['ssd'] = np.sqrt(2.0 * fitparams['sexp']) fitparams['smooth3'] = scipy.signal.wiener(self.y, 3) fitparams['smooth5'] = scipy.signal.wiener(self.y, 5) fitparams['smooth11'] = scipy.signal.wiener(self.y, 11) ### if this is an observation (not fake data), compute the covariance matrix if obs == True: ### for BFGS, get covariance from algorithm output if self.fitmethod == scipy.optimize.fmin_bfgs: print("Approximating covariance from BFGS: ") covar = aopt[3] stderr = np.sqrt(np.diag(covar)) else: ### calculate Hessian approximating with finite differences print("Approximating Hessian with finite differences ...") if comp_hessian: phess = approx_hess(aopt[0], optfunc, neg=args) ### covariance is the inverse of the Hessian print("Hessian (empirical): " + str(phess)) covar = np.linalg.inv(phess) stderr = np.sqrt(np.diag(covar)) else: print( "Cannot compute hessian! Use BFGS or install statsmodels!" ) covar = None stderr = None print("Covariance (empirical): " + str(covar)) fitparams['cov'] = covar ### errors of parameters are on the diagonal of the covariance ### matrix; take square root to get standard deviation fitparams['err'] = stderr ### Print results to screen print("The best-fit model parameters plus errors are:") for i, (x, y) in enumerate(zip(fitparams['popt'], stderr)): print("Parameter " + str(i) + ": " + str(x) + " +/- " + str(y)) print( "The Akaike Information Criterion of the power law model is: " + str(fitparams['aic']) + ".") return fitparams
beta = xk y = np.dot(x, beta) + 0.1 * np.random.randn(nobs) xkols = np.dot(np.linalg.pinv(x), y) print(approx_fprime((1, 2, 3), fun, epsilon, x)) gradtrue = x.sum(0) print(x.sum(0)) gradcs = approx_fprime_cs((1, 2, 3), fun, (x, ), h=1.0e-20) print(gradcs, maxabs(gradcs, gradtrue)) print(approx_hess_cs((1, 2, 3), fun, (x, ), h=1.0e-20)) #this is correctly zero print( approx_hess_cs((1, 2, 3), fun2, (y, x), h=1.0e-20) - 2 * np.dot(x.T, x)) print(numdiff.approx_hess(xk, fun2, 1e-3, (y, x))[0] - 2 * np.dot(x.T, x)) gt = (-x * 2 * (y - np.dot(x, [1, 2, 3]))[:, None]) g = approx_fprime_cs((1, 2, 3), fun1, (y, x), h=1.0e-20) #.T #this shouldn't be transposed gd = numdiff.approx_fprime((1, 2, 3), fun1, epsilon, (y, x)) print(maxabs(g, gt)) print(maxabs(gd, gt)) import statsmodels.api as sm data = sm.datasets.spector.load(as_pandas=False) data.exog = sm.add_constant(data.exog, prepend=False) #mod = sm.Probit(data.endog, data.exog) mod = sm.Logit(data.endog, data.exog) #res = mod.fit(method="newton")
def test_compare_numdiff(self): import statsmodels.tools.numdiff as nd n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for jl in 0,1: for reml in False,True: for cov_pen_wt in 0,10: cov_pen = penalties.PSD(cov_pen_wt) np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp*grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp*grpsize, k_re)) exog_re[:, 0] = 1 slopes = np.random.normal(size=(n_grp, k_re)) slopes = np.kron(slopes, np.ones((grpsize,1))) re_values = (slopes * exog_re).sum(1) err = np.random.normal(size=n_grp*grpsize) endog = exog_fe.sum(1) + re_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) if jl == 0: md = MixedLM(endog, exog_fe, groups, exog_re) score = lambda x: -md.score_sqrt(x) hessian = lambda x : -md.hessian_sqrt(x) else: md = MixedLM(endog, exog_fe, groups, exog_re, use_sqrt=False) score = lambda x: -md.score_full(x) hessian = lambda x: -md.hessian_full(x) md.reml = reml md.cov_pen = cov_pen loglike = lambda x: -md.loglike(x) rslt = md.fit() # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) params = MixedLMParams.from_components(fe_params, cov_re) if jl == 0: params_vec = params.get_packed() else: params_vec = params.get_packed(use_sqrt=False) # Check scores gr = score(params) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-2) # Hessian matrices don't agree well away from # the MLE. #if cov_pen_wt == 0: # hess = hessian(params) # nhess = nd.approx_hess(params_vec, loglike) # assert_allclose(hess, nhess, rtol=1e-2) # Check Hessian matrices at the MLE. if cov_pen_wt == 0: hess = hessian(rslt.params_object) params_vec = rslt.params_object.get_packed() nhess = nd.approx_hess(params_vec, loglike) assert_allclose(hess, nhess, rtol=1e-2)