def test_hess(self): #NOTE: I had to overwrite this to lessen the tolerance for test_params in self.params: he = self.mod.hessian(test_params) hefd = numdiff.approx_fprime_cs(test_params, self.mod.score) assert_almost_equal(he, hefd, decimal=DEC8) #NOTE: notice the accuracy below and the epsilon changes # this doesn't work well for score -> hessian with non-cs step # it's a little better around the optimum assert_almost_equal(he, hefd, decimal=7) hefd = numdiff.approx_fprime(test_params, self.mod.score, centered=True) assert_almost_equal(he, hefd, decimal=4) hefd = numdiff.approx_fprime(test_params, self.mod.score, 1e-9, centered=False) assert_almost_equal(he, hefd, decimal=2) hescs = numdiff.approx_fprime_cs(test_params, self.mod.score) assert_almost_equal(he, hescs, decimal=DEC8) hecs = numdiff.approx_hess_cs(test_params, self.mod.loglike) assert_almost_equal(he, hecs, decimal=5) #NOTE: these just don't work well #hecs = numdiff.approx_hess1(test_params, self.mod.loglike, 1e-3) #assert_almost_equal(he, hecs, decimal=1) #hecs = numdiff.approx_hess2(test_params, self.mod.loglike, 1e-4) #assert_almost_equal(he, hecs, decimal=0) hecs = numdiff.approx_hess3(test_params, self.mod.loglike, 1e-4) assert_almost_equal(he, hecs, decimal=0)
def test_logit_1d(): y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1] g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2] x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0] x = x[:, None] model = ConditionalLogit(y, x, groups=g) # Check the gradient for the denominator of the partial likelihood for x in -1, 0, 1, 2: params = np.r_[x, ] _, grad = model._denom_grad(0, params) ngrad = approx_fprime(params, lambda x: model._denom(0, x)) assert_allclose(grad, ngrad) # Check the gradient for the loglikelihood for x in -1, 0, 1, 2: grad = approx_fprime(np.r_[x, ], model.loglike) score = model.score(np.r_[x, ]) assert_allclose(grad, score, rtol=1e-4) result = model.fit() # From Stata assert_allclose(result.params, np.r_[0.9272407], rtol=1e-5) assert_allclose(result.bse, np.r_[1.295155], rtol=1e-5)
def test_logit_2d(): y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1] g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2] x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0] x2 = np.r_[0, 0, 1, 0, 0, 1, 0, 1, 1, 1] x = np.empty((10, 2)) x[:, 0] = x1 x[:, 1] = x2 model = ConditionalLogit(y, x, groups=g) # Check the gradient for the denominator of the partial likelihood for x in -1, 0, 1, 2: params = np.r_[x, -1.5*x] _, grad = model._denom_grad(0, params) ngrad = approx_fprime(params, lambda x: model._denom(0, x)) assert_allclose(grad, ngrad, rtol=1e-5) # Check the gradient for the loglikelihood for x in -1, 0, 1, 2: params = np.r_[-0.5*x, 0.5*x] grad = approx_fprime(params, model.loglike) score = model.score(params) assert_allclose(grad, score, rtol=1e-4) result = model.fit() # From Stata assert_allclose(result.params, np.r_[1.011074, 1.236758], rtol=1e-3) assert_allclose(result.bse, np.r_[1.420784, 1.361738], rtol=1e-5) result.summary()
def test_calc_project_jacobian(self): proj = TestProject.proj project_param_vector = np.zeros((3,)) low_deg_idx = proj.project_param_idx['Group_1'][('Low',)] high_deg_idx = proj.project_param_idx['Group_1'][('High',)] synt_idx = proj.project_param_idx['k_synt']['Global'] project_param_vector[high_deg_idx] = 0.01 project_param_vector[low_deg_idx] = 0.001 project_param_vector[synt_idx] = 0.01 log_project_param_vector = np.log(project_param_vector) sens_jacobian = proj.calc_project_jacobian(log_project_param_vector) def get_scaled_sims(x): proj.residuals(x) sims = proj.get_simulations(scaled=True) return sims.values[:, 0] num_global_jac = approx_fprime(log_project_param_vector, get_scaled_sims, centered=True) assert np.allclose(num_global_jac, sens_jacobian, atol=0.000001) project_param_vector[high_deg_idx] = 0.02 project_param_vector[low_deg_idx] = 0.003 project_param_vector[synt_idx] = 0.05 log_project_param_vector = np.log(project_param_vector) sens_rss_grad = proj.calc_rss_gradient(log_project_param_vector) num_rss_jac = approx_fprime(log_project_param_vector, proj.calc_sum_square_residuals, centered=True) assert np.allclose(sens_rss_grad, num_rss_jac, atol=0.000001)
def test_dtypes(): def f(x): return 2*x desired = np.array([[2, 0], [0, 2]]) assert_allclose(approx_fprime(np.array([1, 2]), f), desired) assert_allclose(approx_fprime(np.array([1., 2.]), f), desired) assert_allclose(approx_fprime(np.array([1.+0j, 2.+0j]), f), desired)
def gradient_momcond(self, params, epsilon=1e-4, method='centered'): momcond = self.momcond_mean if method == 'centered': gradmoms = (approx_fprime(params, momcond, epsilon=epsilon) + approx_fprime(params, momcond, epsilon=-epsilon))/2 else: gradmoms = approx_fprime(params, momcond, epsilon=epsilon) return gradmoms
def test_grad_fun1_fd(self): for test_params in self.params: #gtrue = self.x.sum(0) gtrue = self.gradtrue(test_params) fun = self.fun() epsilon = 1e-6 gfd = numdiff.approx_fprime(test_params, fun, epsilon=epsilon, args=self.args) gfd += numdiff.approx_fprime(test_params, fun, epsilon=-epsilon, args=self.args) gfd /= 2. assert_almost_equal(gtrue, gfd, decimal=DEC6)
def score(self, params): """ Gradient of log-likelihood evaluated at params """ from statsmodels.tools.numdiff import approx_fprime return approx_fprime(params, self.loglike, epsilon=1e-4, centered=True).ravel()
def compute_param_cov(self, params, backcast=None, robust=True): """ Computes parameter covariances using numerical derivatives. Parameters ---------- params : 1-d array Model parameters robust : bool, optional Flag indicating whether to use robust standard errors (True) or classic MLE (False) """ resids = self.resids(self.starting_values()) var_bounds = self.volatility.variance_bounds(resids) nobs = resids.shape[0] if backcast is None and self._backcast is None: backcast = self.volatility.backcast(resids) self._backcast = backcast elif backcast is None: backcast = self._backcast kwargs = {"sigma2": np.zeros_like(resids), "backcast": backcast, "var_bounds": var_bounds, "individual": False} hess = approx_hess(params, self._loglikelihood, kwargs=kwargs) hess /= nobs inv_hess = np.linalg.inv(hess) if robust: kwargs["individual"] = True scores = approx_fprime(params, self._loglikelihood, kwargs=kwargs) score_cov = np.cov(scores.T) return inv_hess.dot(score_cov).dot(inv_hess) / nobs else: return inv_hess / nobs
def test_poisson_2d(): y = np.r_[3, 1, 4, 8, 2, 5, 4, 7, 2, 6] g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2] x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0] x2 = np.r_[2, 1, 0, 0, 1, 2, 3, 2, 0, 1] x = np.empty((10, 2)) x[:, 0] = x1 x[:, 1] = x2 model = ConditionalPoisson(y, x, groups=g) # Check the gradient for the loglikelihood for x in -1, 0, 1, 2: params = np.r_[-0.5*x, 0.5*x] grad = approx_fprime(params, model.loglike) score = model.score(params) assert_allclose(grad, score, rtol=1e-4) result = model.fit() # From Stata assert_allclose(result.params, np.r_[-.9478957, -.0134279], rtol=1e-3) assert_allclose(result.bse, np.r_[.3874942, .1686712], rtol=1e-5) result.summary()
def deriv2(self, p): """Second derivative of the link function g''(p) implemented through numerical differentiation """ from statsmodels.tools.numdiff import approx_fprime # Note: speciaf function for norm.ppf does not support complex return np.diag(approx_fprime(p, self.deriv, centered=True))
def deriv(self, mu): """ Derivative of the variance function v'(mu) """ from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime #return approx_fprime_cs(mu, self) # TODO fix breaks in `fabs # TODO: diag is workaround problem with numdiff for 1d return np.diag(approx_fprime(mu, self))
def fit_map(self, method="BFGS", minim_opts=None, scale_fe=False): """ Construct the Laplace approximation to the posterior distribution. Parameters ---------- method : string Optimization method for finding the posterior mode. minim_opts : dict-like Options passed to scipy.minimize. scale_fe : bool If true, the columns of the fixed effects design matrix are centered and scaled to unit variance before fitting the model. The results are back-transformed so that the results are presented on the original scale. Returns ------- BayesMixedGLMResults instance. """ if scale_fe: mn = self.exog.mean(0) sc = self.exog.std(0) self._exog_save = self.exog self.exog = self.exog.copy() ixs = np.flatnonzero(sc > 1e-8) self.exog[:, ixs] -= mn[ixs] self.exog[:, ixs] /= sc[ixs] def fun(params): return -self.logposterior(params) def grad(params): return -self.logposterior_grad(params) start = self._get_start() r = minimize(fun, start, method=method, jac=grad, options=minim_opts) if not r.success: msg = ("Laplace fitting did not converge, |gradient|=%.6f" % np.sqrt(np.sum(r.jac**2))) warnings.warn(msg) from statsmodels.tools.numdiff import approx_fprime hess = approx_fprime(r.x, grad) cov = np.linalg.inv(hess) params = r.x if scale_fe: self.exog = self._exog_save del self._exog_save params[ixs] /= sc[ixs] cov[ixs, :][:, ixs] /= np.outer(sc[ixs], sc[ixs]) return BayesMixedGLMResults(self, params, cov, optim_retvals=r)
def test_score(self): # this test the score at parameters different from the optimum. import statsmodels.tools.numdiff as nd score_by_numdiff = nd.approx_fprime(self.res1.params * 2, \ self.mod1.loglike, centered=True) np.testing.assert_allclose(self.mod1.score(self.res1.params * 2), score_by_numdiff, rtol=RTOL_4, atol=ATOL_1)
def test_grad_fun1_fdc(self): for test_params in self.params: #gtrue = self.x.sum(0) gtrue = self.gradtrue(test_params) fun = self.fun() epsilon = 1e-6 #default epsilon 1e-6 is not precise enough gfd = numdiff.approx_fprime(test_params, fun, epsilon=1e-8, args=self.args, centered=True) assert_almost_equal(gtrue, gfd, decimal=DEC5)
def test_score(self): for test_params in self.params: sc = self.mod.score(test_params) scfd = numdiff.approx_fprime(test_params.ravel(), self.mod.loglike) assert_almost_equal(sc, scfd, decimal=1) sccs = numdiff.approx_fprime_cs(test_params.ravel(), self.mod.loglike) assert_almost_equal(sc, sccs, decimal=11)
def test_statsmodels_numdiff_std(avg=AVG, std=STD, f=F): c = clock() j = numdiff.approx_fprime(avg, f, centered=True) LOGGER.debug('\t1> %g [s]', clock() - c) std = np.abs(j.diagonal()*std) # np.sqrt(j*std*std*j) LOGGER.debug('\t2> %g [s]', clock() - c) avg = f(avg) LOGGER.debug('\t3> %g [s]', clock() - c) dt = np.dtype([('avg', float), ('std', float)]) LOGGER.debug('\t4> %g [s]', clock() - c) return np.array(zip(avg, std), dtype=dt)
def test_statsmodels_numdiff_std(avg=AVG, std=STD, f=F): c = clock() j = numdiff.approx_fprime(avg, f, centered=True) LOGGER.debug('\t1> %g [s]', clock() - c) std = [np.linalg.norm(j[n] * std) for n in xrange(NARGS)] LOGGER.debug('\t2> %g [s]', clock() - c) avg = f(avg) LOGGER.debug('\t3> %g [s]', clock() - c) dt = np.dtype([('avg', float), ('std', float)]) LOGGER.debug('\t4> %g [s]', clock() - c) return np.array(zip(avg, std), dtype=dt)
def test_deriv(): # Check link function derivatives using numeric differentiation. np.random.seed(24235) for link in Links: for k in range(10): p = np.random.uniform(0, 1) d = link.deriv(p) da = nd.approx_fprime(np.r_[p], link) assert_allclose(d, da, rtol=1e-6, atol=1e-6, err_msg=str(link))
def test_conditional_mlogit_grad(): df = gen_mlogit(90) model = ConditionalMlogit.from_formula("y ~ 0 + x1 + x2", time="time", groups="g", data=df) # Compare the gradients to numeric gradients for _ in range(5): za = np.random.normal(size=4) grad = model.score(za) ngrad = approx_fprime(za, model.loglike) assert_allclose(grad, ngrad, rtol=1e-5, atol=1e-3)
def test_statsmodels_numdiff_cov(avg=AVG, cov=COV, f=F): c = clock() j = numdiff.approx_fprime(avg, f, centered=True) LOGGER.debug('\t1> %g [s]', clock() - c) cov = np.dot(np.dot(j, cov), j.T) LOGGER.debug('\t2> %g [s]', clock() - c) avg = f(avg) LOGGER.debug('\t3> %g [s]', clock() - c) cov = np.reshape(cov.diagonal(), avg.shape) LOGGER.debug('\t4> %g [s]', clock() - c) dt = np.dtype([('avg', float), ('cov', float)]) LOGGER.debug('\t5> %g [s]', clock() - c) return np.array(zip(avg, cov), dtype=dt)
def score(self, params): """ Return the gradient of the loglike at params Parameters ---------- params : list Notes ----- Return numerical gradient """ loglike = self.loglike return approx_fprime(params, loglike, epsilon=1e-8)
def score(self, AB_mask): """ Return the gradient of the loglike at AB_mask. Parameters ---------- AB_mask : unknown values of A and B matrix concatenated Notes ----- Return numerical gradient """ loglike = self.loglike return approx_fprime(AB_mask, loglike, epsilon=1e-8)
def score(self, params): """ Return the gradient of the loglikelihood at params. Parameters ---------- params : array-like The parameter values at which to evaluate the score function. Notes ----- Returns numerical gradient. """ loglike = self.loglike return approx_fprime(params, loglike, epsilon=1e-8)
def test_derivatives(self): pen = self.pen x = self.params ps = np.array([pen.deriv(np.atleast_1d(xi)) for xi in x]) psn = np.array([approx_fprime(np.atleast_1d(xi), pen.func) for xi in x]) assert_allclose(ps, psn, rtol=1e-7, atol=1e-8) ph = np.array([pen.deriv2(np.atleast_1d(xi)) for xi in x]) phn = np.array([approx_hess(np.atleast_1d(xi), pen.func) for xi in x]) if ph.ndim == 2: # SmoothedSCAD returns only diagonal if hessian if independent # TODO should ww allow this also in L@? ph = np.array([np.diag(phi) for phi in ph]) assert_allclose(ph, phn, rtol=1e-7, atol=1e-8)
def test_hess(self): for test_params in self.params: he = self.mod.hessian(test_params) hefd = numdiff.approx_fprime_cs(test_params, self.mod.score) assert_almost_equal(he, hefd, decimal=DEC8) #NOTE: notice the accuracy below assert_almost_equal(he, hefd, decimal=7) hefd = numdiff.approx_fprime(test_params, self.mod.score, centered=True) assert_allclose(he, hefd, rtol=5e-10) hefd = numdiff.approx_fprime(test_params, self.mod.score, centered=False) assert_almost_equal(he, hefd, decimal=4) hescs = numdiff.approx_fprime_cs(test_params.ravel(), self.mod.score) assert_allclose(he, hescs, rtol=1e-13) hecs = numdiff.approx_hess_cs(test_params.ravel(), self.mod.loglike) assert_allclose(he, hecs, rtol=1e-9) #NOTE: Look at the lack of precision - default epsilon not always #best grad = self.mod.score(test_params) hecs, gradcs = numdiff.approx_hess1(test_params, self.mod.loglike, 1e-6, return_grad=True) assert_almost_equal(he, hecs, decimal=1) assert_almost_equal(grad, gradcs, decimal=1) hecs, gradcs = numdiff.approx_hess2(test_params, self.mod.loglike, 1e-4, return_grad=True) assert_almost_equal(he, hecs, decimal=3) assert_almost_equal(grad, gradcs, decimal=1) hecs = numdiff.approx_hess3(test_params, self.mod.loglike, 1e-5) assert_almost_equal(he, hecs, decimal=4)
def score_numdiff(self, params, pen_weight=None, method='fd', **kwds): """score based on finite difference derivative """ if pen_weight is None: pen_weight = self.pen_weight loglike = lambda p: self.loglike(p, pen_weight=pen_weight, **kwds) if method == 'cs': return approx_fprime_cs(params, loglike) elif method == 'fd': return approx_fprime(params, loglike, centered=True) else: raise ValueError('method not recognized, should be "fd" or "cs"')
def test_hess(self): for test_params in self.params: he = self.mod.hessian(test_params) hefd = numdiff.approx_fprime_cs(test_params, self.mod.score) assert_almost_equal(he, hefd, decimal=DEC8) #NOTE: notice the accuracy below assert_almost_equal(he, hefd, decimal=7) hefd = numdiff.approx_fprime(test_params, self.mod.score, centered=True) assert_allclose(he, hefd, rtol=1e-9) hefd = numdiff.approx_fprime(test_params, self.mod.score, centered=False) assert_almost_equal(he, hefd, decimal=4) hescs = numdiff.approx_fprime_cs(test_params.ravel(), self.mod.score) assert_allclose(he, hescs, rtol=1e-13) hecs = numdiff.approx_hess_cs(test_params.ravel(), self.mod.loglike) assert_allclose(he, hecs, rtol=1e-9) #NOTE: Look at the lack of precision - default epsilon not always #best grad = self.mod.score(test_params) hecs, gradcs = numdiff.approx_hess1(test_params, self.mod.loglike, 1e-6, return_grad=True) assert_almost_equal(he, hecs, decimal=1) assert_almost_equal(grad, gradcs, decimal=1) hecs, gradcs = numdiff.approx_hess2(test_params, self.mod.loglike, 1e-4, return_grad=True) assert_almost_equal(he, hecs, decimal=3) assert_almost_equal(grad, gradcs, decimal=1) hecs = numdiff.approx_hess3(test_params, self.mod.loglike, 1e-5) assert_almost_equal(he, hecs, decimal=4)
def score_obs(self, params): """ Generic Zero Inflated model score (gradient) vector of the log-likelihood Parameters ---------- params : array_like The parameters of the model Returns ------- score : ndarray, 1-D The score vector of the model, i.e. the first derivative of the loglikelihood function, evaluated at `params` """ params_infl = params[:self.k_inflate] params_main = params[self.k_inflate:] y = self.endog w = self.model_infl.predict(params_infl) w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) score_main = self.model_main.score_obs(params_main) llf_main = self.model_main.loglikeobs(params_main) llf = self.loglikeobs(params) zero_idx = np.nonzero(y == 0)[0] nonzero_idx = np.nonzero(y)[0] mu = self.model_main.predict(params_main) # TODO: need to allow for complex to use CS numerical derivatives dldp = np.zeros((self.exog.shape[0], self.k_exog), dtype=np.float64) dldw = np.zeros_like(self.exog_infl, dtype=np.float64) dldp[zero_idx,:] = (score_main[zero_idx].T * (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T dldp[nonzero_idx,:] = score_main[nonzero_idx] if self.inflation == 'logit': dldw[zero_idx,:] = (self.exog_infl[zero_idx].T * w[zero_idx] * (1 - w[zero_idx]) * (1 - np.exp(llf_main[zero_idx])) / np.exp(llf[zero_idx])).T dldw[nonzero_idx,:] = -(self.exog_infl[nonzero_idx].T * w[nonzero_idx]).T elif self.inflation == 'probit': return approx_fprime(params, self.loglikeobs) return np.hstack((dldw, dldp))
def se_params(final, data, gamma): # for t-test moms = moments(final, data, gamma) #(T*K) Omega = weight(moms) #(K*K) obs = moms.shape[0] omegahat = Omega se_d = approx_fprime(final, moments_mean, epsilon=0.0001, args=( data, gamma, )) cov = np.linalg.inv(np.dot(se_d.T, np.dot(omegahat, se_d))) return np.sqrt(np.diag(cov / obs)) # 1*3 vector
def score(self, params): """ Compute the gradient of the log-likelihood at params. Parameters ---------- params : array_like The parameter values at which to evaluate the score function. Returns ------- ndarray The gradient computed using numerical methods. """ loglike = self.loglike return approx_fprime(params, loglike, epsilon=1e-8)
def test_deriv(): # Check link function derivatives using numeric differentiation. np.random.seed(24235) for link in Links: for k in range(10): p = np.random.uniform(0, 1) d = link.deriv(p) da = nd.approx_fprime(np.r_[p], link) assert_allclose(d, da, rtol=1e-6, atol=1e-6, err_msg=str(link)) if not isinstance(link, (type(inverse_power), type(inverse_squared))): # check monotonically increasing assert_array_less(-d, 0)
def score_obs(self, params): """ Generic Zero Inflated model score (gradient) vector of the log-likelihood Parameters ---------- params : array-like The parameters of the model Returns ------- score : ndarray, 1-D The score vector of the model, i.e. the first derivative of the loglikelihood function, evaluated at `params` """ params_infl = params[:self.k_inflate] params_main = params[self.k_inflate:] y = self.endog w = self.model_infl.predict(params_infl) w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) score_main = self.model_main.score_obs(params_main) llf_main = self.model_main.loglikeobs(params_main) llf = self.loglikeobs(params) zero_idx = np.nonzero(y == 0)[0] nonzero_idx = np.nonzero(y)[0] mu = self.model_main.predict(params_main) dldp = np.zeros((self.exog.shape[0], self.k_exog), dtype=np.float64) dldw = np.zeros_like(self.exog_infl, dtype=np.float64) dldp[zero_idx,:] = (score_main[zero_idx].T * (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T dldp[nonzero_idx,:] = score_main[nonzero_idx] if self.inflation == 'logit': dldw[zero_idx,:] = (self.exog_infl[zero_idx].T * w[zero_idx] * (1 - w[zero_idx]) * (1 - np.exp(llf_main[zero_idx])) / np.exp(llf[zero_idx])).T dldw[nonzero_idx,:] = -(self.exog_infl[nonzero_idx].T * w[nonzero_idx]).T elif self.inflation == 'probit': return approx_fprime(params, self.loglikeobs) return np.hstack((dldw, dldp))
def test_deriv2(): # Check link function second derivatives using numeric differentiation. np.random.seed(24235) for link in Links: # TODO: Resolve errors with the numeric derivatives if type(link) == type(probit): continue for k in range(10): p = np.random.uniform(0, 1) p = np.clip(p, 0.01, 0.99) if type(link) == type(cauchy): p = np.clip(p, 0.03, 0.97) d = link.deriv2(p) da = nd.approx_fprime(np.r_[p], link.deriv) assert_allclose(d, da, rtol=1e-6, atol=1e-6, err_msg=str(link))
def test_score_numdiff(): y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(1000, model1) preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups) def loglike(x): return preg.loglike(x) q = x_mean.shape[1] + x_sc.shape[1] + x_sm.shape[1] + x_no.shape[1] np.random.seed(342) for _ in range(5): par0 = preg._get_start() par = par0 + 0.1 * np.random.normal(size=q) score = preg.score(par) score_nd = nd.approx_fprime(par, loglike, epsilon=1e-7) assert_allclose(score, score_nd, atol=1e-3, rtol=1e-4)
def Sts_interference(func, result, names: list) -> pd.DataFrame: " With the numercial estimation of Jacobian, use BHHH estimator to calcualte\ covariance matrix, with the virtue of non-negative definite, so that standard\ errors, T-statistic and P-values could all be calculated." params = result.x jac = approx_fprime(params, func) btt = jac.T.dot(jac) inv_btt = np.linalg.inv(btt) stderr = np.sqrt(np.diag(inv_btt)) params_df = pd.Series(params, index=names, name='Coef') stderr_df = pd.Series(stderr, index=names, name="std_err") tvalues_df = pd.Series(params / stderr, index=names, name='t') pvalues_df = pd.Series(stats.norm.sf(np.abs(params / stderr)) * 2, index=names, name="P>|t|") return pd.concat([params_df, stderr_df, tvalues_df, pvalues_df], axis=1)
def _hessian_finite_difference(self, params, approx_centered=False, **kwargs): params = np.array(params, ndmin=1) warnings.warn('Calculation of the Hessian using finite differences' ' is usually subject to substantial approximation' ' errors.', PrecisionWarning) if not approx_centered: epsilon = _get_epsilon(params, 3, None, len(params)) else: epsilon = _get_epsilon(params, 4, None, len(params)) / 2 hessian = approx_fprime(params, self._score_finite_difference, epsilon=epsilon, kwargs=kwargs, centered=approx_centered) # TODO: changed this to nobs_effective, has to be changed when merging # with statespace mlemodel return hessian / (self.nobs_effective)
def test_poisson_1d(): y = np.r_[3, 1, 1, 4, 5, 2, 0, 1, 6, 2] g = np.r_[0, 0, 0, 0, 1, 1, 1, 1, 1, 1] x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0] x = x[:, None] model = ConditionalPoisson(y, x, groups=g) # Check the gradient for the loglikelihood for x in -1, 0, 1, 2: grad = approx_fprime(np.r_[x, ], model.loglike) score = model.score(np.r_[x, ]) assert_allclose(grad, score, rtol=1e-4) result = model.fit() # From Stata assert_allclose(result.params, np.r_[0.6466272], rtol=1e-4) assert_allclose(result.bse, np.r_[0.4170918], rtol=1e-5)
def initialize(self): """ Initialize (possibly re-initialize) a Model instance. For instance, the design matrix of a linear model may change and some things must be recomputed. """ if not self.score: self.score = lambda x: approx_fprime(x, self.loglike) if not self.hessian: pass else: if not self.hessian: pass if self.exog is not None: er = np.linalg.matrix_rank(self.exog) self.df_model = float(er - 1) self.df_resid = float(self.exog.shape[0] - er) else: self.df_model = np.nan self.df_resid = np.nan super(GenericLikelihoodModel_TobitTruncreg, self).initialize()
def compute_param_cov(self, params, backcast=None, robust=True): """ Computes parameter covariances using numerical derivatives. Parameters ---------- params : 1-d array Model parameters robust : bool, optional Flag indicating whether to use robust standard errors (True) or classic MLE (False) """ resids = self.resids(self.starting_values()) var_bounds = self.volatility.variance_bounds(resids) nobs = resids.shape[0] if backcast is None and self._backcast is None: backcast = self.volatility.backcast(resids) self._backcast = backcast elif backcast is None: backcast = self._backcast kwargs = { 'sigma2': np.zeros_like(resids), 'backcast': backcast, 'var_bounds': var_bounds, 'individual': False } hess = approx_hess(params, self._loglikelihood, kwargs=kwargs) hess /= nobs inv_hess = np.linalg.inv(hess) if robust: kwargs['individual'] = True scores = approx_fprime(params, self._loglikelihood, kwargs=kwargs) score_cov = np.cov(scores.T) return inv_hess.dot(score_cov).dot(inv_hess) / nobs else: return inv_hess / nobs
def fit_map(self, method="BFGS", minim_opts=None): """ Construct the Laplace approximation to the posterior distribution. Parameters ---------- method : string Optimization method for finding the posterior mode. minim_opts : dict-like Options passed to scipy.minimize. Returns ------- BayesMixedGLMResults instance. """ def fun(params): return -self.logposterior(params) def grad(params): return -self.logposterior_grad(params) start = self._get_start() r = minimize(fun, start, method=method, jac=grad, options=minim_opts) if not r.success: msg = ("Laplace fitting did not converge, |gradient|=%.6f" % np.sqrt(np.sum(r.jac**2))) warnings.warn(msg) from statsmodels.tools.numdiff import approx_fprime hess = approx_fprime(r.x, grad) hess_inv = np.linalg.inv(hess) return BayesMixedGLMResults(self, r.x, hess_inv, optim_retvals=r)
def hessian(self, params): from statsmodels.tools.numdiff import approx_fprime hess = approx_fprime(params, self.score) hess = np.atleast_2d(hess) return hess
def grad(self, params=None, **kwds): if params is None: params = self.params kwds.setdefault('epsilon', 1e-4) from statsmodels.tools.numdiff import approx_fprime return approx_fprime(params, self.fun, **kwds)
def margeff_cov_params(model, params, exog, cov_params, at, derivative, dummy_ind, count_ind, method, J): """ Computes the variance-covariance of marginal effects by the delta method. Parameters ---------- model : model instance The model that returned the fitted results. Its pdf method is used for computing the Jacobian of discrete variables in dummy_ind and count_ind params : array-like estimated model parameters exog : array-like exogenous variables at which to calculate the derivative cov_params : array-like The variance-covariance of the parameters at : str Options are: - 'overall', The average of the marginal effects at each observation. - 'mean', The marginal effects at the mean of each regressor. - 'median', The marginal effects at the median of each regressor. - 'zero', The marginal effects at zero for each regressor. - 'all', The marginal effects at each observation. Only overall has any effect here.you derivative : function or array-like If a function, it returns the marginal effects of the model with respect to the exogenous variables evaluated at exog. Expected to be called derivative(params, exog). This will be numerically differentiated. Otherwise, it can be the Jacobian of the marginal effects with respect to the parameters. dummy_ind : array-like Indices of the columns of exog that contain dummy variables count_ind : array-like Indices of the columns of exog that contain count variables Notes ----- For continuous regressors, the variance-covariance is given by Asy. Var[MargEff] = [d margeff / d params] V [d margeff / d params]' where V is the parameter variance-covariance. The outer Jacobians are computed via numerical differentiation if derivative is a function. """ if callable(derivative): from statsmodels.tools.numdiff import approx_fprime_cs params = params.ravel('F') # for Multinomial try: jacobian_mat = approx_fprime_cs(params, derivative, args=(exog, method)) except TypeError: # norm.cdf doesn't take complex values from statsmodels.tools.numdiff import approx_fprime jacobian_mat = approx_fprime(params, derivative, args=(exog, method)) if at == 'overall': jacobian_mat = np.mean(jacobian_mat, axis=1) else: jacobian_mat = jacobian_mat.squeeze() # exog was 2d row vector if dummy_ind is not None: jacobian_mat = _margeff_cov_params_dummy(model, jacobian_mat, params, exog, dummy_ind, method, J) if count_ind is not None: jacobian_mat = _margeff_cov_params_count(model, jacobian_mat, params, exog, count_ind, method, J) else: jacobian_mat = derivative #NOTE: this won't go through for at == 'all' return np.dot(np.dot(jacobian_mat, cov_params), jacobian_mat.T)
def test_compare_numdiff(self): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for use_sqrt in False, True: for reml in False, True: for profile_fe in False, True: np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function(model, profile_fe=profile_fe, has_fe=not profile_fe) # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2)**2 params = MixedLMParams.from_components(fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed(has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed( use_sqrt=False, has_fe=True) loglike_h = loglike_function(model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3)
def hessian(self, params): hess = approx_fprime(params, self.score) return hess
def test_compare_numdiff(self): import statsmodels.tools.numdiff as nd n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for jl in 0, 1: for reml in False, True: for cov_pen_wt in 0, 10: cov_pen = penalties.PSD(cov_pen_wt) np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 slopes = np.random.normal(size=(n_grp, k_re)) slopes = np.kron(slopes, np.ones((grpsize, 1))) re_values = (slopes * exog_re).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) if jl == 0: md = MixedLM(endog, exog_fe, groups, exog_re) score = lambda x: -md.score_sqrt(x) hessian = lambda x: -md.hessian_sqrt(x) else: md = MixedLM(endog, exog_fe, groups, exog_re, use_sqrt=False) score = lambda x: -md.score_full(x) hessian = lambda x: -md.hessian_full(x) md.reml = reml md.cov_pen = cov_pen loglike = lambda x: -md.loglike(x) rslt = md.fit() # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) params = MixedLMParams.from_components( fe_params, cov_re) if jl == 0: params_vec = params.get_packed() else: params_vec = params.get_packed(use_sqrt=False) # Check scores gr = score(params) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-2) # Hessian matrices don't agree well away from # the MLE. #if cov_pen_wt == 0: # hess = hessian(params) # nhess = nd.approx_hess(params_vec, loglike) # assert_allclose(hess, nhess, rtol=1e-2) # Check Hessian matrices at the MLE. if cov_pen_wt == 0: hess = hessian(rslt.params_object) params_vec = rslt.params_object.get_packed() nhess = nd.approx_hess(params_vec, loglike) assert_allclose(hess, nhess, rtol=1e-2)
def _score_finite_difference(self, params, approx_centered=False, **kwargs): kwargs['transformed'] = True return approx_fprime(params, self.loglike, kwargs=kwargs, centered=approx_centered)
def test_compare_numdiff(self, use_sqrt, reml, profile_fe): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function(model, profile_fe=profile_fe, has_fe=not profile_fe) try: # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2)**2 params = MixedLMParams.from_components(fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed(has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we do not have # the profile Hessian matrix and we do not care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed(use_sqrt=False, has_fe=True) loglike_h = loglike_function(model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3) except AssertionError: # See GH#5628; because this test fails unpredictably but only on # OSX, we only xfail it there. if PLATFORM_OSX: pytest.xfail("fails on OSX due to unresolved " "numerical differences") else: raise
tatsmodels\sandbox\regression\numdiff.py", line 81, in approx_fprime1 nobs = np.size(f0) #len(f0) TypeError: object of type 'numpy.float64' has no len() ''' res_bfgs = mod_norm2.fit(start_params=start_params, method="bfgs", fprime=None, maxiter = 500, retall=0) from statsmodels.tools.numdiff import approx_fprime, approx_hess hb=-approx_hess(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) hf=-approx_hess(res_norm3.params, mod_norm2.loglike, epsilon=1e-4) hh = (hf+hb)/2. print(np.linalg.eigh(hh)) grad = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) print(grad) gradb = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=-1e-4) gradf = -approx_fprime(res_norm3.params, mod_norm2.loglike, epsilon=1e-4) print((gradb+gradf)/2.) print(res_norm3.model.score(res_norm3.params)) print(res_norm3.model.score(start_params)) mod_norm2.loglike(start_params/2.) print(np.linalg.inv(-1*mod_norm2.hessian(res_norm3.params))) print(np.sqrt(np.diag(res_bfgs.cov_params()))) print(res_norm3.bse) print("MLE - OLS parameter estimates") print(res_norm3.params[:-1] - res2.params) print("bse diff in percent")
if __name__ == '__main__': # FIXME: turn into tests or move/remove epsilon = 1e-6 nobs = 200 x = np.arange(nobs * 3).reshape(nobs, -1) x = np.random.randn(nobs, 3) xk = np.array([1, 2, 3]) xk = np.array([1., 1., 1.]) #xk = np.zeros(3) beta = xk y = np.dot(x, beta) + 0.1 * np.random.randn(nobs) xkols = np.dot(np.linalg.pinv(x), y) print(approx_fprime((1, 2, 3), fun, epsilon, x)) gradtrue = x.sum(0) print(x.sum(0)) gradcs = approx_fprime_cs((1, 2, 3), fun, (x, ), h=1.0e-20) print(gradcs, maxabs(gradcs, gradtrue)) print(approx_hess_cs((1, 2, 3), fun, (x, ), h=1.0e-20)) #this is correctly zero print( approx_hess_cs((1, 2, 3), fun2, (y, x), h=1.0e-20) - 2 * np.dot(x.T, x)) print(numdiff.approx_hess(xk, fun2, 1e-3, (y, x))[0] - 2 * np.dot(x.T, x)) gt = (-x * 2 * (y - np.dot(x, [1, 2, 3]))[:, None]) g = approx_fprime_cs((1, 2, 3), fun1, (y, x), h=1.0e-20) #.T #this should not be transposed