def chisquared(classFeatureMatrix): """ Chi-squared statistic for a feature and a set of classes. Classes are indexed by rows (1st index) and feature values are indexed by columns. Entries are counts of the feature value given the class. """ # counts of each class: sum the rows classcounts = classFeatureMatrix.sum(axis=1) # attribute counts: valuecounts = classFeatureMatrix.sum(axis=0) # total count of observations total = sum(classcounts) # expected observations under the independence assumption expected = array([valuecounts]*len(classcounts),dtype='float') expected = transpose(transpose(expected)*classcounts/total) # chi-squared statistic: (obs-expected)^2/expected where obs is classFeatureMatrix chi = sum(((classFeatureMatrix - expected)**2)/expected) # degrees of freedom df = (expected.shape[0]-1)*(expected.shape[1]-1) # the p-value; 1 minus the cdf of chi-squared at the given statistic value return 1-chi2(df=df).cdf(chi)
def log_pdf_at_quantile(self, alphas): """ Computes the log-pdf at a given 1d-vector of quantiles """ chi2_instance = chi2(self.dimension) cuttoffs = chi2_instance.isf(1 - alphas) log_determinant_part = -sum(log(diag(self.L))) quadratic_part = -0.5 * cuttoffs const_part = -0.5 * len(self.L) * log(2 * pi) return const_part + log_determinant_part + quadratic_part
def emp_quantiles(self, X, quantiles=arange(0.1, 1, 0.1)): # need inverse chi2 cdf with self.dimension degrees of freedom chi2_instance = chi2(self.dimension) cutoffs = chi2_instance.isf(1 - quantiles) # whitening D, U = eig(self.L.dot(self.L.T)) D = D**(-0.5) W = (diag(D).dot(U.T).dot((X - self.mu).T)).T norms_squared = array([norm(w)**2 for w in W]) results = zeros([len(quantiles)]) for jj in range(0, len(quantiles)): results[jj] = mean(norms_squared < cutoffs[jj]) return results
def emp_quantiles(self, X, quantiles=arange(0.1, 1, 0.1)): # need inverse chi2 cdf with self.dimension degrees of freedom chi2_instance = chi2(self.dimension) cutoffs = chi2_instance.isf(1 - quantiles) # whitening D, U = eig(self.L.dot(self.L.T)) D = D ** (-0.5) W = (diag(D).dot(U.T).dot((X - self.mu).T)).T norms_squared = array([norm(w) ** 2 for w in W]) results = zeros([len(quantiles)]) for jj in range(0, len(quantiles)): results[jj] = mean(norms_squared < cutoffs[jj]) return results
def gaussian_pvalue(X, mu, cov, ndof=None): r"""calculates p-value for the assumptions of `x` originating from a multivariate Gaussian pdf with mean `mu` and covariance `cov`. It exploits the fact that the mahalobonis distance of `x` .. math:: d^2 = (\vec x - \vec \mu)^\top \Sigma^{-1} (\vec x - \vec \mu) is :math:`\chi^2`-distributed with :math:`n_\mathrm{dof} = dim(\vec x)`, then the pvalue is :math:`\mathrm{cdf}_{\chi^2}(d^2)`. Parameters ---------- X : numpy array, shape=(n_samples, n_dim) or (n_dim,) Sample to calculate the Mahalanobis distance for. mu : numpy array, shape=(n_dim) Mean of the Gaussian distribution cov : numpy array, shape=(n_dim, n_dim) Covariance matrix of the Gaussian distribution ndof : float Number of degrees of freedom for the chi2 distribution. If `None`, `n_dim` will be used. Returns ------- pvals : numpy array, shape=(n_samples) The p-values """ if X.ndim == 1: X = X.reshape(1, -1) dsquared = mahalanobis(X, mu, cov) if ndof is None: ndof = X.shape[1] return chi2(ndof).cdf(dsquared)
N = 1000 data = dgp(N, *truth) y, X = data Winv = Omegahat(beta, sigma_u, data) def J(b, s, W, data): m = gN(b, s, data) # Sample moments @ b, s N = data[0].shape[0] return N * m.T @ W @ m # Scale by sample size # Limiting distribution under the null limiting_J = iid.chi2(1 * 2 - 2) import scipy.optimize as optimize def two_step_gmm(data): # First step uses identity weighting matrix W1 = np.eye(gj(1, 1, data).shape[1]) x0 = [1, 1] def J2(params): b, s = params return J(b, s, W1, data) result = optimize.minimize(J2, x0)