def NZAD(self, vmin = 0.005, vmax = 1.995, delv = 0.05): """ Compute the Anderson Darling statistic and p-value for the two distributions of sumpz and true_z vector of spec-z's Since the Anderson Darling test requires a properly normalized distribution over the [vmin,vmax] range, will need to create a new qp object defined on the range np.arange(vmin,vmax+delv,delv) Parameters: vmin, vmax: specz values outside of these values are discarded delz: grid spacing for [vmin,vmax] interval to create new qp object ----------- using: string which parameterization to evaluate Returns: -------- Anderson-Darling statistic and pvalue """ #copy the form of Rongpu's use of skgof functions #will have to use QPPDFCDF class, as those expect objects #that have a .cdf method for a vector of values print "using %f and %f for vmin and vmax\n"%(vmin,vmax) szs = self.truth mask = (szs > vmin) & (szs < vmax) vgrid = np.arange(vmin,vmax+delv,delv) veval = self.stackpz.evaluate(vgrid,'gridded',True,False) vobj = qp.PDF(gridded = (veval[0],veval[1])) tmpnzfunc = QPPDFCDF(vobj,self.dx) nzAD = skgof.ad_test(szs[mask],tmpnzfunc) return nzAD.statistic, nzAD.pvalue
def AD(self, using, dx=0.0001, vmin=0.005, vmax=0.995): """ Compute the Anderson-Darling statistic and p-value for the PIT values by comparing with a uniform distribution between 0 and 1. Since the statistic diverges at 0 and 1, PIT values too close to 0 or 1 are discarded. Parameters: ----------- using: string which parameterization to evaluate dx: float step size for integral vmin, vmax: floats PIT values outside this range are discarded Returns: -------- AD statistic and pvalue """ if self.pitarray is not None: pits = np.array(self.pitarray) else: pits = np.array(self.PIT(using=using,dx=dx)) self.pitarray = pits mask = (pits>vmin) & (pits<vmax) print "now with proper uniform range" delv = vmax-vmin ad_result = skgof.ad_test(pits[mask], stats.uniform(loc=vmin,scale=delv)) return ad_result.statistic, ad_result.pvalue
def computeAD(data,mu,sd,seed): np.random.seed(seed) from skgof import ad_test from scipy.stats import norm, anderson res = ad_test(data, norm(loc=mu,scale=sd)) res2 = anderson(data, 'norm') return [res.statistic, res.pvalue,res2.critical_values.tolist()]
def _p(test_i, null_i, M_i, d_i): gpd_fit = None gpd_fit_p_value = None n_i = n # TODO: no need to sort as much as N numbers, do partial sort: # but this requires some tests (both performance and unit) # null_i_partitioned = np.partition(null_i, n_i+1) # null_i_first_n_sorted = sorted(null_i_partitioned[:-n_i+1]) null_i = sorted(null_i) t = None if all(np.isnan(null_i)): return np.nan, False, np.nan, np.nan # compute ecdf based, biased estimate of p-value raw_ecdf_estimate = (ecdf_pseudocount + d_i.sum()) / (N + 1) if M_i < m: # fit GDP, reducing $n$ until convergance while n_i > 0: # -1 because Python has 0-based indexing t = (null_i[-n_i-1] + null_i[-n_i-2]) / 2 y_untill_n = null_i[-n_i:] exceedences = y_untill_n - t assert all(y_untill_n >= t) assert len(exceedences) == n_i fit = genpareto.fit(exceedences) fitted = genpareto(*fit) gpd_fit = fitted gpd_fit_p_value = ad_test(exceedences, fitted).pvalue if gpd_fit_p_value <= 0.05: break else: n_i -= decrease_n_by if gpd_fit and gpd_fit_p_value < 0.05: return n_i / N * (1 - gpd_fit.cdf(test_i - t)), True, gpd_fit_p_value, raw_ecdf_estimate else: if gpd_fit: # TODO: get index and highlight which observation could not be fitted! warn(f'A good GPD fit could not be reached, using ECDF estimate instead') return raw_ecdf_estimate, False, np.nan, raw_ecdf_estimate
def NZAD(self, vmin=0.005, vmax=1.995): """ Compute the Anderson Darling statistic and p-value for the two distributions of sumpz and true_z vector of spec-z's Parameters: vmin, vmax: specz values outside of these values are discarded ----------- using: string which parameterization to evaluate Returns: -------- Anderson-Darling statistic and pvalue """ #copy the form of Rongpu's use of skgof functions #will have to use QPPDFCDF class, as those expect objects #that have a .cdf method for a vector of values print "using %f and %f for vmin and vmax\n" % (vmin, vmax) szs = self.truth mask = (szs > vmin) & (szs < vmax) tmpnzfunc = QPPDFCDF(self.stackpz, self.dx) nzAD = skgof.ad_test(szs[mask], tmpnzfunc) return nzAD.statistic, nzAD.pvalue