def setGammaDistributionScore(self): a, loc, scale = stats.gamma.fit(self.genData.modelTargetData) expectedGammaDistributionData = stats.gamma(a, loc, scale) statistic, p_value = ks_test(self.genData.modelTargetData, expectedGammaDistributionData) if p_value < 0.05: self.DistributionScoreGamma = {'Gamma': (False, p_value)} else: self.DistributionScoreGamma = {'Gamma': (True, p_value)}
def setExpDistributionScore(self): loc, scale = stats.expon.fit(self.genData.modelTargetData) expectedExpDistributionData = stats.expon(loc, scale) statistic, p_value = ks_test(self.genData.modelTargetData, expectedExpDistributionData) if p_value < 0.05: self.DistributionScoreExp = {'Exp': (False, p_value)} else: self.DistributionScoreExp = {'Exp': (True, p_value)}
def setNormalDistributionScore(self): loc, scale = stats.norm.fit(self.genData.modelTargetData) expectedNormDistributionData = stats.norm(loc, scale) statistic, p_value = ks_test(self.genData.modelTargetData, expectedNormDistributionData) # W, p_value = stats.shapiro(self.genData.modelTargetData) if p_value < 0.05: self.DistributionScoreNormal = {'Normal': (False, p_value)} else: self.DistributionScoreNormal = {'Normal': (True, p_value)}
def NZKS(self): """ Compute the Kolmogorov-Smirnov statistic and p-value for the two distributions of sumpz and true_z Parameters: ----------- using: string which parameterization to evaluate dx: float step size for integral Returns: -------- KS statistic and pvalue """ #copy the form of Rongpu's use of skgof functions #will have to use QPPDFCDF class, as those expect objects #that have a .cdf method for a vector of values tmpnzfunc = QPPDFCDF(self.stackpz) nzks = skgof.ks_test(self.truth,tmpnzfunc) return nzks.statistic, nzks.pvalue
def KS(self, using, dx=0.0001): """ Compute the Kolmogorov-Smirnov statistic and p-value for the PIT values by comparing with a uniform distribution between 0 and 1. Parameters: ----------- using: string which parameterization to evaluate dx: float step size for integral Returns: -------- KS statistic and pvalue """ if self.pitarray is not None: pits = np.array(self.pitarray) else: pits = np.array(self.PIT(using=using,dx=dx)) self.pitarray = pits ks_result = skgof.ks_test(pits, stats.uniform()) return ks_result.statistic, ks_result.pvalue
def main(): """ script to calculate confidence intervals on KS test from bootstraps inputs: PITvec: np array of Probability Integral Transform values output: bootstrap confidence interval file for KS, and a plot of the KS bootstrap values along with a KDE and a Gaussian fit to them to check that sigma is sensible """ basepath = "." infile = "TESTPITVALS.out" outfile = "STANDALONE_KS_BOOTSTRAP_CONF_INTERVAL.out" outfp = open(outfile, "w") data = np.loadtxt(infile) pits = data[:, 1] bootksvals = [] print("read in PITS") nboots = 1000 bootSampleSize = int(len(pits) * 0.5) #set bootstrap sample size as half of the full sample for k in range(nboots): bootpits = resample(pits, n_samples=bootSampleSize, replace=True) print("Bootstrap #%d Gold sample numbe: %d\n" % (k, len(bootpits))) ks_result = skgof.ks_test(bootpits, sps.uniform()) ksval = ks_result.statistic print "KS Val: %.6f" % (ksval) bootksvals.append(ksval) meanks = np.mean(bootksvals) confhigh = np.percentile(bootksvals, 84.135) conflow = np.percentile(bootksvals, 15.865) sigma = 0.5 * (confhigh - conflow) print("mean ks: %.6f sigma: %.6f\n" % (meanks, sigma)) outfp.write("mean KS value: %.6f\nsigma: %.6f\n" % (meanks, sigma)) outfp.close() binedges = np.linspace(meanks - 4. * sigma, meanks + 4. * sigma, 75) binwidth = binedges[2] - binedges[1] fig = plt.figure(figsize=(10, 10)) plt.hist(bootksvals, bins=binedges, label="histogram") xarr = np.arange(meanks - 4. * sigma, meanks + 4. * sigma, binwidth) y = sps.norm(loc=meanks, scale=sigma) yarr = float(nboots) * binwidth * y.pdf(xarr) tmplabel = "Bootstrap Gaussian\nmean:%.6f sigma=%.6f " % (meanks, sigma) plt.plot(xarr, yarr, c='r', lw=3, linestyle='--', label=tmplabel) kdex = sps.gaussian_kde(bootksvals) ykde = float(nboots) * binwidth * kdex(xarr) plt.plot(xarr, ykde, c='g', lw=2, linestyle='-', label="Gaussian KDE fit") plt.plot([meanks, meanks], [0, 1.3 * np.amax(yarr)], lw=4, c='k', label="mean KS") plt.xlabel("SkyNet KS Bootstraps", fontsize=18) plt.ylabel("Number", fontsize=18) plt.legend(loc="upper left") plt.savefig("testks.jpg", fmt="jpg") plt.show() outfp.close() print("finished")
def computeKS(data,mu,sd,seed): np.random.seed(seed) from skgof import ks_test from scipy.stats import norm res = ks_test(data, norm(loc=mu,scale=sd)) return [res.statistic,res.pvalue]