예제 #1
0
    def setGammaDistributionScore(self):
        a, loc, scale = stats.gamma.fit(self.genData.modelTargetData)
        expectedGammaDistributionData = stats.gamma(a, loc, scale)
        statistic, p_value = ks_test(self.genData.modelTargetData,
                                     expectedGammaDistributionData)

        if p_value < 0.05:
            self.DistributionScoreGamma = {'Gamma': (False, p_value)}
        else:
            self.DistributionScoreGamma = {'Gamma': (True, p_value)}
예제 #2
0
    def setExpDistributionScore(self):
        loc, scale = stats.expon.fit(self.genData.modelTargetData)
        expectedExpDistributionData = stats.expon(loc, scale)
        statistic, p_value = ks_test(self.genData.modelTargetData,
                                     expectedExpDistributionData)

        if p_value < 0.05:
            self.DistributionScoreExp = {'Exp': (False, p_value)}
        else:
            self.DistributionScoreExp = {'Exp': (True, p_value)}
예제 #3
0
    def setNormalDistributionScore(self):
        loc, scale = stats.norm.fit(self.genData.modelTargetData)
        expectedNormDistributionData = stats.norm(loc, scale)

        statistic, p_value = ks_test(self.genData.modelTargetData,
                                     expectedNormDistributionData)

        # W, p_value = stats.shapiro(self.genData.modelTargetData)

        if p_value < 0.05:
            self.DistributionScoreNormal = {'Normal': (False, p_value)}
        else:
            self.DistributionScoreNormal = {'Normal': (True, p_value)}
    def NZKS(self):
        """
        Compute the Kolmogorov-Smirnov statistic and p-value for the 
        two distributions of sumpz and true_z
        Parameters:
        -----------
        using: string
            which parameterization to evaluate
        dx: float
            step size for integral
        Returns:
        --------
        KS statistic and pvalue

        """
        #copy the form of Rongpu's use of skgof functions
        #will have to use QPPDFCDF class, as those expect objects
        #that have a .cdf method for a vector of values
        tmpnzfunc = QPPDFCDF(self.stackpz)
        nzks = skgof.ks_test(self.truth,tmpnzfunc)
        return nzks.statistic, nzks.pvalue
    def KS(self, using, dx=0.0001):
        """
        Compute the Kolmogorov-Smirnov statistic and p-value for the PIT 
        values by comparing with a uniform distribution between 0 and 1. 
        Parameters:
        -----------
        using: string
            which parameterization to evaluate
        dx: float
            step size for integral
        Returns:
        --------
        KS statistic and pvalue

        """
        if self.pitarray is not None:
            pits = np.array(self.pitarray)
        else:
            pits = np.array(self.PIT(using=using,dx=dx))
            self.pitarray = pits
        ks_result = skgof.ks_test(pits, stats.uniform())
        return ks_result.statistic, ks_result.pvalue
def main():
    """
    script to calculate confidence intervals on KS test from
    bootstraps
    inputs:
      PITvec: np array of Probability Integral Transform values
    output:
      bootstrap confidence interval file for KS, and a plot of the KS bootstrap
      values along with a KDE and a Gaussian fit to them to check that sigma
      is sensible
    """

    basepath = "."
    infile = "TESTPITVALS.out"
    outfile = "STANDALONE_KS_BOOTSTRAP_CONF_INTERVAL.out"
    outfp = open(outfile, "w")

    data = np.loadtxt(infile)
    pits = data[:, 1]
    bootksvals = []
    print("read in PITS")
    nboots = 1000
    bootSampleSize = int(len(pits) * 0.5)
    #set bootstrap sample size as half of the full sample
    for k in range(nboots):
        bootpits = resample(pits, n_samples=bootSampleSize, replace=True)
        print("Bootstrap #%d Gold sample numbe: %d\n" % (k, len(bootpits)))
        ks_result = skgof.ks_test(bootpits, sps.uniform())
        ksval = ks_result.statistic
        print "KS Val: %.6f" % (ksval)
        bootksvals.append(ksval)

    meanks = np.mean(bootksvals)
    confhigh = np.percentile(bootksvals, 84.135)
    conflow = np.percentile(bootksvals, 15.865)
    sigma = 0.5 * (confhigh - conflow)
    print("mean ks: %.6f   sigma: %.6f\n" % (meanks, sigma))
    outfp.write("mean KS value: %.6f\nsigma: %.6f\n" % (meanks, sigma))
    outfp.close()

    binedges = np.linspace(meanks - 4. * sigma, meanks + 4. * sigma, 75)
    binwidth = binedges[2] - binedges[1]

    fig = plt.figure(figsize=(10, 10))
    plt.hist(bootksvals, bins=binedges, label="histogram")
    xarr = np.arange(meanks - 4. * sigma, meanks + 4. * sigma, binwidth)
    y = sps.norm(loc=meanks, scale=sigma)
    yarr = float(nboots) * binwidth * y.pdf(xarr)
    tmplabel = "Bootstrap Gaussian\nmean:%.6f sigma=%.6f " % (meanks, sigma)
    plt.plot(xarr, yarr, c='r', lw=3, linestyle='--', label=tmplabel)
    kdex = sps.gaussian_kde(bootksvals)
    ykde = float(nboots) * binwidth * kdex(xarr)
    plt.plot(xarr, ykde, c='g', lw=2, linestyle='-', label="Gaussian KDE fit")
    plt.plot([meanks, meanks], [0, 1.3 * np.amax(yarr)],
             lw=4,
             c='k',
             label="mean KS")
    plt.xlabel("SkyNet KS Bootstraps", fontsize=18)
    plt.ylabel("Number", fontsize=18)
    plt.legend(loc="upper left")
    plt.savefig("testks.jpg", fmt="jpg")
    plt.show()

    outfp.close()
    print("finished")
예제 #7
0
def computeKS(data,mu,sd,seed):
    np.random.seed(seed)
    from skgof import ks_test
    from scipy.stats import norm
    res = ks_test(data, norm(loc=mu,scale=sd))
    return [res.statistic,res.pvalue]