Python kstest Examples, scipy.stats.stats.kstest Python Examples

Example #1

0

Show file

File: static_infer.py Project: prshnt/Daily-pracitce

def infer_ks_test_goodness(l1):
    # l = np.histogram(l1)
    # n = len(l)
    mean = average(l1)
    sigma = std(l1)
    res = kstest(l1, 'norm', [mean, sigma])
    if res[1] < 0.01:
        print('reject')
    else:
        print('accept')
    print(res)

Example #2

0

Show file

File: static_infer.py Project: 570468837/Daily-pracitce

def infer_ks_test_goodness(l1):
    # l = np.histogram(l1)
    # n = len(l)
    mean = average(l1)
    sigma = std(l1)
    res = kstest(l1, 'norm', [mean, sigma])
    if res[1] < 0.01:
        print('reject')
    else:
        print('accept')
    print(res)

Example #3

0

Show file

File: distribution.py Project: tannishk/data-profiling

def logistic(dataset,name):
	"Logistic continous distribution and and throws KS Test Statistic either D,D+,D-  test and p value as a result "
	if (dataset[name].dtype == 'int64' or dataset[name].dtype == 'float64'):
		dataset[name].dropna()
		x = np.array(dataset[name])
		z, p = stats.kstest(x, 'logistic')
		if (p < 0.055):
			print 'It is Not a Exponential distribution'
		else:
			print 'It is a Exponential distribution'
		return z, p
	else:
		return None

Example #4

0

Show file

File: distribution.py Project: tannishk/data-profiling

def typedis(dataset,name,dis):
	"Type any type of ditribution . Dis is used to take in the type of code distribution visit refer http://docs.scipy.org/doc/scipy-0.14.0/reference/stats.html#module-scipy.stats for more reference and throws KS Test Statistic either D,D+,D-  test and p value as a result "
	if (dataset[name].dtype == 'int64' or dataset[name].dtype == 'float64'):
		dataset[name].dropna()
		x = np.array(dataset[name])
		z, p = stats.kstest(x, dis)
		if (p < 0.055):
			print 'It is Not as',dis,' distribution'
		else:
			print 'It is a',dis,'distribution'
		return z, p
	else:
		return None

Example #5

0

Show file

File: distribution.py Project: tannishk/data-profiling

def norm(dataset,name):
	"Normal test for normal distribution and throws KS Test Statistic either D,D+,D-  test and p value as a result "
	if(dataset[name].dtype == 'int64' or dataset[name].dtype == 'float64'):
		dataset[name].dropna()
		x = np.array(dataset[name])
		z,p = stats.kstest(x,'norm')
		if(p<0.055):
			print 'It is Not a normal distribution'
		else:
			print 'It is a normal distribution'
		return z,p
	else:
		return None

Example #6

0

Show file

File: distribution.py Project: tannishk/data-profiling

def welisberg(dataset,name):
	"Weibull continous distribution and throws KS Test Statistic either D,D+,D-  test and p value as a result"
	if (dataset[name].dtype == 'int64' or dataset[name].dtype == 'float64'):
		dataset[name].dropna()
		x = np.array(dataset[name])
		z, p = stats.kstest(x, 'dweibull')
		if (p < 0.055):
			print 'It is Not a Weibull distribution'
		else:
			print 'It is a weibull distribution'
		return z, p
	else:
		return None

Example #7

0

Show file

    def test_linear_studentt_parent_dist(self, graph):
        """
        Kolmogorov-Smirnov test for data coming from a student-t (degree of freedom = 3).
        """
        np.random.seed(10)

        data = generate_continuous_data(graph,
                                        distribution="student-t",
                                        noise_scale=1,
                                        n_samples=100000,
                                        seed=10)

        x = data[:, 0]
        _, p_val = stats.kstest(x, "t", args=[3])
        assert p_val < 0.01

Example #8

0

Show file

File: timeseries.py Project: tannishk/data-profiling

def freq(df,col,max1):
    "To find the required freq for the decompostion "

    count = None
    for i in range(1,max1):
        try:
            decomposed = seasonal_decompose(df[col].values, freq=i)
            decomposed.resid = decomposed.resid[[~np.isnan(decomposed.resid)]]
            print decomposed.resid
        ##decomposed.resid = [1,2,1,2,1,2]
            x = np.array(decomposed.resid)
            z,p = stats.kstest(x,'norm')
            if(p<0.055):
              print 'It is not the required freq'
            else:
                print 'it is the required freq'
                count = i
        except ValueError:
            pass
    decompose(df,col,i)
    return count

Example #9

0

Show file

File: statsNum.py Project: PierreGe/android-defect-study

def main():

    finaldatafile = "finaldata.json"
    finalData = None
    try:
        with open(finaldatafile) as data_file:
            finalData = json.load(data_file)
    except:
        print("Run analysis")
        exit()


    for appliName in finalData:
        cgscore, issuescore, classSize = finalData[appliName]
        j = 0
        issueCallgraphValueForStats = []
        callGraphValueForStats = []

        issueSizeValueForStats = []
        classSizeValueForStats = []
        
        issueForModel = []
        callGraphForModel = []
        classSizeForModel = []

        for key in issuescore:
            if key in cgscore:
                j+=1
                issueCallgraphValueForStats.append(issuescore[key])
                callGraphValueForStats.append(cgscore[key])

        for key in issuescore:
            if key in classSize:
                issueSizeValueForStats.append(issuescore[key])
                classSizeValueForStats.append(classSize[key])
                
        for key in issuescore:
            if key in classSize:
                if key in cgscore:
                    issueForModel.append(issuescore[key])
                    callGraphForModel.append(cgscore[key])
                    classSizeForModel.append(classSize[key])

        if j>3:
            spearmanCorrelationCoefficient, spearmanpvalue = spearmanr(issueCallgraphValueForStats,callGraphValueForStats)
            kendalltauCorrelationCoefficient, kendalltaupvalue = kendalltau(issueCallgraphValueForStats,callGraphValueForStats)
            kstestdissueValueForStats, kstestpvalueissueValueForStats = kstest([issuescore[key] for key in issuescore],"norm")
            kstestdcgValueForGraph, kstestpvaluecgValueForGraph = kstest([cgscore[key] for key in cgscore],"norm")

            spearmanCorrelationCoefficient2, spearmanpvalue2 = spearmanr(issueSizeValueForStats,classSizeValueForStats)
            kendalltauCorrelationCoefficient2, kendalltaupvalue2 = kendalltau(issueSizeValueForStats,classSizeValueForStats)
            kstestdchissueSizeValueForStats, kstestpvaluechissueSizeValueForStats = kstest([issuescore[key] for key in issuescore],"norm")
            kstestdclassSizeValueForStats, kstestpvalueclassSizeValueForStats = kstest([classSize[key] for key in classSize],"norm")

            print(appliName)
            print("--- API Call <> Issue")
            print(" "*8 + "Spearman rho correlation coefficient = " + str(spearmanCorrelationCoefficient))
            print(" "*8 + "Spearman p-value = " + str(spearmanpvalue))
            print(" "*8 + "Kendall Tau = " + str(kendalltauCorrelationCoefficient))
            print(" "*8 + "Kendall p-value = " + str(kendalltaupvalue))
            print(" "*8 + "KS Test D = " + str(kstestdissueValueForStats))
            print(" "*8 + "KS p-value = " + str(kstestpvalueissueValueForStats))
            print(" "*8 + "KS Test D = " + str(kstestdcgValueForGraph))
            print(" "*8 + "KS p-value = " + str(kstestpvaluecgValueForGraph))
            print(" "*8 + "dataset size =" + str(j))
            print("--- Class Size <> Issue")
            print(" "*8 + "Spearman rho correlation coefficient = " + str(spearmanCorrelationCoefficient2))
            print(" "*8 + "Spearman p-value = " + str(spearmanpvalue2))
            print(" "*8 + "Kendall Tau = " + str(kendalltauCorrelationCoefficient2))
            print(" "*8 + "Kendall p-value = " + str(kendalltaupvalue2))
            print(" "*8 + "KS Test D = " + str(kstestdchissueSizeValueForStats))
            print(" "*8 + "KS p-value = " + str(kstestpvaluechissueSizeValueForStats))
            print(" "*8 + "KS Test D = " + str(kstestdclassSizeValueForStats))
            print(" "*8 + "KS p-value = " + str(kstestpvalueclassSizeValueForStats))

            y = issueForModel
            X = np.array([callGraphForModel,classSizeForModel]).transpose()
            X = list([list(i) for i in X])
            model = sm.OLS(y, X)
            results = model.fit()
            print(results.summary(yname="issues", xname =("APIcalls", "ClassSize")))

        else:
            print("FAILURE : " + appliName)

    print("|" * 80)
    print("-" * 80)
    print("-" * 80)
    print("|" * 80)

    issueForGlobalModel = []
    callGraphForGlobalModel = []
    classSizeForGlobalModel = []
    issueGlobalCallgraphValueForStats = []
    callGlobalGraphValueForStats = []
    NOissueGlobalCallgraphValueForStats = []
    issueGlobalSizeValueForStats = []
    classGlobalSizeValueForStats = []

    anova1issue = []
    anova2issue = []
    for appliName in finalData:
        cgscore, issuescore, classSize = finalData[appliName]
        for key in issuescore:
            if key in classSize:
                if key in cgscore:
                    issueForGlobalModel.append(issuescore[key])
                    callGraphForGlobalModel.append(cgscore[key])
                    classSizeForGlobalModel.append(issuescore[key])

        for key in issuescore:
            if key in cgscore:
                j+=1
                issueGlobalCallgraphValueForStats.append(issuescore[key])
                callGlobalGraphValueForStats.append(cgscore[key])
            else:
                NOissueGlobalCallgraphValueForStats.append(issuescore[key])

        for key in cgscore:
            if key in issuescore:
                anova1issue.append(cgscore[key])
            else:
                anova2issue.append(cgscore[key])


        for key in issuescore:
            if key in classSize:
                issueGlobalSizeValueForStats.append(issuescore[key])
                classGlobalSizeValueForStats.append(classSize[key])


    spearmanGlobalCorrelationCoefficient, spearmanpvalueGlobal = spearmanr(issueGlobalCallgraphValueForStats,callGlobalGraphValueForStats)
    kendalltauGlobalCorrelationCoefficient, kendalltaupvalueGlobal = kendalltau(issueGlobalCallgraphValueForStats,callGlobalGraphValueForStats)

    spearmanGlobalCorrelationCoefficient2, spearmanpvalue2Global = spearmanr(issueGlobalSizeValueForStats,classGlobalSizeValueForStats)
    kendalltauGlobalCorrelationCoefficient2, kendalltaupvalue2Global = kendalltau(issueGlobalSizeValueForStats,classGlobalSizeValueForStats)


    fvalueanova1, pvalueanova1 = f_oneway(issueGlobalCallgraphValueForStats, NOissueGlobalCallgraphValueForStats)

    fvalueanova2, pvalueanova2 = f_oneway(anova1issue, anova2issue)

    print(len(NOissueGlobalCallgraphValueForStats))
    print("--- Correlation : API Call <> Issue")
    print(" "*8 + "Spearman rho correlation coefficient = " + str(spearmanGlobalCorrelationCoefficient))
    print(" "*8 + "Spearman p-value = " + str(spearmanpvalueGlobal))
    print(" "*8 + "Kendall Tau = " + str(kendalltauGlobalCorrelationCoefficient))
    print(" "*8 + "Kendall p-value = " + str(kendalltaupvalueGlobal))
    print(" "*8 + "ANOVA F-value = " + str(fvalueanova1))
    print(" "*8 + "ANOVA p-value = " + str(pvalueanova1))
    print(" "*8 + "ANOVA F-value = " + str(fvalueanova2))
    print(" "*8 + "ANOVA p-value = " + str(pvalueanova2))
    print("--- Correlation : Class Size <> Issue")
    print(" "*8 + "Spearman rho correlation coefficient = " + str(spearmanGlobalCorrelationCoefficient2))
    print(" "*8 + "Spearman p-value = " + str(spearmanpvalue2Global))
    print(" "*8 + "Kendall Tau = " + str(kendalltauGlobalCorrelationCoefficient2))
    print(" "*8 + "Kendall p-value = " + str(kendalltaupvalue2Global))


    print("_"*80)
    print("_"*80)
    print("-- GLOBAL OLS --")
    y = issueForGlobalModel
    X = np.array([callGraphForGlobalModel,classSizeForGlobalModel]).transpose()
    X = list([list(i) for i in X])
    X = sm.add_constant(X,prepend=False)
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary(yname="issues", xname =("APIcalls", "ClassSize", "const")))


    print("API CALLS only")
    X = callGraphForGlobalModel
    X = sm.add_constant(X,prepend=False)
    model2 = sm.OLS(y, X)
    results = model2.fit()
    print(results.summary(yname="issues",xname =["APIcalls","const"]))
    print("Size only")
    X = classSizeForGlobalModel
    X = sm.add_constant(X,prepend=False)
    model3 = sm.OLS(y, X)
    results = model3.fit()
    print(results.summary(yname="issues",xname =["ClassSize","const"]))

Example #10

0

Show file

def run_densityEstimation(
        functionName,
        method,
        kfold=20,
        numDims=2,
        numSamples=1000,
        candidates="join",
        bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE,
        out=True,
        plot=False,
        tikz=False):
    if method == "sgde_zero":
        interpolation = "zero"
    else:  # interpolation == "boundaries":
        interpolation = "boundaries"

    samples, bounds, natafType = load_data_set(functionName, numSamples,
                                               numDims)

    # do kfold cross validation
    crossEntropyValidation = np.zeros((kfold, 2))
    learnSamples, validationSamples = splitset(samples, splitPercentage=0.7)

    stats = {}
    for i in range(kfold):
        print("=" * 100)
        print("run (%s)= %i/%i" % (method, i + 1, kfold))
        print("=" * 100)
        print("valid: %i x %i (mean=%g, var=%g)" %
              (validationSamples.shape[0], validationSamples.shape[1],
               np.mean(validationSamples), np.var(validationSamples)))

        np.random.seed(i * 123456 + i % 2)
        trainSamples, testSamples = splitset(learnSamples,
                                             splitPercentage=1. - 1. / kfold)

        if "sgde" in method:
            dist, stats[i] = estimateSGDEDensity(functionName,
                                                 trainSamples,
                                                 testSamples,
                                                 bounds=bounds,
                                                 iteration=i,
                                                 plot=plot,
                                                 label=method,
                                                 out=out,
                                                 candidates=candidates,
                                                 interpolation=interpolation)
        elif "kde" in method:
            dist, stats[i] = estimateKDEDensity(
                functionName,
                trainSamples,
                testSamples,
                iteration=i,
                plot=plot,
                label=method,
                out=out,
                bandwidthOptimizationTypeStr=bandwidthOptimizationType)
        elif "nataf" in method:
            # estimate nataf density
            dist, stats[i] = estimateNatafDensity(functionName,
                                                  natafType,
                                                  testSamples,
                                                  iteration=i,
                                                  bounds=bounds,
                                                  plot=plot,
                                                  label=method,
                                                  out=out)
        else:
            raise AttributeError("unknown config '%s'" % method)

        # evaluate the distribution according to the validation set
        crossEntropyValidation[i, 0] = i
        crossEntropyValidation[i, 1] = dist.crossEntropy(validationSamples)
        stats[i]["crossEntropyValidation"] = dist.crossEntropy(
            validationSamples)
        stats[i]["validationSamples"] = validationSamples
        stats[i]["samples"] = {"shuffled": {}, "not_shuffled": {}}
        stats[i]["samples"]["shuffled"]["rvs"] = dist.rvs(numSamples,
                                                          shuffle=True)
        stats[i]["samples"]["shuffled"]["uniform_validation"] = dist.cdf(
            validationSamples, shuffle=True)
        kstests = [None] * numDims

        for idim in range(numDims):
            samples1d = stats[i]["samples"]["shuffled"][
                "uniform_validation"][:, idim]
            res_test = kstest(samples1d, Uniform(0, 1).cdf)
            kstests[idim] = res_test.statistic, res_test.pvalue
            if plot:
                plt.figure()
                plt.hist(samples1d, cumulative=True, normed=True)
                xs = np.linspace(0, 1, 10)
                plt.plot(xs, [Uniform(0, 1).cdf(xi) for xi in xs])
                plt.title("shuffled: %i, %s" % (idim, kstests[idim]))
        print("-" * 80)
        print("shuffled    ", kstests, np.min(kstests), np.max(kstests))
        if plot:
            plt.show()

        stats[i]["samples"]["shuffled"]["kstests"] = kstests
        stats[i]["samples"]["not_shuffled"]["rvs"] = dist.rvs(numSamples,
                                                              shuffle=False)
        stats[i]["samples"]["not_shuffled"]["uniform_validation"] = dist.cdf(
            validationSamples, shuffle=False)
        kstests = [None] * numDims
        for idim in range(numDims):
            samples1d = stats[i]["samples"]["not_shuffled"][
                "uniform_validation"][:, idim]
            res_test = kstest(samples1d, Uniform(0, 1).cdf)
            kstests[idim] = res_test.statistic, res_test.pvalue
            if plot:
                plt.figure()
                plt.hist(samples1d, cumulative=True, normed=True)
                xs = np.linspace(0, 1, 1000)
                plt.plot(xs, [Uniform(0, 1).cdf(xi) for xi in xs])
                plt.title("not shuffled: %i, %s" % (idim, kstests[idim]))
        print("not shuffled", kstests, np.min(kstests), np.max(kstests))
        if plot:
            plt.show()

        stats[i]["samples"]["not_shuffled"]["kstests"] = kstests

        print("CV valid = %g" % crossEntropyValidation[i, 1])

        # write results to file
        if out:
            out_crossEntropy = os.path.join(
                "data", method, "%s.%s.validation.cross_entropies.csv" %
                (method, functionName))
            np.savetxt(out_crossEntropy, crossEntropyValidation[:i, :])

            # save stats to pickle
            out_stats = os.path.join(
                "data", method,
                "%s.%s.best.stats.pkl" % (method, functionName))
            fd = open(out_stats, "w")
            pkl.dump(stats, fd)
            fd.close()

Example #11

0

Show file

File: practiceset-statistics1.py Project: alyssabrady6/BME6311

#generate sample die roll (100 times) data for 12-sided die
rolls = randint(1, 13, 100)
print(rolls)
print(mean(rolls))

#demonstration of central limit theorem
# seed the random number generator
seed(1)

# calculate the mean of 100 dice rolls 1000 times
means = [mean(randint(1, 13, 100)) for _ in range(1000)]
# plot the distribution of sample means
pyplot.hist(means)
plt.xlabel("Frequency")
plt.ylabel("Probability Density")
plt.title("Histogram plot for 100 rolls 12-sided die")
pyplot.show()

#Z-score KS test
stats.kstest(stats.zscore(means), "norm")

#Shapiro-Wilk normailty test
from scipy.stats import shapiro
stat, p = shapiro(means)
print('Statistics={}, p={}'.format(stat, p))
alpha = 0.05
if p > alpha:
    print('Sample looks Normal so we do not reject H0')
else:
    print('Sample does not look Normal so we reject H0')

Example #12

0

Show file

import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize
from astropy.modeling import models, fitting
from scipy.stats import norm, stats

plt.clf()
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True, sharey=True)

## load data
fluxes = np.loadtxt("fluxes.dat")

# - make statistics p-value test
ptest = scipy.stats.mstats.normaltest(fluxes)
kstest = stats.kstest(fluxes, 'norm')

print(ptest)
print(kstest)

## Following  http://stackoverflow.com/questions/7805552/fitting-a-histogram-with-python

bins = np.linspace(0, 1, 40)
n, bins, patches = ax1.hist(fluxes,
                            bins,
                            normed=1,
                            facecolor='green',
                            alpha=0.75)
(mu, sigma) = norm.fit(fluxes)
y = mlab.normpdf(bins, mu, sigma)
l = ax1.plot(bins, y, 'r--', linewidth=2, label='mlab.normpdf')

Example #13

0

Show file

    warnings.simplefilter('ignore')

    for dist_name in dist_names:
        dist = getattr(scipy.stats, dist_name)
        param = dist.fit(df['mean_travel_time'])
        pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2],
                              scale=param[-1]) * size
        plt.plot(pdf_fitted, label=dist_name)
        plt.xlabel("Trip Duration (Minutes)")
        plt.ylabel("Frequency")
        plt.xlim(0, 50)
        plt.ylim(0, 8000)

        params[dist_name] = param
        # Applying the Kolmogorov-Smirnov test
        D, p = stats.kstest(df['mean_travel_time'], dist_name, args=param)
        print("p value for " + dist_name + " = " + str(p))
        print("D value for " + dist_name + " = " + str(D) + "\n")
        dist_results.append((dist_name, p))
        dist_resultsD.append((dist_name, D))

    plt.legend(loc='upper right')
    plt.show()

    best_dist, best_p = (max(dist_results, key=lambda item: item[1]))

    if best_p < 0.001:
        best_dist, best_D = (min(dist_resultsD, key=lambda item: item[1]))
    # store the name of the best fit and its p value

    print("Best fitting distribution: " + str(best_dist))

Example #14

0

Show file

def main():

    finaldatafile = "finaldata.json"
    finalData = None
    try:
        with open(finaldatafile) as data_file:
            finalData = json.load(data_file)
    except:
        print("Run analysis")
        exit()

    for appliName in finalData:
        cgscore, issuescore, classSize = finalData[appliName]
        j = 0
        issueCallgraphValueForStats = []
        callGraphValueForStats = []

        issueSizeValueForStats = []
        classSizeValueForStats = []

        issueForModel = []
        callGraphForModel = []
        classSizeForModel = []

        for key in issuescore:
            if key in cgscore:
                j += 1
                issueCallgraphValueForStats.append(issuescore[key])
                callGraphValueForStats.append(cgscore[key])

        for key in issuescore:
            if key in classSize:
                issueSizeValueForStats.append(issuescore[key])
                classSizeValueForStats.append(classSize[key])

        for key in issuescore:
            if key in classSize:
                if key in cgscore:
                    issueForModel.append(issuescore[key])
                    callGraphForModel.append(cgscore[key])
                    classSizeForModel.append(classSize[key])

        if j > 3:
            spearmanCorrelationCoefficient, spearmanpvalue = spearmanr(
                issueCallgraphValueForStats, callGraphValueForStats)
            kendalltauCorrelationCoefficient, kendalltaupvalue = kendalltau(
                issueCallgraphValueForStats, callGraphValueForStats)
            kstestdissueValueForStats, kstestpvalueissueValueForStats = kstest(
                [issuescore[key] for key in issuescore], "norm")
            kstestdcgValueForGraph, kstestpvaluecgValueForGraph = kstest(
                [cgscore[key] for key in cgscore], "norm")

            spearmanCorrelationCoefficient2, spearmanpvalue2 = spearmanr(
                issueSizeValueForStats, classSizeValueForStats)
            kendalltauCorrelationCoefficient2, kendalltaupvalue2 = kendalltau(
                issueSizeValueForStats, classSizeValueForStats)
            kstestdchissueSizeValueForStats, kstestpvaluechissueSizeValueForStats = kstest(
                [issuescore[key] for key in issuescore], "norm")
            kstestdclassSizeValueForStats, kstestpvalueclassSizeValueForStats = kstest(
                [classSize[key] for key in classSize], "norm")

            print(appliName)
            print("--- API Call <> Issue")
            print(" " * 8 + "Spearman rho correlation coefficient = " +
                  str(spearmanCorrelationCoefficient))
            print(" " * 8 + "Spearman p-value = " + str(spearmanpvalue))
            print(" " * 8 + "Kendall Tau = " +
                  str(kendalltauCorrelationCoefficient))
            print(" " * 8 + "Kendall p-value = " + str(kendalltaupvalue))
            print(" " * 8 + "KS Test D = " + str(kstestdissueValueForStats))
            print(" " * 8 + "KS p-value = " +
                  str(kstestpvalueissueValueForStats))
            print(" " * 8 + "KS Test D = " + str(kstestdcgValueForGraph))
            print(" " * 8 + "KS p-value = " + str(kstestpvaluecgValueForGraph))
            print(" " * 8 + "dataset size =" + str(j))
            print("--- Class Size <> Issue")
            print(" " * 8 + "Spearman rho correlation coefficient = " +
                  str(spearmanCorrelationCoefficient2))
            print(" " * 8 + "Spearman p-value = " + str(spearmanpvalue2))
            print(" " * 8 + "Kendall Tau = " +
                  str(kendalltauCorrelationCoefficient2))
            print(" " * 8 + "Kendall p-value = " + str(kendalltaupvalue2))
            print(" " * 8 + "KS Test D = " +
                  str(kstestdchissueSizeValueForStats))
            print(" " * 8 + "KS p-value = " +
                  str(kstestpvaluechissueSizeValueForStats))
            print(" " * 8 + "KS Test D = " +
                  str(kstestdclassSizeValueForStats))
            print(" " * 8 + "KS p-value = " +
                  str(kstestpvalueclassSizeValueForStats))

            y = issueForModel
            X = np.array([callGraphForModel, classSizeForModel]).transpose()
            X = list([list(i) for i in X])
            model = sm.OLS(y, X)
            results = model.fit()
            print(
                results.summary(yname="issues",
                                xname=("APIcalls", "ClassSize")))

        else:
            print("FAILURE : " + appliName)

    print("|" * 80)
    print("-" * 80)
    print("-" * 80)
    print("|" * 80)

    issueForGlobalModel = []
    callGraphForGlobalModel = []
    classSizeForGlobalModel = []
    issueGlobalCallgraphValueForStats = []
    callGlobalGraphValueForStats = []
    NOissueGlobalCallgraphValueForStats = []
    issueGlobalSizeValueForStats = []
    classGlobalSizeValueForStats = []

    anova1issue = []
    anova2issue = []
    for appliName in finalData:
        cgscore, issuescore, classSize = finalData[appliName]
        for key in issuescore:
            if key in classSize:
                if key in cgscore:
                    issueForGlobalModel.append(issuescore[key])
                    callGraphForGlobalModel.append(cgscore[key])
                    classSizeForGlobalModel.append(issuescore[key])

        for key in issuescore:
            if key in cgscore:
                j += 1
                issueGlobalCallgraphValueForStats.append(issuescore[key])
                callGlobalGraphValueForStats.append(cgscore[key])
            else:
                NOissueGlobalCallgraphValueForStats.append(issuescore[key])

        for key in cgscore:
            if key in issuescore:
                anova1issue.append(cgscore[key])
            else:
                anova2issue.append(cgscore[key])

        for key in issuescore:
            if key in classSize:
                issueGlobalSizeValueForStats.append(issuescore[key])
                classGlobalSizeValueForStats.append(classSize[key])

    spearmanGlobalCorrelationCoefficient, spearmanpvalueGlobal = spearmanr(
        issueGlobalCallgraphValueForStats, callGlobalGraphValueForStats)
    kendalltauGlobalCorrelationCoefficient, kendalltaupvalueGlobal = kendalltau(
        issueGlobalCallgraphValueForStats, callGlobalGraphValueForStats)

    spearmanGlobalCorrelationCoefficient2, spearmanpvalue2Global = spearmanr(
        issueGlobalSizeValueForStats, classGlobalSizeValueForStats)
    kendalltauGlobalCorrelationCoefficient2, kendalltaupvalue2Global = kendalltau(
        issueGlobalSizeValueForStats, classGlobalSizeValueForStats)

    fvalueanova1, pvalueanova1 = f_oneway(issueGlobalCallgraphValueForStats,
                                          NOissueGlobalCallgraphValueForStats)

    fvalueanova2, pvalueanova2 = f_oneway(anova1issue, anova2issue)

    print(len(NOissueGlobalCallgraphValueForStats))
    print("--- Correlation : API Call <> Issue")
    print(" " * 8 + "Spearman rho correlation coefficient = " +
          str(spearmanGlobalCorrelationCoefficient))
    print(" " * 8 + "Spearman p-value = " + str(spearmanpvalueGlobal))
    print(" " * 8 + "Kendall Tau = " +
          str(kendalltauGlobalCorrelationCoefficient))
    print(" " * 8 + "Kendall p-value = " + str(kendalltaupvalueGlobal))
    print(" " * 8 + "ANOVA F-value = " + str(fvalueanova1))
    print(" " * 8 + "ANOVA p-value = " + str(pvalueanova1))
    print(" " * 8 + "ANOVA F-value = " + str(fvalueanova2))
    print(" " * 8 + "ANOVA p-value = " + str(pvalueanova2))
    print("--- Correlation : Class Size <> Issue")
    print(" " * 8 + "Spearman rho correlation coefficient = " +
          str(spearmanGlobalCorrelationCoefficient2))
    print(" " * 8 + "Spearman p-value = " + str(spearmanpvalue2Global))
    print(" " * 8 + "Kendall Tau = " +
          str(kendalltauGlobalCorrelationCoefficient2))
    print(" " * 8 + "Kendall p-value = " + str(kendalltaupvalue2Global))

    print("_" * 80)
    print("_" * 80)
    print("-- GLOBAL OLS --")
    y = issueForGlobalModel
    X = np.array([callGraphForGlobalModel,
                  classSizeForGlobalModel]).transpose()
    X = list([list(i) for i in X])
    X = sm.add_constant(X, prepend=False)
    model = sm.OLS(y, X)
    results = model.fit()
    print(
        results.summary(yname="issues",
                        xname=("APIcalls", "ClassSize", "const")))

    print("API CALLS only")
    X = callGraphForGlobalModel
    X = sm.add_constant(X, prepend=False)
    model2 = sm.OLS(y, X)
    results = model2.fit()
    print(results.summary(yname="issues", xname=["APIcalls", "const"]))
    print("Size only")
    X = classSizeForGlobalModel
    X = sm.add_constant(X, prepend=False)
    model3 = sm.OLS(y, X)
    results = model3.fit()
    print(results.summary(yname="issues", xname=["ClassSize", "const"]))

Example #15

0

Show file

File: uniform_continuous.py Project: AndreaCensi/stochastic_testing

 def pvalue(self):
     a, b = self.bounds 
     normalized = (self.samples - a) / (b - a)
     K, pvalue = kstest(normalized, 'uniform') #@UnusedVariable
     return pvalue

Example #16

0

Show file

x = np.random.normal(mu, sigma, 100)
print(mean(x))
pyplot.hist(x)
pyplot.show()
norm=[np.mean(np.random.normal(mu, sigma, 100)) for _i in range(1000)]
print(norm)

#histogram plot
pyplot.hist(norm)
plt.xlabel("Frequency")
plt.ylabel("Probability Density")
plt.title ("Histogram plot for 100 samples from Random Normal Distribution")
pyplot.show()

#Z-score KS test 
stats.kstest(stats.zscore(norm), "norm")


#poisson
seed (2)
t=np.random.poisson(lam=3,size=(100))
print(mean(t))
pyplot.hist(t)
plt.xlabel("Frequency")
plt.ylabel("Probability Density")
plt.title ("Histogram plot for Random Poisson Distribution of 100 samples (lambda=3)")
pyplot.show()

s = [np.mean(np.random.poisson(3,100)) for _i in range(1000)]
pyplot.hist(s)
plt.xlabel("Frequency")

Example #17

0

Show file

import numpy as np
from numpy import var, std
from scipy.stats import stats
from statistic.check.util import s_2, sigma_2

__author__ = 'zzt'

if __name__ == '__main__':
    l = [420, 500, 920, 1380, 1510, 1650, 1760, 2100, 2300, 2350]
    print(stats.kstest(l, 'expon', [1500.0]))
    x = np.linspace(-15, 15, 9)
    print(std(x)**2)
    print(var(x))
    print(s_2(x))
    print(sigma_2(x))
    print(stats.kstest(x, 'norm', [0, 9]))

Example #18

0

Show file

def def_kstest(rvs1, cdf1, args1=(), alternative1='two-sided'):
    res = kstest(rvs=rvs1, cdf=cdf1, args=args1, alternative=alternative1)
    return res

Example #19

0

Show file

params = {}

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
   
    for dist_name in dist_names:
        dist = getattr(scipy.stats, dist_name)
        param = dist.fit(df['count'])
        pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1]) * size
        plt.plot(pdf_fitted, label=dist_name)
        plt.xlim(0,50)
        plt.ylim(0,500)
        
        params[dist_name] = param
        # Applying the Kolmogorov-Smirnov test
        D, p = stats.kstest(df['count'], dist_name, args=param)
        print("p value for "+dist_name+" = "+str(p))
        dist_results.append((dist_name, p))
        
    plt.legend(loc='upper right')
    plt.show()
    
    best_dist, best_p = (max(dist_results, key=lambda item: item[1]))
    # store the name of the best fit and its p value
    
    print("Best fitting distribution: "+str(best_dist))
    print("Best p value: "+ str(best_p))
    print("Parameters for the best fit: "+ str(params[best_dist]))