Esempio n. 1
0
def kvtest(cdf1, cdf2):
    n1 = len(cdf1)
    n2 = len(cdf2)
    if n1 != n2:
        print >> sys.stderr , "Wrong" 
        sys.exit(1)
    
    dn = max( np.abs( cdf1 - cdf2 )  )  
    dn = dn * np.sqrt( n1 ) 
    
    q = kolmogorov(dn)

    return q 
Esempio n. 2
0
def kolmogorov_smirnov(bins1, bins2, variances1=None, variances2=None):
    assert (bins1.shape == bins2.shape)
    if not (variances1 is None):
        assert (bins1.shape == variances1.shape)
    if not (variances2 is None):
        assert (bins2.shape == variances2.shape)

    sum1 = np.sum(bins1)
    sum2 = np.sum(bins2)

    bins1_norm = bins1 / sum1
    bins2_norm = bins2 / sum2

    bins1_cdf = np.cumsum(bins1_norm)
    bins2_cdf = np.cumsum(bins2_norm)

    esum1 = None
    esum2 = None
    if not (variances1 is None):
        esum1 = sum1 * sum1 / np.sum(variances1)
    if not (variances2 is None):
        esum2 = sum2 * sum2 / np.sum(variances2)

    dfmax = np.max(np.abs(bins1_cdf - bins2_cdf))

    if esum1 and esum2:
        z = dfmax * math.sqrt(esum1 * esum2 / (esum1 + esum2))
    elif esum1:
        z = dfmax * math.sqrt(esum1)
    elif esum2:
        z = dfmax * math.sqrt(esum2)
    else:
        z = dfmax

    p = kolmogorov(z)
    return p
Esempio n. 3
0
 def _k_ki(_p):
     return kolmogorov(kolmogi(_p))
Esempio n. 4
0
 def _ki_k(_x):
     return kolmogi(kolmogorov(_x))
Esempio n. 5
0
 def test_nan(self):
     assert_(np.isnan(kolmogorov(np.nan)))
 def _k_ki(_p):
     return kolmogorov(kolmogi(_p))
 def _ki_k(_x):
     return kolmogi(kolmogorov(_x))
 def test_nan(self):
     assert_(np.isnan(kolmogorov(np.nan)))
    Also the special.erfc(x) is the complementary error function, where x = sigma/sqrt(2)
    i.e. the table of p-value-to-sigma at https://en.wikipedia.org/wiki/Normal_distribution#Standard_deviation_and_tolerance_intervals is actually a table of erf(x), erfc(x), 1./erfc(x) for sigma values from 1 to 6

    and special.erfcinv(alpha)*np.sqrt(2.) will return the significance level in sigma.

'''

smooth_term   = np.sqrt((2.0*num_smooth)/(num_smooth*num_smooth))
featured_term = np.sqrt((2.0*num_featured)/ (num_featured*num_featured))

cval_smooth    = dist_smooth/smooth_term
cval_smooth_nb = dist_smooth_nb/smooth_term
cval_featured    = dist_featured/featured_term
cval_featured_nb = dist_featured_nb/featured_term

p_smooth    = special.kolmogorov(cval_smooth)
p_smooth_nb = special.kolmogorov(cval_smooth_nb)
p_featured    = special.kolmogorov(cval_featured)
p_featured_nb = special.kolmogorov(cval_featured_nb)

sigma_smooth    = special.erfcinv(p_smooth)*np.sqrt(2.)
sigma_smooth_nb = special.erfcinv(p_smooth_nb)*np.sqrt(2.)
sigma_featured    = special.erfcinv(p_featured)*np.sqrt(2.)
sigma_featured_nb = special.erfcinv(p_featured_nb)*np.sqrt(2.)


c_2sig = 1.36
dcrit_smooth_2sig   = c_2sig*smooth_term
dcrit_featured_2sig = c_2sig*featured_term

c_3sig = 1.63
Esempio n. 10
0
graph(sample)

exp_df = [exp_fr(l, val) for val in sorted(sample)]
exp_df = np.array(exp_df)
df = [cdf(val) for val in sorted(sample)]
df = np.array(df)
exp_df_eps = [exp_fr(l, val + eps) for val in sorted(sample)]
exp_df_eps = np.array(exp_df_eps)
df_eps = [cdf(val + eps) for val in sorted(sample)]
df_eps = np.array(df_eps)

D_n = max(abs(exp_df - df))
D_n_1 = max(abs(exp_df_eps - df_eps))
D_n = max(D_n, D_n_1)
statistic = D_n * math.sqrt(len(sample))
k_quantil = kolm_quantil(alf)

p_value = kolmogorov(statistic)

print(f"D_N = {D_n}")
print(f"Критическая область имеет вид: Statistic > {k_quantil}")
print(f"Критическая константа = {k_quantil}")
print(f"Статистика = {statistic}")

if statistic > k_quantil:
    print("Гипотеза H0 отклоняется")
else:
    print("Гипотеза H0 принимается")

print(f"P-value = {p_value}")
# Show the probability of a gap at least as big as 0, 0.5 and 1.0.

from scipy.special import kolmogorov
from scipy.stats import kstwobign
kolmogorov([0, 0.5, 1.0])
# array([ 1.        ,  0.96394524,  0.26999967])

# Compare a sample of size 1000 drawn from a Laplace(0, 1) distribution against
# the target distribution, a Normal(0, 1) distribution.

from scipy.stats import norm, laplace
n = 1000
np.random.seed(seed=233423)
lap01 = laplace(0, 1)
x = np.sort(lap01.rvs(n))
np.mean(x), np.std(x)
# (-0.083073685397609842, 1.3676426568399822)

# Construct the Empirical CDF and the K-S statistic Dn.

target = norm(0,1)  # Normal mean 0, stddev 1
cdfs = target.cdf(x)
ecdfs = np.arange(n+1, dtype=float)/n
gaps = np.column_stack([cdfs - ecdfs[:n], ecdfs[1:] - cdfs])
Dn = np.max(gaps)
Kn = np.sqrt(n) * Dn
print('Dn=%f, sqrt(n)*Dn=%f' % (Dn, Kn))
# Dn=0.058286, sqrt(n)*Dn=1.843153
print(chr(10).join(['For a sample of size n drawn from a N(0, 1) distribution:',
  ' the approximate Kolmogorov probability that sqrt(n)*Dn>=%f is %f' %  (Kn, kolmogorov(Kn)),
  ' the approximate Kolmogorov probability that sqrt(n)*Dn<=%f is %f' %  (Kn, kstwobign.cdf(Kn))]))
for i in range(0, 4):
    plt.plot(
        x,
        TrucGaus(x, mu[i], sigma[i]),
        label=
        r"$G_{{T,{}}}:\mu={:.1f}, \sigma={:.1f}, e={:.1f}, \sqrt{{v}}={:.1f}$, Below 65pts={:.1f}%"
        .format(i, mu[i], sigma[i], expval(mu[i], sigma[i]),
                varval(mu[i], sigma[i]), ratio(mu[i], sigma[i])))

plt.xlabel("Score")
plt.ylabel("Probability density")
plt.xlim(0, 100)
plt.ylim(ymin=0)
plt.legend()
plt.tight_layout()
plt.savefig('all_trunc_gauss.png')

for i in range(0, 4):
    for j in range(i + 1, 4):

        def dist(x):
            firstcdf = quad(TrucGaus, 0, x, args=(mu[i], sigma[i]))[0]
            secondcdf = quad(TrucGaus, 0, x, args=(mu[j], sigma[j]))[0]
            return -(firstcdf - secondcdf)**2

        maxx = minimize_scalar(dist, [0, 100],
                               method='bounded',
                               bounds=[0, 100]).x
        distmax = np.sqrt(-dist(maxx))
        print(i, j, maxx, (spc.kolmogorov(distmax) / distmax)**2)
Esempio n. 13
0
 def Kolmogolov(self, data):
     temp = special.kolmogorov(data)
     return temp
Esempio n. 14
0
from scipy.special import kolmogorov
import numpy

n = 5
d = 0.326

n = 5
sample = [-1.2, 0.2, -0.6, 0.8, -1.0]

phi_sample = numpy.array([0.115, 0.159, 0.274, 0.580, 0.788])

DPlus = ((numpy.arange(1.0, n + 1) / n) - phi_sample).max()
DMinus = (phi_sample - (numpy.arange(0.0, n) / n)).max()

d = max([DPlus, DMinus])

scipy_value = kolmogorov(numpy.sqrt(n) * d)


def summer(x):
    arr = numpy.arange(1, 1001)
    constant_quantity = -2 * x * x
    power_array = constant_quantity * arr * arr
    alternate_array = numpy.array([1, -1] * 500)
    powered_array = numpy.exp(power_array)

    return 1 - (2 * sum(alternate_array * powered_array))


manually_calculated_value = 1 - summer(numpy.sqrt(n) * d)
def neutral_covariance_test(ts,
                            ntests=None,
                            regress='f',
                            formula='DF~f+I(f**2)',
                            varformula=None,
                            standard=True,
                            method='logitnorm',
                            verbose=False,
                            ncores=1,
                            seed=0):

    if method not in ['Kolmogorov', 'logitnorm', 'uncorrected']:
        print(
            'Unknown input method. Must be either "Kolmogorov", "logitnorm", or "uncorrected".'
        )
        return

    S = ts.shape[1]  # number of species
    m = ts.shape[0]  # number of timepoints

    # check if timeseries are normalized
    sum_ts = np.sum(ts, axis=1)
    if max(sum_ts) > 1.01 or min(sum_ts) < 0.99:
        raise ValueError('Timeseries is not normalized.')

    upperbound = 0.2889705

    if ntests == None:
        ntests = min(S, int((upperbound * S)**3))
    elif ntests > (upperbound * S)**3:
        print(
            'Warning: ntests input is large relative to number of species, leading to high false-positive rates for P<0.05'
        )

    pvalues = cv_test(ntests,
                      ts,
                      regress,
                      formula,
                      varformula,
                      ncores=ncores,
                      seed=seed)

    if verbose:
        print("pvals", pvalues)

    ntests = len(pvalues)
    D = scipy.stats.kstest(pvalues, 'uniform').statistic

    if verbose:
        print("D", D)

    if method == 'Kolmogorov':
        Q = predict(ntests, S)
        if verbose:
            print("Q", Q)
        nstar_est = ntests / (1.0 + np.exp(-Q))
        if verbose:
            print("nstar_est", nstar_est)
        P = kolmogorov(D * np.sqrt(nstar_est)
                       )  # complementary cumulative Kolmogorov distribution

    return P