def kvtest(cdf1, cdf2): n1 = len(cdf1) n2 = len(cdf2) if n1 != n2: print >> sys.stderr , "Wrong" sys.exit(1) dn = max( np.abs( cdf1 - cdf2 ) ) dn = dn * np.sqrt( n1 ) q = kolmogorov(dn) return q
def kolmogorov_smirnov(bins1, bins2, variances1=None, variances2=None): assert (bins1.shape == bins2.shape) if not (variances1 is None): assert (bins1.shape == variances1.shape) if not (variances2 is None): assert (bins2.shape == variances2.shape) sum1 = np.sum(bins1) sum2 = np.sum(bins2) bins1_norm = bins1 / sum1 bins2_norm = bins2 / sum2 bins1_cdf = np.cumsum(bins1_norm) bins2_cdf = np.cumsum(bins2_norm) esum1 = None esum2 = None if not (variances1 is None): esum1 = sum1 * sum1 / np.sum(variances1) if not (variances2 is None): esum2 = sum2 * sum2 / np.sum(variances2) dfmax = np.max(np.abs(bins1_cdf - bins2_cdf)) if esum1 and esum2: z = dfmax * math.sqrt(esum1 * esum2 / (esum1 + esum2)) elif esum1: z = dfmax * math.sqrt(esum1) elif esum2: z = dfmax * math.sqrt(esum2) else: z = dfmax p = kolmogorov(z) return p
def _k_ki(_p): return kolmogorov(kolmogi(_p))
def _ki_k(_x): return kolmogi(kolmogorov(_x))
def test_nan(self): assert_(np.isnan(kolmogorov(np.nan)))
Also the special.erfc(x) is the complementary error function, where x = sigma/sqrt(2) i.e. the table of p-value-to-sigma at https://en.wikipedia.org/wiki/Normal_distribution#Standard_deviation_and_tolerance_intervals is actually a table of erf(x), erfc(x), 1./erfc(x) for sigma values from 1 to 6 and special.erfcinv(alpha)*np.sqrt(2.) will return the significance level in sigma. ''' smooth_term = np.sqrt((2.0*num_smooth)/(num_smooth*num_smooth)) featured_term = np.sqrt((2.0*num_featured)/ (num_featured*num_featured)) cval_smooth = dist_smooth/smooth_term cval_smooth_nb = dist_smooth_nb/smooth_term cval_featured = dist_featured/featured_term cval_featured_nb = dist_featured_nb/featured_term p_smooth = special.kolmogorov(cval_smooth) p_smooth_nb = special.kolmogorov(cval_smooth_nb) p_featured = special.kolmogorov(cval_featured) p_featured_nb = special.kolmogorov(cval_featured_nb) sigma_smooth = special.erfcinv(p_smooth)*np.sqrt(2.) sigma_smooth_nb = special.erfcinv(p_smooth_nb)*np.sqrt(2.) sigma_featured = special.erfcinv(p_featured)*np.sqrt(2.) sigma_featured_nb = special.erfcinv(p_featured_nb)*np.sqrt(2.) c_2sig = 1.36 dcrit_smooth_2sig = c_2sig*smooth_term dcrit_featured_2sig = c_2sig*featured_term c_3sig = 1.63
graph(sample) exp_df = [exp_fr(l, val) for val in sorted(sample)] exp_df = np.array(exp_df) df = [cdf(val) for val in sorted(sample)] df = np.array(df) exp_df_eps = [exp_fr(l, val + eps) for val in sorted(sample)] exp_df_eps = np.array(exp_df_eps) df_eps = [cdf(val + eps) for val in sorted(sample)] df_eps = np.array(df_eps) D_n = max(abs(exp_df - df)) D_n_1 = max(abs(exp_df_eps - df_eps)) D_n = max(D_n, D_n_1) statistic = D_n * math.sqrt(len(sample)) k_quantil = kolm_quantil(alf) p_value = kolmogorov(statistic) print(f"D_N = {D_n}") print(f"Критическая область имеет вид: Statistic > {k_quantil}") print(f"Критическая константа = {k_quantil}") print(f"Статистика = {statistic}") if statistic > k_quantil: print("Гипотеза H0 отклоняется") else: print("Гипотеза H0 принимается") print(f"P-value = {p_value}")
# Show the probability of a gap at least as big as 0, 0.5 and 1.0. from scipy.special import kolmogorov from scipy.stats import kstwobign kolmogorov([0, 0.5, 1.0]) # array([ 1. , 0.96394524, 0.26999967]) # Compare a sample of size 1000 drawn from a Laplace(0, 1) distribution against # the target distribution, a Normal(0, 1) distribution. from scipy.stats import norm, laplace n = 1000 np.random.seed(seed=233423) lap01 = laplace(0, 1) x = np.sort(lap01.rvs(n)) np.mean(x), np.std(x) # (-0.083073685397609842, 1.3676426568399822) # Construct the Empirical CDF and the K-S statistic Dn. target = norm(0,1) # Normal mean 0, stddev 1 cdfs = target.cdf(x) ecdfs = np.arange(n+1, dtype=float)/n gaps = np.column_stack([cdfs - ecdfs[:n], ecdfs[1:] - cdfs]) Dn = np.max(gaps) Kn = np.sqrt(n) * Dn print('Dn=%f, sqrt(n)*Dn=%f' % (Dn, Kn)) # Dn=0.058286, sqrt(n)*Dn=1.843153 print(chr(10).join(['For a sample of size n drawn from a N(0, 1) distribution:', ' the approximate Kolmogorov probability that sqrt(n)*Dn>=%f is %f' % (Kn, kolmogorov(Kn)), ' the approximate Kolmogorov probability that sqrt(n)*Dn<=%f is %f' % (Kn, kstwobign.cdf(Kn))]))
for i in range(0, 4): plt.plot( x, TrucGaus(x, mu[i], sigma[i]), label= r"$G_{{T,{}}}:\mu={:.1f}, \sigma={:.1f}, e={:.1f}, \sqrt{{v}}={:.1f}$, Below 65pts={:.1f}%" .format(i, mu[i], sigma[i], expval(mu[i], sigma[i]), varval(mu[i], sigma[i]), ratio(mu[i], sigma[i]))) plt.xlabel("Score") plt.ylabel("Probability density") plt.xlim(0, 100) plt.ylim(ymin=0) plt.legend() plt.tight_layout() plt.savefig('all_trunc_gauss.png') for i in range(0, 4): for j in range(i + 1, 4): def dist(x): firstcdf = quad(TrucGaus, 0, x, args=(mu[i], sigma[i]))[0] secondcdf = quad(TrucGaus, 0, x, args=(mu[j], sigma[j]))[0] return -(firstcdf - secondcdf)**2 maxx = minimize_scalar(dist, [0, 100], method='bounded', bounds=[0, 100]).x distmax = np.sqrt(-dist(maxx)) print(i, j, maxx, (spc.kolmogorov(distmax) / distmax)**2)
def Kolmogolov(self, data): temp = special.kolmogorov(data) return temp
from scipy.special import kolmogorov import numpy n = 5 d = 0.326 n = 5 sample = [-1.2, 0.2, -0.6, 0.8, -1.0] phi_sample = numpy.array([0.115, 0.159, 0.274, 0.580, 0.788]) DPlus = ((numpy.arange(1.0, n + 1) / n) - phi_sample).max() DMinus = (phi_sample - (numpy.arange(0.0, n) / n)).max() d = max([DPlus, DMinus]) scipy_value = kolmogorov(numpy.sqrt(n) * d) def summer(x): arr = numpy.arange(1, 1001) constant_quantity = -2 * x * x power_array = constant_quantity * arr * arr alternate_array = numpy.array([1, -1] * 500) powered_array = numpy.exp(power_array) return 1 - (2 * sum(alternate_array * powered_array)) manually_calculated_value = 1 - summer(numpy.sqrt(n) * d)
def neutral_covariance_test(ts, ntests=None, regress='f', formula='DF~f+I(f**2)', varformula=None, standard=True, method='logitnorm', verbose=False, ncores=1, seed=0): if method not in ['Kolmogorov', 'logitnorm', 'uncorrected']: print( 'Unknown input method. Must be either "Kolmogorov", "logitnorm", or "uncorrected".' ) return S = ts.shape[1] # number of species m = ts.shape[0] # number of timepoints # check if timeseries are normalized sum_ts = np.sum(ts, axis=1) if max(sum_ts) > 1.01 or min(sum_ts) < 0.99: raise ValueError('Timeseries is not normalized.') upperbound = 0.2889705 if ntests == None: ntests = min(S, int((upperbound * S)**3)) elif ntests > (upperbound * S)**3: print( 'Warning: ntests input is large relative to number of species, leading to high false-positive rates for P<0.05' ) pvalues = cv_test(ntests, ts, regress, formula, varformula, ncores=ncores, seed=seed) if verbose: print("pvals", pvalues) ntests = len(pvalues) D = scipy.stats.kstest(pvalues, 'uniform').statistic if verbose: print("D", D) if method == 'Kolmogorov': Q = predict(ntests, S) if verbose: print("Q", Q) nstar_est = ntests / (1.0 + np.exp(-Q)) if verbose: print("nstar_est", nstar_est) P = kolmogorov(D * np.sqrt(nstar_est) ) # complementary cumulative Kolmogorov distribution return P