def kuiper_in_range(data1, data2, lowerbound, upperbound): data_in_range = [] for i in range(len(data1)): if data2[i] > lowerbound and data2[i] < upperbound: data_in_range.append(data1[i]) test_stat_range = astrostats.kuiper(data_in_range, stats.uniform.cdf, args=(0,90)) print("samples:", len(data_in_range)) print("test stat for range", lowerbound, upperbound, test_stat_range)
def ex1d(): #Create array of sample-sizes. ds, ps, dsci, psci = [np.zeros(n) for el in range(4)] #For each sample size, find the Kuiper-statistic d and the p-value according #to my own implementation and that of scipy. for i in range(n): rands = samp[r1c[i]:r1c[i + 1] + r1c[i]:1] ds[i], ps[i] = f.kp(f.CDGaussian, rands) dsci[i], psci[i] = kuiper(rands, f.CDGaussian) return ds, ps, dsci, psci
plt.xscale('log') plt.xlabel('Number of Gaussian deviates') plt.ylabel('p-value') plt.title('Kolmogorov-Smirnov test') plt.ylim(0, 1) plt.legend() plt.savefig('plots/ex1_KS_test.pdf') plt.close() from astropy.stats import kuiper p_Kuiper = np.empty_like(Ns, dtype=np.float64) p_Kuiper_astro = np.empty_like(Ns, dtype=np.float64) for i, N in enumerate(Ns): _, p_Kuiper[i] = NUR_random.Kuipers_test(data[:N], NUR_random.normal_cdf) _, p_Kuiper_astro[i] = kuiper(data[:N], norm.cdf) plt.scatter(Ns, p_Kuiper, label='My results') plt.scatter(Ns, p_Kuiper_astro, label='Astropy results') plt.axhline(0.05, color='gray', alpha=0.5) plt.xscale('log') plt.xlabel('Number of Gaussian deviates') plt.ylabel('p-value') plt.title("Kuiper's test") plt.ylim(0, 1) plt.legend() plt.savefig('plots/ex1_Kuiper_test.pdf') plt.close() randoms_to_test = np.loadtxt('DataFiles/randomnumbers.txt')
data_in_range.append(data1[i]) test_stat_range = astrostats.kuiper(data_in_range, stats.uniform.cdf, args=(0,90)) print("samples:", len(data_in_range)) print("test stat for range", lowerbound, upperbound, test_stat_range) print("################ K-S TEST ################") test_stat_overall = stats.kstest(angle_differences, 'uniform', args=(0,90)) print("test_stat:", test_stat_overall) for i in range(0,5): ks_in_range(angle_differences, velocity_diff_significances, i*0.2, (i+1)*0.2) print("############# KUIPER TEST ################") test_stat_overall = astrostats.kuiper(angle_differences, stats.uniform.cdf, args=(0,90)) print("test_stat:", test_stat_overall) for i in range(0,5): kuiper_in_range(angle_differences, velocity_diff_significances, i*0.2, (i+1)*0.2) ############### Histogram showing angle differences ################## n_bins = 10 fig, axs = plt.subplots() # We can set the number of bins with the `bins` kwarg axs.hist(angle_differences, bins=n_bins, color="black") plt.xlabel("Difference between cluster and BCG angle")
plt.savefig('plots/1e.png',bbox_inches='tight') plt.close() print('Generated plots/1e.png') #---1.d--- # Kuipers test # Preparing arrays kuip_P,kuip_P_ast = np.zeros(len(n)),np.zeros(len(n)) kuip_d,kuip_d_ast = np.zeros(len(n)),np.zeros(len(n)) # Running test for different values of N rand_bm = box_muller(rng.rand_num(n[-1]),rng.rand_num(n[-1]),mu,sig) for i in range(len(n)): rand = rand_bm[0][:n[i]] kuip_d[i],kuip_P[i] = KS_Kuip_test(rand,gauss,mu,sig,Kuip=True) kuip_d_ast[i],kuip_P_ast[i] = kuiper(rand,gauss_cdf) # Plotting plt.plot(n,kuip_P_ast,label='Astropy') plt.plot(n,kuip_P,label='Self written') plt.title('Astropy Kuiper-Test and self-written Kuiper-Test') plt.ylabel('$P(z)$') plt.xlabel('N') plt.xscale('log') lgd = plt.legend(loc=2, bbox_to_anchor=(1,1)) plt.savefig('plots/1f.png', bbox_inches='tight') plt.close() print('Generated plots/1f.png') #---1.e--- # Testing on given random numbers filename = 'randomnumbers.txt'
first_range = whole_range[:int(len(whole_range) / 2.)] second_range = whole_range[int(len(whole_range) / 2.):] Dminus = max([ abs(z1_histed_cumulated[i] - theoretical_distribution_cumulated[i]) for i in first_range ]) Dplus = max([ abs(z1_histed_cumulated[i] - theoretical_distribution_cumulated[i]) for i in second_range ]) V = Dminus + Dplus #same sort of thing as 1C tests_myKuiper.append(siglevel(V, N_numbers_val)) tests_theirKuiper.append(stats.kuiper(z1, CDF_theory)[1]) #plotting plt.figure() plt.semilogx(N_numbers, tests_myKuiper, alpha=0.5, label='my Kuiper test') plt.semilogx(N_numbers, tests_theirKuiper, alpha=0.5, label='their Kuiper test') plt.xlabel('$N_{samples}$', fontsize=16) plt.ylabel('Kuiper test result ($P$)', fontsize=16) plt.legend(loc='best') plt.savefig('homework2problem1partdfigure1.pdf') plt.close()
integrals[count] = k, 0.5 + Romberg_integration( lambda x: np.exp(-x**2), 0, k / np.sqrt(2), order=8) / np.sqrt( np.pi) count += 1 ''' Loop over the numbers and do the Kuipers and the KS-test at the same time, we loose some efficiency here as we calculate the same CDF multiple times, but we do not have enough time to fix this inefficiency. ''' for j in np.logspace(1, 5, 41): BM = BM_2[:int(j)] test, test_2 = CDF_order_2(BM, integrals) KS_numbers.append(KStest(test, test_2)) KS_test_scip.append(scip.kstest(test, 'norm', N=int(j))) Kuipers.append(Kuipers_test(test, test_2)) Kuipers_astr.append( ast_stat.kuiper(BM, cdf=lambda y: 0.5 * (1 + scipy.special.erf(y / 2**(0.5))))) Kuipers_2.append((ast_stat.kuiper(BM, cdf=lambda y: test_2))) Kuipers = np.asarray(Kuipers) Kuipers_astr = np.asarray(Kuipers_astr) Kuipers_2 = np.asarray(Kuipers_2) KS_numbers = np.array(KS_numbers) KS_test_scip = np.array(KS_test_scip) x_range = np.logspace(1, 5, 41) plt.plot(x_range, KS_numbers[:, 0], label='own test') plt.plot(x_range, KS_test_scip[:, 0], label='scipy test') plt.xscale('log') plt.title('KS-test') plt.xlabel('#random numbers') plt.ylabel('statistic')
# Tell the KS test that we already sorted the points D, pval = KStest(curpoints, curCDF, sorted=True) all_D[i] = D all_p[i] = pval # define a lambda function so scipy works without new calculations CDFprecalc = lambda x: curCDF D, pval = kstest(curpoints, CDFprecalc) all_D_sp[i] = D all_p_sp[i] = pval # Perform the Kuiper test D, pval = KuiperTest(curpoints, curCDF, sorted=True) all_D_kuiper[i] = D all_p_kuiper[i] = pval # Astropy D, pval = kuiper(curpoints, CDFprecalc) all_D_kuiper_sp[i] = D all_p_kuiper_sp[i] = pval # Compare our random numbers the 10 sets of random numbers too for j in range(data.shape[1]): curdata = np.copy(data[:numpoints, j]) # Sort the data, since we have already sorted our own points quicksort(curdata) # Calculate two sample tests D, pval = KStest_2sample(curpoints, curdata, sorted=True) all_D_2s_KS[i, j] = D all_p_2s_KS[i, j] = pval D, pval = KuiperTest_2sample(curpoints, curdata, sorted=True) all_D_2s_kuiper[i, j] = D