#Number of bootstrap samples boot_samples = 5000 z_dist_boot = np.zeros(200) z_dist_ideal_boot = np.zeros(200) z_dist_min_boot = np.zeros(200) Nsample = boot_samples L0,L1 = df.get_limits(Nsample, size, rank) #Actually run the methods and save their outputs for obs_num in xrange(L0,L1): boot_phi = np.random.choice(sampled_data,size=num_obs,replace=True) z_dist_max, ideal_chisq_stop, min_chisq = lucy_replicator(50000,boot_phi) z_dist_ideal_chisq = lucy_replicator(ideal_chisq_stop,boot_phi)[0] z_dist_min_chisq = lucy_replicator(min_chisq,boot_phi)[0] z_dist_boot = z_dist_boot+z_dist_max z_dist_ideal_boot = z_dist_ideal_boot+z_dist_ideal_chisq z_dist_min_boot = z_dist_min_boot+z_dist_min_chisq
def lucy_replicator(iters,phi): #Initialize arrays that will hold goodness-of-fit information #ks_zdist_psi = np.zeros(iters+1) #cr_val_zdist_psi = np.zeros(iters+1) #ks_phitilde_c = np.zeros(iters+1) #cr_val_phi_tilde_c = np.zeros(iters+1) chisq_psi_zdist = np.zeros(iters+1) chisq_c_phitilde = np.zeros(iters+1) #Use the same initial guess as in Lucy (1974) initial_guess = (np.sqrt(2)/np.pi)/(1+zm**4) #initial_guess=np.ones(200) #Turn our observed data into a histogram phi_tilde = np.histogram(phi,bins=bins,density=True)[0] #Run the first iteration of the RLD algorithm z_dist = df.integral_calc(initial_guess,lucy_conditional_dists,phi_tilde) z_dist = z_dist/sum(z_dist)/.025 #Calculate the phi^r term, which we call c c = np.dot(z_dist,lucy_conditional_dists) c=c/sum(c)/.025 #Perform the KS tests, saving p values and test statistics #ks_zdist_psi[0] = ks_test(z_dist,norm_vals,num_obs)[0] #cr_val_zdist_psi[0] = ks_test(z_dist,norm_vals,num_obs)[1] #ks_phitilde_c[0] = ks_test(phi_tilde,c,num_obs)[0] #cr_val_phi_tilde_c[0] = ks_test(phi_tilde,c,num_obs)[1] #Perform the chi-squared test expected_psi = np.round(num_obs*norm_vals*.025) observed_zdist = np.round(num_obs*z_dist*.025) expected_c = np.round(num_obs*c*.025) obs_phi = np.round(num_obs*phi_tilde*.025) chisq_psi_zdist[0] = chisquare(observed_zdist,expected_psi)[0] chisq_c_phitilde[0] = chisquare(obs_phi,expected_c)[0] for i in range(0,iters): #Run the RLD algorithm z_dist = df.integral_calc(z_dist,lucy_conditional_dists,phi_tilde) z_dist = z_dist/sum(z_dist)/.025 #Calculate the phi^r term, which we call c c = np.dot(z_dist,lucy_conditional_dists) c=c/sum(c)/.025 #Perform the KS tests, saving p values and test statistics #ks_zdist_psi[i+1] = ks_test(z_dist,norm_vals,num_obs)[0] #cr_val_zdist_psi[i+1] = ks_test(z_dist,norm_vals,num_obs)[1] #ks_phitilde_c[i+1] = ks_test(phi_tilde,c,num_obs)[0] #cr_val_phi_tilde_c[i+1] = ks_test(phi_tilde,c,num_obs)[1] #Perform the chi-squared test #expected_psi = (num_obs*norm_vals*.025) observed_zdist = (num_obs*z_dist*.025) expected_c = (num_obs*c*.025) obs_phi = np.round(num_obs*phi_tilde*.025) chisq_psi_zdist[i+1] = chisquare(observed_zdist,expected_psi)[0] chisq_c_phitilde[i+1] = chisquare(obs_phi,expected_c)[0] #Find the best iteration number to stop at, according the Lucy (1974) ideal_chisq_stop = np.max(np.where(chisq_c_phitilde>233)[0]) #ideal_ks_stop = np.max(np.where(ks_phitilde_c > cr_val_phi_tilde_c[0])) #Find best iteration number to stop at, in terms of "distance" between estimate and truth min_chisq = np.argmin(chisq_psi_zdist) #min_ks = np.argmin(ks_zdist_psi) return z_dist, ideal_chisq_stop, min_chisq
#Load the finished SOMz A = load('SOMz_finished_cut.npy') finished_SOMz = A.item() ###################### num_bins=200 bins = linspace(0,2,201) zm = .5*(bins[:-1]+bins[1:]) bins[num_bins] = 2.001 bins[num_bins] = 2.001 Nsample = len(Mean_test) L0,L1 = df.get_limits(Nsample, size, rank) #Set the initial guess and number of iterations initial_guess = eval(params.initial_guess) iters = params.single_iters #################### #Initialize empty lists base_point_dist = [] base_pdf_dist = [] matias_point_dist =[] matias_pdf_dist =[] rossi_point_local_dist =[] rossi_pdf_local_dist =[] rossi_point_global_dist =[] rossi_pdf_global_dist = []