def test_remove(): emulator = LikelihoodAnalysis.load("analysis.p") emulator.remove_model([8,10]) emulator.train(use_parameters=range(3)) assert emulator.parameter_set.shape[0] == 14 assert emulator.training_set.shape[0] == 14
def test_find(): emulator = LikelihoodAnalysis.load("analysis.p") parameters_to_find = emulator.parameter_set[7] n = emulator.find(parameters_to_find) assert len(n)==1 assert n[0] == 7
def test_find(): emulator = LikelihoodAnalysis.load("analysis.p") parameters_to_find = emulator.parameter_set[7] n = emulator.find(parameters_to_find) assert len(n) == 1 assert n[0] == 7
def test_remove(): emulator = LikelihoodAnalysis.load("analysis.p") emulator.remove_model([8, 10]) emulator.train(use_parameters=range(3)) assert emulator.parameter_set.shape[0] == 14 assert emulator.training_set.shape[0] == 14
def emulatorAccuracy(cmd_args,descriptors_in_plot=single[:-1]): #Smoothing scale smoothing_scale = 1.0 #Ready to plot ax = host_subplot(111, axes_class=AA.Axes) for n,descr in enumerate(descriptors_in_plot): predicted = np.load(os.path.join(root_dir,"troubleshoot","fiducial_from_interpolator_{0}--{1:.1f}.npy".format(descr,smoothing_scale))) measured = np.load(os.path.join(root_dir,"troubleshoot","fiducial_{0}--{1:.1f}.npy".format(descr,smoothing_scale))) covariance = np.load(os.path.join(root_dir,"troubleshoot","covariance_{0}--{1:.1f}.npy".format(descr,smoothing_scale))) ax.plot(np.abs(measured-predicted)/np.sqrt(covariance.diagonal()),color=brew_colors[n],label=descriptors[descr]) #Plot also the predicted descriptors in another cosmology emulator = LikelihoodAnalysis.load(os.path.join(root_dir,"emulators","emulator_{0}--{1:.1f}.p".format(descr,smoothing_scale))) predictedOther = emulator.predict(np.array([0.8,-1.0,0.5])) ax.plot(np.abs(measured-predictedOther)/np.sqrt(covariance.diagonal()),color=brew_colors[n],linestyle="--") #Rename the ticks tk = ax.get_xticks() new_tk = np.zeros(len(tk)) for n in range(len(tk)): new_tk[n] = -0.04 + ((0.12+0.04)/(len(tk)-1))*n ax.set_xticklabels(["{0:.2f}".format(t) for t in new_tk]) ax.set_xlabel(r"$\kappa$",fontsize=22) ax.set_ylabel(r"$(E-M)/\sqrt{C_{MM}}$",fontsize=22) #Set a top axis too axT = ax.twin() tk = axT.get_xticks() new_tk = np.zeros(len(tk)) for n in range(len(tk)): new_tk[n] = 300.0 + ((5000.0-300.0)/(len(tk)-1))*n axT.set_xticklabels(["{0}".format(int(new_tk[0]))] + ["{0}".format(int(t/1000)*1000) for t in new_tk[1:]]) axT.set_yticks([]) axT.set_xlabel(r"$l$",fontsize=22) ax.set_yscale("log") ax.set_ylim(1.0e-3,20.0) ax.legend(loc="lower left") #Save the figure plt.tight_layout() plt.savefig("emulator_accuracy.{0}".format(cmd_args.type))
def pca(cmd_args): #Smoothing scales in arcmin smoothing_scale=1.0 #Create figure fig,ax = plt.subplots(1,2,figsize=(16,8)) #Cycle over descriptors to plot PCA eigenvalues for n,descr in enumerate(single): #Unpickle the emulator an = LikelihoodAnalysis.load(os.path.join(root_dir,"emulators","emulator_{0}--{1:.1f}.p".format(descr,smoothing_scale))) #Compute PCA pca = an.principalComponents() #Plot the eigenvalues on the left and the cumulative sum on the right ax[0].plot(pca.eigenvalues,label=descriptors[descr],color=brew_colors[n]) ax[1].plot(pca.eigenvalues.cumsum()/pca.eigenvalues.sum(),label=descriptors[descr],color=brew_colors[n]) #Draw a line at 3 components ax[0].plot(3*np.ones(100),np.linspace(1.0e-10,1.0e2,100),color="black",linestyle="--") ax[1].plot(3*np.ones(100),np.linspace(0.9,1.01,100),color="black",linestyle="--") ax[1].set_ylim(0.98,1.001) ax[1].set_xscale("log") #Legend ax[0].legend() #Scale ax[0].set_yscale("log") #Labels ax[0].set_xlabel(r"$i$",fontsize=18) ax[1].set_xlabel(r"$n$",fontsize=18) ax[0].set_ylabel(r"$S^2_i$",fontsize=18) ax[1].set_ylabel(r"$\Sigma_{i=0}^n S^2_i/S^2_{tot}$",fontsize=18) #Save figure fig.tight_layout() fig.savefig("pca_components.{0}".format(cmd_args.type))
def main(): ################################################# ############Option parsing####################### ################################################# #Parse command line options parser = argparse.ArgumentParser() parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file") parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity") parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity") parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area") parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!") parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix") parser.add_argument("-s","--save_features",dest="save_features",action="store_true",default=False,help="save features profiles") parser.add_argument("-ss","--save",dest="save",action="store_true",default=False,help="save the best fits and corresponding chi2") parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle") parser.add_argument("-l","--likelihood",dest="likelihood",action="store_true",default=False,help="save the likelihood cubes for the mocks") parser.add_argument("-o","--observation",dest="observation",action="store_true",default=False,help="append the actual observation results to the mock results for direct comparison") parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF") cmd_args = parser.parse_args() if cmd_args.options_file is None: parser.print_help() sys.exit(0) #Set verbosity level if cmd_args.verbose_plus: logging.basicConfig(level=DEBUG_PLUS) elif cmd_args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) #Initialize MPI Pool try: pool = MPIPool() except: pool = None if (pool is not None) and (not pool.is_master()): pool.wait() sys.exit(0) if pool is not None: logging.info("Started MPI Pool.") ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the memory loading feature_loader = FeatureLoader(cmd_args) ########################################################################################################################################### #Use this model for the covariance matrix (from the new set of 50 N body simulations) covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path")) logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix fiducial_feature_ensemble = feature_loader.load_features(covariance_model) fiducial_features = fiducial_feature_ensemble.mean() features_covariance = fiducial_feature_ensemble.covariance() #timestamp now = time.time() logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Treat the 50N-body simulation set as data observation = CFHTcov.getModels(root_path=feature_loader.options.get("observations","root_path")) logging.info("Measuring the observations from {0}".format(observation)) #And load the observations observed_feature = feature_loader.load_features(observation) #timestamp now = time.time() logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #timestamp now = time.time() logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #################################################################################################################### ######################################Compute the chi2 cube######################################################### #################################################################################################################### logging.info("Initializing chi2 meshgrid...") #Set the points in parameter space on which to compute the chi2 (read from options) Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j] w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j] si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j] num_points = len(Om) * len(w) * len(si8) points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose() #Now compute the chi2 at each of these points if pool: split_chunks = pool.size logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size)) else: split_chunks = None logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0])) #Allocate array for best fit first_realization = feature_loader.options.getint("mocks","first_realization") last_realization = feature_loader.options.getint("mocks","last_realization") if cmd_args.observation: best_fit_all = np.zeros((last_realization-first_realization+1 + 1,analysis.parameter_set.shape[1])) chi2_all = np.zeros(last_realization-first_realization+1 + 1) chi2_from_expected_all = np.zeros(last_realization-first_realization+1 + 1) else: best_fit_all = np.zeros((last_realization-first_realization+1,analysis.parameter_set.shape[1])) chi2_all = np.zeros(last_realization-first_realization+1) chi2_from_expected_all = np.zeros(last_realization-first_realization+1) #Cycle through the realizations and obtain a best fit for each one of them for nreal in range(first_realization-1,last_realization): chi_squared = analysis.chi2(points,observed_feature=observed_feature[nreal],features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) now = time.time() logging.info("realization {0}, chi2 calculations completed in {1:.1f}s".format(nreal+1,now-last_timestamp)) last_timestamp = now #After chi2, compute the likelihood likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape)) #Maybe save the likelihood cube? if cmd_args.likelihood: likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string))) logging.info("Saving likelihood cube to {0}...".format(likelihood_filename)) np.save(likelihood_filename,likelihood_cube) #Maybe save the feature profiles? if cmd_args.save_features: features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string))) logging.info("Saving features for realization {0} to {1}...".format(nreal+1,features_filename)) np.save(features_filename,observed_feature[nreal]) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ]) best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0] chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0] logging.info("Best fit for realization {4} is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected,nreal+1)) #Update global array with best fit parameters and corresponding chi2 best_fit_all[nreal-first_realization+1,:] = best_fit_parameters.copy() chi2_all[nreal-first_realization+1] = best_fit_chi2 chi2_from_expected_all[nreal-first_realization+1] = chi2_from_expected ####################################################################################################################################################################### #If option was selected, append the observation results to the mock ones, for comparison if cmd_args.observation: observed_feature = feature_loader.load_features(CFHTLens(root_path=feature_loader.options.get("observations","root_path")))[0] chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) now = time.time() logging.info("actual observation, chi2 calculations completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #After chi2, compute the likelihood likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape)) #Maybe save the likelihood cube? if cmd_args.likelihood: likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood_obs_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving likelihood cube to {0}...".format(likelihood_filename)) np.save(likelihood_filename,likelihood_cube) #Maybe save the feature profiles? if cmd_args.save_features: features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features_obs_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving observed features to {0}...".format(features_filename)) np.save(features_filename,observed_feature) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ]) best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature)[0] chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature)[0] logging.info("Best fit for observation is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected)) #Update global array with best fit parameters and corresponding chi2 best_fit_all[-1,:] = best_fit_parameters.copy() chi2_all[-1] = best_fit_chi2 chi2_from_expected_all[-1] = chi2_from_expected ####################################################################################################################################################################### #Close MPI Pool if pool is not None: pool.close() logging.info("Closed MPI Pool.") if cmd_args.save: #Save the best fit parameters for all realizations best_fit_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","best_fit_all_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving best fit to {0}...".format(best_fit_filename)) np.save(best_fit_filename,best_fit_all) #Save the best fit chi2 for all realizations chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving best fit chi2 to {0}...".format(chi2_filename)) np.save(chi2_filename,chi2_all) #Save also the chi2 for the expected best fit chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_expected_{0}.npy".format(output_string(feature_loader.feature_string))) logging.info("Saving expected chi2 to {0}...".format(chi2_filename)) np.save(chi2_filename,chi2_from_expected_all) end = time.time() logging.info("DONE!!") logging.info("Completed in {0:.1f}s".format(end-start))
def main(): ################################################# ############Option parsing####################### ################################################# #Parse command line options parser = argparse.ArgumentParser() parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file") parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity") parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity") parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area") parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!") parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix") parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file") parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis") parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle") parser.add_argument("-r","--realizations",dest="realizations",type=int,default=None,help="use only the first N realizations to estimate the covariance matrix") parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF") cmd_args = parser.parse_args() if cmd_args.options_file is None: parser.print_help() sys.exit(0) #Set verbosity level if cmd_args.verbose_plus: logging.basicConfig(level=DEBUG_PLUS) elif cmd_args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) #Initialize MPI Pool try: pool = MPIPool() except: pool = None if (pool is not None) and (not pool.is_master()): pool.wait() sys.exit(0) if pool is not None: logging.info("Started MPI Pool.") ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the memory loading feature_loader = FeatureLoader(cmd_args) ########################################################################################################################################### #Use this model for the covariance matrix (from the new set of 50 N body simulations) covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path")) logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix fiducial_feature_ensemble = feature_loader.load_features(covariance_model) #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #Return return fiducial_feature_ensemble,analysis
def main(n_components_collection,cmd_args,pool): ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the data loading feature_loader_collection = FeatureLoaderCross.fromArgs(cmd_args) fiducial_feature_ensemble_collection = list() observed_feature_ensemble_collection = list() analysis_collection = list() formatted_output_string_collection = list() #Sanity check if type(n_components_collection)==list: assert len(n_components_collection)==len(feature_loader_collection) #Cycle over feature types for nc,feature_loader in enumerate(feature_loader_collection): #Use the same number of components for all or not? if type(n_components_collection)==list: n_components = n_components_collection[nc] else: n_components = n_components_collection #Format the output string formatted_output_string_collection.append(output_string(feature_loader.feature_string)+"_ncomp{0}".format(n_components)) #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #timestamp now = time.time() logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ######################Compute PCA components here##################################### pca = analysis.principalComponents() now = time.time() logging.info("Principal components computed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ####################Transform feature space by projecting on PCA eigenvectors############################ analysis = analysis.transform(pca_transform,pca=pca,n_components=n_components) now = time.time() logging.info("Projection on first {1} principal components completed in {0:.1f}s".format(now-last_timestamp,analysis.training_set.shape[1])) last_timestamp = now ####################Retrain emulator###################################################################### analysis.train() now = time.time() logging.info("Emulator re-training completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #Append to the collection analysis_collection.append(analysis) ########################################################################################################################################### ########################################################################################################################################### #Use this model for the covariance matrix (from the new set of 50 N body simulations) covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path")) logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix fiducial_feature_ensemble = feature_loader.load_features(covariance_model) #If options is enabled, use only the first N realizations to estimate the covariance matrix if cmd_args.realizations: first_realization = feature_loader.options.getint("mocks","first_realization") last_realization = feature_loader.options.getint("mocks","last_realization") logging.info("Using only the realizations {0}-{1} to build the fiducial ensemble".format(first_realization,last_realization)) fiducial_feature_ensemble = fiducial_feature_ensemble.subset(range(first_realization-1,last_realization)) assert fiducial_feature_ensemble.num_realizations==last_realization-first_realization+1 ###############Insert PCA transform here############################## fiducial_feature_ensemble = fiducial_feature_ensemble.transform(pca_transform,pca=pca,n_components=n_components) now = time.time() logging.info("Projection on first {1} principal components for covariance ensemble completed in {0:.1f}s".format(now-last_timestamp,analysis.training_set.shape[1])) last_timestamp = now #Append to the collection fiducial_feature_ensemble_collection.append(fiducial_feature_ensemble) #timestamp now = time.time() logging.info("covariance computed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Get also the observation instance if cmd_args.observations_mock: pass else: observation = CFHTLens(root_path=feature_loader.options.get("observations","root_path")) logging.info("Measuring the observations from {0}".format(observation)) #And load the observations observed_feature_ensemble = feature_loader.load_features(observation) ###############Insert PCA transform here############################## observed_feature_ensemble = observed_feature_ensemble.transform(pca_transform,pca=pca,n_components=n_components) now = time.time() logging.info("Projection on first {1} principal components for observation completed in {0:.1f}s".format(now-last_timestamp,analysis.training_set.shape[1])) last_timestamp = now observed_feature_ensemble_collection.append(observed_feature_ensemble) #timestamp now = time.time() logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ ################################Reduce the collections########################################################################################## ################################################################################################################################################ analysis = reduce(mul,analysis_collection) fiducial_feature_ensemble = reduce(mul,fiducial_feature_ensemble_collection) #Sanity check if type(n_components_collection)==list: assert analysis.training_set.shape[1]==reduce(add,n_components_collection) assert fiducial_feature_ensemble.data.shape[1]==reduce(add,n_components_collection) else: assert analysis.training_set.shape[1]==n_components*len(feature_loader_collection) assert fiducial_feature_ensemble.data.shape[1]==n_components*len(feature_loader_collection) #Covariance matrix features_covariance = fiducial_feature_ensemble.covariance() if cmd_args.observations_mock: logging.info("Using fiducial ensemble as mock observations") if cmd_args.realization_pick is not None: logging.info("Using realization {0} as data".format(cmd_args.realization_pick)) observed_feature = fiducial_feature_ensemble[cmd_args.realization_pick] else: observed_feature=fiducial_feature_ensemble.mean() else: #And load the observations observed_feature_ensemble = reduce(mul,observed_feature_ensemble_collection) observed_feature = observed_feature_ensemble.mean() #Sanity check if type(n_components_collection)==list: assert observed_feature.shape[0]==reduce(add,n_components_collection) else: assert observed_feature.shape[0]==n_components*len(feature_loader_collection) ################################################################################################################################################ ################################################################################################################################################ #############Everything is projected on the PCA components now, ready for chi2 computations##################################################### ################################################################################################################################################ ################################################################################################################################################ logging.info("Initializing chi2 meshgrid...") #Read parameters to use from options use_parameters = feature_loader.options.get("parameters","use_parameters").replace(" ","").split(",") assert len(use_parameters)==3 #Reparametrization hash key use_parameters_hash = "-".join(use_parameters) ######################################################################################## #Might need to reparametrize the emulator here, use a dictionary for reparametrizations# ######################################################################################## assert use_parameters_hash in reparametrization.keys(),"No reparametrization scheme specified for {0} parametrization".format(use_parameters_hash) if reparametrization[use_parameters_hash] is not None: #Reparametrize logging.info("Reparametrizing emulator according to {0} parametrization".format(use_parameters_hash)) analysis.reparametrize(reparametrization[use_parameters_hash]) #Retrain for safety analysis.train() #Log current parametrization to user logging.info("Using parametrization {0}".format(use_parameters_hash)) #Set the points in parameter space on which to compute the chi2 (read extremes from options) par = list() for p in range(3): assert feature_loader.options.has_section(use_parameters[p]),"No extremes specified for parameter {0}".format(use_parameters[p]) par.append(np.ogrid[feature_loader.options.getfloat(use_parameters[p],"min"):feature_loader.options.getfloat(use_parameters[p],"max"):feature_loader.options.getint(use_parameters[p],"num_points")*1j]) num_points = len(par[0]) * len(par[1]) * len(par[2]) points = np.array(np.meshgrid(par[0],par[1],par[2],indexing="ij")).reshape(3,num_points).transpose() #Now compute the chi2 at each of these points if pool: split_chunks = pool.size logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size)) else: split_chunks = None logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0])) chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) now = time.time() logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #save output likelihoods_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"likelihoods_{0}".format(use_parameters_hash)) if not os.path.isdir(likelihoods_dir): os.mkdir(likelihoods_dir) #Output filename formatting output_prefix="" if cmd_args.observations_mock: output_prefix+="mock" if cmd_args.cross: output_prefix+="_cross" if cmd_args.realization_pick is not None: output_prefix+="real{0}".format(cmd_args.realization_pick) if cmd_args.realizations: output_prefix+="{0}-{1}".format(first_realization,last_realization) output_prefix += cmd_args.prefix formatted_output_string = "-".join(formatted_output_string_collection) chi2_file = os.path.join(likelihoods_dir,"chi2{0}_{1}.npy".format(output_prefix,formatted_output_string)) likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}_{1}.npy".format(output_prefix,formatted_output_string)) logging.info("Saving chi2 to {0}".format(chi2_file)) np.save(chi2_file,chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape)) logging.info("Saving full likelihood to {0}".format(likelihood_file)) likelihood_cube = analysis.likelihood(chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape)) np.save(likelihood_file,likelihood_cube)
def test_interpolation(): root_path = "Data/all" analysis = LikelihoodAnalysis() #Read in model names models = CFHTemu1.getModels()[:17] assert len(models) == 17 #Shuffle the models np.random.seed(1) np.random.shuffle(models) #Divide into training and testing training_models = models[:-1] testing_model = models[-1] #Read multipoles ell = np.load(os.path.join(root_path,"ell.npy")) #Load in the means of the power spectra of the 17 models, and populate the analysis instance for model in training_models: ens = Ensemble.fromfilelist([os.path.join(root_path,model._cosmo_id_string,"subfield1","sigma05","power_spectrum.npy")]) ens.load(from_old=True) analysis.add_model(parameters=model.squeeze(with_ns=True),feature=ens.mean()) #Add the multipoles to the analysis analysis.add_feature_label(ell) l = analysis.feature_label ens = Ensemble.fromfilelist([os.path.join(root_path,testing_model._cosmo_id_string,"subfield1","sigma05","power_spectrum.npy")]) ens.load(from_old=True) testing_Pl = ens.mean() #Load in also the observed power spectrum ens = Ensemble.fromfilelist([os.path.join(root_path,"observations","subfield1","sigma05","power_spectrum.npy")]) ens.load(from_old=True) observed_Pl = ens.mean() #Output the analysis stats np.savetxt("16_parameter_points.txt",analysis.parameter_set) for n in range(len(training_models)): plt.plot(l,l*(l+1)*analysis.training_set[n]/(2*np.pi)) plt.plot(l,l*(l+1)*observed_Pl/(2*np.pi),linestyle="--",label="Observation") plt.xlabel(r"$l$") plt.ylabel(r"$l(l+1)P_l/2\pi$") plt.yscale("log") plt.legend(loc="upper left") plt.savefig("16_power_spectra.png") plt.clf() #Train the interpolators analysis.train(use_parameters=range(3)) assert hasattr(analysis,"_interpolator") assert hasattr(analysis,"_num_bins") #Emulator portability test with pickle/unpickle analysis.save("analysis.p") emulator = LikelihoodAnalysis.load("analysis.p") #Predict the power spectrum at the remaining point predicted_Pl = emulator.predict(testing_model.squeeze()) #Plot it against the measured one fig,ax = plt.subplots(2,1,figsize=(16,8)) #Measured ax[0].plot(l,l*(l+1)*testing_Pl/(2*np.pi),label="measured") #Predicted ax[0].plot(l,l*(l+1)*predicted_Pl/(2*np.pi),label="interpolated") #Fractional difference ax[1].plot(l,(predicted_Pl - testing_Pl)/testing_Pl) ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") ax[0].legend(loc="upper left") plt.savefig("power_interpolator_test.png") plt.clf() #Give it a shot with two points in parameter space to test vectorization two_parameter_points = np.array((training_models[0].squeeze(),testing_model.squeeze())) two_predicted_Pl = emulator.predict(two_parameter_points) fig,ax = plt.subplots(2,1,figsize=(16,8)) #Predicted ax[0].plot(l,l*(l+1)*two_predicted_Pl[0]/(2*np.pi),color="red",linestyle="--") ax[0].plot(l,l*(l+1)*two_predicted_Pl[1]/(2*np.pi),color="green",linestyle="--") #Measured ax[0].plot(l,l*(l+1)*emulator.training_set[0]/(2*np.pi),color="red",linestyle="-") ax[0].plot(l,l*(l+1)*testing_Pl/(2*np.pi),color="green",linestyle="-") #Fractional difference ax[1].plot(l,(two_predicted_Pl[0] - emulator.training_set[0])/emulator.training_set[0],color="red") ax[1].plot(l,(two_predicted_Pl[1] - testing_Pl)/testing_Pl,color="green") ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") plt.savefig("power_interpolator_test_2.png") plt.clf() #Generate a fudge power spectrum covariance matrix covariance = np.diag(testing_Pl**2/(0.5 + l)) #Generate a fudge observation by wiggling the testing power spectrum observation = testing_Pl + np.random.uniform(low=-testing_Pl*0.1,high=testing_Pl*0.1) #Choose a bunch of points in parameter space points = emulator.parameter_set[:,:-1] #Compute the chi2 chi2_values_1 = emulator.chi2(points,observation,covariance) chi2_values_2 = emulator.chi2(points,observation,covariance,split_chunks=4) assert chi2_values_1.shape == chi2_values_2.shape #Compute the individual contributions chi2_contributions = emulator.chi2Contributions(points[0],observation,covariance) #Plot plt.imshow(chi2_contributions,interpolation="nearest") plt.colorbar() plt.xlabel(r"$j$") plt.ylabel(r"$i$") plt.savefig("chi2_contributions.png") plt.clf() return chi2_values_1,chi2_values_2
def test_interpolation(): root_path = "Data/all" analysis = LikelihoodAnalysis() #Read in model names models = CFHTemu1.getModels()[:17] assert len(models) == 17 #Shuffle the models np.random.seed(1) np.random.shuffle(models) #Divide into training and testing training_models = models[:-1] testing_model = models[-1] #Read multipoles ell = np.load(os.path.join(root_path, "ell.npy")) #Load in the means of the power spectra of the 17 models, and populate the analysis instance for model in training_models: ens = Ensemble.fromfilelist([ os.path.join(root_path, model._cosmo_id_string, "subfield1", "sigma05", "power_spectrum.npy") ]) ens.load(from_old=True) analysis.add_model(parameters=model.squeeze(with_ns=True), feature=ens.mean()) #Add the multipoles to the analysis analysis.add_feature_label(ell) l = analysis.feature_label ens = Ensemble.fromfilelist([ os.path.join(root_path, testing_model._cosmo_id_string, "subfield1", "sigma05", "power_spectrum.npy") ]) ens.load(from_old=True) testing_Pl = ens.mean() #Load in also the observed power spectrum ens = Ensemble.fromfilelist([ os.path.join(root_path, "observations", "subfield1", "sigma05", "power_spectrum.npy") ]) ens.load(from_old=True) observed_Pl = ens.mean() #Output the analysis stats np.savetxt("16_parameter_points.txt", analysis.parameter_set) for n in range(len(training_models)): plt.plot(l, l * (l + 1) * analysis.training_set[n] / (2 * np.pi)) plt.plot(l, l * (l + 1) * observed_Pl / (2 * np.pi), linestyle="--", label="Observation") plt.xlabel(r"$l$") plt.ylabel(r"$l(l+1)P_l/2\pi$") plt.yscale("log") plt.legend(loc="upper left") plt.savefig("16_power_spectra.png") plt.clf() #Train the interpolators analysis.train(use_parameters=range(3)) assert hasattr(analysis, "_interpolator") assert hasattr(analysis, "_num_bins") #Emulator portability test with pickle/unpickle analysis.save("analysis.p") emulator = LikelihoodAnalysis.load("analysis.p") #Predict the power spectrum at the remaining point predicted_Pl = emulator.predict(testing_model.squeeze()) #Plot it against the measured one fig, ax = plt.subplots(2, 1, figsize=(16, 8)) #Measured ax[0].plot(l, l * (l + 1) * testing_Pl / (2 * np.pi), label="measured") #Predicted ax[0].plot(l, l * (l + 1) * predicted_Pl / (2 * np.pi), label="interpolated") #Fractional difference ax[1].plot(l, (predicted_Pl - testing_Pl) / testing_Pl) ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") ax[0].legend(loc="upper left") plt.savefig("power_interpolator_test.png") plt.clf() #Give it a shot with two points in parameter space to test vectorization two_parameter_points = np.array( (training_models[0].squeeze(), testing_model.squeeze())) two_predicted_Pl = emulator.predict(two_parameter_points) fig, ax = plt.subplots(2, 1, figsize=(16, 8)) #Predicted ax[0].plot(l, l * (l + 1) * two_predicted_Pl[0] / (2 * np.pi), color="red", linestyle="--") ax[0].plot(l, l * (l + 1) * two_predicted_Pl[1] / (2 * np.pi), color="green", linestyle="--") #Measured ax[0].plot(l, l * (l + 1) * emulator.training_set[0] / (2 * np.pi), color="red", linestyle="-") ax[0].plot(l, l * (l + 1) * testing_Pl / (2 * np.pi), color="green", linestyle="-") #Fractional difference ax[1].plot(l, (two_predicted_Pl[0] - emulator.training_set[0]) / emulator.training_set[0], color="red") ax[1].plot(l, (two_predicted_Pl[1] - testing_Pl) / testing_Pl, color="green") ax[1].set_xlabel(r"$l$") ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$") ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$") ax[0].set_yscale("log") plt.savefig("power_interpolator_test_2.png") plt.clf() #Generate a fudge power spectrum covariance matrix covariance = np.diag(testing_Pl**2 / (0.5 + l)) #Generate a fudge observation by wiggling the testing power spectrum observation = testing_Pl + np.random.uniform(low=-testing_Pl * 0.1, high=testing_Pl * 0.1) #Choose a bunch of points in parameter space points = emulator.parameter_set[:, :-1] #Compute the chi2 chi2_values_1 = emulator.chi2(points, observation, covariance) chi2_values_2 = emulator.chi2(points, observation, covariance, split_chunks=4) assert chi2_values_1.shape == chi2_values_2.shape #Compute the individual contributions chi2_contributions = emulator.chi2Contributions(points[0], observation, covariance) #Plot plt.imshow(chi2_contributions, interpolation="nearest") plt.colorbar() plt.xlabel(r"$j$") plt.ylabel(r"$i$") plt.savefig("chi2_contributions.png") plt.clf() return chi2_values_1, chi2_values_2
def main(): ################################################# ############Option parsing####################### ################################################# #Parse command line options parser = argparse.ArgumentParser() parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file") parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity") parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity") parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area") parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!") parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix") parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file") parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis") parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle") parser.add_argument("-r","--realizations",dest="realizations",type=int,default=None,help="use only the first N realizations to estimate the covariance matrix") parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF") parser.add_argument("-ms","--mean_subtract",dest="mean_subtract",action="store_true",default=False,help="lod in the observations with the subtracted means") cmd_args = parser.parse_args() if cmd_args.options_file is None: parser.print_help() sys.exit(0) #Set verbosity level if cmd_args.verbose_plus: logging.basicConfig(level=DEBUG_PLUS) elif cmd_args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) #Initialize MPI Pool try: pool = MPIPool() except: pool = None if (pool is not None) and (not pool.is_master()): pool.wait() sys.exit(0) if pool is not None: logging.info("Started MPI Pool.") ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the memory loading feature_loader = FeatureLoader(cmd_args) ########################################################################################################################################### #Use this model for the covariance matrix (from the new set of 50 N body simulations) covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path")) logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix fiducial_feature_ensemble = feature_loader.load_features(covariance_model) #If options is enabled, use only the first N realizations to estimate the covariance matrix if cmd_args.realizations is not None: logging.info("Using only the first {0} realizations to estimate the covariance matrix".format(cmd_args.realizations)) fiducial_feature_ensemble = fiducial_feature_ensemble.subset(range(cmd_args.realizations)) assert fiducial_feature_ensemble.num_realizations==cmd_args.realizations fiducial_features = fiducial_feature_ensemble.mean() features_covariance = fiducial_feature_ensemble.covariance() #timestamp now = time.time() logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Get also the observation instance observation = CFHTLens(root_path=feature_loader.options.get("observations","root_path")) logging.info("Measuring the observations from {0}".format(observation)) #And load the observations observed_feature = feature_loader.load_features(observation).mean() #timestamp now = time.time() logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #timestamp now = time.time() logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #################################################################################################################### ######################################Compute the chi2 cube######################################################### #################################################################################################################### logging.info("Initializing chi2 meshgrid...") #Read parameters to use from options use_parameters = feature_loader.options.get("parameters","use_parameters").replace(" ","").split(",") assert len(use_parameters)==3 #Reparametrization hash key use_parameters_hash = "-".join(use_parameters) ######################################################################################## #Might need to reparametrize the emulator here, use a dictionary for reparametrizations# ######################################################################################## assert use_parameters_hash in reparametrization.keys(),"No reparametrization scheme specified for {0} parametrization".format(use_parameters_hash) if reparametrization[use_parameters_hash] is not None: #Reparametrize logging.info("Reparametrizing emulator according to {0} parametrization".format(use_parameters_hash)) analysis.reparametrize(reparametrization[use_parameters_hash]) #Retrain for safety analysis.train() #Log current parametrization to user logging.info("Using parametrization {0}".format(use_parameters_hash)) #Set the points in parameter space on which to compute the chi2 (read extremes from options) par = list() for p in range(3): assert feature_loader.options.has_section(use_parameters[p]),"No extremes specified for parameter {0}".format(use_parameters[p]) par.append(np.ogrid[feature_loader.options.getfloat(use_parameters[p],"min"):feature_loader.options.getfloat(use_parameters[p],"max"):feature_loader.options.getint(use_parameters[p],"num_points")*1j]) num_points = len(par[0]) * len(par[1]) * len(par[2]) points = np.array(np.meshgrid(par[0],par[1],par[2],indexing="ij")).reshape(3,num_points).transpose() #Now compute the chi2 at each of these points if pool: split_chunks = pool.size logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size)) else: split_chunks = None logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0])) chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) now = time.time() logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #Close pool if pool is not None: pool.close() logging.info("Closed MPI Pool.") #save output likelihoods_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"likelihoods_{0}".format(use_parameters_hash)) prefix = cmd_args.prefix if cmd_args.mean_subtract: prefix += "_meansub" if not os.path.isdir(likelihoods_dir): os.mkdir(likelihoods_dir) if cmd_args.realizations is None: chi2_file = os.path.join(likelihoods_dir,"chi2{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string))) likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string))) else: chi2_file = os.path.join(likelihoods_dir,"chi2{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string))) likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string))) logging.info("Saving chi2 to {0}".format(chi2_file)) np.save(chi2_file,chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape)) logging.info("Saving full likelihood to {0}".format(likelihood_file)) likelihood_cube = analysis.likelihood(chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape)) np.save(likelihood_file,likelihood_cube) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube,parameter_axes={use_parameters[0]:0,use_parameters[1]:1,use_parameters[2]:2},parameter_labels={use_parameters[0]:"0",use_parameters[1]:"1",use_parameters[2]:"2"}) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ]) logging.info("Best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}({2} dof)".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1])) #Additionally save some debugging info to plot, etc... if cmd_args.save_debug: troubleshoot_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot_{0}".format(use_parameters_hash)) if not os.path.isdir(troubleshoot_dir): os.mkdir(troubleshoot_dir) logging.info("Saving troubleshoot info to {0}...".format(troubleshoot_dir)) np.save(os.path.join(troubleshoot_dir,"observation_{0}.npy".format(output_string(feature_loader.feature_string))),observed_feature) np.save(os.path.join(troubleshoot_dir,"covariance_{0}.npy".format(output_string(feature_loader.feature_string))),features_covariance) np.save(os.path.join(troubleshoot_dir,"fiducial_{0}.npy".format(output_string(feature_loader.feature_string))),fiducial_features) np.save(os.path.join(troubleshoot_dir,"best_fit_features_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(best_fit_parameters)) np.save(os.path.join(troubleshoot_dir,"fiducial_from_interpolator_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(np.array([0.26,-1.0,0.800]))) np.save(os.path.join(troubleshoot_dir,"chi2_contributions_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.chi2Contributions(best_fit_parameters,observed_feature=observed_feature,features_covariance=features_covariance)) end = time.time() logging.info("DONE!!") logging.info("Completed in {0:.1f}s".format(end-start))
def main(): ################################################# ############Option parsing####################### ################################################# #Parse command line options parser = argparse.ArgumentParser() parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file") parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity") parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity") parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area") parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!") parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix") parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file") parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis") parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle") parser.add_argument("-r","--remove",dest="remove",action="store",type=int,default=24,help="model to remove from the analysis") parser.add_argument("-R","--random",dest="random",action="store",type=int,default=0,help="random seed initialization for realization picking") cmd_args = parser.parse_args() if cmd_args.options_file is None: parser.print_help() sys.exit(0) #Set verbosity level if cmd_args.verbose_plus: logging.basicConfig(level=DEBUG_PLUS) elif cmd_args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) #Initialize MPI Pool try: pool = MPIPool() except: pool = None if (pool is not None) and (not pool.is_master()): pool.wait() sys.exit(0) if pool is not None: logging.info("Started MPI Pool.") ################################################################################################################# #################Info gathering: covariance matrix, observation and emulator##################################### ################################################################################################################# #start start = time.time() last_timestamp = start #Instantiate a FeatureLoader object that will take care of the memory loading feature_loader = FeatureLoader(cmd_args) ########################################################################################################################################### #Get the names of all the simulated models available for the CFHT analysis, including smoothing scales and subfields all_simulated_models = CFHTemu1.getModels(root_path=feature_loader.options.get("simulations","root_path")) #Use this model for the covariance matrix covariance_model = all_simulated_models[feature_loader.options.getint("analysis","covariance_model")] logging.info("Measuring covariance matrix from model {0}".format(covariance_model)) #Load in the covariance matrix features_covariance = feature_loader.load_features(covariance_model).covariance() #timestamp now = time.time() logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################ #Create a LikelihoodAnalysis instance by unpickling one of the emulators emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators") emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string))) logging.info("Unpickling emulator from {0}...".format(emulator_file)) analysis = LikelihoodAnalysis.load(emulator_file) #timestamp now = time.time() logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now ################################################################################################################################################## #Initialize random seed np.random.seed(cmd_args.random) realization = np.random.randint(0,1000) #Treat the removed model as data model_to_remove = all_simulated_models[cmd_args.remove] parameters_to_remove = model_to_remove.squeeze() logging.info("Treating model {0}, realization {1} as data, loading features...".format(model_to_remove,realization+1)) observed_feature = feature_loader.load_features(model_to_remove)[np.random.randint(0,1000)] #Compute the chi2 for this observed feature without removing it from the emulator (must be close to 0) logging.info("Chi2 before removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1])) #Remove the model from the emulator remove_index = analysis.find(parameters_to_remove)[0] logging.info("Removing model {0} with parameters {1} from emulator...".format(remove_index,analysis.parameter_set[remove_index])) analysis.remove_model(remove_index) #Retrain without the removed model analysis.train() #Compute the chi2 for this observed feature after removing it from the emulator (likely it's not 0 anymore) logging.info("Chi2 after removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1])) #################################################################################################################### ######################################Compute the chi2 cube######################################################### #################################################################################################################### logging.info("Initializing chi2 meshgrid...") #Set the points in parameter space on which to compute the chi2 (read from options) Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j] w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j] si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j] num_points = len(Om) * len(w) * len(si8) points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose() if cmd_args.save_points is not None: logging.info("Saving points to {0}.npy".format(cmd_args.save_points.rstrip(".npy"))) np.save(cmd_args.save_points.rstrip(".npy")+".npy",points) #Now compute the chi2 at each of these points if pool: split_chunks = pool.size logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size)) else: split_chunks = None logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0])) chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks) #Close MPI Pool if pool is not None: pool.close() logging.info("Closed MPI Pool.") now = time.time() logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp)) last_timestamp = now #Save output likelihood_file = "likelihood_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string)) chi2_file = "chi2_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string)) logging.info("Saving chi2 to {0}".format(chi2_file)) np.save(chi2_file,chi_squared.reshape(Om.shape + w.shape + si8.shape)) logging.info("Saving full likelihood to {0}".format(likelihood_file)) likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape)) np.save(likelihood_file,likelihood_cube) #Find the maximum of the likelihood using ContourPlot functionality contour = ContourPlot() contour.getLikelihood(likelihood_cube) contour.getUnitsFromOptions(feature_loader.options) parameters_maximum = contour.getMaximum() parameter_keys = parameters_maximum.keys() parameter_keys.sort(key=contour.parameter_axes.get) #Display the new best fit before exiting best_fit_parameters = [ parameters_maximum[par_key] for par_key in parameter_keys ] logging.info("New best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature))) #End end = time.time() logging.info("DONE!!") logging.info("Completed in {0:.1f}s".format(end-start))