Python LikelihoodAnalysis Beispiele

Programmiersprache: Python

Namespace / Paketname: lenstools.constraints

Klasse / Typ: LikelihoodAnalysis

Beispiele auf hotexamples.com: 14

Python LikelihoodAnalysis - 14 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die lenstools.constraints.LikelihoodAnalysis, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

load(3)

LikelihoodAnalysis(1)

add_feature_label(1)

add_model(1)

save(1)

Beispiel #1

Datei anzeigen

Datei: test_constraints.py Projekt: TheisEizo/LensTools

def test_remove():

	emulator = LikelihoodAnalysis.load("analysis.p")
	emulator.remove_model([8,10])
	emulator.train(use_parameters=range(3))

	assert emulator.parameter_set.shape[0] == 14
	assert emulator.training_set.shape[0] == 14

Beispiel #2

Datei anzeigen

Datei: test_constraints.py Projekt: TheisEizo/LensTools

def test_find():

	emulator = LikelihoodAnalysis.load("analysis.p")
	parameters_to_find = emulator.parameter_set[7]

	n = emulator.find(parameters_to_find)
	assert len(n)==1
	assert n[0] == 7

Beispiel #3

Datei anzeigen

def test_find():

    emulator = LikelihoodAnalysis.load("analysis.p")
    parameters_to_find = emulator.parameter_set[7]

    n = emulator.find(parameters_to_find)
    assert len(n) == 1
    assert n[0] == 7

Beispiel #4

Datei anzeigen

def test_remove():

    emulator = LikelihoodAnalysis.load("analysis.p")
    emulator.remove_model([8, 10])
    emulator.train(use_parameters=range(3))

    assert emulator.parameter_set.shape[0] == 14
    assert emulator.training_set.shape[0] == 14

Beispiel #5

Datei anzeigen

Datei: generate.py Projekt: apetri/CFHTLens_analysis

def emulatorAccuracy(cmd_args,descriptors_in_plot=single[:-1]):

	#Smoothing scale
	smoothing_scale = 1.0

	#Ready to plot
	ax = host_subplot(111, axes_class=AA.Axes)
	for n,descr in enumerate(descriptors_in_plot):

		predicted = np.load(os.path.join(root_dir,"troubleshoot","fiducial_from_interpolator_{0}--{1:.1f}.npy".format(descr,smoothing_scale)))
		measured = np.load(os.path.join(root_dir,"troubleshoot","fiducial_{0}--{1:.1f}.npy".format(descr,smoothing_scale)))
		covariance = np.load(os.path.join(root_dir,"troubleshoot","covariance_{0}--{1:.1f}.npy".format(descr,smoothing_scale)))

		ax.plot(np.abs(measured-predicted)/np.sqrt(covariance.diagonal()),color=brew_colors[n],label=descriptors[descr])

		#Plot also the predicted descriptors in another cosmology
		emulator = LikelihoodAnalysis.load(os.path.join(root_dir,"emulators","emulator_{0}--{1:.1f}.p".format(descr,smoothing_scale)))
		predictedOther = emulator.predict(np.array([0.8,-1.0,0.5]))

		ax.plot(np.abs(measured-predictedOther)/np.sqrt(covariance.diagonal()),color=brew_colors[n],linestyle="--")

	#Rename the ticks
	tk = ax.get_xticks()
	new_tk = np.zeros(len(tk))
	for n in range(len(tk)):
		new_tk[n] = -0.04 + ((0.12+0.04)/(len(tk)-1))*n
	ax.set_xticklabels(["{0:.2f}".format(t) for t in new_tk])
	ax.set_xlabel(r"$\kappa$",fontsize=22)
	ax.set_ylabel(r"$(E-M)/\sqrt{C_{MM}}$",fontsize=22)

	#Set a top axis too
	axT = ax.twin()
	tk = axT.get_xticks()
	new_tk = np.zeros(len(tk))
	for n in range(len(tk)):
		new_tk[n] = 300.0 + ((5000.0-300.0)/(len(tk)-1))*n
	axT.set_xticklabels(["{0}".format(int(new_tk[0]))] + ["{0}".format(int(t/1000)*1000) for t in new_tk[1:]])
	axT.set_yticks([])
	axT.set_xlabel(r"$l$",fontsize=22)

	
	ax.set_yscale("log")
	ax.set_ylim(1.0e-3,20.0)
	ax.legend(loc="lower left")

	#Save the figure
	plt.tight_layout()
	plt.savefig("emulator_accuracy.{0}".format(cmd_args.type))

Beispiel #6

Datei anzeigen

Datei: generate.py Projekt: apetri/CFHTLens_analysis

def pca(cmd_args):

	#Smoothing scales in arcmin
	smoothing_scale=1.0

	#Create figure
	fig,ax = plt.subplots(1,2,figsize=(16,8))

	#Cycle over descriptors to plot PCA eigenvalues
	for n,descr in enumerate(single):

		#Unpickle the emulator
		an = LikelihoodAnalysis.load(os.path.join(root_dir,"emulators","emulator_{0}--{1:.1f}.p".format(descr,smoothing_scale)))

		#Compute PCA
		pca = an.principalComponents()

		#Plot the eigenvalues on the left and the cumulative sum on the right
		ax[0].plot(pca.eigenvalues,label=descriptors[descr],color=brew_colors[n])
		ax[1].plot(pca.eigenvalues.cumsum()/pca.eigenvalues.sum(),label=descriptors[descr],color=brew_colors[n])


	#Draw a line at 3 components
	ax[0].plot(3*np.ones(100),np.linspace(1.0e-10,1.0e2,100),color="black",linestyle="--")
	ax[1].plot(3*np.ones(100),np.linspace(0.9,1.01,100),color="black",linestyle="--")
	ax[1].set_ylim(0.98,1.001)
	ax[1].set_xscale("log")

	#Legend
	ax[0].legend()

	#Scale
	ax[0].set_yscale("log")

	#Labels
	ax[0].set_xlabel(r"$i$",fontsize=18)
	ax[1].set_xlabel(r"$n$",fontsize=18)
	ax[0].set_ylabel(r"$S^2_i$",fontsize=18)
	ax[1].set_ylabel(r"$\Sigma_{i=0}^n S^2_i/S^2_{tot}$",fontsize=18)

	#Save figure
	fig.tight_layout()
	fig.savefig("pca_components.{0}".format(cmd_args.type))

Beispiel #7

Datei anzeigen

Datei: likelihood_mocks.py Projekt: apetri/CFHTLens_analysis

def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_features",dest="save_features",action="store_true",default=False,help="save features profiles")
	parser.add_argument("-ss","--save",dest="save",action="store_true",default=False,help="save the best fits and corresponding chi2")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-l","--likelihood",dest="likelihood",action="store_true",default=False,help="save the likelihood cubes for the mocks")
	parser.add_argument("-o","--observation",dest="observation",action="store_true",default=False,help="append the actual observation results to the mock results for direct comparison")
	parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Use this model for the covariance matrix (from the new set of 50 N body simulations)
	covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path"))
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	
	#Load in the covariance matrix
	fiducial_feature_ensemble = feature_loader.load_features(covariance_model)
	fiducial_features = fiducial_feature_ensemble.mean()
	features_covariance = fiducial_feature_ensemble.covariance()

	#timestamp
	now = time.time()
	logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Treat the 50N-body simulation set as data
	observation = CFHTcov.getModels(root_path=feature_loader.options.get("observations","root_path"))
	logging.info("Measuring the observations from {0}".format(observation))
	
	#And load the observations
	observed_feature = feature_loader.load_features(observation)

	#timestamp
	now = time.time()
	logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#timestamp
	now = time.time()
	logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	####################################################################################################################
	######################################Compute the chi2 cube#########################################################
	####################################################################################################################

	logging.info("Initializing chi2 meshgrid...")

	#Set the points in parameter space on which to compute the chi2 (read from options)
	Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j]
	w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j]
	si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j]

	num_points = len(Om) * len(w) * len(si8) 

	points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose()

	#Now compute the chi2 at each of these points
	if pool:
		split_chunks = pool.size
		logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size))
	else:
		split_chunks = None
		logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0]))

	#Allocate array for best fit
	first_realization = feature_loader.options.getint("mocks","first_realization")
	last_realization = feature_loader.options.getint("mocks","last_realization")

	if cmd_args.observation:
		best_fit_all = np.zeros((last_realization-first_realization+1 + 1,analysis.parameter_set.shape[1]))
		chi2_all = np.zeros(last_realization-first_realization+1 + 1)
		chi2_from_expected_all = np.zeros(last_realization-first_realization+1 + 1)
	else:
		best_fit_all = np.zeros((last_realization-first_realization+1,analysis.parameter_set.shape[1]))
		chi2_all = np.zeros(last_realization-first_realization+1)
		chi2_from_expected_all = np.zeros(last_realization-first_realization+1)

	#Cycle through the realizations and obtain a best fit for each one of them
	
	for nreal in range(first_realization-1,last_realization):
	
		chi_squared = analysis.chi2(points,observed_feature=observed_feature[nreal],features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

		now = time.time()
		logging.info("realization {0}, chi2 calculations completed in {1:.1f}s".format(nreal+1,now-last_timestamp))
		last_timestamp = now

		#After chi2, compute the likelihood
		likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape))

		#Maybe save the likelihood cube?
		if cmd_args.likelihood:
			likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string)))
			logging.info("Saving likelihood cube to {0}...".format(likelihood_filename))
			np.save(likelihood_filename,likelihood_cube)

		#Maybe save the feature profiles?
		if cmd_args.save_features:
			features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features{0}_{1}.npy".format(nreal+1,output_string(feature_loader.feature_string)))
			logging.info("Saving features for realization {0} to {1}...".format(nreal+1,features_filename))
			np.save(features_filename,observed_feature[nreal])

		#Find the maximum of the likelihood using ContourPlot functionality
		contour = ContourPlot()
		contour.getLikelihood(likelihood_cube)
		contour.getUnitsFromOptions(feature_loader.options)
		parameters_maximum = contour.getMaximum()
		parameter_keys = parameters_maximum.keys()
		parameter_keys.sort(key=contour.parameter_axes.get)

		#Display the new best fit before exiting
		best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ])
		best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0]
		chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature[nreal])[0]

		logging.info("Best fit for realization {4} is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected,nreal+1))

		#Update global array with best fit parameters and corresponding chi2
		best_fit_all[nreal-first_realization+1,:] = best_fit_parameters.copy()
		chi2_all[nreal-first_realization+1] = best_fit_chi2 
		chi2_from_expected_all[nreal-first_realization+1] = chi2_from_expected

	#######################################################################################################################################################################

	#If option was selected, append the observation results to the mock ones, for comparison
	if cmd_args.observation:

		observed_feature = feature_loader.load_features(CFHTLens(root_path=feature_loader.options.get("observations","root_path")))[0]

		chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

		now = time.time()
		logging.info("actual observation, chi2 calculations completed in {0:.1f}s".format(now-last_timestamp))
		last_timestamp = now

		#After chi2, compute the likelihood
		likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape))

		#Maybe save the likelihood cube?
		if cmd_args.likelihood:
			likelihood_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","likelihood_obs_{0}.npy".format(output_string(feature_loader.feature_string)))
			logging.info("Saving likelihood cube to {0}...".format(likelihood_filename))
			np.save(likelihood_filename,likelihood_cube)

		#Maybe save the feature profiles?
		if cmd_args.save_features:
			features_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","features_obs_{0}.npy".format(output_string(feature_loader.feature_string)))
			logging.info("Saving observed features to {0}...".format(features_filename))
			np.save(features_filename,observed_feature)

		#Find the maximum of the likelihood using ContourPlot functionality
		contour = ContourPlot()
		contour.getLikelihood(likelihood_cube)
		contour.getUnitsFromOptions(feature_loader.options)
		parameters_maximum = contour.getMaximum()
		parameter_keys = parameters_maximum.keys()
		parameter_keys.sort(key=contour.parameter_axes.get)

		#Display the new best fit before exiting
		best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ])
		best_fit_chi2 = analysis.chi2(best_fit_parameters,features_covariance=features_covariance,observed_feature=observed_feature)[0]
		chi2_from_expected = analysis.chi2(np.array([0.26,-1.0,0.800]),features_covariance=features_covariance,observed_feature=observed_feature)[0]
		
		logging.info("Best fit for observation is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2_best={1:.3f}({2} dof), chi2_expected={3:.3f}({2} dof)".format(best_fit_parameters,best_fit_chi2,analysis.training_set.shape[1],chi2_from_expected))

		#Update global array with best fit parameters and corresponding chi2
		best_fit_all[-1,:] = best_fit_parameters.copy()
		chi2_all[-1] = best_fit_chi2
		chi2_from_expected_all[-1] = chi2_from_expected

	#######################################################################################################################################################################
	
	#Close MPI Pool
	if pool is not None:
		pool.close()
		logging.info("Closed MPI Pool.")

	if cmd_args.save:

		#Save the best fit parameters for all realizations
		best_fit_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","best_fit_all_{0}.npy".format(output_string(feature_loader.feature_string)))
		logging.info("Saving best fit to {0}...".format(best_fit_filename))
		np.save(best_fit_filename,best_fit_all)

		#Save the best fit chi2 for all realizations
		chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_{0}.npy".format(output_string(feature_loader.feature_string)))
		logging.info("Saving best fit chi2 to {0}...".format(chi2_filename))
		np.save(chi2_filename,chi2_all)

		#Save also the chi2 for the expected best fit
		chi2_filename = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot","chi2_all_expected_{0}.npy".format(output_string(feature_loader.feature_string)))
		logging.info("Saving expected chi2 to {0}...".format(chi2_filename))
		np.save(chi2_filename,chi2_from_expected_all)

	end = time.time()

	logging.info("DONE!!")
	logging.info("Completed in {0:.1f}s".format(end-start))

Beispiel #8

Datei anzeigen

Datei: test_load.py Projekt: apetri/CFHTLens_analysis

def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file")
	parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-r","--realizations",dest="realizations",type=int,default=None,help="use only the first N realizations to estimate the covariance matrix")
	parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Use this model for the covariance matrix (from the new set of 50 N body simulations)
	covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path"))
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	
	#Load in the covariance matrix
	fiducial_feature_ensemble = feature_loader.load_features(covariance_model)

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#Return
	return fiducial_feature_ensemble,analysis

Beispiel #9

Datei anzeigen

Datei: pca_likelihood.py Projekt: apetri/CFHTLens_analysis

def main(n_components_collection,cmd_args,pool):

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the data loading
	feature_loader_collection = FeatureLoaderCross.fromArgs(cmd_args)
	fiducial_feature_ensemble_collection = list()
	observed_feature_ensemble_collection = list()
	analysis_collection = list()
	formatted_output_string_collection = list()

	#Sanity check
	if type(n_components_collection)==list:
		assert len(n_components_collection)==len(feature_loader_collection)

	#Cycle over feature types
	for nc,feature_loader in enumerate(feature_loader_collection):

		#Use the same number of components for all or not?
		if type(n_components_collection)==list:
			n_components = n_components_collection[nc]
		else:
			n_components = n_components_collection

		#Format the output string
		formatted_output_string_collection.append(output_string(feature_loader.feature_string)+"_ncomp{0}".format(n_components))

		#Create a LikelihoodAnalysis instance by unpickling one of the emulators
		emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
		emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
		logging.info("Unpickling emulator from {0}...".format(emulator_file))
		analysis = LikelihoodAnalysis.load(emulator_file)

		#timestamp
		now = time.time()
		logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp))
		last_timestamp = now

		######################Compute PCA components here#####################################
		pca = analysis.principalComponents()

		now = time.time()
		logging.info("Principal components computed in {0:.1f}s".format(now-last_timestamp))
		last_timestamp = now

		####################Transform feature space by projecting on PCA eigenvectors############################
		analysis = analysis.transform(pca_transform,pca=pca,n_components=n_components)

		now = time.time()
		logging.info("Projection on first {1} principal components completed in {0:.1f}s".format(now-last_timestamp,analysis.training_set.shape[1]))
		last_timestamp = now

		####################Retrain emulator######################################################################
		analysis.train()

		now = time.time()
		logging.info("Emulator re-training completed in {0:.1f}s".format(now-last_timestamp))
		last_timestamp = now

		#Append to the collection
		analysis_collection.append(analysis)

		###########################################################################################################################################
		###########################################################################################################################################

		#Use this model for the covariance matrix (from the new set of 50 N body simulations)
		covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path"))
		logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	
		#Load in the covariance matrix
		fiducial_feature_ensemble = feature_loader.load_features(covariance_model)

		#If options is enabled, use only the first N realizations to estimate the covariance matrix
		if cmd_args.realizations:

			first_realization = feature_loader.options.getint("mocks","first_realization")
			last_realization = feature_loader.options.getint("mocks","last_realization")

			logging.info("Using only the realizations {0}-{1} to build the fiducial ensemble".format(first_realization,last_realization))
			fiducial_feature_ensemble = fiducial_feature_ensemble.subset(range(first_realization-1,last_realization))
			assert fiducial_feature_ensemble.num_realizations==last_realization-first_realization+1


		###############Insert PCA transform here##############################
		fiducial_feature_ensemble = fiducial_feature_ensemble.transform(pca_transform,pca=pca,n_components=n_components)

		now = time.time()
		logging.info("Projection on first {1} principal components for covariance ensemble completed in {0:.1f}s".format(now-last_timestamp,analysis.training_set.shape[1]))
		last_timestamp = now

		#Append to the collection
		fiducial_feature_ensemble_collection.append(fiducial_feature_ensemble)

		#timestamp
		now = time.time()
		logging.info("covariance computed in {0:.1f}s".format(now-last_timestamp))
		last_timestamp = now

		################################################################################################################################################

		#Get also the observation instance

		if cmd_args.observations_mock:

			pass

		else:
			
			observation = CFHTLens(root_path=feature_loader.options.get("observations","root_path"))
			logging.info("Measuring the observations from {0}".format(observation))

			#And load the observations
			observed_feature_ensemble = feature_loader.load_features(observation)

			###############Insert PCA transform here##############################
			observed_feature_ensemble = observed_feature_ensemble.transform(pca_transform,pca=pca,n_components=n_components)

			now = time.time()
			logging.info("Projection on first {1} principal components for observation completed in {0:.1f}s".format(now-last_timestamp,analysis.training_set.shape[1]))
			last_timestamp = now

			observed_feature_ensemble_collection.append(observed_feature_ensemble)

		#timestamp
		now = time.time()
		logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp))
		last_timestamp = now


	################################################################################################################################################
	################################Reduce the collections##########################################################################################
	################################################################################################################################################

	analysis = reduce(mul,analysis_collection)
	fiducial_feature_ensemble = reduce(mul,fiducial_feature_ensemble_collection)

	#Sanity check
	if type(n_components_collection)==list:
		assert analysis.training_set.shape[1]==reduce(add,n_components_collection)
		assert fiducial_feature_ensemble.data.shape[1]==reduce(add,n_components_collection)
	else:
		assert analysis.training_set.shape[1]==n_components*len(feature_loader_collection)
		assert fiducial_feature_ensemble.data.shape[1]==n_components*len(feature_loader_collection)

	#Covariance matrix
	features_covariance = fiducial_feature_ensemble.covariance()

	if cmd_args.observations_mock:

		logging.info("Using fiducial ensemble as mock observations")
		
		if cmd_args.realization_pick is not None:
			logging.info("Using realization {0} as data".format(cmd_args.realization_pick))
			observed_feature = fiducial_feature_ensemble[cmd_args.realization_pick]
		else:
			observed_feature=fiducial_feature_ensemble.mean()

	else:

		#And load the observations
		observed_feature_ensemble = reduce(mul,observed_feature_ensemble_collection)
		observed_feature = observed_feature_ensemble.mean()

	#Sanity check
	if type(n_components_collection)==list:
		assert observed_feature.shape[0]==reduce(add,n_components_collection)
	else:
		assert observed_feature.shape[0]==n_components*len(feature_loader_collection)

	################################################################################################################################################
	################################################################################################################################################
	#############Everything is projected on the PCA components now, ready for chi2 computations#####################################################
	################################################################################################################################################
	################################################################################################################################################

	logging.info("Initializing chi2 meshgrid...")

	#Read parameters to use from options
	use_parameters = feature_loader.options.get("parameters","use_parameters").replace(" ","").split(",")
	assert len(use_parameters)==3
	
	#Reparametrization hash key
	use_parameters_hash = "-".join(use_parameters)

	########################################################################################
	#Might need to reparametrize the emulator here, use a dictionary for reparametrizations#
	########################################################################################

	assert use_parameters_hash in reparametrization.keys(),"No reparametrization scheme specified for {0} parametrization".format(use_parameters_hash)
	
	if reparametrization[use_parameters_hash] is not None:
		
		#Reparametrize
		logging.info("Reparametrizing emulator according to {0} parametrization".format(use_parameters_hash))
		analysis.reparametrize(reparametrization[use_parameters_hash])

		#Retrain for safety
		analysis.train()

	#Log current parametrization to user
	logging.info("Using parametrization {0}".format(use_parameters_hash))

	#Set the points in parameter space on which to compute the chi2 (read extremes from options)
	par = list()
	for p in range(3):
		assert feature_loader.options.has_section(use_parameters[p]),"No extremes specified for parameter {0}".format(use_parameters[p])
		par.append(np.ogrid[feature_loader.options.getfloat(use_parameters[p],"min"):feature_loader.options.getfloat(use_parameters[p],"max"):feature_loader.options.getint(use_parameters[p],"num_points")*1j])

	num_points = len(par[0]) * len(par[1]) * len(par[2]) 

	points = np.array(np.meshgrid(par[0],par[1],par[2],indexing="ij")).reshape(3,num_points).transpose()
	
	#Now compute the chi2 at each of these points
	if pool:
		split_chunks = pool.size
		logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size))
	else:
		split_chunks = None
		logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0]))
	
	chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

	now = time.time()
	logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	#save output
	likelihoods_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"likelihoods_{0}".format(use_parameters_hash))
	if not os.path.isdir(likelihoods_dir):
		os.mkdir(likelihoods_dir)

	#Output filename formatting
	output_prefix=""
	
	if cmd_args.observations_mock:
		output_prefix+="mock"

	if cmd_args.cross:
		output_prefix+="_cross"

	if cmd_args.realization_pick is not None:
		output_prefix+="real{0}".format(cmd_args.realization_pick)
	
	if cmd_args.realizations:
		output_prefix+="{0}-{1}".format(first_realization,last_realization)

	output_prefix += cmd_args.prefix 

	formatted_output_string = "-".join(formatted_output_string_collection)
	
	chi2_file = os.path.join(likelihoods_dir,"chi2{0}_{1}.npy".format(output_prefix,formatted_output_string))
	likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}_{1}.npy".format(output_prefix,formatted_output_string))

	logging.info("Saving chi2 to {0}".format(chi2_file))
	np.save(chi2_file,chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape))

	logging.info("Saving full likelihood to {0}".format(likelihood_file))
	likelihood_cube = analysis.likelihood(chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape))
	np.save(likelihood_file,likelihood_cube)

Beispiel #10

Datei anzeigen

Datei: train.py Projekt: apetri/CFHTLens_analysis

def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="give a prefix to the name of the pickled emulator")
	parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	#Get the names of all the simulated models available for the CFHT analysis, including smoothing scales and subfields
	all_simulated_models = CFHTemu1.getModels(root_path=feature_loader.options.get("simulations","root_path"))

	#Select subset of training models
	training_models = all_simulated_models

	#Create a LikelihoodAnalysis instance and load the training models into it
	analysis = LikelihoodAnalysis()

	###########################################################
	###############Feature loading#############################
	###########################################################

	#Start loading the data
	logging.info("Loading features...")
	
	for feature_type in feature_loader.features_to_measure.keys():
		logging.info("{0}, smoothing scales: {1} arcmin".format(feature_type,",".join([ str(s) for s in feature_loader.features_to_measure[feature_type] ])))
	
	#Start
	start = time.time()

	#Load the simulated features
	for n,model in enumerate(training_models):

		logging.debug("Model {0}".format(n))
		logging.debug(model)

		ensemble_all_subfields = feature_loader.load_features(model)

		#Add the feature to the LikelihoodAnalysis
		analysis.add_model(parameters=model.squeeze(),feature=ensemble_all_subfields.mean())

	#Log timestamp
	now = time.time()
	logging.info("Simulated features loaded in {0:.1f}s".format(now-start))
	last_timestamp = now

	########################################################################################################
	#####################Feature loading complete, can build the emulator now###############################
	########################################################################################################

	#Train the interpolators using the simulated features
	logging.info("Training interpolators...")
	analysis.train()

	#Log timestamp
	now = time.time()
	logging.info("Emulator trained in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	#Pickle the emulator and save it to a .p file

	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	if not os.path.isdir(emulators_dir):
		os.mkdir(emulators_dir)
	
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Pickling emulator and saving it to {0}".format(emulator_file))
	analysis.save(emulator_file)

	#Log timestamp and finish
	end = time.time()

	logging.info("DONE!!")
	logging.info("Completed in {0:.1f}s".format(end-start))

Beispiel #11

Datei anzeigen

Datei: test_constraints.py Projekt: TheisEizo/LensTools

def test_interpolation():

	root_path = "Data/all"
	analysis = LikelihoodAnalysis()

	#Read in model names
	models = CFHTemu1.getModels()[:17]
	assert len(models) == 17

	#Shuffle the models
	np.random.seed(1)
	np.random.shuffle(models)

	#Divide into training and testing
	training_models = models[:-1]
	testing_model = models[-1]

	#Read multipoles
	ell = np.load(os.path.join(root_path,"ell.npy"))

	#Load in the means of the power spectra of the 17 models, and populate the analysis instance
	for model in training_models:

		ens = Ensemble.fromfilelist([os.path.join(root_path,model._cosmo_id_string,"subfield1","sigma05","power_spectrum.npy")])
		ens.load(from_old=True)

		analysis.add_model(parameters=model.squeeze(with_ns=True),feature=ens.mean())

	#Add the multipoles to the analysis
	analysis.add_feature_label(ell)
	l = analysis.feature_label

	ens = Ensemble.fromfilelist([os.path.join(root_path,testing_model._cosmo_id_string,"subfield1","sigma05","power_spectrum.npy")])
	ens.load(from_old=True)
	testing_Pl = ens.mean()

	#Load in also the observed power spectrum
	ens = Ensemble.fromfilelist([os.path.join(root_path,"observations","subfield1","sigma05","power_spectrum.npy")])
	ens.load(from_old=True)
	observed_Pl = ens.mean() 

	#Output the analysis stats
	np.savetxt("16_parameter_points.txt",analysis.parameter_set)

	for n in range(len(training_models)):

		plt.plot(l,l*(l+1)*analysis.training_set[n]/(2*np.pi))

	plt.plot(l,l*(l+1)*observed_Pl/(2*np.pi),linestyle="--",label="Observation")	

	plt.xlabel(r"$l$")
	plt.ylabel(r"$l(l+1)P_l/2\pi$")
	plt.yscale("log")

	plt.legend(loc="upper left")

	plt.savefig("16_power_spectra.png")
	plt.clf()

	#Train the interpolators
	analysis.train(use_parameters=range(3))
	assert hasattr(analysis,"_interpolator")
	assert hasattr(analysis,"_num_bins")

	#Emulator portability test with pickle/unpickle
	analysis.save("analysis.p")
	emulator = LikelihoodAnalysis.load("analysis.p")

	#Predict the power spectrum at the remaining point
	predicted_Pl = emulator.predict(testing_model.squeeze())

	#Plot it against the measured one
	fig,ax = plt.subplots(2,1,figsize=(16,8))

	#Measured
	ax[0].plot(l,l*(l+1)*testing_Pl/(2*np.pi),label="measured")

	#Predicted
	ax[0].plot(l,l*(l+1)*predicted_Pl/(2*np.pi),label="interpolated")
	
	#Fractional difference
	ax[1].plot(l,(predicted_Pl - testing_Pl)/testing_Pl)

	ax[1].set_xlabel(r"$l$")
	ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$")
	ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$")
	
	ax[0].set_yscale("log")
	ax[0].legend(loc="upper left")

	plt.savefig("power_interpolator_test.png")
	plt.clf()

	#Give it a shot with two points in parameter space to test vectorization
	two_parameter_points = np.array((training_models[0].squeeze(),testing_model.squeeze()))
	two_predicted_Pl = emulator.predict(two_parameter_points)

	fig,ax = plt.subplots(2,1,figsize=(16,8))

	#Predicted
	ax[0].plot(l,l*(l+1)*two_predicted_Pl[0]/(2*np.pi),color="red",linestyle="--")
	ax[0].plot(l,l*(l+1)*two_predicted_Pl[1]/(2*np.pi),color="green",linestyle="--")

	#Measured
	ax[0].plot(l,l*(l+1)*emulator.training_set[0]/(2*np.pi),color="red",linestyle="-")
	ax[0].plot(l,l*(l+1)*testing_Pl/(2*np.pi),color="green",linestyle="-")

	#Fractional difference
	ax[1].plot(l,(two_predicted_Pl[0] - emulator.training_set[0])/emulator.training_set[0],color="red")
	ax[1].plot(l,(two_predicted_Pl[1] - testing_Pl)/testing_Pl,color="green")

	ax[1].set_xlabel(r"$l$")
	ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$")
	ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$")
	
	ax[0].set_yscale("log")

	plt.savefig("power_interpolator_test_2.png")
	plt.clf()

	#Generate a fudge power spectrum covariance matrix
	covariance = np.diag(testing_Pl**2/(0.5 + l))

	#Generate a fudge observation by wiggling the testing power spectrum
	observation = testing_Pl + np.random.uniform(low=-testing_Pl*0.1,high=testing_Pl*0.1)

	#Choose a bunch of points in parameter space
	points = emulator.parameter_set[:,:-1]

	#Compute the chi2
	chi2_values_1 = emulator.chi2(points,observation,covariance)
	chi2_values_2 = emulator.chi2(points,observation,covariance,split_chunks=4)

	assert chi2_values_1.shape == chi2_values_2.shape

	#Compute the individual contributions
	chi2_contributions = emulator.chi2Contributions(points[0],observation,covariance)

	#Plot
	plt.imshow(chi2_contributions,interpolation="nearest")
	plt.colorbar()
	plt.xlabel(r"$j$")
	plt.ylabel(r"$i$")
	plt.savefig("chi2_contributions.png")
	plt.clf()

	return chi2_values_1,chi2_values_2

Beispiel #12

Datei anzeigen

def test_interpolation():

    root_path = "Data/all"
    analysis = LikelihoodAnalysis()

    #Read in model names
    models = CFHTemu1.getModels()[:17]
    assert len(models) == 17

    #Shuffle the models
    np.random.seed(1)
    np.random.shuffle(models)

    #Divide into training and testing
    training_models = models[:-1]
    testing_model = models[-1]

    #Read multipoles
    ell = np.load(os.path.join(root_path, "ell.npy"))

    #Load in the means of the power spectra of the 17 models, and populate the analysis instance
    for model in training_models:

        ens = Ensemble.fromfilelist([
            os.path.join(root_path, model._cosmo_id_string, "subfield1",
                         "sigma05", "power_spectrum.npy")
        ])
        ens.load(from_old=True)

        analysis.add_model(parameters=model.squeeze(with_ns=True),
                           feature=ens.mean())

    #Add the multipoles to the analysis
    analysis.add_feature_label(ell)
    l = analysis.feature_label

    ens = Ensemble.fromfilelist([
        os.path.join(root_path, testing_model._cosmo_id_string, "subfield1",
                     "sigma05", "power_spectrum.npy")
    ])
    ens.load(from_old=True)
    testing_Pl = ens.mean()

    #Load in also the observed power spectrum
    ens = Ensemble.fromfilelist([
        os.path.join(root_path, "observations", "subfield1", "sigma05",
                     "power_spectrum.npy")
    ])
    ens.load(from_old=True)
    observed_Pl = ens.mean()

    #Output the analysis stats
    np.savetxt("16_parameter_points.txt", analysis.parameter_set)

    for n in range(len(training_models)):

        plt.plot(l, l * (l + 1) * analysis.training_set[n] / (2 * np.pi))

    plt.plot(l,
             l * (l + 1) * observed_Pl / (2 * np.pi),
             linestyle="--",
             label="Observation")

    plt.xlabel(r"$l$")
    plt.ylabel(r"$l(l+1)P_l/2\pi$")
    plt.yscale("log")

    plt.legend(loc="upper left")

    plt.savefig("16_power_spectra.png")
    plt.clf()

    #Train the interpolators
    analysis.train(use_parameters=range(3))
    assert hasattr(analysis, "_interpolator")
    assert hasattr(analysis, "_num_bins")

    #Emulator portability test with pickle/unpickle
    analysis.save("analysis.p")
    emulator = LikelihoodAnalysis.load("analysis.p")

    #Predict the power spectrum at the remaining point
    predicted_Pl = emulator.predict(testing_model.squeeze())

    #Plot it against the measured one
    fig, ax = plt.subplots(2, 1, figsize=(16, 8))

    #Measured
    ax[0].plot(l, l * (l + 1) * testing_Pl / (2 * np.pi), label="measured")

    #Predicted
    ax[0].plot(l,
               l * (l + 1) * predicted_Pl / (2 * np.pi),
               label="interpolated")

    #Fractional difference
    ax[1].plot(l, (predicted_Pl - testing_Pl) / testing_Pl)

    ax[1].set_xlabel(r"$l$")
    ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$")
    ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$")

    ax[0].set_yscale("log")
    ax[0].legend(loc="upper left")

    plt.savefig("power_interpolator_test.png")
    plt.clf()

    #Give it a shot with two points in parameter space to test vectorization
    two_parameter_points = np.array(
        (training_models[0].squeeze(), testing_model.squeeze()))
    two_predicted_Pl = emulator.predict(two_parameter_points)

    fig, ax = plt.subplots(2, 1, figsize=(16, 8))

    #Predicted
    ax[0].plot(l,
               l * (l + 1) * two_predicted_Pl[0] / (2 * np.pi),
               color="red",
               linestyle="--")
    ax[0].plot(l,
               l * (l + 1) * two_predicted_Pl[1] / (2 * np.pi),
               color="green",
               linestyle="--")

    #Measured
    ax[0].plot(l,
               l * (l + 1) * emulator.training_set[0] / (2 * np.pi),
               color="red",
               linestyle="-")
    ax[0].plot(l,
               l * (l + 1) * testing_Pl / (2 * np.pi),
               color="green",
               linestyle="-")

    #Fractional difference
    ax[1].plot(l, (two_predicted_Pl[0] - emulator.training_set[0]) /
               emulator.training_set[0],
               color="red")
    ax[1].plot(l, (two_predicted_Pl[1] - testing_Pl) / testing_Pl,
               color="green")

    ax[1].set_xlabel(r"$l$")
    ax[0].set_ylabel(r"$l(l+1)P_l/2\pi$")
    ax[1].set_ylabel(r"$P_l^I-P_l^M/P_l^M$")

    ax[0].set_yscale("log")

    plt.savefig("power_interpolator_test_2.png")
    plt.clf()

    #Generate a fudge power spectrum covariance matrix
    covariance = np.diag(testing_Pl**2 / (0.5 + l))

    #Generate a fudge observation by wiggling the testing power spectrum
    observation = testing_Pl + np.random.uniform(low=-testing_Pl * 0.1,
                                                 high=testing_Pl * 0.1)

    #Choose a bunch of points in parameter space
    points = emulator.parameter_set[:, :-1]

    #Compute the chi2
    chi2_values_1 = emulator.chi2(points, observation, covariance)
    chi2_values_2 = emulator.chi2(points,
                                  observation,
                                  covariance,
                                  split_chunks=4)

    assert chi2_values_1.shape == chi2_values_2.shape

    #Compute the individual contributions
    chi2_contributions = emulator.chi2Contributions(points[0], observation,
                                                    covariance)

    #Plot
    plt.imshow(chi2_contributions, interpolation="nearest")
    plt.colorbar()
    plt.xlabel(r"$j$")
    plt.ylabel(r"$i$")
    plt.savefig("chi2_contributions.png")
    plt.clf()

    return chi2_values_1, chi2_values_2

Beispiel #13

Datei anzeigen

Datei: likelihood.py Projekt: apetri/CFHTLens_analysis

def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file")
	parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-r","--realizations",dest="realizations",type=int,default=None,help="use only the first N realizations to estimate the covariance matrix")
	parser.add_argument("-d","--differentiate",dest="differentiate",action="store_true",default=False,help="differentiate the first minkowski functional to get the PDF")
	parser.add_argument("-ms","--mean_subtract",dest="mean_subtract",action="store_true",default=False,help="lod in the observations with the subtracted means")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Use this model for the covariance matrix (from the new set of 50 N body simulations)
	covariance_model = CFHTcov.getModels(root_path=feature_loader.options.get("simulations","root_path"))
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	
	#Load in the covariance matrix
	fiducial_feature_ensemble = feature_loader.load_features(covariance_model)

	#If options is enabled, use only the first N realizations to estimate the covariance matrix
	if cmd_args.realizations is not None:

		logging.info("Using only the first {0} realizations to estimate the covariance matrix".format(cmd_args.realizations))
		fiducial_feature_ensemble = fiducial_feature_ensemble.subset(range(cmd_args.realizations))
		assert fiducial_feature_ensemble.num_realizations==cmd_args.realizations

	fiducial_features = fiducial_feature_ensemble.mean()
	features_covariance = fiducial_feature_ensemble.covariance()

	#timestamp
	now = time.time()
	logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Get also the observation instance
	observation = CFHTLens(root_path=feature_loader.options.get("observations","root_path"))
	logging.info("Measuring the observations from {0}".format(observation))
	#And load the observations
	observed_feature = feature_loader.load_features(observation).mean()

	#timestamp
	now = time.time()
	logging.info("observation loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#timestamp
	now = time.time()
	logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	####################################################################################################################
	######################################Compute the chi2 cube#########################################################
	####################################################################################################################

	logging.info("Initializing chi2 meshgrid...")

	#Read parameters to use from options
	use_parameters = feature_loader.options.get("parameters","use_parameters").replace(" ","").split(",")
	assert len(use_parameters)==3
	
	#Reparametrization hash key
	use_parameters_hash = "-".join(use_parameters)

	########################################################################################
	#Might need to reparametrize the emulator here, use a dictionary for reparametrizations#
	########################################################################################

	assert use_parameters_hash in reparametrization.keys(),"No reparametrization scheme specified for {0} parametrization".format(use_parameters_hash)
	
	if reparametrization[use_parameters_hash] is not None:
		
		#Reparametrize
		logging.info("Reparametrizing emulator according to {0} parametrization".format(use_parameters_hash))
		analysis.reparametrize(reparametrization[use_parameters_hash])

		#Retrain for safety
		analysis.train()

	#Log current parametrization to user
	logging.info("Using parametrization {0}".format(use_parameters_hash))

	#Set the points in parameter space on which to compute the chi2 (read extremes from options)
	par = list()
	for p in range(3):
		assert feature_loader.options.has_section(use_parameters[p]),"No extremes specified for parameter {0}".format(use_parameters[p])
		par.append(np.ogrid[feature_loader.options.getfloat(use_parameters[p],"min"):feature_loader.options.getfloat(use_parameters[p],"max"):feature_loader.options.getint(use_parameters[p],"num_points")*1j])

	num_points = len(par[0]) * len(par[1]) * len(par[2]) 

	points = np.array(np.meshgrid(par[0],par[1],par[2],indexing="ij")).reshape(3,num_points).transpose()
	
	#Now compute the chi2 at each of these points
	if pool:
		split_chunks = pool.size
		logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size))
	else:
		split_chunks = None
		logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0]))
	
	chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

	now = time.time()
	logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	#Close pool
	if pool is not None:
		pool.close()
		logging.info("Closed MPI Pool.")

	#save output
	likelihoods_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"likelihoods_{0}".format(use_parameters_hash))
	prefix = cmd_args.prefix
	if cmd_args.mean_subtract:
		prefix += "_meansub"

	if not os.path.isdir(likelihoods_dir):
		os.mkdir(likelihoods_dir)
	
	if cmd_args.realizations is None:
		chi2_file = os.path.join(likelihoods_dir,"chi2{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string)))
		likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}_{1}.npy".format(prefix,output_string(feature_loader.feature_string)))
	else:
		chi2_file = os.path.join(likelihoods_dir,"chi2{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string)))
		likelihood_file = os.path.join(likelihoods_dir,"likelihood{0}{1}real_{2}.npy".format(prefix,cmd_args.realizations,output_string(feature_loader.feature_string)))

	logging.info("Saving chi2 to {0}".format(chi2_file))
	np.save(chi2_file,chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape))

	logging.info("Saving full likelihood to {0}".format(likelihood_file))
	likelihood_cube = analysis.likelihood(chi_squared.reshape(par[0].shape + par[1].shape + par[2].shape))
	np.save(likelihood_file,likelihood_cube)

	#Find the maximum of the likelihood using ContourPlot functionality
	contour = ContourPlot()
	contour.getLikelihood(likelihood_cube,parameter_axes={use_parameters[0]:0,use_parameters[1]:1,use_parameters[2]:2},parameter_labels={use_parameters[0]:"0",use_parameters[1]:"1",use_parameters[2]:"2"})
	contour.getUnitsFromOptions(feature_loader.options)
	parameters_maximum = contour.getMaximum()
	parameter_keys = parameters_maximum.keys()
	parameter_keys.sort(key=contour.parameter_axes.get)

	#Display the new best fit before exiting
	best_fit_parameters = np.array([ parameters_maximum[par_key] for par_key in parameter_keys ])
	logging.info("Best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}({2} dof)".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1]))

	#Additionally save some debugging info to plot, etc...
	if cmd_args.save_debug:

		troubleshoot_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"troubleshoot_{0}".format(use_parameters_hash))
		if not os.path.isdir(troubleshoot_dir):
			os.mkdir(troubleshoot_dir)

		logging.info("Saving troubleshoot info to {0}...".format(troubleshoot_dir))

		np.save(os.path.join(troubleshoot_dir,"observation_{0}.npy".format(output_string(feature_loader.feature_string))),observed_feature)
		np.save(os.path.join(troubleshoot_dir,"covariance_{0}.npy".format(output_string(feature_loader.feature_string))),features_covariance)
		np.save(os.path.join(troubleshoot_dir,"fiducial_{0}.npy".format(output_string(feature_loader.feature_string))),fiducial_features)
		np.save(os.path.join(troubleshoot_dir,"best_fit_features_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(best_fit_parameters))
		np.save(os.path.join(troubleshoot_dir,"fiducial_from_interpolator_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.predict(np.array([0.26,-1.0,0.800])))
		np.save(os.path.join(troubleshoot_dir,"chi2_contributions_{0}.npy".format(output_string(feature_loader.feature_string))),analysis.chi2Contributions(best_fit_parameters,observed_feature=observed_feature,features_covariance=features_covariance))

	end = time.time()

	logging.info("DONE!!")
	logging.info("Completed in {0:.1f}s".format(end-start))

Beispiel #14

Datei anzeigen

Datei: test_removal.py Projekt: apetri/CFHTLens_analysis

def main():

	#################################################
	############Option parsing#######################
	#################################################

	#Parse command line options
	parser = argparse.ArgumentParser()
	parser.add_argument("-f","--file",dest="options_file",action="store",type=str,help="analysis options file")
	parser.add_argument("-v","--verbose",dest="verbose",action="store_true",default=False,help="turn on verbosity")
	parser.add_argument("-vv","--verbose_plus",dest="verbose_plus",action="store_true",default=False,help="turn on additional verbosity")
	parser.add_argument("-m","--mask_scale",dest="mask_scale",action="store_true",default=False,help="scale peaks and power spectrum to unmasked area")
	parser.add_argument("-c","--cut_convergence",dest="cut_convergence",action="store",default=None,help="select convergence values in (min,max) to compute the likelihood. Safe for single descriptor only!!")
	parser.add_argument("-g","--group_subfields",dest="group_subfields",action="store_true",default=False,help="group feature realizations by taking the mean over subfields, this makes a big difference in the covariance matrix")
	parser.add_argument("-s","--save_points",dest="save_points",action="store",default=None,help="save points in parameter space to external npy file")
	parser.add_argument("-ss","--save_debug",dest="save_debug",action="store_true",default=False,help="save a bunch of debugging info for the analysis")
	parser.add_argument("-p","--prefix",dest="prefix",action="store",default="",help="prefix of the emulator to pickle")
	parser.add_argument("-r","--remove",dest="remove",action="store",type=int,default=24,help="model to remove from the analysis")
	parser.add_argument("-R","--random",dest="random",action="store",type=int,default=0,help="random seed initialization for realization picking")

	cmd_args = parser.parse_args()

	if cmd_args.options_file is None:
		parser.print_help()
		sys.exit(0)

	#Set verbosity level
	if cmd_args.verbose_plus:
		logging.basicConfig(level=DEBUG_PLUS)
	elif cmd_args.verbose:
		logging.basicConfig(level=logging.DEBUG)
	else:
		logging.basicConfig(level=logging.INFO)

	#Initialize MPI Pool
	try:
		pool = MPIPool()
	except:
		pool = None

	if (pool is not None) and (not pool.is_master()):
		pool.wait()
		sys.exit(0)

	if pool is not None:
		logging.info("Started MPI Pool.")

	#################################################################################################################
	#################Info gathering: covariance matrix, observation and emulator#####################################
	#################################################################################################################

	#start
	start = time.time()
	last_timestamp = start

	#Instantiate a FeatureLoader object that will take care of the memory loading
	feature_loader = FeatureLoader(cmd_args)

	###########################################################################################################################################

	#Get the names of all the simulated models available for the CFHT analysis, including smoothing scales and subfields
	all_simulated_models = CFHTemu1.getModels(root_path=feature_loader.options.get("simulations","root_path"))

	#Use this model for the covariance matrix
	covariance_model = all_simulated_models[feature_loader.options.getint("analysis","covariance_model")]
	logging.info("Measuring covariance matrix from model {0}".format(covariance_model))
	#Load in the covariance matrix
	features_covariance = feature_loader.load_features(covariance_model).covariance()

	#timestamp
	now = time.time()
	logging.info("covariance loaded in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	################################################################################################################################################

	#Create a LikelihoodAnalysis instance by unpickling one of the emulators
	emulators_dir = os.path.join(feature_loader.options.get("analysis","save_path"),"emulators")
	emulator_file = os.path.join(emulators_dir,"emulator{0}_{1}.p".format(cmd_args.prefix,output_string(feature_loader.feature_string)))
	logging.info("Unpickling emulator from {0}...".format(emulator_file))
	analysis = LikelihoodAnalysis.load(emulator_file)

	#timestamp
	now = time.time()
	logging.info("emulator unpickled in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	##################################################################################################################################################

	#Initialize random seed
	np.random.seed(cmd_args.random)
	realization = np.random.randint(0,1000)

	#Treat the removed model as data
	model_to_remove = all_simulated_models[cmd_args.remove]
	parameters_to_remove = model_to_remove.squeeze()
	logging.info("Treating model {0}, realization {1} as data, loading features...".format(model_to_remove,realization+1))
	observed_feature = feature_loader.load_features(model_to_remove)[np.random.randint(0,1000)]

	#Compute the chi2 for this observed feature without removing it from the emulator (must be close to 0)
	logging.info("Chi2 before removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1]))

	#Remove the model from the emulator
	remove_index = analysis.find(parameters_to_remove)[0]
	logging.info("Removing model {0} with parameters {1} from emulator...".format(remove_index,analysis.parameter_set[remove_index]))
	analysis.remove_model(remove_index)

	#Retrain without the removed model
	analysis.train()

	#Compute the chi2 for this observed feature after removing it from the emulator (likely it's not 0 anymore)
	logging.info("Chi2 after removal: {0[0]:.3f} ({1} dof)".format(analysis.chi2(parameters_to_remove,features_covariance=features_covariance,observed_feature=observed_feature),analysis.training_set.shape[1]))

	####################################################################################################################
	######################################Compute the chi2 cube#########################################################
	####################################################################################################################

	logging.info("Initializing chi2 meshgrid...")

	#Set the points in parameter space on which to compute the chi2 (read from options)
	Om = np.ogrid[feature_loader.options.getfloat("Omega_m","min"):feature_loader.options.getfloat("Omega_m","max"):feature_loader.options.getint("Omega_m","num_points")*1j]
	w = np.ogrid[feature_loader.options.getfloat("w","min"):feature_loader.options.getfloat("w","max"):feature_loader.options.getint("w","num_points")*1j]
	si8 = np.ogrid[feature_loader.options.getfloat("sigma8","min"):feature_loader.options.getfloat("sigma8","max"):feature_loader.options.getint("sigma8","num_points")*1j]

	num_points = len(Om) * len(w) * len(si8) 

	points = np.array(np.meshgrid(Om,w,si8,indexing="ij")).reshape(3,num_points).transpose()
	if cmd_args.save_points is not None:
		logging.info("Saving points to {0}.npy".format(cmd_args.save_points.rstrip(".npy")))
		np.save(cmd_args.save_points.rstrip(".npy")+".npy",points)

	#Now compute the chi2 at each of these points
	if pool:
		split_chunks = pool.size
		logging.info("Computing chi squared for {0} parameter combinations using {1} cores...".format(points.shape[0],pool.size))
	else:
		split_chunks = None
		logging.info("Computing chi squared for {0} parameter combinations using 1 core...".format(points.shape[0]))
	
	chi_squared = analysis.chi2(points,observed_feature=observed_feature,features_covariance=features_covariance,pool=pool,split_chunks=split_chunks)

	#Close MPI Pool
	if pool is not None:
		pool.close()
		logging.info("Closed MPI Pool.")

	now = time.time()
	logging.info("chi2 calculations completed in {0:.1f}s".format(now-last_timestamp))
	last_timestamp = now

	#Save output

	likelihood_file = "likelihood_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string))
	chi2_file = "chi2_remove{0}_{1}.npy".format(cmd_args.remove,output_string(feature_loader.feature_string))

	logging.info("Saving chi2 to {0}".format(chi2_file))
	np.save(chi2_file,chi_squared.reshape(Om.shape + w.shape + si8.shape))

	logging.info("Saving full likelihood to {0}".format(likelihood_file))
	likelihood_cube = analysis.likelihood(chi_squared.reshape(Om.shape + w.shape + si8.shape))
	np.save(likelihood_file,likelihood_cube)

	#Find the maximum of the likelihood using ContourPlot functionality
	contour = ContourPlot()
	contour.getLikelihood(likelihood_cube)
	contour.getUnitsFromOptions(feature_loader.options)
	parameters_maximum = contour.getMaximum()
	parameter_keys = parameters_maximum.keys()
	parameter_keys.sort(key=contour.parameter_axes.get)

	#Display the new best fit before exiting
	best_fit_parameters = [ parameters_maximum[par_key] for par_key in parameter_keys ]
	logging.info("New best fit is [ {0[0]:.2f} {0[1]:.2f} {0[2]:.2f} ], chi2={1[0]:.3f}".format(best_fit_parameters,analysis.chi2(np.array(best_fit_parameters),features_covariance=features_covariance,observed_feature=observed_feature)))

	#End
	end = time.time()
	logging.info("DONE!!")
	logging.info("Completed in {0:.1f}s".format(end-start))