Beispiel #1
0
def Bayes_wrapper():
    """
	This function calculates the Bayes score as a function of the beta-function parameter (defined in parameter.py)
	It is not currently parallelized (parallelization is done in MCMC already, but not in the integration).
	
	Output scores are saved and labelled in the Scores/ folder as an .npz file
	"""
    import time
    import fileinput
    import sys
    from .parameter import ModelParameters
    print('UPDATE NEURAL NETWORK')
    directory = 'Scores/'
    if not os.path.exists(directory):
        os.makedirs(directory)

    a = ModelParameters()
    beta_params = a.list_of_beta_params
    score = []
    score_err = []
    init_time = time.time()

    for i in range(len(beta_params)):  # Iterate over beta parameters
        print("Calculating value %d of %d after %.3f seconds" %
              (i + 1, len(beta_params), time.time() - init_time))
        # Modify beta value
        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\tbeta_param" in line:
                print("\tbeta_param = %.5f" % beta_params[i])
            else:
                print(line, end='')
        fileinput.close()
        # Reimport model parameters for new beta
        del sys.modules['Chempy.parameter']
        del sys.modules['Chempy.score_function']
        from .parameter import ModelParameters
        from .score_function import preload_params_mcmc
        preload = preload_params_mcmc(
        )  # Must be reloaded since this depends on beta parameter
        a = ModelParameters()

        # Calculate Bayes score
        integral, integral_err = Bayes_score()
        score.append(integral)
        score_err.append(integral_err)

    # Save output as npz array
    np.savez("Scores/Bayes_score - " + str(a.yield_table_name_sn2) + ", " +
             str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) +
             ".npz",
             beta_param=beta_params,
             score=score,
             score_err=score_err)

    return beta_params, score, score_err
Beispiel #2
0
def CV_bash(beta_index):
    """
	This is for a specific beta value only
	
	This function calculates the CV score as a function of the beta-function parameter (defined in parameter.py)
	It is not currently parallelized (parallelization is done in MCMC already and for element predictions).
	"""
    import time
    import fileinput
    import sys
    from .parameter import ModelParameters
    from .score_function import CV_score
    beta_index = int(beta_index)
    print(beta_index)

    a = ModelParameters()
    beta_params = a.list_of_beta_params[beta_index]
    CV_score_list = []
    init_time = time.time()

    #print("Calculating value %d of %d after %.3f seconds" %(i+1,len(beta_params),time.time()-init_time))
    # Modify beta value
    for line in fileinput.input("Chempy/parameter.py", inplace=True):
        if "\tbeta_param" in line:
            print("\tbeta_param = %.5f" % beta_params)
        else:
            print(line, end='')
    fileinput.close()
    # Reimport model parameters for new beta
    del sys.modules['Chempy.parameter']
    del sys.modules['Chempy.score_function']
    from Chempy.parameter import ModelParameters
    from Chempy.score_function import preload_params_mcmc
    a = ModelParameters()
    preload = preload_params_mcmc()

    # Calculate Bayes score
    score = CV_score()

    rescaled_score = np.power(score, 1. / len(a.initial_neural_names))
    #CV_score_list.append(score)
    np.savez('Scores/CV' + str(beta_index) + '.npz',
             beta_param=beta_params,
             score=score,
             rescaled_score=rescaled_score)

    # Save output as npz array
    #np.savez("Scores/CV_score - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz",
    #			beta_param=beta_params,
    #			score=CV_score_list)

    return beta_params, score, rescaled_score
Beispiel #3
0
def CV_stitch():
    """
	Function to load all CV predictions and combine into one file
	"""
    from .score_function import preload_params_mcmc
    preload = preload_params_mcmc()
    from .parameter import ModelParameters
    a = ModelParameters()
    beta = []
    score = []
    rescaled_score = []
    for i in range(len(a.list_of_beta_params)):
        temp = np.load('Scores/CV' + str(i) + '.npz')
        beta.append(temp['beta_param'])
        score.append(temp['score'])
        rescaled_score.append(temp['rescaled_score'])
        temp.close()
    np.savez("Scores/CV_score - " + str(a.yield_table_name_sn2) + ", " +
             str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) +
             ".npz",
             beta_param=beta,
             score=score,
             rescaled_score=rescaled_score)

    return beta, score
Beispiel #4
0
def scoring_wrapper():
    """
	NO LONGER USED 
	This function will calculate Bayes and CV scores for yield set, using the code in score_function.py.
	
	The neural network must be trained beforehand using training_data and create_network	
	
	Main outputs are labelled .npz files in the Scores/ file.
	
	MUST set a.UseNeural = True for this and select correct dataset.
	"""
    from Chempy.neural import training_data, create_network
    import time
    from Chempy.parameter import ModelParameters
    from Chempy.score_function import CV_wrapper, Bayes_wrapper
    init_time = time.time()
    a = ModelParameters()

    print('Step 1 (at time %.2f s): Calculate Bayes score' %
          (time.time() - init_time))
    Bayes_wrapper()

    print('Step 2 (at time %.2f s): Calculate cross-validation score' %
          (time.time() - init_time))
    CV_wrapper()

    print('Process complete in time %.2f s' % (time.time() - init_time))
    return None
def CV_element_predictions(size=10):
	"""
	This function computes the NORMALISED element predictions from the CV scoring.
	Predictions and sigmas are estimated 10 times over to check for scatter.
	"""
	from .overall_scores import overall_CV	
	import numpy as np
	from .parameter import ModelParameters
	a = ModelParameters()
	
	scores = []
	el_means=[]
	el_sigmas=[]
	el_likelihoods=[]
	for i in range(size):
		print('Computing score %d of %s' %(i+1,size))
		tmp = overall_CV()
		scores.append(np.log10(tmp))
		el_dat = np.load('OverallScores/CV_element_likelihoods.npz')
		el_means.append(el_dat['element_mean'])
		el_sigmas.append(el_dat['element_sigma'])
		el_likelihoods.append(el_dat['likelihood_factors'])
		el_dat.close()
	
	el_dat = np.load('OverallScores/CV_element_likelihoods.npz')
	el_names = el_dat['elements']
	el_dat.close()
	
	# Save output as npz file - each will be list of identically simulated data
	np.savez('OverallScores/CV_normalised_element_predictions_'+str(a.yield_table_name_sn2)+'.npz',
				mean=el_means,sigma=el_sigmas,elements=el_names,likelihood=el_likelihoods,normalised_scores=scores)
	
	return None
Beispiel #6
0
def CV_wrapper():
    """
	NO LONGER USED
	This function calculates the UNNORMALISED CV score as a function of the beta-function parameter (defined in parameter.py)
	It is not currently parallelized (parallelization is done in MCMC already and for element predictions).
	"""
    import time
    import fileinput
    import sys
    from .parameter import ModelParameters
    from .score_function import CV_score

    a = ModelParameters()
    beta_params = a.list_of_beta_params
    CV_score_list = []
    init_time = time.time()

    for i in range(len(beta_params)):  # Iterate over beta parameters
        print("Calculating value %d of %d after %.3f seconds" %
              (i + 1, len(beta_params), time.time() - init_time))
        # Modify beta value
        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\tbeta_param" in line:
                print("\tbeta_param = %.5f" % beta_params[i])
            else:
                print(line, end='')
        fileinput.close()
        # Reimport model parameters for new beta
        del sys.modules['Chempy.parameter']
        del sys.modules['Chempy.score_function']
        from Chempy.score_function import preload_params_mcmc
        from Chempy.parameter import ModelParameters
        a = ModelParameters()

        # Calculate Bayes score
        score = CV_score()
        CV_score_list.append(score)

    # Save output as npz array
    np.savez("Scores/CV_score - " + str(a.yield_table_name_sn2) + ", " +
             str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) +
             ".npz",
             beta_param=beta_params,
             score=CV_score_list)

    return beta_params, CV_score_list
Beispiel #7
0
def runner(index):
    """Function to compute the Chempy predictions for each parameter set"""
    b=ModelParameters()
    params=param_grid[index]
    norm_params=norm_grid[index]
    abun,els=run_Chempy_sample(params,b);
    del b;
    return abun,params,norm_params
def runner(index):
    """Function to compute the Chempy predictions for each parameter set"""
    b = ModelParameters()
    params = all_params[index]
    output = single_timestep_chempy((params, b))
    if type(output) == float:
        if output == np.inf:
            del b
        outs = np.zeros(len(els)), params
    else:
        abun = output[0]
        del b
        outs = abun, params
    return outs
Beispiel #9
0
class preload_params_mcmc():
    """This calculates and stores useful quantities that would be calculated multiple times otherwise for the mcmc run.
	Definitions can be called from this file
	"""
    import numpy as np
    from Chempy.parameter import ModelParameters
    from scipy.stats import beta
    a = ModelParameters()

    elements_to_trace = list(a.elements_to_trace)
    elements_to_trace.append('Zcorona')
    elements_to_trace.append('SNratio')

    # Neural network coeffs
    coeffs = np.load('Neural/neural_model.npz')
    w_array_0 = coeffs['w_array_0']
    w_array_1 = coeffs['w_array_1']
    b_array_0 = coeffs['b_array_0']
    b_array_1 = coeffs['b_array_1']
    coeffs.close()

    # Beta function calculations
    model_errors = np.linspace(a.flat_model_error_prior[0],
                               a.flat_model_error_prior[1],
                               a.flat_model_error_prior[2])
    #error_weight = beta.pdf(model_errors, a = a.beta_error_distribution[1], b = a.beta_error_distribution[2])
    #error_weight/= sum(error_weight)

    wildcard = np.load('Chempy/input/stars/' + a.stellar_identifier + '.npy')

    elements = []
    star_abundance_list = []
    star_error_list = []
    for i, item in enumerate(a.elements_to_trace):
        if item in list(wildcard.dtype.names):
            elements.append(item)
            star_abundance_list.append(float(wildcard[item][0]))
            star_error_list.append(float(wildcard[item][1]))
    star_abundance_list = np.hstack(star_abundance_list)
    star_error_list = np.hstack(star_error_list)
    elements = np.hstack(elements)

    err = []
    for i, item in enumerate(model_errors):
        error_temp = np.ones(len(elements)) * item
        err.append(
            np.sqrt(
                np.multiply(error_temp[:, None], error_temp[:, None]).T +
                np.multiply(star_error_list, star_error_list)).T)
def overall_Bayes():
	"""
	Convenience function to calculate the overall Bayes score, marginalizing over all parameters including beta.
	NB: in parameter.py, we must set the beta_parameter prior. 5+-2.5 is used here in linear space.
	"""
	import numpy as np
	import os
	from .parameter import ModelParameters
	from .score_function import Bayes_score
	
	directory = 'OverallScores/'
	if not os.path.exists(directory):
		os.makedirs(directory)
		
	a = ModelParameters()
	integral,integral_err = Bayes_score()
	
	np.savez("OverallScores/Bayes_score - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz",
				score=integral,
				score_err=integral_err)
	
	return integral,integral_err
def CV_errors():
	"""
	This function computes the overall CV score with errors.
	Median and errors (16/84 percentile) are estimated by running the process 10 times.
	"""
	from .overall_scores import overall_CV	
	import numpy as np
	from .parameter import ModelParameters
	a = ModelParameters()
	
	scores = []
	for _ in range(10):
		tmp = overall_CV()
		scores.append(np.log10(tmp))
			
	median = np.median(scores)
	lower = np.percentile(scores,15.865)
	upper = np.percentile(scores,100-15.865)
	
	print("Average LOO-CV score over 10 iterations is %.2f + %.2f - %.2f" %(median,median-lower,upper-median))
	np.savez("OverallScores/ErrorCV - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz",
				median=median,lower=lower,upper=upper)
	return median, median-lower,upper-median
Beispiel #12
0
#
#
# in fact, mass loss and unprocessed mass is the same.
# caveat: SNIA yields are not metallicity dependent and unprocessed mass loss is 0, thus we only store the yields once at the lowest metallicity.
# caveat: direct BH collapse returns only intial element fractions and happens before SNII so we combine the mass loss with SNII mass loss.

import numpy as np
import xdrlib
import numpy as np
import multiprocessing as mp
import os

####### SETTING THE CHEMPY PARAMETER #######################

from Chempy.parameter import ModelParameters
a = ModelParameters()

# Load solar abundances
from Chempy.solar_abundance import solar_abundances
basic_solar = solar_abundances()
getattr(basic_solar, 'Asplund09')()

# Load the yields
from Chempy.yields import SN2_feedback, AGB_feedback, SN1a_feedback
basic_sn2 = SN2_feedback()
getattr(basic_sn2, 'chieffi04_net')()
basic_1a = SN1a_feedback()
getattr(basic_1a, "Seitenzahl")()
basic_agb = AGB_feedback()
getattr(basic_agb, "Karakas16_net")()
Beispiel #13
0
from Chempy.parameter import ModelParameters
a=ModelParameters()
import multiprocessing as mp
import numpy as np
import tqdm
import time
from Chempy.cem_function import run_Chempy_sample


# First load parameter dataset:
training_params=np.load("APOGEE Training Data.npz")
param_grid=training_params['param_grid']
norm_grid = training_params['norm_grid']
N = len(param_grid) # number of samples
    

def runner(index):
    """Function to compute the Chempy predictions for each parameter set"""
    b=ModelParameters()
    params=param_grid[index]
    norm_params=norm_grid[index]
    abun,els=run_Chempy_sample(params,b);
    del b;
    return abun,params,norm_params

if __name__=='__main__':
    init_time=time.time()
    
    # Compute elements by running code once:
    _,els=run_Chempy_sample(param_grid[0],a)
    
def overall_CV():
	"""
	Convenience function to calculate overall CV score, running MCMC over all parameters including beta.
	NB: must set beta_param / log10_beta priors in parameter file
	"""
	import numpy as np
	from Chempy.parameter import ModelParameters
	import importlib
	import fileinput
	import sys   
	import os
	import multiprocessing as mp
	import tqdm
	from Chempy.wrapper import single_star_optimization
	from Chempy.plot_mcmc import restructure_chain
	from Chempy.cem_function import posterior_function_mcmc_quick
	from scipy.stats import norm
	from .score_function import preload_params_mcmc
	import matplotlib.pyplot as plt
	#p = mp.Pool()
	
	directory = 'OverallScores/'
	if not os.path.exists(directory):
		os.makedirs(directory)
	 
	## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only
	# This is definitely not a good implementation (involves rewriting entire parameter file),
	# But other steps are far slower
	
	# Initialise arrays
	element_mean = []
	element_sigma = []
	overall_score = 1.
	factors = []
	
	# Starting elements (copied from original parameter file)
	b = ModelParameters()
	starting_el = b.elements_to_trace
	orig = "\telements_to_trace = "+str(starting_el) # Original element string
	#print(starting_el)

	# Calculate required Chempy elements
	preload = preload_params_mcmc()
	elements_init = np.copy(preload.elements)
	np.save('Scores/CV_elements.npy',elements_init)
	#print(elements_init) 
   
	# Create new parameter names
	newstr = []
	for i,el in enumerate(elements_init):
		if el !=starting_el[-1]:
			newstr.append(orig.replace("'"+str(el)+"', ",""))
		else:
			newstr.append(orig.replace("'"+str(el)+"'",""))
	for index in range(len(elements_init)): # Iterate over removed element
		for line in fileinput.input("Chempy/parameter.py", inplace=True):
			if "\telements_to_trace" in line:
				print(newstr[index])
				#print(line,end='') # TO TEST
			else:
				print(line,end='')
		fileinput.close()
		del sys.modules['Chempy.parameter']
		del sys.modules['Chempy.score_function']
		from Chempy.parameter import ModelParameters
		from .score_function import preload_params_mcmc 
		a = ModelParameters()
		preload = preload_params_mcmc()
		##############
		
		# Run MCMC with 27/28 elements. 
		print('Running MCMC iteration %d of %d' %(index+1,len(elements_init)))
		#print(a.elements_to_trace)
		single_star_optimization()
		
		# Create the posterior PDF and load it 
		restructure_chain('mcmc/')
		positions = np.load('mcmc/posteriorPDF.npy') # Posterior parameter PDF
		#print("In CV_score, element list is",a.elements_to_trace)
		
		##############
		
		for line in fileinput.input("Chempy/parameter.py", inplace=True):
			if "\telements_to_trace" in line:
				print(orig)
			else:
				print(line,end='')
		fileinput.close()
		del sys.modules['Chempy.parameter']
		del sys.modules['Chempy.score_function']
		from Chempy.parameter import ModelParameters
		from .score_function import preload_params_mcmc 
		a = ModelParameters()
		preload = preload_params_mcmc()	
		##############
		
		# This uses all 28 elements again for predictions
				
		# Multiprocess and calculate elemental predictions for each parameter set

		from .score_function import element_predictor
		p = mp.Pool()		
		indices = np.ones(len(positions))*index
		abundance = list(tqdm.tqdm(p.imap_unordered(element_predictor,zip(positions,indices)),total=len(positions)))
		p.close()
		p.join()	
		
		abundance = np.array(abundance)
		mean,sigma = norm.fit(abundance)
		print(mean)
		print(sigma)
		
		element_mean.append(mean)
		element_sigma.append(sigma)
		#a.plot_hist=True
		if a.plot_hist == True:
			plt.clf()
			plt.hist(abundance, bins=40, normed=True, alpha=0.6, color='g')
			#abundance = np.array(abundance) # Unmask array
			# Plot the PDF.
			xmin, xmax = plt.xlim()
			x = np.linspace(xmin, xmax, 100)
			p = norm.pdf(x, mean, sigma)
			plt.plot(x, p, c='k', linewidth=2)
			title = 'Plot of element %d abundance' %(index)
			plt.title(title)
			plt.xlabel('[X/Fe] abundance')
			plt.ylabel('Relative frequency')
		
		total_err = np.sqrt((preload.star_error_list[index])**2 + sigma**2)
		likelihood_factor = norm.pdf(mean,loc=preload.star_abundance_list[index],scale=total_err)
		overall_score *= likelihood_factor
		factors.append(likelihood_factor)
		print("Likelihood contribution from %dth element is %.8f" %(index+1,likelihood_factor))
		print(overall_score)
		sys.stdout.flush()
		#print(starting_el)
	np.savez('OverallScores/CV_element_likelihoods.npz',
				elements=elements_init,
				likelihood_factors=factors,
				element_mean = element_mean,
				element_sigma = element_sigma)	
	
	rescaled_score = np.power(overall_score,1./len(starting_el))
			
	np.save("OverallScores/CV_score_rescaled - "+str(a.yield_table_name_sn2)+\
	","+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npy",rescaled_score)
				
	return rescaled_score
Beispiel #15
0
def single_star_optimization():
    '''
	This function will optimize the parameters of a single zone quickly

	INPUT: 

	   a = will be loaded from parameter.py (prepare all variables there)

	OUTPUT:

	   log_list = a list of intermediate results (so far only for debugging)
	'''
    import time
    #import multiprocessing as mp
    from .optimization import minimizer_initial_quick
    from .cem_function import global_optimization_error_returned
    from .parameter import ModelParameters
    from .score_function import preload_params_mcmc

    # For testing
    import warnings
    warnings.filterwarnings("ignore")

    a = ModelParameters()
    preload = preload_params_mcmc()

    print(a.stellar_identifier_list)
    start_time = time.time()

    log_list = []
    # I: Minimization for each star seperately
    # 1: for each star make initial conditions (each star needs other model parameters)
    parameter_list = []
    for item in a.stellar_identifier_list:
        parameter_list.append(item)
    # 2: call posterior_function_for_minimization with scipy.optimize.minimize in multiprocess for each star and recover the found parameters
    #p = mp.Pool(len(parameter_list))
    #t = p.map(minimizer_initial_quick, parameter_list)
    #p.close()
    #p.join()
    #result = np.vstack(t)

    result = minimizer_initial_quick(parameter_list)

    log_list.append(np.copy(result))
    log_list.append('initial minimization')
    initial = time.time()
    print('first minimization for each star separately took: %2.f seconds' %
          (initial - start_time))

    # IV: repeat II and III until posterior does not change much
    #result[:,:len(a.SSP_parameters)] = np.mean(result[:,:len(a.SSP_parameters)], axis = 0)
    #posteriors = []
    #counter = 0
    #while True:
    #	counter += 1
    #	if len(posteriors) > 1:
    #		if np.abs(posteriors[-1] - posteriors[-2]) < a.gibbs_sampler_tolerance:
    #			break
    #		if len(posteriors) > a.gibbs_sampler_maxiter:
    #			break

    #	initial = time.time()
    # II: Global parameter minimization:
    # 1: only SSP parameters free. Use mean SSP parameter values and individual (but fixed ISM parameter values)
    #	changing_parameter = result[0,:len(a.SSP_parameters)]
    # 2: Call each star in multiprocess but only return the predictions
    # 3: Calculate the likelihood for each star and optimize the common model error (is all done within minimizer global, which is calling 'global optimization')
    #	x = minimizer_global(changing_parameter,  a.tol_minimization, a.maxiter_minimization, a.verbose, result)

    # 4: return global SSP parameters and common model error
    #	posterior, error_list, elements = global_optimization_error_returned(x, result)
    #	posteriors.append(posterior)
    #	print(posteriors)

    #	global_iteration1 = time.time()
    #	print('step %d global minimization took: %2.f seconds' %(counter, global_iteration1 - initial))

    # III: Local parameter minimization:
    # 1: Use fixed global parameters and fixed common errors make initial conditions
    #	result[:,:len(a.SSP_parameters)] = x

    #	log_list.append((np.copy(x),posterior))
    #	log_list.append('step %d global minimization' %(counter))

    #	p0_list = []
    #	parameter_list = []
    #	x_list = []
    #	error_list_mp = []
    #	element_list_mp = []

    #	for i,item in enumerate(a.stellar_identifier_list):
    #		parameter_list.append(item)
    #		p0_list.append(result[i,len(a.SSP_parameters):])
    #		x_list.append(x)
    #		error_list_mp.append(error_list)
    #		element_list_mp.append(elements)

    #	args = zip(p0_list,parameter_list,x_list,error_list_mp,element_list_mp)

    # 2: Minimize each star ISM parameters in multiprocess
    #	p = mp.Pool(len(parameter_list))
    #	t = p.map(minimizer_local, args)
    #	p.close()
    #	p.join()
    #	local_parameters = np.vstack(t)
    #	result[:,len(a.SSP_parameters):] = local_parameters

    #	log_list.append(np.copy(result))
    #	log_list.append('step %d local minimization' %(counter))
    #	local_iteration1 = time.time()
    #	print('step %d local minimization took: %2.f seconds' %(counter, local_iteration1 - global_iteration1))

    #log_list.append(posteriors)
    #print(log_list)

    # V: MCMC run
    ## reshape the result to have global parameters in the front and the local parameters following
    #changing_parameter = list(result[0,:len(a.SSP_parameters)])

    elements = np.unique(a.elements_to_trace, preload.wildcard.dtype.names)
    changing_parameter = list(result)

    #for i in range(result.shape[0]):
    #	changing_parameter.append(list(result[i,len(a.SSP_parameters):]))

    changing_parameter = np.hstack(changing_parameter)

    ## jitter the parameters to initialise the chain (add a validation later, i.e. testing that the particular parameters yield a result)

    mcmc_quick(changing_parameter, elements, preload)

    # 1: Free all parameters and optimize common error (SSP should be the same for all stars)
    # 2: Plug everything into emcee and sample the posterior
    return log_list
Beispiel #16
0
def CV_median():
    """
	Function to compute the median and 15/85 percentiles of the posterior parameters.
	This is calculated for each element cross-validation.
	The value of the beta coefficient in the parameter file is used.	
	"""
    from Chempy.parameter import ModelParameters
    import importlib
    import fileinput
    import sys
    import multiprocessing as mp
    import tqdm
    from Chempy.wrapper import single_star_optimization
    from Chempy.plot_mcmc import restructure_chain
    from Chempy.cem_function import posterior_function_mcmc_quick
    from Chempy.parameter import ModelParameters
    from scipy.stats import norm
    from .score_function import preload_params_mcmc
    import matplotlib.pyplot as plt

    print('SET BETA PARAMETER IN PARAMETER.PY FILE')

    ## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only
    # This is definitely not a good implementation (involves rewriting entire parameter file),
    # But other steps are far slower

    # Initialise arrays
    element_mean = []
    element_sigma = []
    overall_score = 1.
    factors = []
    posterior_med = []
    posterior_up = []
    posterior_low = []

    # Starting elements (copied from original parameter file)
    b = ModelParameters()
    starting_el = b.elements_to_trace
    orig = "\telements_to_trace = " + str(
        starting_el)  # Original element string
    #print(starting_el)

    # Calculate required Chempy elements
    preload = preload_params_mcmc()
    elements_init = np.copy(preload.elements)
    #print(elements_init)

    # Create new parameter names
    newstr = []
    for i, el in enumerate(elements_init):
        if el != starting_el[-1]:
            newstr.append(orig.replace("'" + str(el) + "', ", ""))
        else:
            newstr.append(orig.replace("'" + str(el) + "'", ""))
    for index in range(len(elements_init)):  # Iterate over removed element
        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\telements_to_trace" in line:
                print(newstr[index])
                #print(line,end='') # TO TEST
            else:
                print(line, end='')
        fileinput.close()
        del sys.modules['Chempy.parameter']
        from Chempy.parameter import ModelParameters
        a = ModelParameters()
        del sys.modules['Chempy.score_function']
        from .score_function import preload_params_mcmc
        preload = preload_params_mcmc()
        ##############

        # Run MCMC with 27/28 elements.
        print('Running MCMC iteration %d of %d' %
              (index + 1, len(elements_init)))
        #print(a.elements_to_trace)
        single_star_optimization()

        # Create the posterior PDF and load it
        restructure_chain('mcmc/')
        positions = np.load('mcmc/posteriorPDF.npy')  # Posterior parameter PDF
        tmp_med = np.zeros(len(a.p0))
        tmp_low = np.zeros(len(a.p0))
        tmp_up = np.zeros(len(a.p0))
        for j in range(len(a.p0)):
            tmp_med[j] = np.median(positions[:, j])
            tmp_up[j] = np.percentile(positions[:, j], 100 - 15.865)
            tmp_low[j] = np.percentile(positions[:, j], 15.865)
        posterior_med.append(tmp_med)
        posterior_up.append(tmp_up)
        posterior_low.append(tmp_low)
        #print(a.elements_to_trace)

        ##############
        # RESET parameter file

        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\telements_to_trace" in line:
                print(orig)
            else:
                print(line, end='')
        fileinput.close()
        del sys.modules['Chempy.parameter']
        from Chempy.parameter import ModelParameters
        del sys.modules['Chempy.score_function']
        from .score_function import preload_params_mcmc
        a = ModelParameters()
        preload = preload_params_mcmc()

        ##############

        sys.stdout.flush()

    np.savez('Scores/CV_medians' + str(a.beta_param) + '.npz',
             elements=elements_init,
             median=posterior_med,
             upper=posterior_up,
             lower=posterior_low)
    return None
Beispiel #17
0
def mcmc_quick(changing_parameter, elements, preload):
    '''
	Convenience function to use the MCMC for one zone. A subdirectory mcmc/ will be created in the current directory and intermediate chains will be stored there.
	The MCMC will sample the volume of best posterior for the likelihood functions that are declared in parameter.py. 
	This is a cut down version to speed up MCMC for one star only
	INPUT:

	   changing_parameter = the parameter vector for initialization (will usually be found from minimization before). The initial chain will be created by jittering slightly the initial parameter guess

	   error_list = the vector of element errors

	   elements = the corresponding element symbols

	OUTPUT:

	   The function will create a folder and store the chain as well as the predicted element values

	The MCMC stops when the convergence criteria is met, which is when the median posterior of all walkers does not change much inbetween 200 steps anymore.
	'''
    import time
    import os
    import multiprocessing as mp
    from .cem_function import posterior_function_mcmc_quick
    from .score_function import preload_params_mcmc
    from .parameter import ModelParameters
    import emcee

    a = ModelParameters()
    start1 = time.time()
    directory = 'mcmc/'
    if os.path.exists(directory):
        if a.verbose:
            print('%s already existed. Content might be overwritten' %
                  (directory))
    else:
        os.makedirs(directory)

    nthreads = mp.cpu_count()
    if nthreads == 4:
        nthreads = 2
    ndim = len(changing_parameter)
    a.nwalkers = max(a.nwalkers, int(ndim * 2))
    chain = np.empty(shape=(a.nwalkers, ndim))

    for i in range(a.nwalkers):
        result = -np.inf
        while result == -np.inf:
            jitter = np.random.normal(loc=0, scale=0.001, size=ndim)
            result = posterior_function_mcmc_quick(changing_parameter + jitter,
                                                   elements, preload)
        chain[i] = changing_parameter + jitter

    pool = mp.Pool()
    sampler = emcee.EnsembleSampler(a.nwalkers,
                                    ndim,
                                    posterior_function_mcmc_quick,
                                    threads=nthreads,
                                    args=[elements, preload],
                                    pool=pool)
    pos, prob, state, blobs = sampler.run_mcmc(chain, a.mburn)

    mean_prob = mean_prob_beginning = np.zeros((a.m))
    posterior_list = []
    posterior_std_list = []
    for i in range(a.m):
        print('step ', i + 1, 'of ', a.m)
        pos, prob, state, blobs = sampler.run_mcmc(pos,
                                                   a.save_state_every,
                                                   rstate0=state,
                                                   lnprob0=prob,
                                                   blobs0=blobs,
                                                   storechain=True)
        #	np.save('%s/flatchain' %(directory),sampler.chain)
        #	np.save('%s/flatlnprobability' %(directory),sampler.lnprobability)
        #	np.save('%s/flatblobs' %(directory),sampler.blobs)
        #	posterior = np.load('%s/flatlnprobability.npy' %(directory))
        posterior = sampler.lnprobability
        posterior_list.append(np.mean(posterior, axis=0)[-1])
        posterior_std_list.append(np.std(posterior, axis=0)[-1])
        #	np.save('%s/flatmeanposterior' %(directory), posterior_list)
        #	np.save('%s/flatstdposterior' %(directory), posterior_std_list)
        print(np.mean(posterior, axis=0)[0], np.mean(posterior, axis=0)[-1])

        #if i>202:
        #print('posterior -1, -100, -200',np.mean(posterior, axis = 0)[-1], np.mean(posterior, axis = 0)[-100], np.mean(posterior, axis = 0)[-200])
        #print('posterior 0, 100, 200',np.mean(posterior, axis = 0)[0], np.mean(posterior, axis = 0)[100], np.mean(posterior, axis = 0)[200])
        #print("Mean acceptance fraction:", sampler.acceptance_fraction)
        elapsed1 = (time.time() - start1)
        print('calculation so far took', elapsed1, ' seconds')
        if i > a.min_mcmc_iterations and np.abs(
                np.mean(posterior, axis=0)[-1] - np.mean(posterior, axis=0)
            [-100]) < a.mcmc_tolerance and np.abs(
                np.mean(posterior, axis=0)[-1] -
                np.mean(posterior, axis=0)[-200]) < a.mcmc_tolerance:
            break
    np.save('%s/flatchain' % (directory), sampler.chain)
    np.save('%s/flatlnprobability' % (directory), sampler.lnprobability)
    np.save('%s/flatblobs' % (directory), sampler.blobs)
    posterior = sampler.lnprobability
    #posterior = np.load('%s/flatlnprobability.npy' %(directory))
    posterior_list.append(np.mean(posterior, axis=0)[-1])
    posterior_std_list.append(np.std(posterior, axis=0)[-1])
    np.save('%s/flatmeanposterior' % (directory), posterior_list)
    np.save('%s/flatstdposterior' % (directory), posterior_std_list)
    pool.close()
    if a.send_email:
        send_email(nthreads, i,
                   np.mean(posterior, axis=0)[0],
                   np.mean(posterior, axis=0)[-1], a, elapsed1)
Beispiel #18
0
def Bayes_score():
    """
	This calculates the Bayes factor score for a specific yield set and choice of error parameter, as defined in parameter file.
	First MCMC is run to determine the centre of the parameter space and then integration is performed.
	This needs a trained neural network in the Neural/ folder.
	
	Output is Bayes score and predicted (1 sigma) error.
	"""
    from .parameter import ModelParameters
    from .cem_function import posterior_function_mcmc_quick
    from .score_function import preload_params_mcmc
    from .plot_mcmc import restructure_chain
    from .wrapper import single_star_optimization
    from scipy.stats import multivariate_normal as scinorm
    from numpy.random import multivariate_normal as numnorm
    from skmonaco import mcimport
    import time

    # Load model parameters
    a = ModelParameters()
    preload = preload_params_mcmc()
    init_time = time.time()

    # Compute posterior + load median values - this automatically uses the neural network!!
    print('After %.3f seconds, finding posterior parameter values' %
          (time.time() - init_time))
    single_star_optimization()
    restructure_chain('mcmc/')
    positions = np.load('mcmc/posteriorPDF.npy')
    init_param = []
    for j in range(len(a.p0)):
        init_param.append(np.percentile(positions[:, j], 50))
    print(
        'After %.3f seconds, initial parameters are:' %
        (time.time() - init_time), init_param)

    # Function to compute posterior (needs a trained neural network)
    def posterior(theta):
        a = ModelParameters()
        post, _ = posterior_function_mcmc_quick(theta, a, preload)
        posterior = np.exp(post)
        return posterior

    # Read prior sigma from file
    sigma = []  # Read prior sigma from parameter file
    for i, param_name in enumerate(a.to_optimize):
        sigma.append(a.priors.get(param_name)[1])
    sigma = np.array(sigma)
    # Compute covariance matrix
    print('After %.3f seconds, computing covariance matrix' %
          (time.time() - init_time))
    positions = np.load('mcmc/posteriorPDF.npy')
    cov_matrix = np.zeros((len(a.p0), len(a.p0)))
    for i in range(len(a.p0)):
        for j in range(len(a.p0)):
            cov_matrix[i, j] = np.cov((positions[:, i], positions[:, j]))[1, 0]

    def gauss_factor(theta):
        # Returns gaussian fit to data
        return scinorm.pdf(theta, mean=np.array(init_param), cov=cov_matrix)

    def posterior_mod(theta):
        # Returns flattened posterior
        return posterior(theta) / gauss_factor(theta)

    def dist(size):
        # Distribution function for mcmc sampling
        mean = np.array(init_param)
        return numnorm(mean, cov_matrix, size=size)

    if 'beta_param' or 'log10_beta' in a.to_optimize:  # don't save output here
        print('After %.3f seconds, starting parameter-space integration' %
              (time.time() - init_time))
        integral, integral_err = mcimport(posterior_mod,
                                          a.int_samples,
                                          dist,
                                          nprocs=4)  # Quad-core processing

    else:
        print(
            'After %.3f seconds, starting parameter-space integration for beta = %.3f'
            % (time.time() - init_time, a.beta_param))
        integral, integral_err = mcimport(posterior_mod,
                                          a.int_samples,
                                          dist,
                                          nprocs=4)  # Quad-core processing

        print('After %.3f seconds, integration is complete' %
              (time.time() - init_time))
        np.save('Scores/integral_' + str(a.beta_param) + '.npy', integral)
        np.save('Scores/integral_err_' + str(a.beta_param) + '.npy',
                integral_err)

    return integral, integral_err
Beispiel #19
0
 def posterior(theta):
     a = ModelParameters()
     post, _ = posterior_function_mcmc_quick(theta, a, preload)
     posterior = np.exp(post)
     return posterior