def CV_stitch(): """ Function to load all CV predictions and combine into one file """ from .score_function import preload_params_mcmc preload = preload_params_mcmc() from .parameter import ModelParameters a = ModelParameters() beta = [] score = [] rescaled_score = [] for i in range(len(a.list_of_beta_params)): temp = np.load('Scores/CV' + str(i) + '.npz') beta.append(temp['beta_param']) score.append(temp['score']) rescaled_score.append(temp['rescaled_score']) temp.close() np.savez("Scores/CV_score - " + str(a.yield_table_name_sn2) + ", " + str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) + ".npz", beta_param=beta, score=score, rescaled_score=rescaled_score) return beta, score
def element_predictor(args): params, index = args index = int(index) preload = preload_params_mcmc() from .cem_function import posterior_function_mcmc_quick _, all_abun = posterior_function_mcmc_quick(params, preload.elements, preload) output = all_abun[index] return output
def Bayes_wrapper(): """ This function calculates the Bayes score as a function of the beta-function parameter (defined in parameter.py) It is not currently parallelized (parallelization is done in MCMC already, but not in the integration). Output scores are saved and labelled in the Scores/ folder as an .npz file """ import time import fileinput import sys from .parameter import ModelParameters print('UPDATE NEURAL NETWORK') directory = 'Scores/' if not os.path.exists(directory): os.makedirs(directory) a = ModelParameters() beta_params = a.list_of_beta_params score = [] score_err = [] init_time = time.time() for i in range(len(beta_params)): # Iterate over beta parameters print("Calculating value %d of %d after %.3f seconds" % (i + 1, len(beta_params), time.time() - init_time)) # Modify beta value for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\tbeta_param" in line: print("\tbeta_param = %.5f" % beta_params[i]) else: print(line, end='') fileinput.close() # Reimport model parameters for new beta del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from .parameter import ModelParameters from .score_function import preload_params_mcmc preload = preload_params_mcmc( ) # Must be reloaded since this depends on beta parameter a = ModelParameters() # Calculate Bayes score integral, integral_err = Bayes_score() score.append(integral) score_err.append(integral_err) # Save output as npz array np.savez("Scores/Bayes_score - " + str(a.yield_table_name_sn2) + ", " + str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) + ".npz", beta_param=beta_params, score=score, score_err=score_err) return beta_params, score, score_err
def CV_bash(beta_index): """ This is for a specific beta value only This function calculates the CV score as a function of the beta-function parameter (defined in parameter.py) It is not currently parallelized (parallelization is done in MCMC already and for element predictions). """ import time import fileinput import sys from .parameter import ModelParameters from .score_function import CV_score beta_index = int(beta_index) print(beta_index) a = ModelParameters() beta_params = a.list_of_beta_params[beta_index] CV_score_list = [] init_time = time.time() #print("Calculating value %d of %d after %.3f seconds" %(i+1,len(beta_params),time.time()-init_time)) # Modify beta value for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\tbeta_param" in line: print("\tbeta_param = %.5f" % beta_params) else: print(line, end='') fileinput.close() # Reimport model parameters for new beta del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from Chempy.parameter import ModelParameters from Chempy.score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() # Calculate Bayes score score = CV_score() rescaled_score = np.power(score, 1. / len(a.initial_neural_names)) #CV_score_list.append(score) np.savez('Scores/CV' + str(beta_index) + '.npz', beta_param=beta_params, score=score, rescaled_score=rescaled_score) # Save output as npz array #np.savez("Scores/CV_score - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz", # beta_param=beta_params, # score=CV_score_list) return beta_params, score, rescaled_score
def CV_median(): """ Function to compute the median and 15/85 percentiles of the posterior parameters. This is calculated for each element cross-validation. The value of the beta coefficient in the parameter file is used. """ from Chempy.parameter import ModelParameters import importlib import fileinput import sys import multiprocessing as mp import tqdm from Chempy.wrapper import single_star_optimization from Chempy.plot_mcmc import restructure_chain from Chempy.cem_function import posterior_function_mcmc_quick from Chempy.parameter import ModelParameters from scipy.stats import norm from .score_function import preload_params_mcmc import matplotlib.pyplot as plt print('SET BETA PARAMETER IN PARAMETER.PY FILE') ## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only # This is definitely not a good implementation (involves rewriting entire parameter file), # But other steps are far slower # Initialise arrays element_mean = [] element_sigma = [] overall_score = 1. factors = [] posterior_med = [] posterior_up = [] posterior_low = [] # Starting elements (copied from original parameter file) b = ModelParameters() starting_el = b.elements_to_trace orig = "\telements_to_trace = " + str( starting_el) # Original element string #print(starting_el) # Calculate required Chempy elements preload = preload_params_mcmc() elements_init = np.copy(preload.elements) #print(elements_init) # Create new parameter names newstr = [] for i, el in enumerate(elements_init): if el != starting_el[-1]: newstr.append(orig.replace("'" + str(el) + "', ", "")) else: newstr.append(orig.replace("'" + str(el) + "'", "")) for index in range(len(elements_init)): # Iterate over removed element for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(newstr[index]) #print(line,end='') # TO TEST else: print(line, end='') fileinput.close() del sys.modules['Chempy.parameter'] from Chempy.parameter import ModelParameters a = ModelParameters() del sys.modules['Chempy.score_function'] from .score_function import preload_params_mcmc preload = preload_params_mcmc() ############## # Run MCMC with 27/28 elements. print('Running MCMC iteration %d of %d' % (index + 1, len(elements_init))) #print(a.elements_to_trace) single_star_optimization() # Create the posterior PDF and load it restructure_chain('mcmc/') positions = np.load('mcmc/posteriorPDF.npy') # Posterior parameter PDF tmp_med = np.zeros(len(a.p0)) tmp_low = np.zeros(len(a.p0)) tmp_up = np.zeros(len(a.p0)) for j in range(len(a.p0)): tmp_med[j] = np.median(positions[:, j]) tmp_up[j] = np.percentile(positions[:, j], 100 - 15.865) tmp_low[j] = np.percentile(positions[:, j], 15.865) posterior_med.append(tmp_med) posterior_up.append(tmp_up) posterior_low.append(tmp_low) #print(a.elements_to_trace) ############## # RESET parameter file for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(orig) else: print(line, end='') fileinput.close() del sys.modules['Chempy.parameter'] from Chempy.parameter import ModelParameters del sys.modules['Chempy.score_function'] from .score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() ############## sys.stdout.flush() np.savez('Scores/CV_medians' + str(a.beta_param) + '.npz', elements=elements_init, median=posterior_med, upper=posterior_up, lower=posterior_low) return None
def CV_score(): """This function will compute the UNNORMALISED cross-validation abundances for each of the 22 elements, using the best parameter choice for each. This computes the likelihood contribution, and saves each separately. """ from Chempy.parameter import ModelParameters import importlib import fileinput import sys import multiprocessing as mp import tqdm from Chempy.wrapper import single_star_optimization from Chempy.plot_mcmc import restructure_chain from Chempy.cem_function import posterior_function_mcmc_quick from scipy.stats import norm from .score_function import preload_params_mcmc import matplotlib.pyplot as plt #p = mp.Pool() ## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only # This is definitely not a good implementation (involves rewriting entire parameter file), # But other steps are far slower # Initialise arrays element_mean = [] element_sigma = [] overall_score = 1. factors = [] # Starting elements (copied from original parameter file) b = ModelParameters() starting_el = b.elements_to_trace orig = "\telements_to_trace = " + str( starting_el) # Original element string #print(starting_el) # Calculate required Chempy elements preload = preload_params_mcmc() elements_init = np.copy(preload.elements) np.save('Scores/CV_elements.npy', elements_init) #print(elements_init) # Create new parameter names newstr = [] for i, el in enumerate(elements_init): if el != starting_el[-1]: newstr.append(orig.replace("'" + str(el) + "', ", "")) else: newstr.append(orig.replace("'" + str(el) + "'", "")) for index in range(len(elements_init)): # Iterate over removed element for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(newstr[index]) #print(line,end='') # TO TEST else: print(line, end='') fileinput.close() del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from Chempy.parameter import ModelParameters from .score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() ############## # Run MCMC with 27/28 elements. print('Running MCMC iteration %d of %d' % (index + 1, len(elements_init))) #print(a.elements_to_trace) single_star_optimization() # Create the posterior PDF and load it restructure_chain('mcmc/') positions = np.load('mcmc/posteriorPDF.npy') # Posterior parameter PDF #print("In CV_score, element list is",a.elements_to_trace) ############## for line in fileinput.input("Chempy/parameter.py", inplace=True): if "\telements_to_trace" in line: print(orig) else: print(line, end='') fileinput.close() del sys.modules['Chempy.parameter'] del sys.modules['Chempy.score_function'] from Chempy.parameter import ModelParameters from .score_function import preload_params_mcmc a = ModelParameters() preload = preload_params_mcmc() ############## # This uses all 28 elements again for predictions # Multiprocess and calculate elemental predictions for each parameter set from .score_function import element_predictor p = mp.Pool() indices = np.ones(len(positions)) * index abundance = list( tqdm.tqdm(p.imap_unordered(element_predictor, zip(positions, indices)), total=len(positions))) p.close() p.join() abundance = np.array(abundance) mean, sigma = norm.fit(abundance) print(mean) print(sigma) element_mean.append(mean) element_sigma.append(sigma) #a.plot_hist=True if a.plot_hist == True: plt.clf() plt.hist(abundance, bins=40, normed=True, alpha=0.6, color='g') #abundance = np.array(abundance) # Unmask array # Plot the PDF. xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = norm.pdf(x, mean, sigma) plt.plot(x, p, c='k', linewidth=2) title = 'Plot of element %d abundance' % (index) plt.title(title) plt.xlabel('[X/Fe] abundance') plt.ylabel('Relative frequency') total_err = np.sqrt((preload.star_error_list[index])**2 + sigma**2) likelihood_factor = norm.pdf(mean, loc=preload.star_abundance_list[index], scale=total_err) overall_score *= likelihood_factor factors.append(likelihood_factor) print( "Likelihood contribution from %dth element is %.8f with beta param %.4f" % (index + 1, likelihood_factor, a.beta_param)) print(overall_score) sys.stdout.flush() #print(starting_el) np.savez('Scores/CV_beta_elements' + str(a.beta_param) + '.npz', elements=elements_init, likelihood_factors=factors, element_mean=element_mean, element_sigma=element_sigma) return overall_score
def Bayes_score(): """ This calculates the Bayes factor score for a specific yield set and choice of error parameter, as defined in parameter file. First MCMC is run to determine the centre of the parameter space and then integration is performed. This needs a trained neural network in the Neural/ folder. Output is Bayes score and predicted (1 sigma) error. """ from .parameter import ModelParameters from .cem_function import posterior_function_mcmc_quick from .score_function import preload_params_mcmc from .plot_mcmc import restructure_chain from .wrapper import single_star_optimization from scipy.stats import multivariate_normal as scinorm from numpy.random import multivariate_normal as numnorm from skmonaco import mcimport import time # Load model parameters a = ModelParameters() preload = preload_params_mcmc() init_time = time.time() # Compute posterior + load median values - this automatically uses the neural network!! print('After %.3f seconds, finding posterior parameter values' % (time.time() - init_time)) single_star_optimization() restructure_chain('mcmc/') positions = np.load('mcmc/posteriorPDF.npy') init_param = [] for j in range(len(a.p0)): init_param.append(np.percentile(positions[:, j], 50)) print( 'After %.3f seconds, initial parameters are:' % (time.time() - init_time), init_param) # Function to compute posterior (needs a trained neural network) def posterior(theta): a = ModelParameters() post, _ = posterior_function_mcmc_quick(theta, a, preload) posterior = np.exp(post) return posterior # Read prior sigma from file sigma = [] # Read prior sigma from parameter file for i, param_name in enumerate(a.to_optimize): sigma.append(a.priors.get(param_name)[1]) sigma = np.array(sigma) # Compute covariance matrix print('After %.3f seconds, computing covariance matrix' % (time.time() - init_time)) positions = np.load('mcmc/posteriorPDF.npy') cov_matrix = np.zeros((len(a.p0), len(a.p0))) for i in range(len(a.p0)): for j in range(len(a.p0)): cov_matrix[i, j] = np.cov((positions[:, i], positions[:, j]))[1, 0] def gauss_factor(theta): # Returns gaussian fit to data return scinorm.pdf(theta, mean=np.array(init_param), cov=cov_matrix) def posterior_mod(theta): # Returns flattened posterior return posterior(theta) / gauss_factor(theta) def dist(size): # Distribution function for mcmc sampling mean = np.array(init_param) return numnorm(mean, cov_matrix, size=size) if 'beta_param' or 'log10_beta' in a.to_optimize: # don't save output here print('After %.3f seconds, starting parameter-space integration' % (time.time() - init_time)) integral, integral_err = mcimport(posterior_mod, a.int_samples, dist, nprocs=4) # Quad-core processing else: print( 'After %.3f seconds, starting parameter-space integration for beta = %.3f' % (time.time() - init_time, a.beta_param)) integral, integral_err = mcimport(posterior_mod, a.int_samples, dist, nprocs=4) # Quad-core processing print('After %.3f seconds, integration is complete' % (time.time() - init_time)) np.save('Scores/integral_' + str(a.beta_param) + '.npy', integral) np.save('Scores/integral_err_' + str(a.beta_param) + '.npy', integral_err) return integral, integral_err