Beispiel #1
0
def CV_stitch():
    """
	Function to load all CV predictions and combine into one file
	"""
    from .score_function import preload_params_mcmc
    preload = preload_params_mcmc()
    from .parameter import ModelParameters
    a = ModelParameters()
    beta = []
    score = []
    rescaled_score = []
    for i in range(len(a.list_of_beta_params)):
        temp = np.load('Scores/CV' + str(i) + '.npz')
        beta.append(temp['beta_param'])
        score.append(temp['score'])
        rescaled_score.append(temp['rescaled_score'])
        temp.close()
    np.savez("Scores/CV_score - " + str(a.yield_table_name_sn2) + ", " +
             str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) +
             ".npz",
             beta_param=beta,
             score=score,
             rescaled_score=rescaled_score)

    return beta, score
Beispiel #2
0
def element_predictor(args):
    params, index = args
    index = int(index)
    preload = preload_params_mcmc()
    from .cem_function import posterior_function_mcmc_quick
    _, all_abun = posterior_function_mcmc_quick(params, preload.elements,
                                                preload)
    output = all_abun[index]
    return output
Beispiel #3
0
def Bayes_wrapper():
    """
	This function calculates the Bayes score as a function of the beta-function parameter (defined in parameter.py)
	It is not currently parallelized (parallelization is done in MCMC already, but not in the integration).
	
	Output scores are saved and labelled in the Scores/ folder as an .npz file
	"""
    import time
    import fileinput
    import sys
    from .parameter import ModelParameters
    print('UPDATE NEURAL NETWORK')
    directory = 'Scores/'
    if not os.path.exists(directory):
        os.makedirs(directory)

    a = ModelParameters()
    beta_params = a.list_of_beta_params
    score = []
    score_err = []
    init_time = time.time()

    for i in range(len(beta_params)):  # Iterate over beta parameters
        print("Calculating value %d of %d after %.3f seconds" %
              (i + 1, len(beta_params), time.time() - init_time))
        # Modify beta value
        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\tbeta_param" in line:
                print("\tbeta_param = %.5f" % beta_params[i])
            else:
                print(line, end='')
        fileinput.close()
        # Reimport model parameters for new beta
        del sys.modules['Chempy.parameter']
        del sys.modules['Chempy.score_function']
        from .parameter import ModelParameters
        from .score_function import preload_params_mcmc
        preload = preload_params_mcmc(
        )  # Must be reloaded since this depends on beta parameter
        a = ModelParameters()

        # Calculate Bayes score
        integral, integral_err = Bayes_score()
        score.append(integral)
        score_err.append(integral_err)

    # Save output as npz array
    np.savez("Scores/Bayes_score - " + str(a.yield_table_name_sn2) + ", " +
             str(a.yield_table_name_agb) + ", " + str(a.yield_table_name_1a) +
             ".npz",
             beta_param=beta_params,
             score=score,
             score_err=score_err)

    return beta_params, score, score_err
Beispiel #4
0
def CV_bash(beta_index):
    """
	This is for a specific beta value only
	
	This function calculates the CV score as a function of the beta-function parameter (defined in parameter.py)
	It is not currently parallelized (parallelization is done in MCMC already and for element predictions).
	"""
    import time
    import fileinput
    import sys
    from .parameter import ModelParameters
    from .score_function import CV_score
    beta_index = int(beta_index)
    print(beta_index)

    a = ModelParameters()
    beta_params = a.list_of_beta_params[beta_index]
    CV_score_list = []
    init_time = time.time()

    #print("Calculating value %d of %d after %.3f seconds" %(i+1,len(beta_params),time.time()-init_time))
    # Modify beta value
    for line in fileinput.input("Chempy/parameter.py", inplace=True):
        if "\tbeta_param" in line:
            print("\tbeta_param = %.5f" % beta_params)
        else:
            print(line, end='')
    fileinput.close()
    # Reimport model parameters for new beta
    del sys.modules['Chempy.parameter']
    del sys.modules['Chempy.score_function']
    from Chempy.parameter import ModelParameters
    from Chempy.score_function import preload_params_mcmc
    a = ModelParameters()
    preload = preload_params_mcmc()

    # Calculate Bayes score
    score = CV_score()

    rescaled_score = np.power(score, 1. / len(a.initial_neural_names))
    #CV_score_list.append(score)
    np.savez('Scores/CV' + str(beta_index) + '.npz',
             beta_param=beta_params,
             score=score,
             rescaled_score=rescaled_score)

    # Save output as npz array
    #np.savez("Scores/CV_score - "+str(a.yield_table_name_sn2)+", "+str(a.yield_table_name_agb)+", "+str(a.yield_table_name_1a)+".npz",
    #			beta_param=beta_params,
    #			score=CV_score_list)

    return beta_params, score, rescaled_score
Beispiel #5
0
def CV_median():
    """
	Function to compute the median and 15/85 percentiles of the posterior parameters.
	This is calculated for each element cross-validation.
	The value of the beta coefficient in the parameter file is used.	
	"""
    from Chempy.parameter import ModelParameters
    import importlib
    import fileinput
    import sys
    import multiprocessing as mp
    import tqdm
    from Chempy.wrapper import single_star_optimization
    from Chempy.plot_mcmc import restructure_chain
    from Chempy.cem_function import posterior_function_mcmc_quick
    from Chempy.parameter import ModelParameters
    from scipy.stats import norm
    from .score_function import preload_params_mcmc
    import matplotlib.pyplot as plt

    print('SET BETA PARAMETER IN PARAMETER.PY FILE')

    ## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only
    # This is definitely not a good implementation (involves rewriting entire parameter file),
    # But other steps are far slower

    # Initialise arrays
    element_mean = []
    element_sigma = []
    overall_score = 1.
    factors = []
    posterior_med = []
    posterior_up = []
    posterior_low = []

    # Starting elements (copied from original parameter file)
    b = ModelParameters()
    starting_el = b.elements_to_trace
    orig = "\telements_to_trace = " + str(
        starting_el)  # Original element string
    #print(starting_el)

    # Calculate required Chempy elements
    preload = preload_params_mcmc()
    elements_init = np.copy(preload.elements)
    #print(elements_init)

    # Create new parameter names
    newstr = []
    for i, el in enumerate(elements_init):
        if el != starting_el[-1]:
            newstr.append(orig.replace("'" + str(el) + "', ", ""))
        else:
            newstr.append(orig.replace("'" + str(el) + "'", ""))
    for index in range(len(elements_init)):  # Iterate over removed element
        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\telements_to_trace" in line:
                print(newstr[index])
                #print(line,end='') # TO TEST
            else:
                print(line, end='')
        fileinput.close()
        del sys.modules['Chempy.parameter']
        from Chempy.parameter import ModelParameters
        a = ModelParameters()
        del sys.modules['Chempy.score_function']
        from .score_function import preload_params_mcmc
        preload = preload_params_mcmc()
        ##############

        # Run MCMC with 27/28 elements.
        print('Running MCMC iteration %d of %d' %
              (index + 1, len(elements_init)))
        #print(a.elements_to_trace)
        single_star_optimization()

        # Create the posterior PDF and load it
        restructure_chain('mcmc/')
        positions = np.load('mcmc/posteriorPDF.npy')  # Posterior parameter PDF
        tmp_med = np.zeros(len(a.p0))
        tmp_low = np.zeros(len(a.p0))
        tmp_up = np.zeros(len(a.p0))
        for j in range(len(a.p0)):
            tmp_med[j] = np.median(positions[:, j])
            tmp_up[j] = np.percentile(positions[:, j], 100 - 15.865)
            tmp_low[j] = np.percentile(positions[:, j], 15.865)
        posterior_med.append(tmp_med)
        posterior_up.append(tmp_up)
        posterior_low.append(tmp_low)
        #print(a.elements_to_trace)

        ##############
        # RESET parameter file

        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\telements_to_trace" in line:
                print(orig)
            else:
                print(line, end='')
        fileinput.close()
        del sys.modules['Chempy.parameter']
        from Chempy.parameter import ModelParameters
        del sys.modules['Chempy.score_function']
        from .score_function import preload_params_mcmc
        a = ModelParameters()
        preload = preload_params_mcmc()

        ##############

        sys.stdout.flush()

    np.savez('Scores/CV_medians' + str(a.beta_param) + '.npz',
             elements=elements_init,
             median=posterior_med,
             upper=posterior_up,
             lower=posterior_low)
    return None
Beispiel #6
0
def CV_score():
    """This function will compute the UNNORMALISED cross-validation abundances for each of the 22 elements,
	using the best parameter choice for each. 
	This computes the likelihood contribution, and saves each separately.
	"""
    from Chempy.parameter import ModelParameters
    import importlib
    import fileinput
    import sys
    import multiprocessing as mp
    import tqdm
    from Chempy.wrapper import single_star_optimization
    from Chempy.plot_mcmc import restructure_chain
    from Chempy.cem_function import posterior_function_mcmc_quick
    from scipy.stats import norm
    from .score_function import preload_params_mcmc
    import matplotlib.pyplot as plt
    #p = mp.Pool()

    ## Code to rewrite parameter file for each element in turn, so as to run MCMC for 21/22 elements only
    # This is definitely not a good implementation (involves rewriting entire parameter file),
    # But other steps are far slower

    # Initialise arrays
    element_mean = []
    element_sigma = []
    overall_score = 1.
    factors = []

    # Starting elements (copied from original parameter file)
    b = ModelParameters()
    starting_el = b.elements_to_trace
    orig = "\telements_to_trace = " + str(
        starting_el)  # Original element string
    #print(starting_el)

    # Calculate required Chempy elements
    preload = preload_params_mcmc()
    elements_init = np.copy(preload.elements)
    np.save('Scores/CV_elements.npy', elements_init)
    #print(elements_init)

    # Create new parameter names
    newstr = []
    for i, el in enumerate(elements_init):
        if el != starting_el[-1]:
            newstr.append(orig.replace("'" + str(el) + "', ", ""))
        else:
            newstr.append(orig.replace("'" + str(el) + "'", ""))
    for index in range(len(elements_init)):  # Iterate over removed element
        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\telements_to_trace" in line:
                print(newstr[index])
                #print(line,end='') # TO TEST
            else:
                print(line, end='')
        fileinput.close()
        del sys.modules['Chempy.parameter']
        del sys.modules['Chempy.score_function']
        from Chempy.parameter import ModelParameters
        from .score_function import preload_params_mcmc
        a = ModelParameters()
        preload = preload_params_mcmc()
        ##############

        # Run MCMC with 27/28 elements.
        print('Running MCMC iteration %d of %d' %
              (index + 1, len(elements_init)))
        #print(a.elements_to_trace)
        single_star_optimization()

        # Create the posterior PDF and load it
        restructure_chain('mcmc/')
        positions = np.load('mcmc/posteriorPDF.npy')  # Posterior parameter PDF
        #print("In CV_score, element list is",a.elements_to_trace)

        ##############

        for line in fileinput.input("Chempy/parameter.py", inplace=True):
            if "\telements_to_trace" in line:
                print(orig)
            else:
                print(line, end='')
        fileinput.close()
        del sys.modules['Chempy.parameter']
        del sys.modules['Chempy.score_function']
        from Chempy.parameter import ModelParameters
        from .score_function import preload_params_mcmc
        a = ModelParameters()
        preload = preload_params_mcmc()
        ##############

        # This uses all 28 elements again for predictions

        # Multiprocess and calculate elemental predictions for each parameter set

        from .score_function import element_predictor
        p = mp.Pool()
        indices = np.ones(len(positions)) * index
        abundance = list(
            tqdm.tqdm(p.imap_unordered(element_predictor,
                                       zip(positions, indices)),
                      total=len(positions)))
        p.close()
        p.join()

        abundance = np.array(abundance)
        mean, sigma = norm.fit(abundance)
        print(mean)
        print(sigma)

        element_mean.append(mean)
        element_sigma.append(sigma)
        #a.plot_hist=True
        if a.plot_hist == True:
            plt.clf()
            plt.hist(abundance, bins=40, normed=True, alpha=0.6, color='g')
            #abundance = np.array(abundance) # Unmask array
            # Plot the PDF.
            xmin, xmax = plt.xlim()
            x = np.linspace(xmin, xmax, 100)
            p = norm.pdf(x, mean, sigma)
            plt.plot(x, p, c='k', linewidth=2)
            title = 'Plot of element %d abundance' % (index)
            plt.title(title)
            plt.xlabel('[X/Fe] abundance')
            plt.ylabel('Relative frequency')

        total_err = np.sqrt((preload.star_error_list[index])**2 + sigma**2)
        likelihood_factor = norm.pdf(mean,
                                     loc=preload.star_abundance_list[index],
                                     scale=total_err)
        overall_score *= likelihood_factor
        factors.append(likelihood_factor)
        print(
            "Likelihood contribution from %dth element is %.8f with beta param %.4f"
            % (index + 1, likelihood_factor, a.beta_param))
        print(overall_score)
        sys.stdout.flush()
        #print(starting_el)
    np.savez('Scores/CV_beta_elements' + str(a.beta_param) + '.npz',
             elements=elements_init,
             likelihood_factors=factors,
             element_mean=element_mean,
             element_sigma=element_sigma)
    return overall_score
Beispiel #7
0
def Bayes_score():
    """
	This calculates the Bayes factor score for a specific yield set and choice of error parameter, as defined in parameter file.
	First MCMC is run to determine the centre of the parameter space and then integration is performed.
	This needs a trained neural network in the Neural/ folder.
	
	Output is Bayes score and predicted (1 sigma) error.
	"""
    from .parameter import ModelParameters
    from .cem_function import posterior_function_mcmc_quick
    from .score_function import preload_params_mcmc
    from .plot_mcmc import restructure_chain
    from .wrapper import single_star_optimization
    from scipy.stats import multivariate_normal as scinorm
    from numpy.random import multivariate_normal as numnorm
    from skmonaco import mcimport
    import time

    # Load model parameters
    a = ModelParameters()
    preload = preload_params_mcmc()
    init_time = time.time()

    # Compute posterior + load median values - this automatically uses the neural network!!
    print('After %.3f seconds, finding posterior parameter values' %
          (time.time() - init_time))
    single_star_optimization()
    restructure_chain('mcmc/')
    positions = np.load('mcmc/posteriorPDF.npy')
    init_param = []
    for j in range(len(a.p0)):
        init_param.append(np.percentile(positions[:, j], 50))
    print(
        'After %.3f seconds, initial parameters are:' %
        (time.time() - init_time), init_param)

    # Function to compute posterior (needs a trained neural network)
    def posterior(theta):
        a = ModelParameters()
        post, _ = posterior_function_mcmc_quick(theta, a, preload)
        posterior = np.exp(post)
        return posterior

    # Read prior sigma from file
    sigma = []  # Read prior sigma from parameter file
    for i, param_name in enumerate(a.to_optimize):
        sigma.append(a.priors.get(param_name)[1])
    sigma = np.array(sigma)
    # Compute covariance matrix
    print('After %.3f seconds, computing covariance matrix' %
          (time.time() - init_time))
    positions = np.load('mcmc/posteriorPDF.npy')
    cov_matrix = np.zeros((len(a.p0), len(a.p0)))
    for i in range(len(a.p0)):
        for j in range(len(a.p0)):
            cov_matrix[i, j] = np.cov((positions[:, i], positions[:, j]))[1, 0]

    def gauss_factor(theta):
        # Returns gaussian fit to data
        return scinorm.pdf(theta, mean=np.array(init_param), cov=cov_matrix)

    def posterior_mod(theta):
        # Returns flattened posterior
        return posterior(theta) / gauss_factor(theta)

    def dist(size):
        # Distribution function for mcmc sampling
        mean = np.array(init_param)
        return numnorm(mean, cov_matrix, size=size)

    if 'beta_param' or 'log10_beta' in a.to_optimize:  # don't save output here
        print('After %.3f seconds, starting parameter-space integration' %
              (time.time() - init_time))
        integral, integral_err = mcimport(posterior_mod,
                                          a.int_samples,
                                          dist,
                                          nprocs=4)  # Quad-core processing

    else:
        print(
            'After %.3f seconds, starting parameter-space integration for beta = %.3f'
            % (time.time() - init_time, a.beta_param))
        integral, integral_err = mcimport(posterior_mod,
                                          a.int_samples,
                                          dist,
                                          nprocs=4)  # Quad-core processing

        print('After %.3f seconds, integration is complete' %
              (time.time() - init_time))
        np.save('Scores/integral_' + str(a.beta_param) + '.npy', integral)
        np.save('Scores/integral_err_' + str(a.beta_param) + '.npy',
                integral_err)

    return integral, integral_err