예제 #1
0
    def __init__(self, f_image, abc_name, sound_speed, density, dt, dx):
        img = Image.open(f_image)
        self.width, self.height = img.size

        if abc_name == "Mur1":
            self.abc_field = ABC.Mur1(
                0, self.width, 0, self.height, sound_speed["air"], dt, dx)
        elif abc_name == "Mur2":
            u_list = np.zeros([self.width-2, self.height-2])
            self.abc_field = ABC.Mur2(
                0, self.width, 0, self.height, sound_speed["air"], dt, dx, density, u_list)

        self.ref_points = []
        field_arr = np.array(img, dtype=np.int32).T
        # (self.wall_area,
        # self.ref_points_w,
        # self.ref_points_h) = self.__read_field(field_arr)
        (self.wall_area,
         self.ref_points_w_p,
         self.ref_points_w_m,
         self.ref_points_h_p,
         self.ref_points_h_m) = self.__read_field(field_arr)
        self.velocity_arr, self.density_arr = self.__make_velocity_density_field(
            self.wall_area, sound_speed, density)

        R = 1
        self.coef_obs = ((1+R)*(sound_speed["air"]*dt/dx) - (1-R)) / \
            ((1 + R) * (sound_speed["air"] * dt / dx) + (1 - R))
예제 #2
0
def minimum_entropy(summary_stats: ["function"],
                    n_obs: int,
                    y_obs: [[float]],
                    fitting_model: Models.Model,
                    priors: ["stats.Distribution"],
                    min_subset_size=1,
                    max_subset_size=None,
                    n_samples=1000,
                    n_accept=100,
                    k=4,
                    printing=False) -> ([int], [[float]]):
    """

    RETURNS
    [int] - indexes of best summary stats
    [[float]] - list of all accepted theta when "best summary stats"
    """

    lowest = ([], maxsize, [])

    # all permutations of summary stats
    n_stats = len(summary_stats)
    max_subset_size = max_subset_size if (max_subset_size) else n_stats
    perms = []
    for n in range(max(min_subset_size, 1),
                   min(n_stats + 1, max_subset_size + 1)):
        perms += [x for x in combinations([i for i in range(n_stats)], n)]

    sampling_details = {
        "sampling_method": "best",
        "num_runs": n_samples,
        "sample_size": n_accept
    }

    for (j, perm) in enumerate(perms):
        if (printing): print("Permutation = ", perm, sep="")
        else: print("({}/{})".format(j, len(perms)), end="\r")
        ss = [summary_stats[i] for i in perm]
        _, accepted_theta = ABC.abc_rejection(n_obs,
                                              y_obs,
                                              fitting_model,
                                              priors,
                                              sampling_details,
                                              summary_stats=ss,
                                              show_plots=False,
                                              printing=printing)

        estimate_ent = __k_nn_estimate_entropy(len(priors),
                                               accepted_theta,
                                               k=k)
        if (printing):
            print("Estimate_ent of ",
                  perm,
                  "= {:,.2f}\n".format(estimate_ent),
                  sep="")
        if (estimate_ent < lowest[1]):
            lowest = (perm, estimate_ent, accepted_theta)

    # return lowest[1]
    return lowest[0], lowest[2]
예제 #3
0
def __rsse(obs, target) -> float:
    # Residual Sum of Squares Error

    error = sum([ABC.l2_norm(o, target)**2 for o in obs])
    error = np.sqrt(error)
    error /= len(obs)

    return error
예제 #4
0
def main(field_image, txt_data):
    print("field setting....")
    if gpu_flag:
        field_data = field.Field_GPU(field_image, abc_name,
                                     sound_speed_list, density_list, dt, dx)
    else:
        field_data = field.Field(field_image, abc_name,
                                 sound_speed_list, density_list, dt, dx)
    width = field_data.width
    height = field_data.height

    if abc_name == "Mur1":
        import ABC
        abc_field = ABC.Mur1(0, width, 0, height,
                             sound_speed_list["air"], dt, dx)
    print("done")
    print("pulse information reading....")
    pulse_info_list = read_pulse_info(txt_data)
    print("done")

    if gpu_flag:
        print("calc with GPU start....")
        cp.cuda.set_allocator(cp.cuda.MemoryPool().malloc)
        P1 = cp.zeros((width, height), dtype=cp.float32)
        P2 = cp.zeros((width, height), dtype=cp.float32)
        if debug_flag:
            fig = plt.figure()
        image_list = Calc(field_data, P1, P2, pulse_info_list)
    else:
        print("calc without GPU start....")
        P1 = np.zeros((width, height), dtype=np.float32)
        P2 = np.zeros((width, height), dtype=np.float32)
        if debug_flag:
            fig = plt.figure()
        image_list = Calc(field_data, P1, P2, pulse_info_list)
    if debug_flag:
        print("make animation....")
        ani = animation.ArtistAnimation(
            fig, image_list[0], interval=100, blit=True)
        ani.save(f'ani.gif', writer="imagemagick")
예제 #5
0
def __k_nn_estimate_entropy(n_params: int,
                            parameter_samples: [(float)],
                            k=4) -> float:
    """
    DESCRIPTION
    Kth Nearest Neighbour estimate of entropy for a posterior distribution.

    PARAMETERS
    n_params (int) - Number of parameters being fitted.
    parameter_samples ([(float)]) - Set of accepted sampled parameters.

    OPTIONAL PARAMETERS
    k (int) - Which nearest neighbour to consider (default=4)

    RETURNS
    float - estimated entropy
    """
    n = len(parameter_samples)  # number accepted samples
    if (k > n):
        raise ValueError("k cannot be greater than the number of samples")

    gamma = special.gamma(1 + n_params / 2)
    digamma = special.digamma(k)

    h_hat = np.log(np.pi**(n_params / 2) / gamma)
    h_hat -= digamma
    h_hat += np.log(n)

    constant = n_params / n
    for i in range(n):
        sample_i = parameter_samples[i]
        distances = []
        for j in range(n):  # find kth nearest neighbour
            if (j == i): continue
            sample_j = parameter_samples[j]
            distances.append(ABC.l2_norm(sample_i, sample_j))
        distances.sort()
        h_hat += constant * np.log(distances[3])

    return h_hat
예제 #6
0
def main(argv):
    abcConf = Config.Config(argv)
    abcList = list()
    expT = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S").replace(" ", "").replace(":", "")
    for run in range(abcConf.RUN_TIME):

        abc = ABC.ABC(abcConf)
        abc.setExperimentID(run, expT)
        start_time = time.time() * 1000
        abc.initial()
        abc.memorize_best_source()
        while(not(abc.stopping_condition())):
            abc.send_employed_bees()
            abc.calculate_probabilities()
            abc.send_onlooker_bees()
            abc.memorize_best_source()
            abc.send_scout_bees()
            abc.increase_cycle()

        abc.globalTime = time.time() * 1000 - start_time
        abcList.append(abc)
    Reporter(abcList)
예제 #7
0
# -*- coding: utf-8 -*-
"""
Created on Thu Feb  7 18:01:49 2019

@author: GabrielAsus
"""
import ABC as abc
import math
lower = [-6, -6]


def function1(x):

    return x[0]**2 + 2 * x[1]**2 - 0.3 * math.cos(
        3 * math.pi * x[0]) - 0.4 * math.cos(4 * math.pi * x[1]) + 0.7
    #return ((x[0]**2+x[1]-11)**2)+(x[0]+x[1]**2-7)**2


algoritmo = abc.ArtifitialBC(dimention=2,
                             lower=[-100, -100],
                             upper=[100, 100],
                             function=function1,
                             populationSize=30,
                             maxIter=200)
solution, fitness = algoritmo.run()
print(solution)
print(fitness)
def run_everything(cluster, num_sigma, red_clump, run_number, location, elem):
    """Return the covariance matrix statistics and KS distances for every element in APOGEE in the desired cluster,
    for every simulation run.  Function also saves all final summary statistics and values of sigma to file.

    Parameters
    ----------
    cluster : str
        Name of the desired cluster (e.g. 'NGC 2682')
    num_sigma : int
        Number of simulations to run 
    red_clump : str
        If the red clump stars in rcsample are to be removed, set to True.  If all stars are to be used,
        set to False.
    run_number : int
        Number of the run by which to label files.
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being analyzed.

    Returns
    -------
    D_cov_all : tuple
        All covariance matrix summary statistics for all simulations
    ks_all : tuple
        All KS distances for all simulations
    """

    #Create cluster directory, if doesn't exist already
    cluster_dir = oc.make_directory(cluster)
    #Get APOGEE and spectral data
    apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra(
        cluster, red_clump, location)
    num_elem = 15
    num_stars = len(spectra)

    #Create synthetic spectra for each value of sigma and fit
    sigma_vals = np.random.uniform(0, 0.1, int(num_sigma))
    fake_res = []
    fake_err = []
    y_ax_psm = []
    psm_cdists = []
    fake_nanless_res = []
    final_real_spectra = []
    final_real_spectra_err = []
    for i in range(len(sigma_vals)):
        fake_dat = ABC.psm_data(num_elem, num_stars, apogee_cluster_data,
                                sigma_vals[i], T, cluster, spectra,
                                spectra_errs, run_number, location, elem)
        fake_res.append(fake_dat[0])
        fake_err.append(fake_dat[1])
        y_ax_psm.append(fake_dat[2])
        psm_cdists.append(fake_dat[3])
        fake_nanless_res.append(fake_dat[4])
        final_real_spectra.append(fake_dat[5])
        final_real_spectra_err.append(fake_dat[6])

    #Fit the data
    real_res = []
    real_err = []
    real_nanless_res = []
    real_nanless_err = []
    real_weights = []
    for i in range(len(sigma_vals)):
        real_dat = oc.fit_func(elem,
                               cluster,
                               final_real_spectra[i],
                               final_real_spectra_err[i],
                               T,
                               dat_type='data',
                               run_number=run_number,
                               location=location,
                               sigma_val=None)
        real_res.append(real_dat[0])
        real_err.append(real_dat[1])
        real_nanless_res.append(real_dat[7])
        real_nanless_err.append(real_dat[8])
        real_weights.append(real_dat[11])

    #Get the cumulative distributions for the data
    y_ax_real = []
    real_cdists = []
    for i in range(len(sigma_vals)):
        real_cdist_dat = pp.cum_dist(real_nanless_res[i], real_nanless_err[i])
        y_ax_real.append(real_cdist_dat[0])
        real_cdists.append(real_cdist_dat[1])

    #Calculate summary statistics
    D_cov_all = []
    ks_all = []
    for i in range(len(sigma_vals)):
        D_cov_all.append(
            ABC.d_cov(cluster, real_weights[i], real_res[i], real_err[i],
                      fake_res[i], fake_err[i], num_stars, sigma_vals[i], elem,
                      location, run_number))
        ks_all.append(
            ABC.KS(cluster, y_ax_real[i], real_cdists[i], y_ax_psm[i],
                   psm_cdists[i], sigma_vals[i], elem, location, run_number))
    D_cov_all = np.array(D_cov_all)
    ks_all = np.array(ks_all)

    #Write to file
    timestr = time.strftime(
        "%Y%m%d_%H%M%S")  #Date and time by which to identify file
    name_string = str(cluster).replace(' ',
                                       '')  #Remove spaces from name of cluster
    pid = str(os.getpid())
    if location == 'personal':
        path = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(
            elem
        ) + '/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'
    elif location == 'server':
        path = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(
            elem
        ) + '/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'  #Server path
    file = h5py.File(path, 'w')
    file['D_cov'] = D_cov_all
    file['KS'] = ks_all
    file['sigma'] = sigma_vals
    file.close()

    return D_cov_all, ks_all, sigma_vals
예제 #9
0
def run_everything(cluster, num_sigma, red_clump, run_number, location,
                   elem):  ###Function to run the entire algorithm
    """Return the covariance matrix statistics and KS distances for every element in APOGEE in the desired cluster,
    for every simulation run.  Function also saves all final summary statistics and values of sigma to file.

    Parameters
    ----------
    cluster : str
        Name of the desired cluster (e.g. 'NGC 2682')
    num_sigma : int
        Number of simulations to run 
    red_clump : str
        If the red clump stars in rcsample are to be removed, set to True.  If all stars are to be used,
        set to False.
    run_number : int
        Number of the run by which to label files.
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being analyzed.

    Returns
    -------
    D_cov_all : tuple
        All covariance matrix summary statistics for all simulations
    ks_all : tuple
        All KS distances for all simulations
    """

    #Create cluster directory, if doesn't exist already
    cluster_dir = oc.make_directory(
        cluster)  ###Make a directory named after the cluster
    #Get APOGEE and spectral data
    apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra(
        cluster, red_clump, location)  ###Get the allStar data and spectra
    num_elem = 15  ###Number of elements in APOGEE
    num_stars = len(spectra)  ###Number of stars in the cluster

    #Create synthetic spectra for each value of sigma and fit
    sigma_vals = np.random.uniform(
        0, 0.1, int(num_sigma)
    )  ###Create an array of sigma values between 0 and 0.1 dex that are randomly drawn from a uniform
    ###distribution, the size of the number of simulations that you want to run at once
    fake_res = []  ###Empty list for the fake residuals
    fake_err = []  ###Empty list for the fake errors
    y_ax_psm = [
    ]  ###Empty list for the y-axis for the fake cumulative distributions
    psm_cdists = []  ###Empty list for the fake cumulative distributions
    fake_nanless_res = [
    ]  ###Empty list for the fake residuals with NaNs removed
    final_real_spectra = [
    ]  ###Empty list for the observed spectra that are masked in the same way as the fake spectra
    final_real_spectra_err = [
    ]  ###Empty list for the observed spectral errors that are masked in the same way as the fake spectra
    for i in range(
            len(sigma_vals)
    ):  ###Iterate through the number of simulations you want to run
        ###Run the psm_data function from ABC.py to get the fake fits, etc.
        fake_dat = ABC.psm_data(num_elem, num_stars, apogee_cluster_data,
                                sigma_vals[i], T, cluster, spectra,
                                spectra_errs, run_number, location, elem)
        fake_res.append(fake_dat[0])  ###Get the fake residuals
        fake_err.append(fake_dat[1])  ###Get the fake errors
        y_ax_psm.append(
            fake_dat[2]
        )  ###Get the y-axis for the fake cumulative distributions
        psm_cdists.append(
            fake_dat[3])  ###Get the fake cumulative distributions
        fake_nanless_res.append(
            fake_dat[4])  ###Get the fake residuals with no NaNs
        final_real_spectra.append(
            fake_dat[5]
        )  ###Get the observed spectra that are masked in the same way as the fake spectra
        final_real_spectra_err.append(
            fake_dat[6]
        )  ###Get the observed spectral errors that are masked in the same way as the fake spectra

    #Fit the data
    real_res = []  ###Empty list for the real residuals
    real_err = []  ###Empty list for the real errors
    real_nanless_res = []  ###Empty list for the real residuals with no NaNs
    real_nanless_err = []  ###Empty list for the real errors with no NaNs
    real_weights = [
    ]  ###Empty list for the weights of the windows for the element
    for i in range(
            len(sigma_vals)):  ###Iterate through the number of simulations
        ###Run the fit_func function from occam_clusters_input.py to get fits for real data, using the observed spectra and errors masked in the same way as the simulations
        real_dat = oc.fit_func(elem,
                               cluster,
                               final_real_spectra[i],
                               final_real_spectra_err[i],
                               T,
                               dat_type='data',
                               run_number=run_number,
                               location=location,
                               sigma_val=None)
        real_res.append(real_dat[0])  ###Get the real residuals
        real_err.append(real_dat[1])  ###Get the real errors
        real_nanless_res.append(
            real_dat[7])  ###Get the real residuals with no NaNs
        real_nanless_err.append(
            real_dat[8])  ###Get the real errors with no NaNs
        real_weights.append(
            real_dat[11])  ###Get the weights of the windows for the element

    #Get the cumulative distributions for the data
    y_ax_real = []  ###Empty list for y-axis for real cumulative distributions
    real_cdists = []  ###Empty list for real cumulative distributions
    for i in range(
            len(sigma_vals)):  ###Iterate through the number of simulations
        real_cdist_dat = pp.cum_dist(
            all_real_nanless_res[i], all_real_nanless_err[i]
        )  ###Compute the cumulative distributions using the cum_dist function from occam_clusters_post_process.py
        y_ax_real.append(
            real_cdist_dat[0]
        )  ###Get the y-axes for the real cumulative distributions
        real_cdists.append(
            real_cdist_dat[1])  ###Get the real cumulative distributions

    #Calculate summary statistics
    D_cov_all = []  ###Empty list for the delta covariance statistics
    ks_all = []  ###Empty list for the KS distance statistics
    for i in range(len(sigma_vals)):  ###Iterate through the simulations
        ###Compute the delta covariance statistics
        D_cov_all.append(
            ABC.d_cov(cluster, real_weights[i], real_res[i], real_err[i],
                      fake_res[i], fake_err[i], num_stars, sigma_vals[i], elem,
                      location, run_number))
        ###Compute the KS distance statistics
        ks_all.append(
            ABC.KS(cluster, y_ax_real[i], real_cdists[i], y_ax_psm[i],
                   psm_cdists[i], sigma_vals[i], elem, location, run_number))
    D_cov_all = np.array(D_cov_all)  ###Make into array
    ks_all = np.array(ks_all)  ###Make into array

    #Write to file
    timestr = time.strftime(
        "%Y%m%d_%H%M%S")  #Date and time by which to identify file
    name_string = str(cluster).replace(' ',
                                       '')  #Remove spaces from name of cluster
    pid = str(os.getpid())  ###PID for file labelling
    if location == 'personal':  ###If running on Mac
        path = '/Users/chloecheng/Personal/run_files/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'
    elif location == 'server':  ###If running on server
        path = '/geir_data/scr/ccheng/AST425/Personal/run_files/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'  #Server path
    file = h5py.File(path, 'w')  ###Write file
    file['D_cov'] = D_cov_all
    file['KS'] = ks_all
    file['sigma'] = sigma_vals
    file.close()

    return D_cov_all, ks_all, sigma_vals
예제 #10
0
def abc_semi_auto(n_obs: int,
                  y_obs: [[float]],
                  fitting_model: Models.Model,
                  priors: ["stats.Distribution"],
                  distance_measure=ABC.l2_norm,
                  n_pilot_samples=10000,
                  n_pilot_acc=1000,
                  n_params_sample_size=100,
                  summary_stats=None,
                  printing=True) -> (["function"], [[float]]):

    group_dim = lambda ys, i: [y[i] for y in ys]
    summary_stats = summary_stats if (summary_stats) else ([
        (lambda ys: group_dim(ys, i)) for i in range(len(y_obs[0]))
    ])

    sampling_details = {
        "sampling_method": "best",
        "num_runs": n_pilot_samples,
        "sample_size": n_pilot_acc,
        "distance_measure": distance_measure,
        "params_sample_size": n_params_sample_size
    }

    #perform pilot run
    _, pilot_params = ABC.abc_rejection(n_obs=n_obs,
                                        y_obs=y_obs,
                                        fitting_model=fitting_model,
                                        priors=priors,
                                        sampling_details=sampling_details,
                                        summary_stats=summary_stats,
                                        show_plots=False,
                                        printing=printing)

    # calculate distribution of accepted params
    new_priors = []
    for i in range(fitting_model.n_params):
        pilot_params_dim = [x[i] for x in pilot_params]
        dist = stats.gaussian_kde(pilot_params_dim)
        new_priors.append(dist)
    if (printing): print("Calculated posteriors from pilot.")

    # Sample new parameters and simulate model
    m = sampling_details["params_sample_size"] if (
        "params_sample_size" in sampling_details) else 1000

    samples = []
    for i in range(m):
        if (printing): print("{}/{}".format(i, m), end="\r")
        theta_t = [list(p.resample(1))[0][0] for p in new_priors]

        # observe theorised model
        fitting_model.update_params(theta_t)
        y_t = fitting_model.observe()
        s_t = [s(y_t) for s in summary_stats]

        samples.append((theta_t, s_t))
    if (printing): print("Generated {} parameter sets.".format(m))

    # create summary stats
    # NOTE - other methods can be used
    new_summary_stats = []
    X = [list(np.ravel(np.matrix(x[1])))
         for x in samples]  # flatten output data
    X = np.array(X)
    coefs = []

    for i in range(fitting_model.n_params):
        y = np.array([x[0][i] for x in samples])

        reg = LinearRegression().fit(X, y)
        coefs.append(list(reg.coef_))

    new_summary_stats = [
        lambda xs: list(np.dot(coefs, np.ravel(np.matrix(xs))))
    ]
    s_t = [s(samples[0][1]) for s in new_summary_stats]
    if (printing): print("Generated summary statistics")

    return new_summary_stats, coefs
예제 #11
0
def two_step_minimum_entropy(summary_stats: ["function"],
                             n_obs: int,
                             y_obs: [[float]],
                             fitting_model: Models.Model,
                             priors: ["stats.Distribution"],
                             min_subset_size=1,
                             max_subset_size=None,
                             n_samples=1000,
                             n_accept=100,
                             n_keep=10,
                             k=4,
                             printing=False) -> ([int], [[float]]):
    """
    OPTIONAL PARAMETERS
    n_keep (int) - number of (best) accepted samples to keep from the set of stats which minimise entropy (`best_stats`) and use for evaluating second stage (default=10)
    """
    n_stats = len(summary_stats)
    max_subset_size = max_subset_size if (max_subset_size) else n_stats

    # find summary stats which minimise entropy
    me_stats_id, accepted_theta = minimum_entropy(
        summary_stats,
        n_obs,
        y_obs,
        fitting_model,
        priors,
        min_subset_size=min_subset_size,
        max_subset_size=max_subset_size,
        n_samples=n_samples,
        n_accept=n_accept,
        k=k,
        printing=printing)
    me_stats = [summary_stats[i] for i in me_stats_id]
    s_obs = [s(y_obs) for s in me_stats]
    if (printing): print("ME stats found -", me_stats_id, "\n")

    # identify the `n_keep` best set of parameters
    theta_scores = []
    for (i, theta) in enumerate(accepted_theta):

        fitting_model.update_params(theta)
        y_t = fitting_model.observe()
        s_t = [s(y_t) for s in me_stats]

        weight = ABC.l1_norm([
            ABC.l2_norm(s_t_i, s_obs_i)
            for (s_t_i, s_obs_i) in zip(s_t, s_obs)
        ])
        theta_scores.append((weight, i))

    theta_scores.sort(key=lambda x: x[0])
    me_theta = [accepted_theta[x[1]] for x in theta_scores[:n_keep]]
    if (printing): print("ME theta found.\n")

    # all permutations of summary stats
    n_stats = len(summary_stats)
    perms = []
    for n in range(min_subset_size, max_subset_size + 1):
        perms += [x for x in combinations([i for i in range(n_stats)], n)]

    lowest = ([], maxsize, [])

    # compare subsets of summary stats to
    sampling_details = {
        "sampling_method": "best",
        "num_runs": n_samples,
        "sample_size": n_accept,
        "distance_measure": ABC.log_l2_norm
    }

    for (i, perm) in enumerate(perms):
        if (printing): print("Permutation = ", perm, sep="")
        else: print("{}/{}           ".format(i, len(perms)), end="\r")
        ss = [summary_stats[i] for i in perm]
        _, accepted_theta = ABC.abc_rejection(n_obs,
                                              y_obs,
                                              fitting_model,
                                              priors,
                                              sampling_details,
                                              summary_stats=ss,
                                              show_plots=False,
                                              printing=printing)

        rsses = [__rsse(accepted_theta, theta) for theta in me_theta]
        mrsse = np.mean(rsses)
        if (printing):
            print("MRSSE of ", perm, "= {:,.2f}\n".format(mrsse), sep="")
        if (mrsse < lowest[1]): lowest = (perm, mrsse, accepted_theta)

    return lowest[0], lowest[2]
예제 #12
0
def joyce_marjoram(summary_stats: ["function"],
                   n_obs: int,
                   y_obs: [[float]],
                   fitting_model: Models.Model,
                   priors: ["stats.Distribution"],
                   param_bounds: [(float, float)],
                   distance_measure=ABC.l2_norm,
                   KERNEL=ABC.uniform_kernel,
                   BANDWIDTH=1,
                   n_samples=10000,
                   n_bins=10,
                   printing=True) -> [int]:
    """
    DESCRIPTION
    Use the algorithm in Paul Joyce, Paul Marjoram 2008 to find an approxiamtely sufficient set of summary statistics (from set `summary_stats`)

    PARAMETERS
    summary_stats ([function]) - functions which summarise `y_obs` and the observations of `fitting_model` in some way. These are what will be evaluated
    n_obs (int) - Number of observations available.
    y_obs ([[float]]) - Observations from true model.
    fitting_model (Model) - Model the algorithm will aim to fit to observations.
    priors (["stats.Distribution"]) - Priors for the value of parameters of `fitting_model`.
    param_bounds ([(float,float)]) - The bounds of the priors used to generate parameter sets.
    KERNEL (func) - one of the kernels defined above. determine which parameters are good or not.
    BANDWIDTH (float) - scale parameter for `KERNEL`
    n_samples (int) - number of samples to make
    n_bins (int) - Number of bins to discretise each dimension of posterior into (default=10)

    RETURNS
    [int] - indexes of selected summary stats in `summary_stats`
    """

    if (type(y_obs) != list):
        raise TypeError("`y_obs` must be a list (not {})".format(type(y_obs)))
    if (len(y_obs) != n_obs):
        raise ValueError(
            "Wrong number of observations supplied (len(y_obs)!=n_obs) ({}!={})"
            .format(len(y_obs), n_obs))
    if (len(priors) != fitting_model.n_params):
        raise ValueError(
            "Wrong number of priors given (exp fitting_model.n_params={})".
            format(fitting_model.n_params))

    group_dim = lambda ys, i: [y[i] for y in ys]
    summary_stats = summary_stats if (summary_stats) else ([
        (lambda ys: group_dim(ys, i)) for i in range(len(y_obs[0]))
    ])
    s_obs = [s(y_obs) for s in summary_stats]

    # generate samples
    SAMPLES = []  # (theta,s_vals)
    for i in range(n_samples):
        if (printing): print("{:,}/{:,}".format(i + 1, n_samples), end="\r")

        # sample parameters
        theta_t = [pi_i.rvs(1)[0] for pi_i in priors]

        # observe theorised model
        fitting_model.update_params(theta_t)
        y_t = fitting_model.observe()
        s_t = [s(y_t) for s in summary_stats]

        SAMPLES.append((theta_t, s_t))

    if (printing):
        print()
        for i in range(len(summary_stats)):
            print("var_{}={:,.3f}".format(i,
                                          np.var([x[1][i] for x in SAMPLES])))

    # consider adding each summary stat in turn
    ACCEPTED_SUMMARY_STATS_ID = []  # index of accepted summary stats

    id_to_try = randint(0, len(summary_stats) - 1)
    ACCEPTED_SUMMARY_STATS_ID = [id_to_try]
    tried = []

    while True:
        if (printing):
            print("Currently accepted - ", ACCEPTED_SUMMARY_STATS_ID)

        # samples using current accepted summary stats
        samples_curr = [(theta, [s[j] for j in ACCEPTED_SUMMARY_STATS_ID])
                        for (theta, s) in SAMPLES]
        s_obs_curr = [s_obs[j] for j in ACCEPTED_SUMMARY_STATS_ID]
        accepted_params_curr = []
        for (theta_t, s_t) in samples_curr:
            norm_vals = [
                distance_measure(s_t_i, s_obs_i)
                for (s_t_i, s_obs_i) in zip(s_t, s_obs_curr)
            ]
            if (
                    KERNEL(ABC.l1_norm(norm_vals), BANDWIDTH)
            ):  # NOTE - ABC.l1_norm() can be replaced by anyother other norm
                accepted_params_curr.append(theta_t)

        # chooose next ss to try
        available_ss = [
            x for x in range(len(summary_stats) - len(tried))
            if (x not in ACCEPTED_SUMMARY_STATS_ID) and (x not in tried)
        ]
        if (len(available_ss) == 0): return ACCEPTED_SUMMARY_STATS_ID

        id_to_try = available_ss[randint(0, len(available_ss) - 1)]
        tried += [id_to_try]
        if (printing):
            print("Trying to add {} to [{}]".format(
                id_to_try,
                ",".join([str(x) for x in ACCEPTED_SUMMARY_STATS_ID])))

        # samples using current accepted summary stats and id_to_try
        samples_prop = [
            (theta, [s[j] for j in ACCEPTED_SUMMARY_STATS_ID + [id_to_try]])
            for (theta, s) in SAMPLES
        ]
        s_obs_prop = [
            s_obs[j] for j in ACCEPTED_SUMMARY_STATS_ID + [id_to_try]
        ]
        accepted_params_prop = []
        for (theta_t, s_t) in samples_prop:
            norm_vals = [
                distance_measure(s_t_i, s_obs_i)
                for (s_t_i, s_obs_i) in zip(s_t, s_obs_prop)
            ]
            if (
                    KERNEL(ABC.l1_norm(norm_vals), BANDWIDTH)
            ):  # NOTE - ABC.l1_norm() can be replaced by anyother other norm
                accepted_params_prop.append(theta_t)

        if (printing): print("N_(k-1)={:,}".format(len(accepted_params_curr)))
        if (printing): print("N_k    ={:,}".format(len(accepted_params_prop)))
        if (__compare_summary_stats(accepted_params_curr,
                                    accepted_params_prop,
                                    param_bounds,
                                    n_params=len(priors),
                                    n_bins=10)):
            # add id_to_try
            ACCEPTED_SUMMARY_STATS_ID += [id_to_try]
            if (printing):
                print("Accepting {}.\nCurrently accepted - ".format(id_to_try),
                      ACCEPTED_SUMMARY_STATS_ID)

            # consider removing previous summaries
            if (printing): print("\nConsider removing previous summaries")
            for i in range(len(ACCEPTED_SUMMARY_STATS_ID) - 2, -1, -1):
                ids_minus = [
                    x for (j, x) in enumerate(ACCEPTED_SUMMARY_STATS_ID)
                    if j != i
                ]
                if (printing):
                    print("Comparing [{}] to [{}]".format(
                        ",".join([str(x) for x in ACCEPTED_SUMMARY_STATS_ID]),
                        ",".join([str(x) for x in ids_minus])))

                # samples using reduced set
                samples_minus = [(theta, [s[j] for j in ids_minus])
                                 for (theta, s) in SAMPLES]
                s_obs_minus = [s_obs[j] for j in ids_minus]
                accepted_params_minus = []
                for (theta_t, s_t) in samples_minus:
                    norm_vals = [
                        distance_measure(s_t_i, s_obs_i)
                        for (s_t_i, s_obs_i) in zip(s_t, s_obs_minus)
                    ]
                    if (
                            KERNEL(ABC.l1_norm(norm_vals), BANDWIDTH)
                    ):  # NOTE - ABC.l1_norm() can be replaced by anyother other norm
                        accepted_params_minus.append(theta_t)

                if (__compare_summary_stats(accepted_params_prop,
                                            accepted_params_minus,
                                            param_bounds,
                                            n_params=len(priors),
                                            n_bins=10)):
                    if (printing):
                        print("Removing - ", ACCEPTED_SUMMARY_STATS_ID[i])
                    ACCEPTED_SUMMARY_STATS_ID = ids_minus

            if (printing): print("Reduced to - ", ACCEPTED_SUMMARY_STATS_ID)

        if (printing): print()

    return ACCEPTED_SUMMARY_STATS_ID
예제 #13
0
import psyneulink as pnl
import ABC

ABC = pnl.Composition(name='ABC')

A_0 = pnl.TransferMechanism(name='A_0',
                            function=pnl.Linear(intercept=2, slope=5))
A_input_0 = pnl.TransferMechanism(name='A_input_0',
                                  function=pnl.Linear(default_variable=0))
B_0 = pnl.TransferMechanism(name='B_0', function=pnl.Logistic)
C_0 = pnl.TransferMechanism(name='C_0', function=pnl.Exponential)

ABC.add_node(A_0)
ABC.add_node(A_input_0)
ABC.add_node(B_0)
ABC.add_node(C_0)

ABC.add_projection(projection=pnl.MappingProjection(name='Edge A_0 to B_0'),
                   sender=A_0,
                   receiver=B_0)
ABC.add_projection(
    projection=pnl.MappingProjection(name='Edge A_input_0 to A_0'),
    sender=A_input_0,
    receiver=A_0)
ABC.add_projection(projection=pnl.MappingProjection(name='Edge A_0 to C_0'),
                   sender=A_0,
                   receiver=C_0)
ABC.run(inputs={A_input_0: 0}, log=True, num_trials=50)

print('Finished running model')