예제 #1
0
def minimum_entropy(summary_stats: ["function"],
                    n_obs: int,
                    y_obs: [[float]],
                    fitting_model: Models.Model,
                    priors: ["stats.Distribution"],
                    min_subset_size=1,
                    max_subset_size=None,
                    n_samples=1000,
                    n_accept=100,
                    k=4,
                    printing=False) -> ([int], [[float]]):
    """

    RETURNS
    [int] - indexes of best summary stats
    [[float]] - list of all accepted theta when "best summary stats"
    """

    lowest = ([], maxsize, [])

    # all permutations of summary stats
    n_stats = len(summary_stats)
    max_subset_size = max_subset_size if (max_subset_size) else n_stats
    perms = []
    for n in range(max(min_subset_size, 1),
                   min(n_stats + 1, max_subset_size + 1)):
        perms += [x for x in combinations([i for i in range(n_stats)], n)]

    sampling_details = {
        "sampling_method": "best",
        "num_runs": n_samples,
        "sample_size": n_accept
    }

    for (j, perm) in enumerate(perms):
        if (printing): print("Permutation = ", perm, sep="")
        else: print("({}/{})".format(j, len(perms)), end="\r")
        ss = [summary_stats[i] for i in perm]
        _, accepted_theta = ABC.abc_rejection(n_obs,
                                              y_obs,
                                              fitting_model,
                                              priors,
                                              sampling_details,
                                              summary_stats=ss,
                                              show_plots=False,
                                              printing=printing)

        estimate_ent = __k_nn_estimate_entropy(len(priors),
                                               accepted_theta,
                                               k=k)
        if (printing):
            print("Estimate_ent of ",
                  perm,
                  "= {:,.2f}\n".format(estimate_ent),
                  sep="")
        if (estimate_ent < lowest[1]):
            lowest = (perm, estimate_ent, accepted_theta)

    # return lowest[1]
    return lowest[0], lowest[2]
예제 #2
0
def abc_semi_auto(n_obs: int,
                  y_obs: [[float]],
                  fitting_model: Models.Model,
                  priors: ["stats.Distribution"],
                  distance_measure=ABC.l2_norm,
                  n_pilot_samples=10000,
                  n_pilot_acc=1000,
                  n_params_sample_size=100,
                  summary_stats=None,
                  printing=True) -> (["function"], [[float]]):

    group_dim = lambda ys, i: [y[i] for y in ys]
    summary_stats = summary_stats if (summary_stats) else ([
        (lambda ys: group_dim(ys, i)) for i in range(len(y_obs[0]))
    ])

    sampling_details = {
        "sampling_method": "best",
        "num_runs": n_pilot_samples,
        "sample_size": n_pilot_acc,
        "distance_measure": distance_measure,
        "params_sample_size": n_params_sample_size
    }

    #perform pilot run
    _, pilot_params = ABC.abc_rejection(n_obs=n_obs,
                                        y_obs=y_obs,
                                        fitting_model=fitting_model,
                                        priors=priors,
                                        sampling_details=sampling_details,
                                        summary_stats=summary_stats,
                                        show_plots=False,
                                        printing=printing)

    # calculate distribution of accepted params
    new_priors = []
    for i in range(fitting_model.n_params):
        pilot_params_dim = [x[i] for x in pilot_params]
        dist = stats.gaussian_kde(pilot_params_dim)
        new_priors.append(dist)
    if (printing): print("Calculated posteriors from pilot.")

    # Sample new parameters and simulate model
    m = sampling_details["params_sample_size"] if (
        "params_sample_size" in sampling_details) else 1000

    samples = []
    for i in range(m):
        if (printing): print("{}/{}".format(i, m), end="\r")
        theta_t = [list(p.resample(1))[0][0] for p in new_priors]

        # observe theorised model
        fitting_model.update_params(theta_t)
        y_t = fitting_model.observe()
        s_t = [s(y_t) for s in summary_stats]

        samples.append((theta_t, s_t))
    if (printing): print("Generated {} parameter sets.".format(m))

    # create summary stats
    # NOTE - other methods can be used
    new_summary_stats = []
    X = [list(np.ravel(np.matrix(x[1])))
         for x in samples]  # flatten output data
    X = np.array(X)
    coefs = []

    for i in range(fitting_model.n_params):
        y = np.array([x[0][i] for x in samples])

        reg = LinearRegression().fit(X, y)
        coefs.append(list(reg.coef_))

    new_summary_stats = [
        lambda xs: list(np.dot(coefs, np.ravel(np.matrix(xs))))
    ]
    s_t = [s(samples[0][1]) for s in new_summary_stats]
    if (printing): print("Generated summary statistics")

    return new_summary_stats, coefs
예제 #3
0
def two_step_minimum_entropy(summary_stats: ["function"],
                             n_obs: int,
                             y_obs: [[float]],
                             fitting_model: Models.Model,
                             priors: ["stats.Distribution"],
                             min_subset_size=1,
                             max_subset_size=None,
                             n_samples=1000,
                             n_accept=100,
                             n_keep=10,
                             k=4,
                             printing=False) -> ([int], [[float]]):
    """
    OPTIONAL PARAMETERS
    n_keep (int) - number of (best) accepted samples to keep from the set of stats which minimise entropy (`best_stats`) and use for evaluating second stage (default=10)
    """
    n_stats = len(summary_stats)
    max_subset_size = max_subset_size if (max_subset_size) else n_stats

    # find summary stats which minimise entropy
    me_stats_id, accepted_theta = minimum_entropy(
        summary_stats,
        n_obs,
        y_obs,
        fitting_model,
        priors,
        min_subset_size=min_subset_size,
        max_subset_size=max_subset_size,
        n_samples=n_samples,
        n_accept=n_accept,
        k=k,
        printing=printing)
    me_stats = [summary_stats[i] for i in me_stats_id]
    s_obs = [s(y_obs) for s in me_stats]
    if (printing): print("ME stats found -", me_stats_id, "\n")

    # identify the `n_keep` best set of parameters
    theta_scores = []
    for (i, theta) in enumerate(accepted_theta):

        fitting_model.update_params(theta)
        y_t = fitting_model.observe()
        s_t = [s(y_t) for s in me_stats]

        weight = ABC.l1_norm([
            ABC.l2_norm(s_t_i, s_obs_i)
            for (s_t_i, s_obs_i) in zip(s_t, s_obs)
        ])
        theta_scores.append((weight, i))

    theta_scores.sort(key=lambda x: x[0])
    me_theta = [accepted_theta[x[1]] for x in theta_scores[:n_keep]]
    if (printing): print("ME theta found.\n")

    # all permutations of summary stats
    n_stats = len(summary_stats)
    perms = []
    for n in range(min_subset_size, max_subset_size + 1):
        perms += [x for x in combinations([i for i in range(n_stats)], n)]

    lowest = ([], maxsize, [])

    # compare subsets of summary stats to
    sampling_details = {
        "sampling_method": "best",
        "num_runs": n_samples,
        "sample_size": n_accept,
        "distance_measure": ABC.log_l2_norm
    }

    for (i, perm) in enumerate(perms):
        if (printing): print("Permutation = ", perm, sep="")
        else: print("{}/{}           ".format(i, len(perms)), end="\r")
        ss = [summary_stats[i] for i in perm]
        _, accepted_theta = ABC.abc_rejection(n_obs,
                                              y_obs,
                                              fitting_model,
                                              priors,
                                              sampling_details,
                                              summary_stats=ss,
                                              show_plots=False,
                                              printing=printing)

        rsses = [__rsse(accepted_theta, theta) for theta in me_theta]
        mrsse = np.mean(rsses)
        if (printing):
            print("MRSSE of ", perm, "= {:,.2f}\n".format(mrsse), sep="")
        if (mrsse < lowest[1]): lowest = (perm, mrsse, accepted_theta)

    return lowest[0], lowest[2]