예제 #1
0
def __rsse(obs, target) -> float:
    # Residual Sum of Squares Error

    error = sum([ABC.l2_norm(o, target)**2 for o in obs])
    error = np.sqrt(error)
    error /= len(obs)

    return error
예제 #2
0
def __k_nn_estimate_entropy(n_params: int,
                            parameter_samples: [(float)],
                            k=4) -> float:
    """
    DESCRIPTION
    Kth Nearest Neighbour estimate of entropy for a posterior distribution.

    PARAMETERS
    n_params (int) - Number of parameters being fitted.
    parameter_samples ([(float)]) - Set of accepted sampled parameters.

    OPTIONAL PARAMETERS
    k (int) - Which nearest neighbour to consider (default=4)

    RETURNS
    float - estimated entropy
    """
    n = len(parameter_samples)  # number accepted samples
    if (k > n):
        raise ValueError("k cannot be greater than the number of samples")

    gamma = special.gamma(1 + n_params / 2)
    digamma = special.digamma(k)

    h_hat = np.log(np.pi**(n_params / 2) / gamma)
    h_hat -= digamma
    h_hat += np.log(n)

    constant = n_params / n
    for i in range(n):
        sample_i = parameter_samples[i]
        distances = []
        for j in range(n):  # find kth nearest neighbour
            if (j == i): continue
            sample_j = parameter_samples[j]
            distances.append(ABC.l2_norm(sample_i, sample_j))
        distances.sort()
        h_hat += constant * np.log(distances[3])

    return h_hat
예제 #3
0
def two_step_minimum_entropy(summary_stats: ["function"],
                             n_obs: int,
                             y_obs: [[float]],
                             fitting_model: Models.Model,
                             priors: ["stats.Distribution"],
                             min_subset_size=1,
                             max_subset_size=None,
                             n_samples=1000,
                             n_accept=100,
                             n_keep=10,
                             k=4,
                             printing=False) -> ([int], [[float]]):
    """
    OPTIONAL PARAMETERS
    n_keep (int) - number of (best) accepted samples to keep from the set of stats which minimise entropy (`best_stats`) and use for evaluating second stage (default=10)
    """
    n_stats = len(summary_stats)
    max_subset_size = max_subset_size if (max_subset_size) else n_stats

    # find summary stats which minimise entropy
    me_stats_id, accepted_theta = minimum_entropy(
        summary_stats,
        n_obs,
        y_obs,
        fitting_model,
        priors,
        min_subset_size=min_subset_size,
        max_subset_size=max_subset_size,
        n_samples=n_samples,
        n_accept=n_accept,
        k=k,
        printing=printing)
    me_stats = [summary_stats[i] for i in me_stats_id]
    s_obs = [s(y_obs) for s in me_stats]
    if (printing): print("ME stats found -", me_stats_id, "\n")

    # identify the `n_keep` best set of parameters
    theta_scores = []
    for (i, theta) in enumerate(accepted_theta):

        fitting_model.update_params(theta)
        y_t = fitting_model.observe()
        s_t = [s(y_t) for s in me_stats]

        weight = ABC.l1_norm([
            ABC.l2_norm(s_t_i, s_obs_i)
            for (s_t_i, s_obs_i) in zip(s_t, s_obs)
        ])
        theta_scores.append((weight, i))

    theta_scores.sort(key=lambda x: x[0])
    me_theta = [accepted_theta[x[1]] for x in theta_scores[:n_keep]]
    if (printing): print("ME theta found.\n")

    # all permutations of summary stats
    n_stats = len(summary_stats)
    perms = []
    for n in range(min_subset_size, max_subset_size + 1):
        perms += [x for x in combinations([i for i in range(n_stats)], n)]

    lowest = ([], maxsize, [])

    # compare subsets of summary stats to
    sampling_details = {
        "sampling_method": "best",
        "num_runs": n_samples,
        "sample_size": n_accept,
        "distance_measure": ABC.log_l2_norm
    }

    for (i, perm) in enumerate(perms):
        if (printing): print("Permutation = ", perm, sep="")
        else: print("{}/{}           ".format(i, len(perms)), end="\r")
        ss = [summary_stats[i] for i in perm]
        _, accepted_theta = ABC.abc_rejection(n_obs,
                                              y_obs,
                                              fitting_model,
                                              priors,
                                              sampling_details,
                                              summary_stats=ss,
                                              show_plots=False,
                                              printing=printing)

        rsses = [__rsse(accepted_theta, theta) for theta in me_theta]
        mrsse = np.mean(rsses)
        if (printing):
            print("MRSSE of ", perm, "= {:,.2f}\n".format(mrsse), sep="")
        if (mrsse < lowest[1]): lowest = (perm, mrsse, accepted_theta)

    return lowest[0], lowest[2]