def __rsse(obs, target) -> float: # Residual Sum of Squares Error error = sum([ABC.l2_norm(o, target)**2 for o in obs]) error = np.sqrt(error) error /= len(obs) return error
def __k_nn_estimate_entropy(n_params: int, parameter_samples: [(float)], k=4) -> float: """ DESCRIPTION Kth Nearest Neighbour estimate of entropy for a posterior distribution. PARAMETERS n_params (int) - Number of parameters being fitted. parameter_samples ([(float)]) - Set of accepted sampled parameters. OPTIONAL PARAMETERS k (int) - Which nearest neighbour to consider (default=4) RETURNS float - estimated entropy """ n = len(parameter_samples) # number accepted samples if (k > n): raise ValueError("k cannot be greater than the number of samples") gamma = special.gamma(1 + n_params / 2) digamma = special.digamma(k) h_hat = np.log(np.pi**(n_params / 2) / gamma) h_hat -= digamma h_hat += np.log(n) constant = n_params / n for i in range(n): sample_i = parameter_samples[i] distances = [] for j in range(n): # find kth nearest neighbour if (j == i): continue sample_j = parameter_samples[j] distances.append(ABC.l2_norm(sample_i, sample_j)) distances.sort() h_hat += constant * np.log(distances[3]) return h_hat
def two_step_minimum_entropy(summary_stats: ["function"], n_obs: int, y_obs: [[float]], fitting_model: Models.Model, priors: ["stats.Distribution"], min_subset_size=1, max_subset_size=None, n_samples=1000, n_accept=100, n_keep=10, k=4, printing=False) -> ([int], [[float]]): """ OPTIONAL PARAMETERS n_keep (int) - number of (best) accepted samples to keep from the set of stats which minimise entropy (`best_stats`) and use for evaluating second stage (default=10) """ n_stats = len(summary_stats) max_subset_size = max_subset_size if (max_subset_size) else n_stats # find summary stats which minimise entropy me_stats_id, accepted_theta = minimum_entropy( summary_stats, n_obs, y_obs, fitting_model, priors, min_subset_size=min_subset_size, max_subset_size=max_subset_size, n_samples=n_samples, n_accept=n_accept, k=k, printing=printing) me_stats = [summary_stats[i] for i in me_stats_id] s_obs = [s(y_obs) for s in me_stats] if (printing): print("ME stats found -", me_stats_id, "\n") # identify the `n_keep` best set of parameters theta_scores = [] for (i, theta) in enumerate(accepted_theta): fitting_model.update_params(theta) y_t = fitting_model.observe() s_t = [s(y_t) for s in me_stats] weight = ABC.l1_norm([ ABC.l2_norm(s_t_i, s_obs_i) for (s_t_i, s_obs_i) in zip(s_t, s_obs) ]) theta_scores.append((weight, i)) theta_scores.sort(key=lambda x: x[0]) me_theta = [accepted_theta[x[1]] for x in theta_scores[:n_keep]] if (printing): print("ME theta found.\n") # all permutations of summary stats n_stats = len(summary_stats) perms = [] for n in range(min_subset_size, max_subset_size + 1): perms += [x for x in combinations([i for i in range(n_stats)], n)] lowest = ([], maxsize, []) # compare subsets of summary stats to sampling_details = { "sampling_method": "best", "num_runs": n_samples, "sample_size": n_accept, "distance_measure": ABC.log_l2_norm } for (i, perm) in enumerate(perms): if (printing): print("Permutation = ", perm, sep="") else: print("{}/{} ".format(i, len(perms)), end="\r") ss = [summary_stats[i] for i in perm] _, accepted_theta = ABC.abc_rejection(n_obs, y_obs, fitting_model, priors, sampling_details, summary_stats=ss, show_plots=False, printing=printing) rsses = [__rsse(accepted_theta, theta) for theta in me_theta] mrsse = np.mean(rsses) if (printing): print("MRSSE of ", perm, "= {:,.2f}\n".format(mrsse), sep="") if (mrsse < lowest[1]): lowest = (perm, mrsse, accepted_theta) return lowest[0], lowest[2]