def softmax(dist, return_numpy=True): """ Computes the softmax function on a set of values """ if utils.is_distribution(dist): if dist.IS_AOA: output = [] for i in range(len(dist.values)): output[i] = softmax(dist.values[i], return_numpy=True) output = utils.to_categorical(np.array(output)) else: dist = np.copy(dist.values) output = dist - dist.max(axis=0) output = np.exp(output) output = output / np.sum(output, axis=0) if return_numpy: return output else: return utils.to_categorical(output)
def softmax(dist, return_numpy=True): """ Computes the softmax function on a set of values """ dist = utils.to_numpy(dist) output = [] if utils.is_arr_of_arr(dist): for i in range(len(dist.values)): output.append(softmax(dist[i]), return_numpy=True) output = dist - dist.max(axis=0) output = np.exp(output) output = output / np.sum(output, axis=0) if return_numpy: return output else: return utils.to_categorical(output)
def update_posterior_states(A, obs, prior=None, method=FPI, return_numpy=True): """ Update marginal posterior over hidden states using variational inference Can optionally set message passing algorithm used for inference Parameters ---------- - 'A' [numpy nd.array (matrix or tensor or array-of-arrays) or Categorical]: Observation likelihood of the generative model, mapping from hidden states to observations Used to invert generative model to obtain marginal likelihood over hidden states, given the observation - 'obs' [numpy 1D array, array of arrays (with 1D numpy array entries), int or tuple]: The observation (generated by the environment). If single modality, this can be a 1D array (one-hot vector representation) or an int (observation index) If multi-modality, this can be an array of arrays (whose entries are 1D one-hot vectors) or a tuple (of observation indices) - 'prior' [numpy 1D array, array of arrays (with 1D numpy array entries), Categorical, or None]: Prior beliefs about hidden states, to be integrated with the marginal likelihood to obtain a posterior distribution. If None, prior is set to be equal to a flat categorical distribution (at the level of the individual inference functions). (optional) - 'return_numpy' [bool]: True/False flag to determine whether the posterior is returned as a numpy array or a Categorical - 'method' [str]: Algorithm used to perform the variational inference. Options: 'FPI' - Fixed point iteration - http://www.cs.cmu.edu/~guestrin/Class/10708/recitations/r9/VI-view.pdf, slides 13- 18 - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.137.221&rep=rep1&type=pdf, slides 24 - 38 'VMP - Variational message passing (not implemented) 'MMP' - Marginal message passing (not implemented) 'BP' - Belief propagation (not implemented) 'EP' - Expectation propagation (not implemented) 'CV' - CLuster variation method (not implemented) **kwargs: List of keyword/parameter arguments corresponding to parameter values for the respective variational inference algorithm Returns ---------- - 'qs' [numpy 1D array, array of arrays (with 1D numpy array entries), or Categorical]: Marginal posterior beliefs over hidden states """ # safe convert to numpy A = utils.to_numpy(A) # collect model dimensions if utils.is_arr_of_arr(A): n_factors = A[0].ndim - 1 n_states = list(A[0].shape[1:]) n_modalities = len(A) n_observations = [] for m in range(n_modalities): n_observations.append(A[m].shape[0]) else: n_factors = A.ndim - 1 n_states = list(A.shape[1:]) n_modalities = 1 n_observations = [A.shape[0]] obs = process_observations(obs, n_modalities, n_observations) if prior is not None: prior = process_priors(prior, n_factors) if method is FPI: qs = run_fpi(A, obs, n_observations, n_states, prior) elif method is VMP: raise NotImplementedError(f"{VMP} is not implemented") elif method is MMP: raise NotImplementedError(f"{MMP} is not implemented") elif method is BP: raise NotImplementedError(f"{BP} is not implemented") elif method is EP: raise NotImplementedError(f"{EP} is not implemented") elif method is CV: raise NotImplementedError(f"{CV} is not implemented") else: raise ValueError(f"{method} is not implemented") if return_numpy: return qs else: return utils.to_categorical(qs)
def get_expected_obs(qs_pi, A, return_numpy=False): """ Given a posterior predictive density Qs_pi and an observation likelihood model A, get the expected observations given the predictive posterior. Parameters ---------- qs_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]: Posterior predictive density over hidden states. If a list, each entry of the list is the posterior predictive for a given timepoint of an expected trajectory A [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical (either single-factor of AoA)]: Observation likelihood mapping from hidden states to observations, with different modalities (if there are multiple) stored in different arrays return_numpy [Boolean]: True/False flag to determine whether output of function is a numpy array or a Categorical Returns ------- qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]: Expected observations under the given policy. If a list, a list of the expected observations over the time horizon of policy evaluation, where each entry is the expected observations at a given timestep. """ # initialise expected observations qo_pi = [] A = utils.to_numpy(A) if isinstance(qs_pi, list): n_steps = len(qs_pi) for t in range(n_steps): qs_pi[t] = utils.to_numpy(qs_pi[t], flatten=True) else: n_steps = 1 qs_pi = [utils.to_numpy(qs_pi, flatten=True)] if utils.is_arr_of_arr(A): num_modalities = len(A) for t in range(n_steps): qo_pi_t = np.empty(num_modalities, dtype=object) qo_pi.append(qo_pi_t) # get expected observations over time for t in range(n_steps): for modality in range(num_modalities): qo_pi[t][modality] = spm_dot(A[modality], qs_pi[t]) else: # get expected observations over time for t in range(n_steps): qo_pi.append(spm_dot(A, qs_pi[t])) if return_numpy: if n_steps == 1: return qo_pi[0] else: return qo_pi else: if n_steps == 1: return utils.to_categorical(qo_pi[0]) else: for t in range(n_steps): qo_pi[t] = utils.to_categorical(qo_pi[t]) return qo_pi
def update_posterior_policies( qs, A, B, C, policies, use_utility=True, use_states_info_gain=True, use_param_info_gain=False, pA=None, pB=None, gamma=16.0, return_numpy=True, ): """ Updates the posterior beliefs about policies based on expected free energy prior @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (n_step x n_factor), not just a list of tuples as it is now) Parameters ---------- - `qs` [1D numpy array, array-of-arrays, or Categorical (either single- or multi-factor)]: Current marginal beliefs about hidden state factors - `A` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Categorical (both single and multi-modality)]: Observation likelihood model (beliefs about the likelihood mapping entertained by the agent) - `B` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or Categorical (both single and multi-factor)]: Transition likelihood model (beliefs about the likelihood mapping entertained by the agent) - `C` [numpy 1D-array, array-of-arrays (in case of multiple modalities), or Categorical (both single and multi-modality)]: Prior beliefs about outcomes (prior preferences) - `policies` [list of tuples]: A list of all the possible policies, each expressed as a tuple of indices, where a given index corresponds to an action on a particular hidden state factor e.g. policies[1][2] yields the index of the action under policy 1 that affects hidden state factor 2 - `use_utility` [bool]: Whether to calculate utility term, i.e how much expected observation confer with prior expectations - `use_states_info_gain` [bool]: Whether to calculate state information gain - `use_param_info_gain` [bool]: Whether to calculate parameter information gain @NOTE requires pA or pB to be specified - `pA` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Dirichlet (both single and multi-modality)]: Prior dirichlet parameters for A. Defaults to none, in which case info gain w.r.t. Dirichlet parameters over A is skipped. - `pB` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or Dirichlet (both single and multi-factor)]: Prior dirichlet parameters for B. Defaults to none, in which case info gain w.r.t. Dirichlet parameters over A is skipped. - `gamma` [float, defaults to 16.0]: Precision over policies, used as the inverse temperature parameter of a softmax transformation of the expected free energies of each policy - `return_numpy` [Boolean]: True/False flag to determine whether output of function is a numpy array or a Categorical Returns -------- - `qp` [1D numpy array or Categorical]: Posterior beliefs about policies, defined here as a softmax function of the expected free energies of policies - `efe` - [1D numpy array or Categorical]: The expected free energies of policies """ n_policies = len(policies) efe = np.zeros(n_policies) q_pi = np.zeros((n_policies, 1)) for idx, policy in enumerate(policies): qs_pi = get_expected_states(qs, B, policy) qo_pi = get_expected_obs(qs_pi, A) if use_utility: efe[idx] += calc_expected_utility(qo_pi, C) if use_states_info_gain: efe[idx] += calc_states_info_gain(A, qs_pi) if use_param_info_gain: if pA is not None: efe[idx] += calc_pA_info_gain(pA, qo_pi, qs_pi) if pB is not None: efe[idx] += calc_pB_info_gain(pB, qs_pi, qs, policy) q_pi = softmax(efe * gamma) if return_numpy: q_pi = q_pi / q_pi.sum(axis=0) else: q_pi = utils.to_categorical(q_pi) q_pi.normalize() return q_pi, efe
def get_expected_states(qs, B, policy, return_numpy=False): """ Given a posterior density qs, a transition likelihood model B, and a policy, get the state distribution expected under that policy's pursuit Parameters ---------- - `qs` [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Current posterior beliefs about hidden states - `B` [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical (either single-factor of AoA)]: Transition likelihood mapping from states at t to states at t + 1, with different actions (per factor) stored along the lagging dimension - `policy` [np.arrays]: np.array of size (policy_len x n_factors) where each value corrresponds to a control state - `return_numpy` [Boolean]: True/False flag to determine whether output of function is a numpy array or a Categorical Returns ------- - `qs_pi` [ list of np.arrays with len n_steps, where in case of multiple hidden state factors, each np.array in the list is a 1 x n_factors array-of-arrays, otherwise a list of 1D numpy arrays]: Expected states under the given policy - also known as the 'posterior predictive density' """ n_steps = policy.shape[0] n_factors = policy.shape[1] qs = utils.to_numpy(qs, flatten=True) B = utils.to_numpy(B) # initialise beliefs over expected states qs_pi = [] if utils.is_arr_of_arr(B): for t in range(n_steps): qs_pi_t = np.empty(n_factors, dtype=object) qs_pi.append(qs_pi_t) # initialise expected states after first action using current posterior (t = 0) for control_factor, control in enumerate(policy[0, :]): qs_pi[0][control_factor] = spm_dot( B[control_factor][:, :, control], qs[control_factor]) # get expected states over time if n_steps > 1: for t in range(1, n_steps): for control_factor, control in enumerate(policy[t, :]): qs_pi[t][control_factor] = spm_dot( B[control_factor][:, :, control], qs_pi[t - 1][control_factor]) else: # initialise expected states after first action using current posterior (t = 0) qs_pi.append(spm_dot(B[:, :, policy[0, 0]], qs)) # then loop over future timepoints if n_steps > 1: for t in range(1, n_steps): qs_pi.append(spm_dot(B[:, :, policy[t, 0]], qs_pi[t - 1])) if return_numpy: if len(qs_pi) == 1: return qs_pi[0] else: return qs_pi else: if len(qs_pi) == 1: return utils.to_categorical(qs_pi[0]) else: for t in range(n_steps): qs_pi[t] = utils.to_categorical(qs_pi[t]) return qs_pi
def update_transition_dirichlet(pB, B, actions, qs, qs_prev, lr=1.0, return_numpy=True, factors="all"): """ Update Dirichlet parameters that parameterize the transition model of the generative model (describing the probabilistic mapping between hidden states over time). Parameters ----------- - pB [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet (either single-modality or AoA)]: The prior Dirichlet parameters of the generative model, parameterizing the agent's beliefs about the transition likelihood. - B [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]: The transition likelihood of the generative model. - actions [tuple]: A tuple containing the action(s) performed at a given timestep. - Qs_curr [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Current marginal posterior beliefs about hidden state factors - Qs_prev [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Past marginal posterior beliefs about hidden state factors - eta [float, optional]: Learning rate. - return_numpy [bool, optional]: Logical flag to determine whether output is a numpy array or a Dirichlet - which_factors [list, optional]: Indices (in terms of range(Nf)) of the hidden state factors to include in learning. Defaults to 'all', meaning that transition likelihood matrices for all hidden state factors are updated as a function of transitions in the different control factors (i.e. actions) """ pB = utils.to_numpy(pB) if utils.is_arr_of_arr(pB): n_factors = len(pB) else: n_factors = 1 if return_numpy: pB_updated = pB.copy() else: pB_updated = utils.to_dirichlet(pB.copy()) if not utils.is_distribution(qs): qs = utils.to_categorical(qs) if factors == "all": if n_factors == 1: db = qs.cross(qs_prev, return_numpy=True) db = db * (B[:, :, actions[0]] > 0).astype("float") pB_updated = pB_updated + (lr * db) elif n_factors > 1: for f in range(n_factors): db = qs[f].cross(qs_prev[f], return_numpy=True) db = db * (B[f][:, :, actions[f]] > 0).astype("float") pB_updated[f] = pB_updated[f] + (lr * db) else: for f_idx in factors: db = qs[f_idx].cross(qs_prev[f_idx], return_numpy=True) db = db * (B[f_idx][:, :, actions[f_idx]] > 0).astype("float") pB_updated[f_idx] = pB_updated[f_idx] + (lr * db) return pB_updated
def update_likelihood_dirichlet(pA, A, obs, qs, lr=1.0, return_numpy=True, modalities="all"): """ Update Dirichlet parameters of the likelihood distribution Parameters ----------- - pA [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet (either single-modality or AoA)]: The prior Dirichlet parameters of the generative model, parameterizing the agent's beliefs about the observation likelihood. - A [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]: The observation likelihood of the generative model. - obs [numpy 1D array, array-of-arrays (with 1D numpy array entries), int or tuple]: A discrete observation used in the update equation - Qx [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Current marginal posterior beliefs about hidden state factors - lr [float, optional]: Learning rate. - return_numpy [bool, optional]: Logical flag to determine whether output is a numpy array or a Dirichlet - modalities [list, optional]: Indices (in terms of range(n_modalities)) of the observation modalities to include in learning. Defaults to 'all, meaning that observation likelihood matrices for all modalities are updated as a function of observations in the different modalities. """ pA = utils.to_numpy(pA) if utils.is_arr_of_arr(pA): n_modalities = len(pA) n_observations = [pA[m].shape[0] for m in range(n_modalities)] else: n_modalities = 1 n_observations = [pA.shape[0]] if return_numpy: pA_updated = pA.copy() else: pA_updated = utils.to_dirichlet(pA.copy()) # observation index if isinstance(obs, (int, np.integer)): obs = np.eye(A.shape[0])[obs] # observation indices elif isinstance(obs, tuple): obs = np.array( [np.eye(n_observations[g])[obs[g]] for g in range(n_modalities)], dtype=object) # convert to Categorical to make the cross product easier obs = utils.to_categorical(obs) if modalities == "all": if n_modalities == 1: da = obs.cross(qs, return_numpy=True) da = da * (A > 0).astype("float") pA_updated = pA_updated + (lr * da) elif n_modalities > 1: for g in range(n_modalities): da = obs[g].cross(qs, return_numpy=True) da = da * (A[g] > 0).astype("float") pA_updated[g] = pA_updated[g] + (lr * da) else: for g_idx in modalities: da = obs[g_idx].cross(qs, return_numpy=True) da = da * (A[g_idx] > 0).astype("float") pA_updated[g_idx] = pA_updated[g_idx] + (lr * da) return pA_updated