def softmax(dist, return_numpy=True): """ Computes the softmax function on a set of values """ if utils.is_distribution(dist): if dist.IS_AOA: output = [] for i in range(len(dist.values)): output[i] = softmax(dist.values[i], return_numpy=True) output = utils.to_categorical(np.array(output)) else: dist = np.copy(dist.values) output = dist - dist.max(axis=0) output = np.exp(output) output = output / np.sum(output, axis=0) if return_numpy: return output else: return utils.to_categorical(output)
def softmax(dist, return_numpy=True): """ Computes the softmax function on a set of values """ dist = utils.to_numpy(dist) output = [] if utils.is_arr_of_arr(dist): for i in range(len(dist.values)): output.append(softmax(dist[i]), return_numpy=True) output = dist - dist.max(axis=0) output = np.exp(output) output = output / np.sum(output, axis=0) if return_numpy: return output else: return utils.to_categorical(output)
def update_posterior_states(A, obs, prior=None, return_numpy=True, method=FPI, **kwargs): """ Update marginal posterior over hidden states using variational inference Can optionally set message passing algorithm used for inference Parameters ---------- - 'A' [numpy nd.array (matrix or tensor or array-of-arrays) or Categorical]: Observation likelihood of the generative model, mapping from hidden states to observations Used to invert generative model to obtain marginal likelihood over hidden states, given the observation - 'obs' [numpy 1D array, array of arrays (with 1D numpy array entries), int or tuple]: The observation (generated by the environment). If single modality, this can be a 1D array (one-hot vector representation) or an int (observation index) If multi-modality, this can be an array of arrays (whose entries are 1D one-hot vectors) or a tuple (of observation indices) - 'prior' [numpy 1D array, array of arrays (with 1D numpy array entries), Categorical, or None]: Prior beliefs about hidden states, to be integrated with the marginal likelihood to obtain a posterior distribution. If None, prior is set to be equal to a flat categorical distribution (at the level of the individual inference functions). (optional) - 'return_numpy' [bool]: True/False flag to determine whether the posterior is returned as a numpy array or a Categorical - 'method' [str]: Algorithm used to perform the variational inference. Options: 'FPI' - Fixed point iteration - http://www.cs.cmu.edu/~guestrin/Class/10708/recitations/r9/VI-view.pdf, slides 13- 18 - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.137.221&rep=rep1&type=pdf, slides 24 - 38 'VMP - Variational message passing (not implemented) 'MMP' - Marginal message passing (not implemented) 'BP' - Belief propagation (not implemented) 'EP' - Expectation propagation (not implemented) 'CV' - CLuster variation method (not implemented) - **kwargs: List of keyword/parameter arguments corresponding to parameter values for the respective variational inference algorithm Returns ---------- - 'qs' [numpy 1D array, array of arrays (with 1D numpy array entries), or Categorical]: Marginal posterior beliefs over hidden states """ # safe convert to numpy A = utils.to_numpy(A) # collect model dimensions if utils.is_arr_of_arr(A): n_factors = A[0].ndim - 1 n_states = list(A[0].shape[1:]) n_modalities = len(A) n_observations = [] for m in range(n_modalities): n_observations.append(A[m].shape[0]) else: n_factors = A.ndim - 1 n_states = list(A.shape[1:]) n_modalities = 1 n_observations = [A.shape[0]] obs = process_observations(obs, n_modalities, n_observations) if prior is not None: prior = process_priors(prior, n_factors) if method is FPI: qs = run_fpi(A, obs, n_observations, n_states, prior, **kwargs) elif method is VMP: raise NotImplementedError(f"{VMP} is not implemented") elif method is MMP: raise NotImplementedError(f"{MMP} is not implemented") elif method is BP: raise NotImplementedError(f"{BP} is not implemented") elif method is EP: raise NotImplementedError(f"{EP} is not implemented") elif method is CV: raise NotImplementedError(f"{CV} is not implemented") else: raise ValueError(f"{method} is not implemented") if return_numpy: return qs else: return utils.to_categorical(qs)
def get_expected_obs(qs_pi, A, return_numpy=False): """ Given a posterior predictive density Qs_pi and an observation likelihood model A, get the expected observations given the predictive posterior. Parameters ---------- qs_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]: Posterior predictive density over hidden states. If a list, each entry of the list is the posterior predictive for a given timepoint of an expected trajectory A [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical (either single-factor of AoA)]: Observation likelihood mapping from hidden states to observations, with different modalities (if there are multiple) stored in different arrays return_numpy [Boolean]: True/False flag to determine whether output of function is a numpy array or a Categorical Returns ------- qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]: Expected observations under the given policy. If a list, a list of the expected observations over the time horizon of policy evaluation, where each entry is the expected observations at a given timestep. """ # initialise expected observations qo_pi = [] A = utils.to_numpy(A) if isinstance(qs_pi, list): n_steps = len(qs_pi) for t in range(n_steps): qs_pi[t] = utils.to_numpy(qs_pi[t], flatten=True) else: n_steps = 1 qs_pi = [utils.to_numpy(qs_pi, flatten=True)] if utils.is_arr_of_arr(A): num_modalities = len(A) for t in range(n_steps): qo_pi_t = np.empty(num_modalities, dtype=object) qo_pi.append(qo_pi_t) # get expected observations over time for t in range(n_steps): for modality in range(num_modalities): qo_pi[t][modality] = spm_dot(A[modality], qs_pi[t]) else: # get expected observations over time for t in range(n_steps): qo_pi.append(spm_dot(A, qs_pi[t])) if return_numpy: if n_steps == 1: return qo_pi[0] else: return qo_pi else: if n_steps == 1: return utils.to_categorical(qo_pi[0]) else: for t in range(n_steps): qo_pi[t] = utils.to_categorical(qo_pi[t]) return qo_pi
def update_posterior_policies( qs, A, B, C, policies, use_utility=True, use_states_info_gain=True, use_param_info_gain=False, pA=None, pB=None, gamma=16.0, return_numpy=True, ): """ Updates the posterior beliefs about policies based on expected free energy prior @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (n_step x n_factor), not just a list of tuples as it is now) Parameters ---------- - `qs` [1D numpy array, array-of-arrays, or Categorical (either single- or multi-factor)]: Current marginal beliefs about hidden state factors - `A` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Categorical (both single and multi-modality)]: Observation likelihood model (beliefs about the likelihood mapping entertained by the agent) - `B` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or Categorical (both single and multi-factor)]: Transition likelihood model (beliefs about the likelihood mapping entertained by the agent) - `C` [numpy 1D-array, array-of-arrays (in case of multiple modalities), or Categorical (both single and multi-modality)]: Prior beliefs about outcomes (prior preferences) - `policies` [list of tuples]: A list of all the possible policies, each expressed as a tuple of indices, where a given index corresponds to an action on a particular hidden state factor e.g. policies[1][2] yields the index of the action under policy 1 that affects hidden state factor 2 - `use_utility` [bool]: Whether to calculate utility term, i.e how much expected observation confer with prior expectations - `use_states_info_gain` [bool]: Whether to calculate state information gain - `use_param_info_gain` [bool]: Whether to calculate parameter information gain @NOTE requires pA or pB to be specified - `pA` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Dirichlet (both single and multi-modality)]: Prior dirichlet parameters for A. Defaults to none, in which case info gain w.r.t. Dirichlet parameters over A is skipped. - `pB` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or Dirichlet (both single and multi-factor)]: Prior dirichlet parameters for B. Defaults to none, in which case info gain w.r.t. Dirichlet parameters over A is skipped. - `gamma` [float, defaults to 16.0]: Precision over policies, used as the inverse temperature parameter of a softmax transformation of the expected free energies of each policy - `return_numpy` [Boolean]: True/False flag to determine whether output of function is a numpy array or a Categorical Returns -------- - `qp` [1D numpy array or Categorical]: Posterior beliefs about policies, defined here as a softmax function of the expected free energies of policies - `efe` - [1D numpy array or Categorical]: The expected free energies of policies """ n_policies = len(policies) efe = np.zeros(n_policies) q_pi = np.zeros((n_policies, 1)) for idx, policy in enumerate(policies): qs_pi = get_expected_states(qs, B, policy) qo_pi = get_expected_obs(qs_pi, A) if use_utility: efe[idx] += calc_expected_utility(qo_pi, C) if use_states_info_gain: efe[idx] += calc_states_info_gain(A, qs_pi) if use_param_info_gain: if pA is not None: efe[idx] += calc_pA_info_gain(pA, qo_pi, qs_pi) if pB is not None: efe[idx] += calc_pB_info_gain(pB, qs_pi, qs, policy) q_pi = softmax(efe * gamma) if return_numpy: q_pi = q_pi / q_pi.sum(axis=0) # type: ignore else: q_pi = utils.to_categorical(q_pi) q_pi.normalize() return q_pi, efe
def get_expected_states(qs, B, policy, return_numpy=False): """ Given a posterior density qs, a transition likelihood model B, and a policy, get the state distribution expected under that policy's pursuit Parameters ---------- - `qs` [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Current posterior beliefs about hidden states - `B` [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical (either single-factor of AoA)]: Transition likelihood mapping from states at t to states at t + 1, with different actions (per factor) stored along the lagging dimension - `policy` [np.arrays]: np.array of size (policy_len x n_factors) where each value corrresponds to a control state - `return_numpy` [Boolean]: True/False flag to determine whether output of function is a numpy array or a Categorical Returns ------- - `qs_pi` [ list of np.arrays with len n_steps, where in case of multiple hidden state factors, each np.array in the list is a 1 x n_factors array-of-arrays, otherwise a list of 1D numpy arrays]: Expected states under the given policy - also known as the 'posterior predictive density' """ n_steps = policy.shape[0] n_factors = policy.shape[1] qs = utils.to_numpy(qs, flatten=True) B = utils.to_numpy(B) # initialise beliefs over expected states qs_pi = [] if utils.is_arr_of_arr(B): for t in range(n_steps): qs_pi_t = np.empty(n_factors, dtype=object) qs_pi.append(qs_pi_t) # initialise expected states after first action using current posterior (t = 0) for control_factor, control in enumerate(policy[0, :]): qs_pi[0][control_factor] = spm_dot(B[control_factor][:, :, control], qs[control_factor]) # get expected states over time if n_steps > 1: for t in range(1, n_steps): for control_factor, control in enumerate(policy[t, :]): qs_pi[t][control_factor] = spm_dot( B[control_factor][:, :, control], qs_pi[t - 1][control_factor] ) else: # initialise expected states after first action using current posterior (t = 0) qs_pi.append(spm_dot(B[:, :, policy[0, 0]], qs)) # then loop over future timepoints if n_steps > 1: for t in range(1, n_steps): qs_pi.append(spm_dot(B[:, :, policy[t, 0]], qs_pi[t - 1])) if return_numpy: if len(qs_pi) == 1: return qs_pi[0] else: return qs_pi else: if len(qs_pi) == 1: return utils.to_categorical(qs_pi[0]) else: for t in range(n_steps): qs_pi[t] = utils.to_categorical(qs_pi[t]) return qs_pi
def update_transition_dirichlet(pB, B, actions, qs, qs_prev, lr=1.0, factors="all", return_numpy=True): """ Update Dirichlet parameters that parameterize the transition model of the generative model (describing the probabilistic mapping between hidden states over time). Parameters ----------- - pB [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet (either single-modality or AoA)]: The prior Dirichlet parameters of the generative model, parameterizing the agent's beliefs about the transition likelihood. - B [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]: The transition likelihood of the generative model. - actions [numpy 1D array]: A 1D numpy array of shape (num_control_factors,) containing the action(s) performed at a given timestep. - qs [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Current marginal posterior beliefs about hidden state factors - qs_prev [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Past marginal posterior beliefs about hidden state factors - lr [float, optional]: Learning rate. - return_numpy [bool, optional]: Logical flag to determine whether output is a numpy array or a Dirichlet - factors [list, optional]: Indices (in terms of range(Nf)) of the hidden state factors to include in learning. Defaults to 'all', meaning that transition likelihood matrices for all hidden state factors are updated as a function of transitions in the different control factors (i.e. actions) """ pB = utils.to_numpy(pB) B = utils.to_numpy(B) if utils.is_arr_of_arr(pB): n_factors = len(pB) else: n_factors = 1 if return_numpy: pB_updated = copy.deepcopy(pB) else: pB_updated = utils.to_dirichlet(copy.deepcopy(pB)) if not utils.is_distribution(qs): qs = utils.to_categorical(qs) if factors == "all": if n_factors == 1: dfdb = qs.cross(qs_prev, return_numpy=True) dfdb = dfdb * (B[:, :, actions[0]] > 0).astype("float") pB_updated[:, :, actions[0]] = pB_updated[:, :, actions[0]] + (lr * dfdb) elif n_factors > 1: for factor in range(n_factors): dfdb = qs[factor].cross(qs_prev[factor], return_numpy=True) dfdb = dfdb * (B[factor][:, :, actions[factor]] > 0).astype("float") pB_updated[factor][:, :, actions[factor]] = pB_updated[ factor][:, :, actions[factor]] + (lr * dfdb) else: for factor in factors: dfdb = qs[factor].cross(qs_prev[factor], return_numpy=True) dfdb = dfdb * (B[factor][:, :, actions[factor]] > 0).astype("float") pB_updated[factor][:, :, actions[factor]] = pB_updated[ factor][:, :, actions[factor]] + (lr * dfdb) return pB_updated
def update_likelihood_dirichlet(pA, A, obs, qs, lr=1.0, modalities="all", return_numpy=True): """ Update Dirichlet parameters of the likelihood distribution Parameters ----------- - pA [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet (either single-modality or AoA)]: The prior Dirichlet parameters of the generative model, parameterizing the agent's beliefs about the observation likelihood. - A [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]: The observation likelihood of the generative model. - obs [numpy 1D array, array-of-arrays (with 1D numpy array entries), int or tuple]: A discrete observation (possible multi-modality) used in the update equation - qs [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Current marginal posterior beliefs about hidden state factors - lr [float, optional]: Learning rate. - return_numpy [bool, optional]: Logical flag to determine whether output is a numpy array or a Dirichlet - modalities [list, optional]: Indices (in terms of range(n_modalities)) of the observation modalities to include in learning.Defaults to 'all', meaning that observation likelihood matrices for all modalities are updated using their respective observations. """ pA = utils.to_numpy(pA) A = utils.to_numpy(A) if utils.is_arr_of_arr(pA): n_modalities = len(pA) n_observations = [ pA[modality].shape[0] for modality in range(n_modalities) ] else: n_modalities = 1 n_observations = [pA.shape[0]] if return_numpy: pA_updated = copy.deepcopy(pA) else: pA_updated = utils.to_dirichlet(copy.deepcopy(pA)) # observation index if isinstance(obs, (int, np.integer)): obs = np.eye(A.shape[0])[obs] # observation indices elif isinstance(obs, tuple): obs = np.array( [ np.eye(n_observations[modality])[obs[modality]] for modality in range(n_modalities) ], dtype=object, ) # convert to Categorical to make the cross product easier obs = utils.to_categorical(obs) if modalities == "all": if n_modalities == 1: dfda = obs.cross(qs, return_numpy=True) dfda = dfda * (A > 0).astype("float") pA_updated = pA_updated + (lr * dfda) elif n_modalities > 1: for modality in range(n_modalities): dfda = obs[modality].cross(qs, return_numpy=True) dfda = dfda * (A[modality] > 0).astype("float") pA_updated[modality] = pA_updated[modality] + (lr * dfda) else: for modality in modalities: dfda = obs[modality].cross(qs, return_numpy=True) dfda = dfda * (A[modality] > 0).astype("float") pA_updated[modality] = pA_updated[modality] + (lr * dfda) return pA_updated
def update_posterior_policies_mmp( qs_seq_pi, A, B, C, policies, use_utility=True, use_states_info_gain=True, use_param_info_gain=False, prior=None, pA=None, pB=None, F=None, E=None, gamma=16.0, return_numpy=True, ): """ `qs_seq_pi`: numpy object array that stores posterior marginals beliefs over hidden states for each policy. The structure is nested as policies --> timesteps --> hidden state factors. So qs_seq_pi[p_idx][t][f] is the belief about factor `f` at time `t`, under policy `p_idx` `A`: numpy object array that stores likelihood mappings for each modality. `B`: numpy object array that stores transition matrices (possibly action-conditioned) for each hidden state factor `policies`: numpy object array that stores each (potentially-multifactorial) policy in `policies[p_idx]`. Shape of `policies[p_idx]` is `(num_timesteps, num_factors)` `use_utility`: Boolean that determines whether expected utility should be incorporated into computation of EFE (default: `True`) `use_states_info_gain`: Boolean that determines whether state epistemic value (info gain about hidden states) should be incorporated into computation of EFE (default: `True`) `use_param_info_gain`: Boolean that determines whether parameter epistemic value (info gain about generative model parameters) should be incorporated into computation of EFE (default: `False`) `prior`: numpy object array that stores priors over hidden states - this matters when computing the first value of the parameter info gain for the Dirichlet parameters over B `pA`: numpy object array that stores Dirichlet priors over likelihood mappings (one per modality) `pB`: numpy object array that stores Dirichlet priors over transition mappings (one per hidden state factor) `F` : 1D numpy array that stores variational free energy of each policy `E` : 1D numpy array that stores prior probability each policy (e.g. 'habits') `gamma`: Float that encodes the precision over policies `return_numpy`: Boolean that determines whether output should be a numpy array or an instance of the Categorical class (default: `True`) """ A = utils.to_numpy(A) B = utils.to_numpy(B) num_obs, num_states, num_modalities, num_factors = utils.get_model_dimensions( A, B) horizon = len(qs_seq_pi[0]) num_policies = len(qs_seq_pi) # initialise`qo_seq` as object arrays to initially populate `qo_seq_pi` qo_seq = utils.obj_array(horizon) for t in range(horizon): qo_seq[t] = utils.obj_array_zeros(num_obs) # initialise expected observations qo_seq_pi = utils.obj_array(num_policies) for p_idx in range(num_policies): # qo_seq_pi[p_idx] = copy.deepcopy(obs_over_time) qo_seq_pi[p_idx] = qo_seq efe = np.zeros(num_policies) if F is None: F = np.zeros(num_policies) if E is None: E = np.zeros(num_policies) for p_idx, policy in enumerate(policies): qs_seq_pi_i = qs_seq_pi[p_idx] for t in range(horizon): qo_pi_t = get_expected_obs(qs_seq_pi_i[t], A) qo_seq_pi[p_idx][t] = qo_pi_t if use_utility: efe[p_idx] += calc_expected_utility(qo_seq_pi[p_idx][t], C) if use_states_info_gain: efe[p_idx] += calc_states_info_gain(A, qs_seq_pi_i[t]) if use_param_info_gain: if pA is not None: efe[p_idx] += calc_pA_info_gain(pA, qo_seq_pi[p_idx][t], qs_seq_pi_i[t]) if pB is not None: if t > 0: efe[p_idx] += calc_pB_info_gain( pB, qs_seq_pi_i[t], qs_seq_pi_i[t - 1], policy) else: if prior is not None: efe[p_idx] += calc_pB_info_gain( pB, qs_seq_pi_i[t], prior, policy) q_pi = softmax(efe * gamma - F + E) if return_numpy: q_pi = q_pi / q_pi.sum(axis=0) else: q_pi = utils.to_categorical(q_pi) q_pi.normalize() return q_pi, efe