def test_mmp_v2(self): array_path = os.path.join(os.getcwd(), DATA_PATH + "mmp_b.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"][0] B = mat_contents["B"][0] prev_obs = mat_contents["obs_idx"].astype("int64") policy = mat_contents["policy"].astype("int64") - 1 curr_t = mat_contents["t"][0, 0].astype("int64") - 1 t_horizon = mat_contents["t_horizon"][0, 0].astype("int64") prev_actions = mat_contents["previous_actions"].astype("int64") - 1 result_spm = mat_contents["qs"][0] _ = mat_contents["likelihoods"][0] num_obs, num_states, _, num_factors = get_model_dimensions(A, B) prev_obs = convert_observation_array( prev_obs[:, max(0, curr_t - t_horizon):(curr_t + 1)], num_obs) ll_seq = get_joint_likelihood_seq(A, prev_obs, num_states) qs_seq = run_mmp_v2(A, B, ll_seq, policy, prev_actions, num_iter=5, grad_descent=True) result_pymdp = qs_seq[-1] for f in range(num_factors): self.assertTrue( np.isclose(result_spm[f].squeeze(), result_pymdp[f]).all())
def test_mmp_d(self): """ Testing our SPM-ified version of `run_MMP` with 2 hidden state factors & 2 outcome modalities, at the final timestep of the generative process (boundary condition test) @NOTE: mmp_d.mat test has issues with the prediction errors. But the future messages are totally fine (even at the last timestep of variational iteration.""" array_path = os.path.join(os.getcwd(), DATA_PATH + "mmp_d.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"][0] B = mat_contents["B"][0] prev_obs = mat_contents["obs_idx"].astype("int64") policy = mat_contents["policy"].astype("int64") - 1 curr_t = mat_contents["t"][0, 0].astype("int64") - 1 t_horizon = mat_contents["t_horizon"][0, 0].astype("int64") prev_actions = mat_contents["previous_actions"].astype("int64") - 1 result_spm = mat_contents["qs"][0] likelihoods = mat_contents["likelihoods"][0] num_obs, num_states, _, num_factors = get_model_dimensions(A, B) prev_obs = convert_observation_array( prev_obs[:, max(0, curr_t - t_horizon):(curr_t + 1)], num_obs) prev_actions = prev_actions[(max(0, curr_t - t_horizon) - 1):, :] prior = np.empty(num_factors, dtype=object) for f in range(num_factors): uniform = np.ones(num_states[f]) / num_states[f] prior[f] = B[f][:, :, prev_actions[0, f]].dot(uniform) lh_seq = get_joint_likelihood_seq(A, prev_obs, num_states) qs_seq, _ = run_mmp(lh_seq, B, policy, prev_actions[1:], prior=prior, num_iter=5, grad_descent=True, last_timestep=True) result_pymdp = qs_seq[-1] for f in range(num_factors): self.assertTrue( np.isclose(result_spm[f].squeeze(), result_pymdp[f]).all())
def test_mmp_a(self): """ Testing our SPM-ified version of `run_MMP` with 1 hidden state factor & 1 outcome modality, at a random fixed timestep during the generative process """ array_path = os.path.join(os.getcwd(), DATA_PATH + "mmp_a.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"][0] B = mat_contents["B"][0] prev_obs = mat_contents["obs_idx"].astype("int64") policy = mat_contents["policy"].astype("int64") - 1 curr_t = mat_contents["t"][0, 0].astype("int64") - 1 t_horizon = mat_contents["t_horizon"][0, 0].astype("int64") prev_actions = mat_contents["previous_actions"].astype("int64") - 1 result_spm = mat_contents["qs"][0] likelihoods = mat_contents["likelihoods"][0] num_obs, num_states, _, num_factors = get_model_dimensions(A, B) prev_obs = convert_observation_array( prev_obs[:, max(0, curr_t - t_horizon):(curr_t + 1)], num_obs) prev_actions = prev_actions[(max(0, curr_t - t_horizon) - 1):, :] prior = np.empty(num_factors, dtype=object) for f in range(num_factors): uniform = np.ones(num_states[f]) / num_states[f] prior[f] = B[f][:, :, prev_actions[0, f]].dot(uniform) ll_seq = get_joint_likelihood_seq(A, prev_obs, num_states) qs_seq = run_mmp_v2(A, B, ll_seq, policy, prev_actions[1:], prior=prior, num_iter=5, grad_descent=True) result_pymdp = qs_seq[-1] for f in range(num_factors): self.assertTrue( np.isclose(result_spm[f].squeeze(), result_pymdp[f]).all())
def test_mmp_c(self): """ Testing our SPM-ified version of `run_MMP` with 2 hidden state factors & 2 outcome modalities, at the very first timestep of the generative process (boundary condition test). So there are no previous actions""" array_path = os.path.join(os.getcwd(), DATA_PATH + "mmp_c.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"][0] B = mat_contents["B"][0] prev_obs = mat_contents["obs_idx"].astype("int64") policy = mat_contents["policy"].astype("int64") - 1 curr_t = mat_contents["t"][0, 0].astype("int64") - 1 t_horizon = mat_contents["t_horizon"][0, 0].astype("int64") # prev_actions = mat_contents["previous_actions"].astype("int64") - 1 result_spm = mat_contents["qs"][0] likelihoods = mat_contents["likelihoods"][0] num_obs, num_states, _, num_factors = get_model_dimensions(A, B) prev_obs = convert_observation_array( prev_obs[:, max(0, curr_t - t_horizon):(curr_t + 1)], num_obs) # prev_actions = prev_actions[(max(0, curr_t - t_horizon)) :, :] lh_seq = get_joint_likelihood_seq(A, prev_obs, num_states) qs_seq, _ = run_mmp(lh_seq, B, policy, prev_actions=None, prior=None, num_iter=5, grad_descent=True) result_pymdp = qs_seq[-1] for f in range(num_factors): self.assertTrue( np.isclose(result_spm[f].squeeze(), result_pymdp[f]).all())
def test_mmp_fixedpoints(self): array_path = os.path.join(os.getcwd(), DATA_PATH + "mmp_a.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"][0] B = mat_contents["B"][0] prev_obs = mat_contents["obs_idx"].astype("int64") policy = mat_contents["policy"].astype("int64") - 1 curr_t = mat_contents["t"][0, 0].astype("int64") - 1 t_horizon = mat_contents["t_horizon"][0, 0].astype("int64") prev_actions = mat_contents["previous_actions"].astype("int64") - 1 result_spm = mat_contents["qs"][0] likelihoods = mat_contents["likelihoods"][0] num_obs, num_states, _, num_factors = get_model_dimensions(A, B) prev_obs = convert_observation_array( prev_obs[:, max(0, curr_t - t_horizon):(curr_t + 1)], num_obs) prev_actions = prev_actions[(max(0, curr_t - t_horizon) - 1):, :] prior = np.empty(num_factors, dtype=object) for f in range(num_factors): uniform = np.ones(num_states[f]) / num_states[f] prior[f] = B[f][:, :, prev_actions[0, f]].dot(uniform) lh_seq = get_joint_likelihood_seq(A, prev_obs, num_states) qs_seq, F = run_mmp(lh_seq, B, policy, prev_actions[1:], prior=prior, num_iter=5, grad_descent=False, save_vfe_seq=True) self.assertTrue((np.diff(np.array(F)) < 0).all())
def run_mmp(lh_seq, B, policy, prev_actions=None, prior=None, num_iter=10, grad_descent=False, tau=0.25, last_timestep=False, save_vfe_seq=False): """ Marginal message passing scheme for updating posterior beliefs about multi-factor hidden states over time, conditioned on a particular policy. Parameters: -------------- `lh_seq`[numpy object array]: Likelihoods of hidden state factors given a sequence of observations over time. This is logged beforehand `B`[numpy object array]: Transition likelihood of the generative model, mapping from hidden states at T to hidden states at T+1. One B matrix per modality (e.g. `B[f]` corresponds to f-th factor's B matrix) This is used in inference to compute the 'forward' and 'backward' messages conveyed between beliefs about temporally-adjacent timepoints. `policy` [2-D numpy.ndarray]: Matrix of shape (policy_len, num_control_factors) that indicates the indices of each action (control state index) upon timestep t and control_factor f in the element `policy[t,f]` for a given policy. `prev_actions` [None or 2-D numpy.ndarray]: If provided, should be a matrix of previous actions of shape (infer_len, num_control_factors) taht indicates the indices of each action (control state index) taken in the past (up until the current timestep). `prior`[None or numpy object array]: If provided, this a numpy object array with one sub-array per hidden state factor, that stores the prior beliefs about initial states (at t = 0, relative to `infer_len`). `num_iter`[Int]: Number of variational iterations `grad_descent` [Bool]: Flag for whether to use gradient descent (predictive coding style) `tau` [Float]: Decay constant for use in `grad_descent` version `last_timestep` [Bool]: Flag for whether we are at the last timestep of belief updating `save_vfe_seq` [Bool]: Flag for whether to save the sequence of variational free energies over time (for this policy). If `False`, then VFE is integrated across time/iterations. Returns: -------------- `qs_seq`[list]: the sequence of beliefs about the different hidden state factors over time, one multi-factor posterior belief per timestep in `infer_len` `F`[Float or list, depending on setting of save_vfe_seq] """ # window past_len = len(lh_seq) future_len = policy.shape[0] if last_timestep: infer_len = past_len + future_len - 1 else: infer_len = past_len + future_len future_cutoff = past_len + future_len - 2 # dimensions _, num_states, _, num_factors = get_model_dimensions(A=None, B=B) B = to_arr_of_arr(B) # beliefs qs_seq = obj_array(infer_len) for t in range(infer_len): qs_seq[t] = obj_array_uniform(num_states) # last message qs_T = obj_array_zeros(num_states) # prior if prior is None: prior = obj_array_uniform(num_states) # transposed transition trans_B = obj_array(num_factors) for f in range(num_factors): trans_B[f] = spm_norm(np.swapaxes(B[f], 0, 1)) # full policy if prev_actions is None: prev_actions = np.zeros((past_len, policy.shape[1])) policy = np.vstack((prev_actions, policy)) # initialise variational free energy of policy (accumulated over time) if save_vfe_seq: F = [] F.append(0.0) else: F = 0.0 for itr in range(num_iter): for t in range(infer_len): for f in range(num_factors): # likelihood if t < past_len: lnA = spm_log(spm_dot(lh_seq[t], qs_seq[t], [f])) else: lnA = np.zeros(num_states[f]) # past message if t == 0: lnB_past = spm_log(prior[f]) else: past_msg = B[f][:, :, int(policy[t - 1, f])].dot(qs_seq[t - 1][f]) lnB_past = spm_log(past_msg) # future message if t >= future_cutoff: lnB_future = qs_T[f] else: future_msg = trans_B[f][:, :, int(policy[t, f])].dot( qs_seq[t + 1][f]) lnB_future = spm_log(future_msg) # inference if grad_descent: lnqs = spm_log(qs_seq[t][f]) coeff = 1 if (t >= future_cutoff) else 2 err = (coeff * lnA + lnB_past + lnB_future) - coeff * lnqs err -= err.mean() lnqs = lnqs + tau * err qs_seq[t][f] = softmax(lnqs) if (t == 0) or (t == (infer_len - 1)): F += +0.5 * lnqs.dot(0.5 * err) else: F += lnqs.dot( 0.5 * (err - (num_factors - 1) * lnA / num_factors) ) # @NOTE: not sure why Karl does this in SPM_MDP_VB_X, we should look into this else: qs_seq[t][f] = softmax(lnA + lnB_past + lnB_future) if not grad_descent: if save_vfe_seq: if t < past_len: F.append( F[-1] + calc_free_energy(qs_seq[t], prior, num_factors, likelihood=spm_log(lh_seq[t]))[0]) else: F.append( F[-1] + calc_free_energy(qs_seq[t], prior, num_factors)[0]) else: if t < past_len: F += calc_free_energy(qs_seq[t], prior, num_factors, likelihood=spm_log(lh_seq[t])) else: F += calc_free_energy(qs_seq[t], prior, num_factors) return qs_seq, F
def spm_MDP_G(A, x): """ Calculates the Bayesian surprise in the same way as spm_MDP_G.m does in the original matlab code. Parameters ---------- A (numpy ndarray or array-object): array assigning likelihoods of observations/outcomes under the various hidden state configurations x (numpy ndarray or array-object): Categorical distribution presenting probabilities of hidden states (this can also be interpreted as the predictive density over hidden states/causes if you're calculating the expected Bayesian surprise) Returns ------- G (float): the (expected or not) Bayesian surprise under the density specified by x -- namely, this scores how much an expected observation would update beliefs about hidden states x, were it to be observed. """ # if A.dtype == "object": # Ng = len(A) # AOA_flag = True # else: # Ng = 1 # AOA_flag = False _, _, Ng, _ = utils.get_model_dimensions(A=A) # Probability distribution over the hidden causes: i.e., Q(x) qx = spm_cross(x) G = 0 qo = 0 idx = np.array(np.where(qx > np.exp(-16))).T if utils.is_arr_of_arr(A): # Accumulate expectation of entropy: i.e., E[lnP(o|x)] for i in idx: # Probability over outcomes for this combination of causes po = np.ones(1) for g in range(Ng): index_vector = [slice(0, A[g].shape[0])] + list(i) po = spm_cross(po, A[g][tuple(index_vector)]) po = po.ravel() qo += qx[tuple(i)] * po G += qx[tuple(i)] * po.dot(np.log(po + np.exp(-16))) else: for i in idx: po = np.ones(1) index_vector = [slice(0, A.shape[0])] + list(i) po = spm_cross(po, A[tuple(index_vector)]) po = po.ravel() qo += qx[tuple(i)] * po G += qx[tuple(i)] * po.dot(np.log(po + np.exp(-16))) # Subtract negative entropy of expectations: i.e., E[lnQ(o)] # G = G - qo.dot(np.log(qo + np.exp(-16))) # type: ignore G = G - qo.dot(spm_log(qo)) # type: ignore return G
def update_posterior_policies_mmp( qs_seq_pi, A, B, C, policies, use_utility=True, use_states_info_gain=True, use_param_info_gain=False, prior=None, pA=None, pB=None, F=None, E=None, gamma=16.0, return_numpy=True, ): """ `qs_seq_pi`: numpy object array that stores posterior marginals beliefs over hidden states for each policy. The structure is nested as policies --> timesteps --> hidden state factors. So qs_seq_pi[p_idx][t][f] is the belief about factor `f` at time `t`, under policy `p_idx` `A`: numpy object array that stores likelihood mappings for each modality. `B`: numpy object array that stores transition matrices (possibly action-conditioned) for each hidden state factor `policies`: numpy object array that stores each (potentially-multifactorial) policy in `policies[p_idx]`. Shape of `policies[p_idx]` is `(num_timesteps, num_factors)` `use_utility`: Boolean that determines whether expected utility should be incorporated into computation of EFE (default: `True`) `use_states_info_gain`: Boolean that determines whether state epistemic value (info gain about hidden states) should be incorporated into computation of EFE (default: `True`) `use_param_info_gain`: Boolean that determines whether parameter epistemic value (info gain about generative model parameters) should be incorporated into computation of EFE (default: `False`) `prior`: numpy object array that stores priors over hidden states - this matters when computing the first value of the parameter info gain for the Dirichlet parameters over B `pA`: numpy object array that stores Dirichlet priors over likelihood mappings (one per modality) `pB`: numpy object array that stores Dirichlet priors over transition mappings (one per hidden state factor) `F` : 1D numpy array that stores variational free energy of each policy `E` : 1D numpy array that stores prior probability each policy (e.g. 'habits') `gamma`: Float that encodes the precision over policies `return_numpy`: Boolean that determines whether output should be a numpy array or an instance of the Categorical class (default: `True`) """ A = utils.to_numpy(A) B = utils.to_numpy(B) num_obs, num_states, num_modalities, num_factors = utils.get_model_dimensions( A, B) horizon = len(qs_seq_pi[0]) num_policies = len(qs_seq_pi) # initialise`qo_seq` as object arrays to initially populate `qo_seq_pi` qo_seq = utils.obj_array(horizon) for t in range(horizon): qo_seq[t] = utils.obj_array_zeros(num_obs) # initialise expected observations qo_seq_pi = utils.obj_array(num_policies) for p_idx in range(num_policies): # qo_seq_pi[p_idx] = copy.deepcopy(obs_over_time) qo_seq_pi[p_idx] = qo_seq efe = np.zeros(num_policies) if F is None: F = np.zeros(num_policies) if E is None: E = np.zeros(num_policies) for p_idx, policy in enumerate(policies): qs_seq_pi_i = qs_seq_pi[p_idx] for t in range(horizon): qo_pi_t = get_expected_obs(qs_seq_pi_i[t], A) qo_seq_pi[p_idx][t] = qo_pi_t if use_utility: efe[p_idx] += calc_expected_utility(qo_seq_pi[p_idx][t], C) if use_states_info_gain: efe[p_idx] += calc_states_info_gain(A, qs_seq_pi_i[t]) if use_param_info_gain: if pA is not None: efe[p_idx] += calc_pA_info_gain(pA, qo_seq_pi[p_idx][t], qs_seq_pi_i[t]) if pB is not None: if t > 0: efe[p_idx] += calc_pB_info_gain( pB, qs_seq_pi_i[t], qs_seq_pi_i[t - 1], policy) else: if prior is not None: efe[p_idx] += calc_pB_info_gain( pB, qs_seq_pi_i[t], prior, policy) q_pi = softmax(efe * gamma - F + E) if return_numpy: q_pi = q_pi / q_pi.sum(axis=0) else: q_pi = utils.to_categorical(q_pi) q_pi.normalize() return q_pi, efe
def update_posterior_states_v2( A, B, prev_obs, policies, prev_actions=None, prior=None, return_numpy=True, policy_sep_prior = True, **kwargs, ): """ Update posterior over hidden states using marginal message passing """ # safe convert to numpy A = utils.to_numpy(A) num_obs, num_states, num_modalities, num_factors = utils.get_model_dimensions(A, B) A = utils.to_arr_of_arr(A) B = utils.to_arr_of_arr(B) prev_obs = utils.process_observation_seq(prev_obs, num_modalities, num_obs) if prior is not None: if policy_sep_prior: for p_idx, policy in enumerate(policies): prior[p_idx] = utils.process_prior(prior[p_idx], num_factors) else: prior = utils.process_prior(prior, num_factors) lh_seq = get_joint_likelihood_seq(A, prev_obs, num_states) if prev_actions is not None: prev_actions = np.stack(prev_actions,0) qs_seq_pi = utils.obj_array(len(policies)) F = np.zeros(len(policies)) # variational free energy of policies if policy_sep_prior: for p_idx, policy in enumerate(policies): # get sequence and the free energy for policy qs_seq_pi[p_idx], F[p_idx] = run_mmp( lh_seq, B, policy, prev_actions=prev_actions, prior=prior[p_idx], **kwargs ) else: for p_idx, policy in enumerate(policies): # get sequence and the free energy for policy qs_seq_pi[p_idx], F[p_idx] = run_mmp( lh_seq, B, policy, prev_actions=prev_actions, prior=prior, **kwargs ) return qs_seq_pi, F
def run_mmp_v2(A, B, ll_seq, policy, prev_actions=None, prior=None, num_iter=10, grad_descent=False, tau=0.25): # window past_len = len(ll_seq) future_len = policy.shape[0] infer_len = past_len + future_len future_cutoff = past_len + future_len - 2 # dimensions _, num_states, _, num_factors = get_model_dimensions(A, B) A = to_arr_of_arr(A) B = to_arr_of_arr(B) # beliefs qs_seq = [np.empty(num_factors, dtype=object) for _ in range(infer_len)] for t in range(infer_len): for f in range(num_factors): qs_seq[t][f] = np.ones(num_states[f]) / num_states[f] # last message qs_T = np.empty(num_factors, dtype=object) for f in range(num_factors): qs_T[f] = np.zeros(num_states[f]) # prior if prior is None: prior = np.empty(num_factors, dtype=object) for f in range(num_factors): prior[f] = np.ones(num_states[f]) / num_states[f] # transposed transition trans_B = np.empty(num_factors, dtype=object) for f in range(num_factors): trans_B[f] = np.zeros_like(B[f]) for u in range(B[f].shape[2]): trans_B[f][:, :, u] = spm_norm(B[f][:, :, u].T) # full policy if prev_actions is None: prev_actions = np.zeros((past_len, policy.shape[1])) policy = np.vstack((prev_actions, policy)) for _ in range(num_iter): for t in range(infer_len): for f in range(num_factors): # likelihood if t < past_len: lnA = np.log(spm_dot(ll_seq[t], qs_seq[t], [f]) + 1e-16) else: lnA = np.zeros(num_states[f]) # past message if t == 0: lnB_past = np.log(prior[f] + 1e-16) else: past_msg = B[f][:, :, int(policy[t - 1, f])].dot(qs_seq[t - 1][f]) lnB_past = np.log(past_msg + 1e-16) # future message if t >= future_cutoff: lnB_future = qs_T[f] else: future_msg = trans_B[f][:, :, int(policy[t, f])].dot( qs_seq[t + 1][f]) lnB_future = np.log(future_msg + 1e-16) # inference if grad_descent: lnqs = np.log(qs_seq[t][f] + 1e-16) coeff = 1 if (t >= future_cutoff) else 2 err = (coeff * lnA + lnB_past + lnB_future) - coeff * lnqs err -= err.mean() lnqs = lnqs + tau * err qs_seq[t][f] = softmax(lnqs) else: qs_seq[t][f] = softmax(lnA + lnB_past + lnB_future) return qs_seq