def test_fpi_inference(self): num_obs = [2, 4] num_states = [2, 2] num_control = [2, 2] A = utils.random_A_matrix(num_obs, num_states) B = utils.random_B_matrix(num_states, num_control) C = utils.obj_array_zeros([num_ob for num_ob in num_obs]) C[1][0] = 1.0 C[1][1] = -2.0 agent = Agent(A=A, B=B, C=C, control_fac_idx=[1]) o, s = [0, 2], [0, 0] qx = agent.infer_states(o) agent.infer_policies() action = agent.sample_action() self.assertEqual(len(action), len(num_control))
def test_mmp_active_inference(self): """ Tests to make sure whole active inference loop works (with various past and future inference/policy horizons). @TODO: Need to check this against a MATLAB output, where the sequence of all observations / actions / generative model parameters are used (with deterministic action sampling and pre-determined generative process outputs - i.e. no effects of action) """ num_obs = [3, 2] num_states = [4, 3] num_control = [1, 3] A = utils.random_A_matrix(num_obs, num_states) B = utils.random_B_matrix(num_states, num_control) C = utils.obj_array_zeros(num_obs) C[1][0] = 1.0 C[1][1] = -2.0 agent = Agent(A=A, B=B, C=C, control_fac_idx=[1], inference_algo="MMP", policy_len=2, inference_horizon=3) T = 10 for t in range(T): o = [ np.random.randint(num_ob) for num_ob in num_obs ] # just randomly generate observations at each timestep, no generative process qx = agent.infer_states(o) agent.infer_policies() action = agent.sample_action() print(agent.prev_actions) print(agent.prev_obs)
def test_mmp_inference(self): num_obs = [2, 4] num_states = [2, 2] num_control = [2, 2] A = utils.random_A_matrix(num_obs, num_states) B = utils.random_B_matrix(num_states, num_control) C = utils.obj_array_zeros(num_obs) C[1][0] = 1.0 C[1][1] = -2.0 agent = Agent(A=A, B=B, C=C, control_fac_idx=[1], inference_algo="MMP", policy_len=5, inference_horizon=1) o = [0, 2] qx = agent.infer_states(o) print(qx[0].shape) print(qx[1].shape)
def test_update_posterior_states_v2(self): """ Testing our SPM-ified version of `run_MMP` with 1 hidden state factor & 1 outcome modality, at a random fixed timestep during the generative process """ past_len = 3 future_len = 4 num_policies = 5 num_states = [6, 7, 8] num_controls = [9, 10, 11] num_obs = [12, 13, 14] num_modalities = len(num_obs) A = random_A_matrix(num_obs, num_states) B = random_B_matrix(num_states, num_controls) prev_obs = [rand_onehot_obs(num_obs) for _ in range(past_len)] prev_actions = np.array([rand_controls(num_controls) for _ in range(past_len)]) policies = [ np.array([rand_controls(num_controls) for _ in range(future_len)]) for _ in range(num_policies) ] prior = rand_dist_states(num_states) qs_seq_pi, VFE_policies = update_posterior_states_v2(A, B, prev_obs, policies, prev_actions, prior=prior, policy_sep_prior = False) qs_seq_pi_future = utils.obj_array(num_policies) for p_idx in range(num_policies): qs_seq_pi_future[p_idx] = qs_seq_pi[p_idx][(1 + past_len) :] # create C matrix # horizon = len(qs_seq_pi_future[0]) # C = utils.obj_array(horizon) # for t in range(horizon): # C[t] = utils.obj_array(num_modalities) # for g in range(num_modalities): # C[t][g] = np.ones(num_obs[g]) C = utils.obj_array_uniform(num_obs) q_pi, efe = update_posterior_policies_mmp( qs_seq_pi_future, A, B, C, policies, use_utility=True, use_states_info_gain=True, use_param_info_gain=False, prior = None, pA=None, pB=None, F = VFE_policies, E = None, gamma=16.0, return_numpy=True, ) qs_pi_curr_t = utils.obj_array(num_policies) for p_idx in range(num_policies): qs_pi_curr_t[p_idx] = qs_seq_pi[p_idx][past_len] qs_bma = average_states_over_policies(qs_pi_curr_t, q_pi) # Bayesian model average of hidden states across policies
return obs def rand_controls(num_controls): if type(num_controls) is int: num_controls = [num_controls] controls = np.zeros(len(num_controls)) for i in range(len(num_controls)): controls[i] = np.random.randint(num_controls[i]) return controls if __name__ == "__main__": past_len = 4 future_len = 4 num_states = [8, 12, 13] num_controls = [12, 3, 16] num_obs = [12, 14, 6] A = random_A_matrix(num_obs, num_states) B = random_B_matrix(num_states, num_controls) prev_obs = [rand_onehot_obs(num_obs) for _ in range(past_len)] prev_actions = np.array( [rand_controls(num_controls) for _ in range(past_len)]) policy = np.array([rand_controls(num_controls) for _ in range(future_len)]) ll_seq = get_joint_likelihood_seq(A, prev_obs, num_states) qs_seq = run_mmp_v2(A, B, ll_seq, policy, grad_descent=True) for t, qs in enumerate(qs_seq): print(f"Step {t} shape {[el.shape for el in qs]}")