def test_pB_info_gain(self): """ Test the pB_info_gain function. Demonstrates operation by manipulating shape of the Dirichlet priors over likelihood parameters (pB), which affects information gain for different states """ n_states = [2] n_control = [2] qs = Categorical(values=np.eye(n_states[0])[0]) B = Categorical(values=construct_generic_B(n_states, n_control)) pB_matrix = construct_pB(n_states, n_control) # create prior over dirichlets such that there is a skew # in the parameters about the likelihood mapping from the # hidden states to hidden states under the second action, # such that hidden state 0 is considered to be more likely than the other, # given the action in question # Therefore taking that action would yield an expected state that afford # high information gain about that part of the likelihood distribution. # pB_matrix[0, :, 1] = 2.0 pB = Dirichlet(values=pB_matrix) # single timestep n_step = 1 policies = core.construct_policies(n_states, n_control, policy_len=n_step) pB_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) pB_info_gains[idx] += core.calc_pB_info_gain(pB, qs_pi, qs, policy) self.assertGreater(pB_info_gains[1], pB_info_gains[0])
def test_construct_policies_single_factor(self): """ Test policy constructor function for single factor control states """ n_states = [3] n_control = [3] control_fac_idx = [0] # One step policies policy_len = 1 policies = core.construct_policies(n_states, n_control, policy_len, control_fac_idx) self.assertEqual(len(policies), n_control[0]) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore # multistep step policies policy_len = 3 policies = core.construct_policies(n_states, n_control, policy_len, control_fac_idx) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore # Now leave out the optional arguments of `construct_policies` such as `n_control` # and `control_fac_idx` n_states = [3] # one step policies policy_len = 1 policies, n_control = core.construct_policies(n_states, None, policy_len, None) self.assertEqual(len(policies), n_control[0]) self.assertEqual(n_states[0], n_control[0]) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore # multistep step policies policy_len = 3 policies, n_control = core.construct_policies(n_states, None, policy_len, None) self.assertEqual(n_states[0], n_control[0]) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore
def test_state_info_gain(self): """ Test the states_info_gain function. Demonstrates working by manipulating uncertainty in the likelihood matrices (A or B) in a ways that alternatively change the resolvability of uncertainty (via an imprecise expected state and a precise mapping, or high ambiguity and imprecise mapping). """ n_states = [2] n_control = [2] qs = Categorical(values=np.eye(n_states[0])[0]) # add some uncertainty into the consequences of the second policy, which # leads to increased epistemic value of observations, in case of pursuing # that policy -- in the case of a precise observation likelihood model B_matrix = construct_generic_B(n_states, n_control) B_matrix[:, :, 1] = core.softmax(B_matrix[:, :, 1]) B = Categorical(values=B_matrix) # single timestep n_step = 1 policies = core.construct_policies(n_states, n_control, policy_len=n_step) # single observation modality num_obs = [2] # create noiseless identity A matrix A = Categorical(values=np.eye(num_obs[0])) state_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi) self.assertGreater(state_info_gains[1], state_info_gains[0]) # we can 'undo' the epistemic bonus of the second policy by making the A matrix # totally ambiguous, thus observations cannot resolve uncertainty about hidden states # - in this case, uncertainty in the posterior beliefs doesn't matter A = Categorical(values=np.ones((num_obs[0], num_obs[0]))) A.normalize() state_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi) self.assertEqual(state_info_gains[0], state_info_gains[1])
def test_pA_info_gain(self): """ Test the pA_info_gain function. Demonstrates operation by manipulating shape of the Dirichlet priors over likelihood parameters (pA), which affects information gain for different expected observations """ n_states = [2] n_control = [2] qs = Categorical(values=np.eye(n_states[0])[0]) B = Categorical(values=construct_generic_B(n_states, n_control)) # single timestep n_step = 1 policies = core.construct_policies(n_states, n_control, policy_len=n_step) # single observation modality num_obs = [2] # create noiseless identity A matrix A = Categorical(values=np.eye(num_obs[0])) # create prior over dirichlets such that there is a skew # in the parameters about the likelihood mapping from the # second hidden state (index 1) to observations, such that one # observation is considered to be more likely than the other conditioned on that state. # Therefore sampling that observation would afford high info gain # about parameters for that part of the likelhood distribution. pA_matrix = construct_pA(num_obs, n_states) pA_matrix[0, 1] = 2.0 pA = Dirichlet(values=pA_matrix) pA_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) qo_pi = core.get_expected_obs(qs_pi, A) pA_info_gains[idx] += core.calc_pA_info_gain(pA, qo_pi, qs_pi) self.assertGreater(pA_info_gains[1], pA_info_gains[0])
scenes[1][1, 1] = 2 scenes[1][1, 0] = 3 env = VisualForagingEnv(scenes=scenes, n_features=3) A = env.get_likelihood_dist() B = env.get_transition_dist() # if you want parameter information gain and/or learning # pA = Dirichlet(values = A.values * 1e20) # pB = Dirichlet(values = B.values * 1e20) C = np.empty(env.n_modalities, dtype=object) for g, No in enumerate(env.n_observations): C[g] = np.zeros(No) policies, n_control = core.construct_policies(env.n_states, None, 1, [0]) obs = env.reset() msg = """ === Starting experiment === \n True scene: {} Initial observation {} """ print(msg.format(env.true_scene, obs)) prior = env.get_uniform_posterior() for t in range(T): qs = core.update_posterior_states(A, obs, prior, return_numpy=False) msg = """[{}] Inference [location {} / scene {}] Observation [location {} / feature {}] """ print(msg.format(t, np.argmax(qs[0].values), np.argmax(qs[1].values), obs[0], obs[1]))
def test_expected_utility(self): """ Test for the expected utility function, for a simple single factor generative model where there are imbalances in the preferences for different outcomes. Test for both single timestep policy horizons and multiple timestep horizons """ n_states = [2] n_control = [2] qs = Categorical(values=construct_init_qs(n_states)) B = Categorical(values=construct_generic_B(n_states, n_control)) # Single timestep n_step = 1 policies = core.construct_policies(n_states, n_control, policy_len=n_step) # Single observation modality num_obs = [2] # Create noiseless identity A matrix A = Categorical(values=np.eye(num_obs[0])) # Create imbalance in preferences for observations C = Categorical(values=np.eye(num_obs[0])[1]) utilities = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) qo_pi = core.get_expected_obs(qs_pi, A) utilities[idx] += core.calc_expected_utility(qo_pi, C) self.assertGreater(utilities[1], utilities[0]) n_states = [3] n_control = [3] qs = Categorical(values=construct_init_qs(n_states)) B = Categorical(values=construct_generic_B(n_states, n_control)) # 3-step policies # One involves going to state 0 two times in a row, and then state 2 at the end # One involves going to state 1 three times in a row policies = [ np.array([0, 0, 2]).reshape(-1, 1), np.array([1, 1, 1]).reshape(-1, 1) ] # single observation modality num_obs = [3] # create noiseless identity A matrix A = Categorical(values=np.eye(num_obs[0])) # create imbalance in preferences for observations # this is designed to illustrate the time-integrated nature of the expected free energy # -- even though the first observation (index 0) is the most preferred, the policy # that frequents this observation the most is actually not optimal, because that policy # ends up visiting a less preferred state at the end. C = Categorical(values=np.array([1.2, 1, 0.5])) utilities = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) qo_pi = core.get_expected_obs(qs_pi, A) utilities[idx] += core.calc_expected_utility(qo_pi, C) self.assertGreater(utilities[1], utilities[0])
def test_construct_policies_multifactor(self): """ Test policy constructor function for multi factor control states """ n_states = [3, 4] n_control = [3, 1] control_fac_idx = [0] # One step policies policy_len = 1 policies = core.construct_policies(n_states, n_control, policy_len, control_fac_idx) self.assertEqual(len(policies), n_control[0]) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore # Multistep step policies policy_len = 3 policies = core.construct_policies(n_states, n_control, policy_len, control_fac_idx) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore # One step policies policy_len = 1 policies, n_control = core.construct_policies(n_states, None, policy_len, control_fac_idx) self.assertEqual(len(policies), n_control[0]) self.assertEqual(n_control[1], 1) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore # multistep step policies policy_len = 3 policies, n_control = core.construct_policies(n_states, None, policy_len, control_fac_idx) self.assertEqual(n_states[0], n_control[0]) self.assertEqual(n_control[1], 1) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore control_fac_idx = [1] # One step policies policy_len = 1 policies, n_control = core.construct_policies(n_states, None, policy_len, control_fac_idx) self.assertEqual(len(policies), n_control[1]) self.assertEqual(n_control[0], 1) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore # multistep step policies policy_len = 3 policies, n_control = core.construct_policies(n_states, None, policy_len, control_fac_idx) self.assertEqual(n_control[0], 1) for policy in policies: self.assertEqual(policy.shape[0], policy_len) # type: ignore
def test_multistep_multi_factor_posterior_policies(self): """ Test for computing posterior over policies (and associated expected free energies) in the case of a posterior over hidden states with multiple hidden state factors. This version tests using a policy horizon of 3 steps ahead """ n_states = [3, 4] n_control = [3, 4] qs = Categorical(values=construct_init_qs(n_states)) B = Categorical(values=construct_generic_B(n_states, n_control)) pB = Dirichlet(values=construct_pB(n_states, n_control)) # Single timestep n_step = 3 policies = core.construct_policies(n_states, n_control, policy_len=n_step) # Single observation modality num_obs = [4] A = Categorical(values=construct_generic_A(num_obs, n_states)) pA = Dirichlet(values=construct_pA(num_obs, n_states)) C = Categorical(values=construct_generic_C(num_obs)) q_pi, efe = core.update_posterior_policies( qs, A, B, C, policies, use_utility=True, use_states_info_gain=True, use_param_info_gain=True, pA=pA, pB=pB, gamma=16.0, return_numpy=True, ) self.assertEqual(len(q_pi), len(policies)) # type: ignore self.assertEqual(len(efe), len(policies)) # Multiple observation modalities num_obs = [3, 2] A = Categorical(values=construct_generic_A(num_obs, n_states)) pA = Dirichlet(values=construct_pA(num_obs, n_states)) C = Categorical(values=construct_generic_C(num_obs)) q_pi, efe = core.update_posterior_policies( qs, A, B, C, policies, use_utility=True, use_states_info_gain=True, use_param_info_gain=True, pA=pA, pB=pB, gamma=16.0, return_numpy=True, ) self.assertEqual(len(q_pi), len(policies)) # type: ignore self.assertEqual(len(efe), len(policies))
transition_matrix = env.get_transition_dist() B = Categorical(values=transition_matrix) B.remove_zeros() # plot_empirical_prior(B)# reward_location = 3 C = Categorical(dims=[env.n_states]) C[reward_location] = 1.0 # plot_beliefs(C, title="Prior preference (C)") qs = Categorical(dims=[env.n_states]) policy_len = 2 n_control = [env.n_control] policies = core.construct_policies([n_states], n_control=n_control, policy_len=policy_len) n_policies = len(policies) print(f"Total number of policies {n_policies}") print(policies[0].shape) def evaluate_policy(policy, qs, A, B, C): G = 0 qs = qs.copy() # loop over policy policy_len = policy.shape[0] for t in range(policy_len): # get action u = int(policy[t, :])