def test_dot_function_a(self): """ test with vectors and matrices, discrete state / outcomes """ array_path = os.path.join(os.getcwd(), "tests/data/dot_a.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"] obs = mat_contents["o"] states = mat_contents["s"] states = np.array(states, dtype=object) result_1 = mat_contents["result1"] result_2 = mat_contents["result2"] result_3 = mat_contents["result3"] A = Categorical(values=A) result_1_py = A.dot(obs, return_numpy=True) self.assertTrue(np.isclose(result_1, result_1_py).all()) result_2_py = A.dot(states, return_numpy=True) result_2_py = result_2_py.astype("float64")[:, np.newaxis] self.assertTrue(np.isclose(result_2, result_2_py).all()) result_3_py = A.dot(states, dims_to_omit=[0], return_numpy=True) self.assertTrue(np.isclose(result_3, result_3_py).all())
def test_cross_function_d(self): """Test case d: outer-producting a vector and a sequence of vectors: Options: - first vector is a Categorical, second sequence of vectors is a numpy ndarray (dtype = object) - first vector is a Categorical, second sequence of vectors is a Categorical (where self.IS_AOA = True)) """ array_path = os.path.join(os.getcwd(), "tests/data/cross_d.mat") mat_contents = loadmat(file_name=array_path) result_1 = mat_contents["result1"] random_vec = Categorical(values=mat_contents["random_vec"]) states = mat_contents["s"] for i in range(len(states)): states[i] = states[i].squeeze() # first way, where first array is a Categorical, second array is a numpy ndarray # (dtype = object) result_1a_py = random_vec.cross(states, return_numpy=True) self.assertTrue(np.isclose(result_1, result_1a_py).all()) # second way, where first array is a Categorical, second array is a Categorical # (where self.IS_AOA = True) states = Categorical(values=states[0]) result_1b_py = random_vec.cross(states, return_numpy=True) self.assertTrue(np.isclose(result_1, result_1b_py).all())
def test_dot_function_e(self): """ CONTINUOUS states and outcomes, but add a final (fourth) hidden state factor """ array_path = os.path.join(os.getcwd(), "tests/data/dot_e.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"] obs = mat_contents["o"] states = mat_contents["s"] states_array_version = np.empty(states.shape[1], dtype=object) for i in range(states.shape[1]): states_array_version[i] = states[0][i][0] result_1 = mat_contents["result1"] result_2 = mat_contents["result2"] result_3 = mat_contents["result3"] A = Categorical(values=A) result_1_py = A.dot(obs, return_numpy=True) self.assertTrue(np.isclose(result_1, result_1_py).all()) result_2_py = A.dot(states_array_version, return_numpy=True) result_2_py = result_2_py.astype("float64")[:, np.newaxis] self.assertTrue(np.isclose(result_2, result_2_py).all()) result_3_py = A.dot(states_array_version, dims_to_omit=[0], return_numpy=True) self.assertTrue(np.isclose(result_3, result_3_py).all())
def test_normalize_multi_factor(self): values_1 = np.random.rand(5) values_2 = np.random.rand(4, 3) values = np.array([values_1, values_2]) c = Categorical(values=values) c.normalize() self.assertTrue(c.is_normalized())
def test_update_pA_multiFactor_somemodalities(self): """ Test for updating prior Dirichlet parameters over sensory likelihood (pA) in the case that SOME observation modalities are updated and the generative model has multiple hidden state factors """ n_states = [2, 6] qs = Categorical(values = construct_init_qs(n_states)) learning_rate = 1.0 # multiple observation modalities num_obs = [3,4,5] modalities_to_update = [0, 2] A = Categorical(values = construct_generic_A(num_obs, n_states)) pA = Dirichlet(values = construct_pA(num_obs,n_states)) observation = A.dot(qs,return_numpy=False).sample() pA_updated = core.update_likelihood_dirichlet(pA, A, observation, qs, lr=learning_rate, modalities=modalities_to_update,return_numpy=True) for modality, no in enumerate(num_obs): if modality in modalities_to_update: validation_pA = pA[modality] + learning_rate * core.spm_cross(np.eye(no)[observation[modality]], qs.values) else: validation_pA = pA[modality] self.assertTrue(np.all(pA_updated[modality]==validation_pA.values))
def test_normalize_multi_factor(self): values_1 = np.random.rand(5) values_2 = np.random.rand(4, 3) values = np.array([values_1, values_2]) d = Dirichlet(values=values) normed = Categorical(values=d.mean(return_numpy=True)) self.assertTrue(normed.is_normalized())
def test_copy(self): values = np.random.rand(3, 2) c = Categorical(values=values) c_copy = c.copy() self.assertTrue(np.array_equal(c_copy.values, c.values)) c_copy.values = c_copy.values * 2 self.assertFalse(np.array_equal(c_copy.values, c.values))
def test_cross_function_b(self): """Test case b: outer-producting two vectors together: Options: - both vectors are stored in single Categorical (with two entries, where self.AoA == True) - first vector is a Categorical (self.AoA = False) and second array is a numpy ndarray (non-object array) - first vector is a Categorical, second vector is also Categorical """ array_path = os.path.join(os.getcwd(), "tests/data/cross_b.mat") mat_contents = loadmat(file_name=array_path) result_1 = mat_contents["result1"] result_2 = mat_contents["result2"] # first way, where both arrays as stored as two entries in a single AoA Categorical states = Categorical(values=mat_contents["s"][0]) result_1_py = states.cross(return_numpy=True) self.assertTrue(np.isclose(result_1, result_1_py).all()) # second way (type 1), where first array is a Categorical, second array is a # straight numpy array states_first_factor = Categorical(values=mat_contents["s"][0][0]) states_second_factor = mat_contents["s"][0][1] result_2a_py = states_first_factor.cross(states_second_factor, return_numpy=True) self.assertTrue(np.isclose(result_2, result_2a_py).all()) # second way (type 2), where first array is a Categorical, second array # is another Categorical states_first_factor = Categorical(values=mat_contents["s"][0][0]) states_second_factor = Categorical(values=mat_contents["s"][0][1]) result_2b_py = states_first_factor.cross(states_second_factor, return_numpy=True) self.assertTrue(np.isclose(result_2, result_2b_py).all())
def sample_action(p_i, possible_policies, Nu, sampling_type="marginal_action"): """ Samples action from posterior over policies, using one of two methods. @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (nStep x nFactor), not just a list of tuples as it is now) Parameters ---------- p_i [1D numpy.ndarray or Categorical]: Variational posterior over policies. possible_policies [list of tuples]: List of tuples that indicate the possible policies under consideration. Each tuple stores the actions taken upon the separate hidden state factors. Same length as p_i. Nu [list of integers]: List of the dimensionalities of the different (controllable)) hidden states sampling_type [string, 'marginal_action' or 'posterior_sample']: Indicates whether the sampled action for a given hidden state factor is given by the evidence for that action, marginalized across different policies ('marginal_action') or simply the action entailed by the policy sampled from the posterior. Returns ---------- selectedPolicy [tuple]: tuple containing the list of actions selected by the agent """ numControls = len(Nu) if sampling_type == "marginal_action": if isinstance(p_i, Categorical): p_i = p_i.values.squeeze() action_marginals = np.empty(numControls, dtype=object) for nu_i in range(numControls): action_marginals[nu_i] = np.zeros(Nu[nu_i]) # Weight each action according to the posterior probability it gets across policies for pol_i, policy in enumerate(possible_policies): for nu_i, a_i in enumerate(policy): action_marginals[nu_i][a_i] += p_i[pol_i] action_marginals = Categorical(values=action_marginals) action_marginals.normalize() selected_policy = action_marginals.sample() elif sampling_type == "posterior_sample": if isinstance(p_i, Categorical): policy_index = p_i.sample() selected_policy = possible_policies[policy_index] else: sample_onehot = np.random.multinomial(1, p_i.squeeze()) policy_index = np.where(sample_onehot == 1)[0][0] selected_policy = possible_policies[policy_index] return selected_policy
def reset(self, state=None): if state is None: loc_state = np.zeros(self.n_locations) loc_state[0] = 1.0 scene_state = np.zeros(self.n_scenes) self._true_scene = np.random.randint(self.n_scenes) scene_state[self._true_scene] = 1.0 full_state = np.empty(self.n_factors, dtype=object) full_state[LOCATION_ID] = loc_state full_state[SCENE_ID] = scene_state self._state = Categorical(values=full_state) else: self._state = Categorical(values=state) return self._get_observation()
def test_state_info_gain(self): """ Test the states_info_gain function. Demonstrates working by manipulating uncertainty in the likelihood matrices (A or B) in a ways that alternatively change the resolvability of uncertainty (via an imprecise expected state and a precise mapping, or high ambiguity and imprecise mapping). """ n_states = [2] n_control = [2] qs = Categorical(values=np.eye(n_states[0])[0]) # add some uncertainty into the consequences of the second policy, which # leads to increased epistemic value of observations, in case of pursuing # that policy -- in the case of a precise observation likelihood model B_matrix = construct_generic_B(n_states, n_control) B_matrix[:, :, 1] = core.softmax(B_matrix[:, :, 1]) B = Categorical(values=B_matrix) # single timestep n_step = 1 policies = core.construct_policies(n_states, n_control, policy_len=n_step) # single observation modality num_obs = [2] # create noiseless identity A matrix A = Categorical(values=np.eye(num_obs[0])) state_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi) self.assertGreater(state_info_gains[1], state_info_gains[0]) # we can 'undo' the epistemic bonus of the second policy by making the A matrix # totally ambiguous, thus observations cannot resolve uncertainty about hidden states # - in this case, uncertainty in the posterior beliefs doesn't matter A = Categorical(values=np.ones((num_obs[0], num_obs[0]))) A.normalize() state_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi) self.assertEqual(state_info_gains[0], state_info_gains[1])
def reset(self, state=None): if state is None: loc_state = np.zeros(self.n_locations) loc_state[0] = 1.0 reward_condition = np.zeros(self.n_reward_conditions) self._reward_condition = np.random.randint( self.n_reward_conditions) reward_condition[self._reward_condition] = 1.0 full_state = np.empty(self.n_factors, dtype=object) full_state[LOCATION_FACTOR_ID] = loc_state full_state[TRIAL_FACTOR_ID] = reward_condition self._state = Categorical(values=full_state) else: self._state = Categorical(values=state) return self._get_observation()
def test_pA_info_gain(self): """ Test the pA_info_gain function. Demonstrates operation by manipulating shape of the Dirichlet priors over likelihood parameters (pA), which affects information gain for different expected observations """ n_states = [2] n_control = [2] qs = Categorical(values=np.eye(n_states[0])[0]) B = Categorical(values=construct_generic_B(n_states, n_control)) # single timestep n_step = 1 policies = core.construct_policies(n_states, n_control, policy_len=n_step) # single observation modality num_obs = [2] # create noiseless identity A matrix A = Categorical(values=np.eye(num_obs[0])) # create prior over dirichlets such that there is a skew # in the parameters about the likelihood mapping from the # second hidden state (index 1) to observations, such that one # observation is considered to be more likely than the other conditioned on that state. # Therefore sampling that observation would afford high info gain # about parameters for that part of the likelhood distribution. pA_matrix = construct_pA(num_obs, n_states) pA_matrix[0, 1] = 2.0 pA = Dirichlet(values=pA_matrix) pA_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) qo_pi = core.get_expected_obs(qs_pi, A) pA_info_gains[idx] += core.calc_pA_info_gain(pA, qo_pi, qs_pi) self.assertGreater(pA_info_gains[1], pA_info_gains[0])
def test_multi_factor_init_values_expand(self): values_1 = np.random.rand(5) values_2 = np.random.rand(4) values = np.array([values_1, values_2]) c = Categorical(values=values) self.assertEqual(c.shape, (2, )) self.assertEqual(c[0].shape, (5, 1)) self.assertEqual(c[1].shape, (4, 1))
def step(self, actions): prob_states = np.empty(self.n_factors, dtype=object) for f in range(self.n_factors): prob_states[f] = (self._transition_dist[f][:, :, actions[f]].dot( self._state[f], return_numpy=True).flatten()) state = Categorical(values=prob_states).sample() self._state = self._construct_state(state) return self._get_observation()
def test_dot_function_c_cat(self): """ test with vectors and matrices, discrete state / outcomes but with a third hidden state factor. Now, when arguments themselves are instances of Categorical """ array_path = os.path.join(os.getcwd(), "tests/data/dot_c.mat") mat_contents = loadmat(file_name=array_path) A = mat_contents["A"] obs = Categorical(values=mat_contents["o"]) states = mat_contents["s"] states_array_version = np.empty(states.shape[1], dtype=object) for i in range(states.shape[1]): states_array_version[i] = states[0][i][0] states_array_version = Categorical(values=states_array_version) result_1 = mat_contents["result1"] result_2 = mat_contents["result2"] result_3 = mat_contents["result3"] A = Categorical(values=A) result_1_py = A.dot(obs, return_numpy=True) self.assertTrue(np.isclose(result_1, result_1_py).all()) result_2_py = A.dot(states_array_version, return_numpy=True) result_2_py = result_2_py.astype("float64")[:, np.newaxis] self.assertTrue(np.isclose(result_2, result_2_py).all()) result_3_py = A.dot(states_array_version, dims_to_omit=[0], return_numpy=True) self.assertTrue(np.isclose(result_3, result_3_py).all())
def reset(self, init_qs=None): if init_qs is None: self.qs = self._construct_D_prior() else: if isinstance(init_qs, Categorical): self.qs = init_qs else: self.qs = Categorical(values=init_qs) return self.qs
def _construct_likelihood_dist(self): A = np.empty(self.n_modalities, dtype=object) for modality in range(self.n_modalities): A[modality] = np.zeros([self.n_observations[modality]] + self.n_states) for loc in range(self.n_states[LOCATION_FACTOR_ID]): for reward_condition in range(self.n_states[TRIAL_FACTOR_ID]): if loc == 0: # the case when the agent is in the centre location # when in the centre location, reward observation is always 'no reward', or the outcome with index 0 A[REWARD_MODALITY_ID][0, loc, reward_condition] = 1.0 # when in the centre location, cue is totally ambiguous with respect to the reward condition A[CUE_MODALITY_ID][:, loc, reward_condition] = 1.0 / self.n_observations[ 2] elif loc == 3: # the case when loc == 3, or the cue location ('bottom arm') # when in the cue location, reward observation is always 'no reward', or the outcome with index 0 A[REWARD_MODALITY_ID][0, loc, reward_condition] = 1.0 # when in the cue location, the cue indicates the reward condition umambiguously / signals where the reward is located A[CUE_MODALITY_ID][reward_condition, loc, reward_condition] = 1.0 else: # the case when the agent is in one of the (potentially-) rewarding arms if loc == ( reward_condition + 1 ): # when location is consistent with reward condition high_prob_idx = REWARD_IDX # means highest probability is concentrated over reward outcome low_prob_idx = LOSS_IDX # lower probability on loss outcome else: high_prob_idx = LOSS_IDX # means highest probability is concentrated over loss outcome low_prob_idx = REWARD_IDX # lower probability on reward outcome A[REWARD_MODALITY_ID][ high_prob_idx, loc, reward_condition] = self.reward_probs[0] A[REWARD_MODALITY_ID][ low_prob_idx, loc, reward_condition] = self.reward_probs[1] # cue is ambiguous when in the reward location A[CUE_MODALITY_ID][:, loc, reward_condition] = 1.0 / self.n_observations[ 2] A[LOCATION_MODALITY_ID][ loc, loc, reward_condition] = 1.0 # the agent always observes its location, regardless of the reward condition return Categorical(values=A)
def sample_action(q_pi, policies, n_control, sampling_type="marginal_action"): """ Samples action from posterior over policies, using one of two methods. Parameters ---------- q_pi [1D numpy.ndarray or Categorical]: Posterior beliefs about (possibly multi-step) policies. policies [list of numpy ndarrays]: List of arrays that indicate the policies under consideration. Each element within the list is a matrix that stores the the indices of the actions upon the separate hidden state factors, at each timestep (nStep x nControlFactor) n_control [list of integers]: List of the dimensionalities of the different (controllable)) hidden state factors sampling_type [string, 'marginal_action' or 'posterior_sample']: Indicates whether the sampled action for a given hidden state factor is given by the evidence for that action, marginalized across different policies ('marginal_action') or simply the action entailed by a sample from the posterior over policies Returns ---------- selectedPolicy [1D numpy ndarray]: Numpy array containing the indices of the actions along each control factor """ n_factors = len(n_control) if sampling_type == "marginal_action": if utils.is_distribution(q_pi): q_pi = utils.to_numpy(q_pi) action_marginals = np.empty(n_factors, dtype=object) for c_idx in range(n_factors): action_marginals[c_idx] = np.zeros(n_control[c_idx]) # weight each action according to its integrated posterior probability over policies and timesteps for pol_idx, policy in enumerate(policies): for t in range(policy.shape[0]): for factor_i, action_i in enumerate(policy[t, :]): action_marginals[factor_i][action_i] += q_pi[pol_idx] action_marginals = Categorical(values=action_marginals) action_marginals.normalize() selected_policy = np.array(action_marginals.sample()) elif sampling_type == "posterior_sample": if utils.is_distribution(q_pi): policy_index = q_pi.sample() selected_policy = policies[policy_index] else: q_pi = Categorical(values=q_pi) policy_index = q_pi.sample() selected_policy = policies[policy_index] return selected_policy
def get_expected_obs(Qs_pi, A, return_numpy=False): """ Given a posterior predictive density Qs_pi and an observation likelihood model A, get the expected observations given the predictive posterior. @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (nStep x nFactor), not just a list of tuples as it is now) Parameters ---------- Qs_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Posterior predictive density over hidden states A [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical (either single-factor of AoA)]: Observation likelihood mapping from hidden states to observations, with different modalities (if there are multiple) stored in different arrays return_numpy [Boolean]: True/False flag to determine whether output of function is a numpy array or a Categorical Returns ------- Qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]: Expected observations under the given policy """ if isinstance(A, Categorical): if not return_numpy: Qo_pi = A.dot(Qs_pi) return Qo_pi else: Qo_pi = A.dot(Qs_pi, return_numpy=True) if Qo_pi.dtype == "object": Qo_pi_flattened = np.empty(len(Qo_pi), dtype=object) for g in range(len(Qo_pi)): Qo_pi_flattened[g] = Qo_pi[g].flatten() return Qo_pi_flattened else: return Qo_pi.flatten() elif A.dtype == "object": Ng = len(A) Qo_pi = np.empty(Ng, dtype=object) if isinstance(Qs_pi, Categorical): Qs_pi = Qs_pi.values for f in range(len(Qs_pi)): Qs_pi[f] = Qs_pi[f].flatten() for g in range(Ng): Qo_pi[g] = spm_dot(A[g], Qs_pi) else: if isinstance(Qs_pi, Categorical): Qs_pi = Qs_pi.values Qo_pi = spm_dot(A, Qs_pi) if not return_numpy: Qo_pi = Categorical(values=Qo_pi) return Qo_pi else: return Qo_pi
def _construct_transition_dist(self): B_locs = np.eye(self.n_locations) B_locs = B_locs.reshape(self.n_locations, self.n_locations, 1) B_locs = np.tile(B_locs, (1, 1, self.n_locations)) B_locs = B_locs.transpose(1, 2, 0) B = np.empty(self.n_factors, dtype=object) B[LOCATION_ID] = B_locs B[SCENE_ID] = np.eye(self.n_scenes).reshape(self.n_scenes, self.n_scenes, 1) return Categorical(values=B)
def test_pB_info_gain(self): """ Test the pB_info_gain function. Demonstrates operation by manipulating shape of the Dirichlet priors over likelihood parameters (pB), which affects information gain for different states """ n_states = [2] n_control = [2] qs = Categorical(values=np.eye(n_states[0])[0]) B = Categorical(values=construct_generic_B(n_states, n_control)) pB_matrix = construct_pB(n_states, n_control) # create prior over dirichlets such that there is a skew # in the parameters about the likelihood mapping from the # hidden states to hidden states under the second action, # such that hidden state 0 is considered to be more likely than the other, # given the action in question # Therefore taking that action would yield an expected state that afford # high information gain about that part of the likelihood distribution. # pB_matrix[0, :, 1] = 2.0 pB = Dirichlet(values=pB_matrix) # single timestep n_step = 1 policies = core.construct_policies(n_states, n_control, policy_len=n_step) pB_info_gains = np.zeros(len(policies)) for idx, policy in enumerate(policies): qs_pi = core.get_expected_states(qs, B, policy) pB_info_gains[idx] += core.calc_pB_info_gain(pB, qs_pi, qs, policy) self.assertGreater(pB_info_gains[1], pB_info_gains[0])
def _construct_B_distribution(self): if self.n_factors == 1: B = np.eye(*self.n_states)[:, :, np.newaxis] if 0 in self.control_fac_idx: B = np.tile(B, (1, 1, self.n_controls[0])) B = B.transpose(1, 2, 0) else: B = np.empty(self.n_factors, dtype=object) for factor, ns in enumerate(self.n_states): B_basic = np.eye(ns)[:, :, np.newaxis] if factor in self.control_fac_idx: B[factor] = np.tile(B_basic, (1, 1, self.n_controls[factor])) B[factor] = B[factor].transpose(1, 2, 0) else: B[factor] = B_basic B = Categorical(values=B) B.normalize() return B
def test_cross_function_c(self): """Test case c: outer-producting a vector and a matrix together: Options: - first vector is a Categorical, and the matrix argument is a numpy ndarray (non-object array) - first vector is a Categorical, and the matrix argument is also a Categorical """ array_path = os.path.join(os.getcwd(), "tests/data/cross_c.mat") mat_contents = loadmat(file_name=array_path) result_1 = mat_contents["result1"] random_vec = Categorical(values=mat_contents["random_vec"]) # first way, where first array is a Categorical, second array is a numpy ndarray random_matrix = mat_contents["random_matrix"] result_1a_py = random_vec.cross(random_matrix, return_numpy=True) self.assertTrue(np.isclose(result_1, result_1a_py).all()) # second way, where first array is a Categorical, second array is a Categorical random_matrix = Categorical(values=mat_contents["random_matrix"]) result_1b_py = random_vec.cross(random_matrix, return_numpy=True) self.assertTrue(np.isclose(result_1, result_1b_py).all())
def test_update_pB_multiFactor_withActions_someFactors(self): """ Test for updating prior Dirichlet parameters over transition likelihood (pB) in the case that there are mulitple hidden state factors, and there are actions. Some factors are updated """ n_states = [3, 4, 5] n_control = [3, 4, 5] qs_prev = Categorical(values = construct_init_qs(n_states)) qs = Categorical(values = construct_init_qs(n_states)) learning_rate = 1.0 factors_to_update = [0,1] B = Categorical(values = construct_generic_B(n_states,n_control)) B.normalize() pB = Dirichlet(values = construct_pB(n_states,n_control)) action = np.array([np.random.randint(nc) for nc in n_control]) pB_updated = core.update_transition_dirichlet(pB,B,action,qs,qs_prev,lr=learning_rate,factors=factors_to_update,return_numpy=True) validation_pB = pB.copy() for factor, _ in enumerate(n_control): validation_pB = pB[factor].copy() if factor in factors_to_update: validation_pB[:,:,action[factor]] += learning_rate * core.spm_cross(qs[factor].values, qs_prev[factor].values) * (B[factor][:, :, action[factor]].values > 0) self.assertTrue(np.all(pB_updated[factor]==validation_pB.values))
def test_update_pB_multiFactor_noActions_allFactors(self): """ Test for updating prior Dirichlet parameters over transition likelihood (pB) in the case that there are mulitple hidden state factors, and there are no actions. All factors are updated """ n_states = [3, 4] n_control = [1, 1] qs_prev = Categorical(values = construct_init_qs(n_states)) qs = Categorical(values = construct_init_qs(n_states)) learning_rate = 1.0 B = Categorical(values = np.array([np.random.rand(ns, ns, n_control[factor]) for factor, ns in enumerate(n_states)])) B.normalize() pB = Dirichlet(values = np.array([np.ones_like(B[factor].values) for factor in range(len(n_states))])) action = np.array([np.random.randint(nc) for nc in n_control]) pB_updated = core.update_transition_dirichlet(pB,B,action,qs,qs_prev,lr=learning_rate,factors="all",return_numpy=True) validation_pB = pB.copy() for factor, _ in enumerate(n_control): validation_pB = pB[factor].copy() validation_pB[:,:,action[factor]] += learning_rate * core.spm_cross(qs[factor].values, qs_prev[factor].values) * (B[factor][:, :, action[factor]].values > 0) self.assertTrue(np.all(pB_updated[factor]==validation_pB.values))
def test_is_normalized(self): values = np.array([[0.7, 0.5], [0.3, 0.5]]) c = Categorical(values=values) self.assertTrue(c.is_normalized()) values = np.array([[0.2, 0.8], [0.3, 0.5]]) c = Categorical(values=values) self.assertFalse(c.is_normalized())
def test_contains_zeros(self): values = np.array([[1.0, 0.0], [1.0, 1.0]]) c = Categorical(values=values) self.assertTrue(c.contains_zeros()) values = np.array([[1.0, 1.0], [1.0, 1.0]]) c = Categorical(values=values) self.assertFalse(c.contains_zeros())
def test_update_pB_singleFactor_withActions(self): """ Test for updating prior Dirichlet parameters over transition likelihood (pB) in the case that the one and only hidden state factor is updated, and there are actions. """ n_states = [3] n_control = [3] qs_prev = Categorical(values = construct_init_qs(n_states)) qs = Categorical(values = construct_init_qs(n_states)) learning_rate = 1.0 B = Categorical(values = construct_generic_B(n_states, n_control)) pB = Dirichlet(values = np.ones_like(B.values)) action = np.array([np.random.randint(nc) for nc in n_control]) pB_updated = core.update_transition_dirichlet(pB,B,action,qs,qs_prev,lr=learning_rate,factors="all",return_numpy=True) validation_pB = pB.copy() validation_pB[:,:,action[0]] += learning_rate * core.spm_cross(qs.values, qs_prev.values) * (B[:, :, action[0]].values > 0) self.assertTrue(np.all(pB_updated==validation_pB.values))
def _construct_D_prior(self): if self.n_factors == 1: D = Categorical(values=np.ones(*self.n_states)) else: D = Categorical( values=np.array([np.ones(Ns) for Ns in self.n_states])) D.normalize() return D