Ejemplo n.º 1
0
def calc_expected_utility(qo_pi, C):
    """
    Given expected observations under a policy Qo_pi and a prior over observations C
    compute the expected utility of the policy.

    Parameters
    ----------
    qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]:
        Expected observations under the given policy (predictive posterior over outcomes). If a list, a list of the expected observations
        over the time horizon of policy evaluation, where each entry is the expected observations at a given timestep. 
    C [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array):
        Prior beliefs over outcomes, expressed in terms of relative log probabilities
    Returns
    -------
    expected_util [scalar]:
        Utility (reward) expected under the policy in question
    """

    if isinstance(qo_pi, list):
        n_steps = len(qo_pi)
        for t in range(n_steps):
            qo_pi[t] = utils.to_numpy(qo_pi[t], flatten=True)
    else:
        n_steps = 1
        qo_pi = [utils.to_numpy(qo_pi, flatten=True)]

    C = utils.to_numpy(C, flatten=True)

    # initialise expected utility
    expected_util = 0

    # in case of multiple observation modalities, loop over time points and modalities
    if utils.is_arr_of_arr(C):

        num_modalities = len(C)

        for t in range(n_steps):
            for modality in range(num_modalities):
                lnC = np.log(softmax(C[modality][:, np.newaxis]) + 1e-16)
                expected_util += qo_pi[t][modality].dot(lnC)

    # else, just loop over time (since there's only one modality)
    else:

        lnC = np.log(softmax(C[:, np.newaxis]) + 1e-16)

        for t in range(n_steps):
            lnC = np.log(softmax(C[:, np.newaxis] + 1e-16))
            expected_util += qo_pi[t].dot(lnC)

    return expected_util
Ejemplo n.º 2
0
    def test_state_info_gain(self):
        """
        Test the states_info_gain function. Demonstrates working
        by manipulating uncertainty in the likelihood matrices (A or B)
        in a ways that alternatively change the resolvability of uncertainty
        (via an imprecise expected state and a precise mapping, or high ambiguity
        and imprecise mapping).
        """

        n_states = [2]
        n_control = [2]

        qs = Categorical(values=np.eye(n_states[0])[0])

        # add some uncertainty into the consequences of the second policy, which
        # leads to increased epistemic value of observations, in case of pursuing
        # that policy -- in the case of a precise observation likelihood model
        B_matrix = construct_generic_B(n_states, n_control)
        B_matrix[:, :, 1] = core.softmax(B_matrix[:, :, 1])
        B = Categorical(values=B_matrix)

        # single timestep
        n_step = 1
        policies = core.construct_policies(n_states,
                                           n_control,
                                           policy_len=n_step)

        # single observation modality
        num_obs = [2]

        # create noiseless identity A matrix
        A = Categorical(values=np.eye(num_obs[0]))

        state_info_gains = np.zeros(len(policies))

        for idx, policy in enumerate(policies):

            qs_pi = core.get_expected_states(qs, B, policy)

            state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi)

        self.assertGreater(state_info_gains[1], state_info_gains[0])

        # we can 'undo' the epistemic bonus of the second policy by making the A matrix
        # totally ambiguous, thus observations cannot resolve uncertainty about hidden states
        # - in this case, uncertainty in the posterior beliefs doesn't matter

        A = Categorical(values=np.ones((num_obs[0], num_obs[0])))
        A.normalize()

        state_info_gains = np.zeros(len(policies))

        for idx, policy in enumerate(policies):

            qs_pi = core.get_expected_states(qs, B, policy)

            state_info_gains[idx] += core.calc_states_info_gain(A, qs_pi)

        self.assertEqual(state_info_gains[0], state_info_gains[1])
Ejemplo n.º 3
0
def calc_expected_utility(Qo_pi, C):
    """
    Given expected observations under a policy Qo_pi and a prior over observations C
    compute the expected utility of the policy.

    @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (nStep x nFactor), not just a list of tuples as it is now)
    Parameters
    ----------
    Qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]:
        Posterior predictive density over outcomes
    C [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array):
        Prior beliefs over outcomes, expressed in terms of relative log probabilities
    Returns
    -------
    expected_util [scalar]:
        Utility (reward) expected under the policy in question
    """

    if isinstance(Qo_pi, Categorical):
        Qo_pi = Qo_pi.values

    if Qo_pi.dtype == "object":
        for g in range(len(Qo_pi)):
            Qo_pi[g] = Qo_pi[g].flatten()

    if C.dtype == "object":

        expected_util = 0

        Ng = len(C)
        for g in range(Ng):
            lnC = np.log(softmax(C[g][:, np.newaxis]) + 1e-16)
            expected_util += Qo_pi[g].flatten().dot(lnC)

    else:

        lnC = np.log(softmax(C[:, np.newaxis]) + 1e-16)
        expected_util = Qo_pi.flatten().dot(lnC)

    return expected_util
Ejemplo n.º 4
0
def update_posterior_policies(
    qs,
    A,
    B,
    C,
    policies,
    use_utility=True,
    use_states_info_gain=True,
    use_param_info_gain=False,
    pA=None,
    pB=None,
    gamma=16.0,
    return_numpy=True,
):
    """ Updates the posterior beliefs about policies based on expected free energy prior

        @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (n_step x n_factor), not just a list of tuples as it is now)

        Parameters
        ----------
        - `qs` [1D numpy array, array-of-arrays, or Categorical (either single- or multi-factor)]:
            Current marginal beliefs about hidden state factors
        - `A` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Categorical (both single and multi-modality)]:
            Observation likelihood model (beliefs about the likelihood mapping entertained by the agent)
        - `B` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or Categorical (both single and multi-factor)]:
            Transition likelihood model (beliefs about the likelihood mapping entertained by the agent)
        - `C` [numpy 1D-array, array-of-arrays (in case of multiple modalities), or Categorical (both single and multi-modality)]:
            Prior beliefs about outcomes (prior preferences)
        - `policies` [list of tuples]:
            A list of all the possible policies, each expressed as a tuple of indices, where a given index corresponds to an action on a particular hidden state factor
            e.g. policies[1][2] yields the index of the action under policy 1 that affects hidden state factor 2
        - `use_utility` [bool]:
            Whether to calculate utility term, i.e how much expected observation confer with prior expectations
        - `use_states_info_gain` [bool]:
            Whether to calculate state information gain
        - `use_param_info_gain` [bool]:
            Whether to calculate parameter information gain @NOTE requires pA or pB to be specified 
        - `pA` [numpy ndarray, array-of-arrays (in case of multiple modalities), or Dirichlet (both single and multi-modality)]:
            Prior dirichlet parameters for A. Defaults to none, in which case info gain w.r.t. Dirichlet parameters over A is skipped.
        - `pB` [numpy ndarray, array-of-arrays (in case of multiple hidden state factors), or Dirichlet (both single and multi-factor)]:
            Prior dirichlet parameters for B. Defaults to none, in which case info gain w.r.t. Dirichlet parameters over A is skipped.
        - `gamma` [float, defaults to 16.0]:
            Precision over policies, used as the inverse temperature parameter of a softmax transformation of the expected free energies of each policy
        - `return_numpy` [Boolean]:
            True/False flag to determine whether output of function is a numpy array or a Categorical
        
        Returns
        --------
        - `qp` [1D numpy array or Categorical]:
            Posterior beliefs about policies, defined here as a softmax function of the expected free energies of policies
        - `efe` - [1D numpy array or Categorical]:
            The expected free energies of policies

    """

    n_policies = len(policies)

    efe = np.zeros(n_policies)
    q_pi = np.zeros((n_policies, 1))

    for idx, policy in enumerate(policies):
        qs_pi = get_expected_states(qs, B, policy)
        qo_pi = get_expected_obs(qs_pi, A)

        if use_utility:
            efe[idx] += calc_expected_utility(qo_pi, C)

        if use_states_info_gain:
            efe[idx] += calc_states_info_gain(A, qs_pi)

        if use_param_info_gain:
            if pA is not None:
                efe[idx] += calc_pA_info_gain(pA, qo_pi, qs_pi)
            if pB is not None:
                efe[idx] += calc_pB_info_gain(pB, qs_pi, qs, policy)

    q_pi = softmax(efe * gamma)

    if return_numpy:
        q_pi = q_pi / q_pi.sum(axis=0)
    else:
        q_pi = utils.to_categorical(q_pi)
        q_pi.normalize()

    return q_pi, efe
"""

# reset environment
first_state = env.reset(init_state=s[0])

# get an observation, given the state
first_obs = gp_likelihood[:, first_state]

# turn observation into an index
first_obs = np.where(first_obs)[0][0]

print("Initial Location {}".format(first_state))
print("Initial Observation {}".format(first_obs))

# infer initial state, given first observation
qs = core.softmax(A[first_obs, :].log() + D.log(), return_numpy=False)

# loop over time
for t in range(T):

    qs_past = qs.copy()

    s_t = env.set_state(s[t + 1])

    # evoke observation, given the state
    obs = gp_likelihood[:, s_t]

    # turn observation into an index
    obs = np.where(obs)[0][0]

    # get transition likelihood