Esempio n. 1
0
    def construct_values(self, values):
        """Initialize a Categorical distribution with `values` argument
        
        Parameters
        ----------
        - `values` [np.ndarray]
            The parameters of the distribution
        """

        if not isinstance(values, np.ndarray):
            raise ValueError("`values` must be a `np.ndarray`")

        if utils.is_arr_of_arr(values):
            self.IS_AOA = True

        if self.IS_AOA:
            self.values = np.empty(len(values), dtype="object")
            for i, array in enumerate(values):
                if array.ndim == 1:
                    # repo generally uses column vectors
                    values[i] = np.expand_dims(values[i], axis=1)
                self.values[i] = values[i].astype("float64")
        else:
            if values.ndim == 1:
                values = np.expand_dims(values, axis=1)
            self.values = values.astype("float64")
Esempio n. 2
0
def spm_cross(X, x=None, *args):
    """ Multi-dimensional outer product
    
    Parameters
    ----------
    - `x` [np.ndarray] || [Categorical] (optional)
        The values to perfrom the outer-product with. If empty, then the
        outer-product is taken between X and itself. If x is not empty, then outer product is 
        taken between X and the various dimensions of x.
    - `args` [np.ndarray] || [Categorical] (optional)
        Remaining arrays to perform outer-product with. These extra arrays are recursively multiplied 
        with the 'initial' outer product (that between X and x).
    
    Returns
    -------
    - `y` [np.ndarray] || [Categorical]
          The result of the outer-product
    """

    if len(args) == 0 and x is None:
        if utils.is_arr_of_arr(X):
            Y = spm_cross(*list(X))
        elif np.issubdtype(X.dtype, np.number):
            Y = X
        return Y

    if utils.is_arr_of_arr(X):
        X = spm_cross(*list(X))

    if x is not None and utils.is_arr_of_arr(x):
        x = spm_cross(*list(x))

    reshape_dims = tuple(list(X.shape) + list(np.ones(x.ndim, dtype=int)))
    A = X.reshape(reshape_dims)

    reshape_dims = tuple(list(np.ones(X.ndim, dtype=int)) + list(x.shape))
    B = x.reshape(reshape_dims)
    Y = np.squeeze(A * B)

    for x in args:
        Y = spm_cross(Y, x)
    return Y
Esempio n. 3
0
def calc_expected_utility(qo_pi, C):
    """
    Given expected observations under a policy Qo_pi and a prior over observations C
    compute the expected utility of the policy.

    Parameters
    ----------
    qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]:
        Expected observations under the given policy (predictive posterior over outcomes). If a list, a list of the expected observations
        over the time horizon of policy evaluation, where each entry is the expected observations at a given timestep. 
    C [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array):
        Prior beliefs over outcomes, expressed in terms of relative log probabilities
    Returns
    -------
    expected_util [scalar]:
        Utility (reward) expected under the policy in question
    """

    if isinstance(qo_pi, list):
        n_steps = len(qo_pi)
        for t in range(n_steps):
            qo_pi[t] = utils.to_numpy(qo_pi[t], flatten=True)
    else:
        n_steps = 1
        qo_pi = [utils.to_numpy(qo_pi, flatten=True)]

    C = utils.to_numpy(C, flatten=True)

    # initialise expected utility
    expected_util = 0

    # in case of multiple observation modalities, loop over time points and modalities
    if utils.is_arr_of_arr(C):

        num_modalities = len(C)

        for t in range(n_steps):
            for modality in range(num_modalities):
                lnC = np.log(softmax(C[modality][:, np.newaxis]) + 1e-16)
                expected_util += qo_pi[t][modality].dot(lnC)

    # else, just loop over time (since there's only one modality)
    else:

        lnC = np.log(softmax(C[:, np.newaxis]) + 1e-16)

        for t in range(n_steps):
            lnC = np.log(softmax(C[:, np.newaxis] + 1e-16))
            expected_util += qo_pi[t].dot(lnC)

    return expected_util
Esempio n. 4
0
def softmax(dist, return_numpy=True):
    """ 
    Computes the softmax function on a set of values
    """

    dist = utils.to_numpy(dist)

    output = []
    if utils.is_arr_of_arr(dist):
        for i in range(len(dist.values)):
            output.append(softmax(dist[i]), return_numpy=True)

    output = dist - dist.max(axis=0)
    output = np.exp(output)
    output = output / np.sum(output, axis=0)
    if return_numpy:
        return output
    else:
        return utils.to_categorical(output)
Esempio n. 5
0
def process_priors(prior, n_factors):
    """
    Helper function for formatting observations  
    
    @TODO
    """
    if utils.is_distribution(prior):
        prior_arr = np.empty(n_factors, dtype=object)
        if n_factors == 1:
            prior_arr[0] = prior.values.squeeze()
        else:
            for factor in range(n_factors):
                prior_arr[factor] = prior[factor].values.squeeze()
        prior = prior_arr

    elif not utils.is_arr_of_arr(prior):
        prior = utils.to_arr_of_arr(prior)

    return prior
Esempio n. 6
0
def spm_dot(X, x, dims_to_omit=None, obs_mode=False):
    """ Dot product of a multidimensional array with `x`
        The dimensions in `dims_to_omit` will not be summed across during the dot product

    @TODO: we should look for an alternative to obs_mode
    
    Parameters
    ----------
    - `x` [1D numpy.ndarray] - either vector or array of arrays
        The alternative array to perform the dot product with
    - `dims_to_omit` [list :: int] (optional)
        Which dimensions to omit
    
    Returns 
    -------
    - `Y` [1D numpy.ndarray] - the result of the dot product
    """

    # construct dims to perform dot product on
    if utils.is_arr_of_arr(x):
        dims = (np.arange(0, len(x)) + X.ndim - len(x)).astype(int)
    else:
        if obs_mode is True:
            """
            @NOTE Case when you're getting the likelihood of an observation under the generative model.
                  Equivalent to something like self.values[np.where(x),:]
                  when `x` is a discrete 'one-hot' observation vector
            """
            dims = np.array([0], dtype=int)
        else:
            """
            @NOTE Case when `x` leading dimension matches the lagging dimension of `values`
                  E.g. a more 'classical' dot product of a likelihood with hidden states
            """
            dims = np.array([1], dtype=int)

        x = utils.to_arr_of_arr(x)

    # delete ignored dims
    if dims_to_omit is not None:
        if not isinstance(dims_to_omit, list):
            raise ValueError("`dims_to_omit` must be a `list` of `int`")
        dims = np.delete(dims, dims_to_omit)
        if len(x) == 1:
            x = np.empty([0], dtype=object)
        else:
            x = np.delete(x, dims_to_omit)

    # compute dot product
    for d in range(len(x)):
        s = np.ones(np.ndim(X), dtype=int)
        s[dims[d]] = np.shape(x[d])[0]
        X = X * x[d].reshape(tuple(s))
        X = np.sum(X, axis=dims[d], keepdims=True)
    Y = np.squeeze(X)

    # check to see if `Y` is a scalar
    if np.prod(Y.shape) <= 1.0:
        Y = Y.item()
        Y = np.array([Y]).astype("float64")

    return Y
Esempio n. 7
0
def spm_dot(X, y, dims_to_omit=None, obs_mode=False):
    """ Dot product of a multidimensional array `X` with `y`
    The dimensions in `dims_to_omit` will not be summed across during  dot product
   
    @TODO: we need documentation describing `obs_mode`
        Ideally, we could find a way to avoid it altogether 

    Parameters
    ----------
    `y` [1D numpy.ndarray] 
        Either vector or array of arrays
    `dims_to_omit` [list :: int] (optional) 
        Which dimensions to omit
    """

    X = utils.to_numpy(X)
    y = utils.to_numpy(y)

    # if `X` is array of array, we need to construct the dims to sum
    if utils.is_arr_of_arr(X):
        dims = (np.arange(0, len(y)) + X.ndim - len(y)).astype(int)
    else:
        """ 
        Deal with particular use case - see above @TODO 
        """
        if obs_mode is True:
            """
            Case when you're getting the likelihood of an observation under model.
            Equivalent to something like self.values[np.where(x),:]
            where `y` is a discrete 'one-hot' observation vector
            """
            dims = np.array([0], dtype=int)
        else:
            """
            Case when `y` leading dimension matches the lagging dimension of `values`
            E.g. a more 'classical' dot product of a likelihood with hidden states
            """
            dims = np.array([1], dtype=int)

        # convert `y` to array of array
        y = utils.to_arr_of_arr(y)

    # omit dims not needed for dot product
    if dims_to_omit is not None:
        if not isinstance(dims_to_omit, list):
            raise ValueError("`dims_to_omit` must be a `list` of `int`")

        # delete dims
        dims = np.delete(dims, dims_to_omit)
        if len(y) == 1:
            y = np.empty([0], dtype=object)
        else:
            y = np.delete(y, dims_to_omit)

    print(dims)
    # perform dot product
    for d in range(len(y)):
        s = np.ones(np.ndim(X), dtype=int)
        s[dims[d]] = np.shape(y[d])[0]
        X = X * y[d].reshape(tuple(s))
        X = np.sum(X, axis=dims[d], keepdims=True)
    X = np.squeeze(X)

    # perform check to see if `x` is a scalar
    if np.prod(X.shape) <= 1.0:
        X = X.item()
        X = np.array([X]).astype("float64")

    return X
Esempio n. 8
0
def update_posterior_states(A, obs, prior=None, method=FPI, return_numpy=True):
    """ 
    Update marginal posterior over hidden states using variational inference
        Can optionally set message passing algorithm used for inference
    
    Parameters
    ----------
    - 'A' [numpy nd.array (matrix or tensor or array-of-arrays) or Categorical]:
        Observation likelihood of the generative model, mapping from hidden states to observations
        Used to invert generative model to obtain marginal likelihood over hidden states, given the observation
    - 'obs' [numpy 1D array, array of arrays (with 1D numpy array entries), int or tuple]:
        The observation (generated by the environment). If single modality, this can be a 1D array 
        (one-hot vector representation) or an int (observation index)
        If multi-modality, this can be an array of arrays (whose entries are 1D one-hot vectors) or a tuple (of observation indices)
    - 'prior' [numpy 1D array, array of arrays (with 1D numpy array entries), Categorical, or None]:
        Prior beliefs about hidden states, to be integrated with the marginal likelihood to obtain a posterior distribution. 
        If None, prior is set to be equal to a flat categorical distribution (at the level of the individual inference functions).
        (optional)
    - 'return_numpy' [bool]:
        True/False flag to determine whether the posterior is returned as a numpy array or a Categorical
    - 'method' [str]:
        Algorithm used to perform the variational inference. 
        Options: 'FPI' - Fixed point iteration 
                    - http://www.cs.cmu.edu/~guestrin/Class/10708/recitations/r9/VI-view.pdf, slides 13- 18
                    - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.137.221&rep=rep1&type=pdf, slides 24 - 38
                 'VMP  - Variational message passing (not implemented)
                 'MMP' - Marginal message passing (not implemented)
                 'BP'  - Belief propagation (not implemented)
                 'EP'  - Expectation propagation (not implemented)
                 'CV'  - CLuster variation method (not implemented)
    **kwargs: List of keyword/parameter arguments corresponding to parameter values for the respective variational inference algorithm

    Returns
    ----------
    - 'qs' [numpy 1D array, array of arrays (with 1D numpy array entries), or Categorical]:
        Marginal posterior beliefs over hidden states 
    """

    # safe convert to numpy
    A = utils.to_numpy(A)

    # collect model dimensions
    if utils.is_arr_of_arr(A):
        n_factors = A[0].ndim - 1
        n_states = list(A[0].shape[1:])
        n_modalities = len(A)
        n_observations = []
        for m in range(n_modalities):
            n_observations.append(A[m].shape[0])
    else:
        n_factors = A.ndim - 1
        n_states = list(A.shape[1:])
        n_modalities = 1
        n_observations = [A.shape[0]]

    obs = process_observations(obs, n_modalities, n_observations)
    if prior is not None:
        prior = process_priors(prior, n_factors)

    if method is FPI:
        qs = run_fpi(A, obs, n_observations, n_states, prior)
    elif method is VMP:
        raise NotImplementedError(f"{VMP} is not implemented")
    elif method is MMP:
        raise NotImplementedError(f"{MMP} is not implemented")
    elif method is BP:
        raise NotImplementedError(f"{BP} is not implemented")
    elif method is EP:
        raise NotImplementedError(f"{EP} is not implemented")
    elif method is CV:
        raise NotImplementedError(f"{CV} is not implemented")
    else:
        raise ValueError(f"{method} is not implemented")

    if return_numpy:
        return qs
    else:
        return utils.to_categorical(qs)
Esempio n. 9
0
def calc_pB_info_gain(pB, qs_pi, qs_prev, policy):
    """
    Compute expected Dirichlet information gain about parameters pB under a given policy
    Parameters
    ----------
    pB [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Dirichlet (either single-factor of AoA)]:
        Prior dirichlet parameters parameterizing beliefs about the likelihood describing transitions bewteen hidden states,
        with different factors (if there are multiple) stored in different arrays.
    qs_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]:
        Posterior predictive density over hidden states. If a list, each entry of the list is the posterior predictive for a given timepoint of an expected trajectory
    qs_prev [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]:
        Posterior over hidden states (before getting observations)
    policy [numpy 2D ndarray, of size n_steps x n_control_factors]:
        Policy to consider. Each row of the matrix encodes the action index along a different control factor for a given timestep.  
    Returns
    -------
    infogain_pB [scalar]:
        Surprise (about dirichlet parameters) expected under the policy in question
    """

    if isinstance(qs_pi, list):
        n_steps = len(qs_pi)
        for t in range(n_steps):
            qs_pi[t] = utils.to_numpy(qs_pi[t], flatten=True)
    else:
        n_steps = 1
        qs_pi = [utils.to_numpy(qs_pi, flatten=True)]

    if isinstance(qs_prev, Categorical):
        qs_prev = utils.to_numpy(qs_prev, flatten=True)

    if isinstance(pB, Dirichlet):
        if pB.IS_AOA:
            num_factors = pB.n_arrays
        else:
            num_factors = 1
        wB = pB.expectation_of_log()
    else:
        if utils.is_arr_of_arr(pB):
            num_factors = len(pB)
            wB = np.empty(num_factors, dtype=object)
            for factor in range(num_factors):
                wB[factor] = spm_wnorm(pB[factor])
        else:
            num_factors = 1
            wB = spm_wnorm(pB)

    pB = utils.to_numpy(pB)

    pB_infogain = 0

    if num_factors == 1:

        for t in range(n_steps):

            if t == 0:
                previous_qs = qs_prev
            else:
                previous_qs = qs_pi[t - 1]

            a_i = policy[t, 0]

            wB_t = wB[:, :, a_i] * (pB[:, :, a_i] > 0).astype("float")
            pB_infogain = -qs_pi[t].dot(wB_t.dot(qs_prev))
    else:

        for t in range(n_steps):

            # the 'past posterior' used for the information gain about pB here is the posterior over expected states at the timestep previous to the one under consideration
            if (
                    t == 0
            ):  # if we're on the first timestep, we just use the latest posterior in the entire action-perception cycle as the previous posterior
                previous_qs = qs_prev
            else:  # otherwise, we use the expected states for the timestep previous to the timestep under consideration
                previous_qs = qs_pi[t - 1]

            policy_t = policy[
                t, :]  # get the list of action-indices for the current timestep

            for factor, a_i in enumerate(policy_t):
                wB_factor_t = wB[factor][:, :, a_i] * (pB[factor][:, :, a_i] >
                                                       0).astype("float")
                pB_infogain -= qs_pi[t][factor].dot(
                    wB_factor_t.dot(previous_qs[factor]))

    return pB_infogain
Esempio n. 10
0
def calc_pA_info_gain(pA, qo_pi, qs_pi):
    """
    Compute expected Dirichlet information gain about parameters pA under a policy
    Parameters
    ----------
    pA [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Dirichlet (either single-factor of AoA)]:
        Prior dirichlet parameters parameterizing beliefs about the likelihood mapping from hidden states to observations, 
        with different modalities (if there are multiple) stored in different arrays.
    qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]:
        Expected observations. If a list, each entry of the list is the posterior predictive for a given timepoint of an expected trajectory
    qs_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]:
        Posterior predictive density over hidden states. If a list, each entry of the list is the posterior predictive for a given timepoint of an expected trajectory
    Returns
    -------
    infogain_pA [scalar]:
        Surprise (about dirichlet parameters) expected under the policy in question
    """

    if isinstance(qo_pi, list):
        n_steps = len(qo_pi)
        for t in range(n_steps):
            qo_pi[t] = utils.to_numpy(qo_pi[t], flatten=True)
    else:
        n_steps = 1
        qo_pi = [utils.to_numpy(qo_pi, flatten=True)]

    if isinstance(qs_pi, list):
        for t in range(n_steps):
            qs_pi[t] = utils.to_numpy(qs_pi[t], flatten=True)
    else:
        n_steps = 1
        qs_pi = [utils.to_numpy(qs_pi, flatten=True)]

    if isinstance(pA, Dirichlet):
        if pA.IS_AOA:
            num_modalities = pA.n_arrays
        else:
            num_modalities = 1
        wA = pA.expectation_of_log()
    else:
        if utils.is_arr_of_arr(pA):
            num_modalities = len(pA)
            wA = np.empty(num_modalities, dtype=object)
            for modality in range(num_modalities):
                wA[modality] = spm_wnorm(pA[modality])
        else:
            num_modalities = 1
            wA = spm_wnorm(pA)

    pA = utils.to_numpy(pA)

    pA_infogain = 0

    if num_modalities == 1:
        wA = wA * (pA > 0).astype("float")
        for t in range(n_steps):
            pA_infogain = -qo_pi[t].dot(spm_dot(wA, qs_pi[t])[:, np.newaxis])
    else:
        for modality in range(num_modalities):
            wA_modality = wA[modality] * (pA[modality] > 0).astype("float")
            for t in range(n_steps):
                pA_infogain -= qo_pi[t][modality].dot(
                    spm_dot(wA_modality, qs_pi[t])[:, np.newaxis])

    return pA_infogain
Esempio n. 11
0
def get_expected_obs(qs_pi, A, return_numpy=False):
    """
    Given a posterior predictive density Qs_pi and an observation likelihood model A,
    get the expected observations given the predictive posterior.

    Parameters
    ----------
    qs_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]:
        Posterior predictive density over hidden states. If a list, each entry of the list is the posterior predictive for a given timepoint of an expected trajectory
    A [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical (either single-factor of AoA)]:
        Observation likelihood mapping from hidden states to observations, with different modalities (if there are multiple) stored in different arrays
    return_numpy [Boolean]:
        True/False flag to determine whether output of function is a numpy array or a Categorical
    Returns
    -------
    qo_pi [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), Categorical (either single-factor or AoA), or list]:
        Expected observations under the given policy. If a list, a list of the expected observations over the time horizon of policy evaluation, where
        each entry is the expected observations at a given timestep. 
    """

    # initialise expected observations
    qo_pi = []
    A = utils.to_numpy(A)

    if isinstance(qs_pi, list):
        n_steps = len(qs_pi)
        for t in range(n_steps):
            qs_pi[t] = utils.to_numpy(qs_pi[t], flatten=True)
    else:
        n_steps = 1
        qs_pi = [utils.to_numpy(qs_pi, flatten=True)]

    if utils.is_arr_of_arr(A):

        num_modalities = len(A)

        for t in range(n_steps):
            qo_pi_t = np.empty(num_modalities, dtype=object)
            qo_pi.append(qo_pi_t)

        # get expected observations over time
        for t in range(n_steps):
            for modality in range(num_modalities):
                qo_pi[t][modality] = spm_dot(A[modality], qs_pi[t])

    else:

        # get expected observations over time
        for t in range(n_steps):
            qo_pi.append(spm_dot(A, qs_pi[t]))

    if return_numpy:
        if n_steps == 1:
            return qo_pi[0]
        else:
            return qo_pi
    else:
        if n_steps == 1:
            return utils.to_categorical(qo_pi[0])
        else:
            for t in range(n_steps):
                qo_pi[t] = utils.to_categorical(qo_pi[t])
            return qo_pi
Esempio n. 12
0
def get_expected_states(qs, B, policy, return_numpy=False):
    """
    Given a posterior density qs, a transition likelihood model B, and a policy, 
    get the state distribution expected under that policy's pursuit

    Parameters
    ----------
    - `qs` [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]:
        Current posterior beliefs about hidden states
    - `B` [numpy nd-array, array-of-arrays (where each entry is a numpy nd-array), or Categorical (either single-factor of AoA)]:
        Transition likelihood mapping from states at t to states at t + 1, with different actions (per factor) stored along the lagging dimension
   - `policy` [np.arrays]:
        np.array of size (policy_len x n_factors) where each value corrresponds to a control state
    - `return_numpy` [Boolean]:
        True/False flag to determine whether output of function is a numpy array or a Categorical
    Returns
    -------
    - `qs_pi` [ list of np.arrays with len n_steps, where in case of multiple hidden state factors, each np.array in the list is a 1 x n_factors array-of-arrays, otherwise a list of 1D numpy arrays]:
        Expected states under the given policy - also known as the 'posterior predictive density'

    """

    n_steps = policy.shape[0]
    n_factors = policy.shape[1]

    qs = utils.to_numpy(qs, flatten=True)
    B = utils.to_numpy(B)

    # initialise beliefs over expected states
    qs_pi = []

    if utils.is_arr_of_arr(B):

        for t in range(n_steps):
            qs_pi_t = np.empty(n_factors, dtype=object)
            qs_pi.append(qs_pi_t)

        # initialise expected states after first action using current posterior (t = 0)
        for control_factor, control in enumerate(policy[0, :]):
            qs_pi[0][control_factor] = spm_dot(
                B[control_factor][:, :, control], qs[control_factor])

        # get expected states over time
        if n_steps > 1:
            for t in range(1, n_steps):
                for control_factor, control in enumerate(policy[t, :]):
                    qs_pi[t][control_factor] = spm_dot(
                        B[control_factor][:, :, control],
                        qs_pi[t - 1][control_factor])

    else:

        # initialise expected states after first action using current posterior (t = 0)
        qs_pi.append(spm_dot(B[:, :, policy[0, 0]], qs))

        # then loop over future timepoints
        if n_steps > 1:
            for t in range(1, n_steps):
                qs_pi.append(spm_dot(B[:, :, policy[t, 0]], qs_pi[t - 1]))

    if return_numpy:
        if len(qs_pi) == 1:
            return qs_pi[0]
        else:
            return qs_pi
    else:
        if len(qs_pi) == 1:
            return utils.to_categorical(qs_pi[0])
        else:
            for t in range(n_steps):
                qs_pi[t] = utils.to_categorical(qs_pi[t])
            return qs_pi
Esempio n. 13
0
def run_mmp(
    A,
    B,
    obs_t,
    policy,
    curr_t,
    t_horizon,
    T,
    qs_bma=None,
    prior=None,
    num_iter=10,
    dF=1.0,
    dF_tol=0.001,
    previous_actions=None,
    use_gradient_descent=False,
    tau=0.25,
):
    """
    Optimise marginal posterior beliefs about hidden states using marginal message-passing scheme (MMP) developed
    by Thomas Parr and colleagues, see https://github.com/tejparr/nmpassing
   
    Parameters
    ----------
    - 'A' [numpy nd.array (matrix or tensor or array-of-arrays)]:
        Observation likelihood of the generative model, mapping from hidden states to observations. 
        Used in inference to get the likelihood of an observation, under different hidden state configurations.
    - 'B' [numpy.ndarray (tensor or array-of-arrays)]:
        Transition likelihood of the generative model, mapping from hidden states at t to hidden states at t+1.
        Used in inference to get expected future (or past) hidden states, given past (or future) hidden states (or expectations thereof).
    - 'obs_t' [list of length t_horizon of numpy 1D array or array of arrays (with 1D numpy array entries)]:
        Sequence of observations sampled from beginning of time horizon the current timestep t. The first observation (the start of the time horizon) 
        is either the first timestep of the generative process or the first timestep of the policy horizon (whichever is closer to 'curr_t' in time).
        The observations over time are stored as a list of numpy arrays, where in case of multi-modalities each numpy array is an array-of-arrays, with
        one 1D numpy.ndarray for each modality. In the case of a single modality, each observation is a single 1D numpy.ndarray.
    - 'policy' [2D np.ndarray]:
        Array of actions constituting a single policy. Policy is a shape (n_steps, n_control_factors) numpy.ndarray, the values of which
        indicate actions along a given control factor (column index) at a given timestep (row index).
    - 'curr_t' [int]:
        Current timestep (relative to the 'absolute' time of the generative process).
    - 't_horizon'[int]:
        Temporal horizon of inference for states and policies.
    - 'T' [int]:
        Temporal horizon of the generative process (absolute time)
    - `qs_bma` [numpy 1D array, array of arrays (with 1D numpy array entries) or None]:
    - 'prior' [numpy 1D array, array of arrays (with 1D numpy array entries) or None]:
        Prior beliefs of the agent at the beginning of the time horizon, to be integrated with the marginal likelihood to obtain posterior at the first timestep.
        If absent, prior is set to be a uniform distribution over hidden states (identical to the initialisation of the posterior.
    -'num_iter' [int]:
        Number of variational iterations to run. (optional)
    -'dF' [float]:
        Starting free energy gradient (dF/dt) before updating in the course of gradient descent.  (optional)
    -'dF_tol' [float]:
        Threshold value of the gradient of the variational free energy (dF/dt), to be checked at each iteration. If 
        dF <= dF_tol, the iterations are halted pre-emptively and the final marginal posterior belief(s) is(are) returned.  (optional)
    -'previous_actions' [numpy.ndarray with shape (num_steps, n_control_factors) or None]:
        Array of previous actions, which can be used to constrain the 'past' messages in inference to only consider states of affairs that were possible
        under actions that are known to have been taken. The first dimension of previous-arrays (previous_actions.shape[0]) encodes how far back in time
        the agent is considering. The first timestep of this either corresponds to either the first timestep of the generative process or the f
        first timestep of the policy horizon (whichever is sooner in time).  (optional)
    -'use_gradient_descent' [bool]:
        Flag to indicate whether to use gradient descent to optimise posterior beliefs.
    -'tau' [float]:
        Learning rate for gradient descent (only used if use_gradient_descent is True)
 
  
    Returns
    ----------
    -'qs' [list of length T of numpy 1D arrays or array of arrays (with 1D numpy array entries):
        Marginal posterior beliefs over hidden states (single- or multi-factor) achieved via marginal message pasing
    -'qss' [list of lists of length T of numpy 1D arrays or array of arrays (with 1D numpy array entries):
        Marginal posterior beliefs about hidden states (single- or multi-factor) held at each timepoint, *about* each timepoint of the observation
        sequence
    -'F' [2D np.ndarray]:
        Variational free energy of beliefs about hidden states, indexed by time point and variational iteration
    -'F_pol' [float]:
        Total free energy of the policy under consideration.
    """

    # get model dimensions
    time_window_idxs = np.array([
        i
        for i in range(max(0, curr_t - t_horizon), min(T, curr_t + t_horizon))
    ])
    window_len = len(time_window_idxs)
    print("window_len ", window_len)
    if utils.is_arr_of_arr(obs_t[0]):
        n_observations = [obs_array_i.shape[0] for obs_array_i in obs_t[0]]
    else:
        n_observations = [obs_t[0].shape[0]]

    if utils.is_arr_of_arr(B):
        n_states = [sub_B.shape[0] for sub_B in B]
    else:
        n_states = [B[0].shape[0]]
        B = utils.to_arr_of_arr(B)

    n_modalities = len(n_observations)
    n_factors = len(n_states)
    """
    =========== Step 1 ===========
        Loop over the observation modalities and use assumption of independence among observation modalities
        to multiply each modality-specific likelihood onto a single joint likelihood over hidden states [shape = n_states]
    """

    # compute time-window, taking into account boundary conditions
    if curr_t == 0:
        obs_range = [0]
    else:
        obs_range = range(max(0, curr_t - t_horizon), curr_t)

    # likelihood of observations under configurations of hidden causes (over time)
    likelihood = np.empty(len(obs_range), dtype=object)
    for t in range(len(obs_range)):
        likelihood_t = np.ones(tuple(n_states))

        if n_modalities == 1:
            likelihood_t *= spm_dot(A, obs_t[obs_range[t]], obs_mode=True)
        else:
            for modality in range(n_modalities):
                likelihood_t *= spm_dot(A[modality],
                                        obs_t[obs_range[t]][modality],
                                        obs_mode=True)
        likelihood[t] = np.log(likelihood_t + 1e-16)
    print("ll", likelihood[0].shape)
    """
    =========== Step 2 ===========
        Create a flat posterior (and prior if necessary)
        If prior is not provided, initialise prior to be identical to posterior
        (namely, a flat categorical distribution).
    """

    qs = [np.empty(n_factors, dtype=object) for i in range(window_len + 1)]
    for t in range(window_len + 1):
        for f in range(n_factors):
            qs[t][f] = np.ones(n_states[f]) / n_states[f]

    if prior is None:
        prior = np.empty(n_factors, dtype=object)
        for f in range(n_factors):
            prior[f] = np.ones(n_states[f]) / n_states[f]

    # set final future message as all ones at the time horizon (no information from beyond the horizon)
    last_message = np.empty(n_factors, dtype=object)
    for f in range(n_factors):
        last_message[f] = np.ones(n_states[f])
    # qs[-1] = last_message
    """
    =========== Step 3 ===========
        Loop over time indices of time window, which includes time before the policy horizon 
        as well as including the policy horizon
        n_steps, n_factors [0 1 2 0;
                            1 2 0 1]
    """

    if previous_actions is None:
        previous_actions = np.zeros((1, policy.shape[1]))

    full_policy = np.vstack((previous_actions, policy))

    qss = [[] for i in range(num_iter)]
    F = np.zeros((len(qs), num_iter))
    F_pol = 0.0

    for n in range(num_iter):
        for t in range(0, len(qs) - 1):
            lnBpast_tensor = np.empty(n_factors, dtype=object)
            for f in range(n_factors):
                if t <= len(obs_t):
                    lnA = spm_dot(likelihood[t], qs[t], [f])
                else:
                    lnA = np.zeros(n_states[f])
                # the 'forwards message' in VB_X
                if t == 0:
                    lnBpast = np.log(prior[f] + 1e-16)
                else:
                    lnBpast = 0.5 * np.log(B[f][:, :, full_policy[
                        t - 1, f]].dot(qs[t - 1][f]) + 1e-16)

                if t == len(qs) - 1:
                    lnBfuture = last_message[f]
                else:  # the 'backwards message' in VB_X
                    lnBfuture = 0.5 * np.log(
                        spm_norm(B[f][:, :, int(full_policy[t + 1, f])].T).dot(
                            qs[t + 1][f]) + 1e-16)
                lnBpast_tensor[f] = 2 * lnBpast
                if use_gradient_descent:
                    # gradients
                    lns = np.log(qs[t][f] + 1e-16)  # current estimate
                    e = (lnA + lnBpast + lnBfuture) - lns  # prediction error
                    lns += tau * e  # increment the current (log) belief with the prediction error
                    # e -= e.mean() # Karl does this

                    qs_t_f = softmax(lns)

                    F_pol += 0.5 * qs[t][f].dot(e)

                    qs[t][f] = qs_t_f
                else:
                    # free energy minimum for the factor in question
                    qs[t][f] = softmax(lnA + lnBpast + lnBfuture)

            F[t, n] = calc_free_energy(qs[t], lnBpast_tensor, n_factors,
                                       likelihood[t])
            F_pol += F[t, n]
        qss[n].append(qs)

    return qs, qss, F, F_pol
Esempio n. 14
0
def update_transition_dirichlet(pB,
                                B,
                                actions,
                                qs,
                                qs_prev,
                                lr=1.0,
                                return_numpy=True,
                                factors="all"):
    """
    Update Dirichlet parameters that parameterize the transition model of the generative model 
    (describing the probabilistic mapping between hidden states over time).

    Parameters
    -----------
   -  pB [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet (either single-modality or AoA)]:
        The prior Dirichlet parameters of the generative model, parameterizing the agent's beliefs about the transition likelihood. 
    - B [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]:
        The transition likelihood of the generative model. 
    - actions [tuple]:
        A tuple containing the action(s) performed at a given timestep.
    - Qs_curr [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]:
        Current marginal posterior beliefs about hidden state factors
    - Qs_prev [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]:
        Past marginal posterior beliefs about hidden state factors
    - eta [float, optional]:
        Learning rate.
    - return_numpy [bool, optional]:
        Logical flag to determine whether output is a numpy array or a Dirichlet
    - which_factors [list, optional]:
        Indices (in terms of range(Nf)) of the hidden state factors to include in learning.
        Defaults to 'all', meaning that transition likelihood matrices for all hidden state factors
        are updated as a function of transitions in the different control factors (i.e. actions)
    """

    pB = utils.to_numpy(pB)

    if utils.is_arr_of_arr(pB):
        n_factors = len(pB)
    else:
        n_factors = 1

    if return_numpy:
        pB_updated = pB.copy()
    else:
        pB_updated = utils.to_dirichlet(pB.copy())

    if not utils.is_distribution(qs):
        qs = utils.to_categorical(qs)

    if factors == "all":
        if n_factors == 1:
            db = qs.cross(qs_prev, return_numpy=True)
            db = db * (B[:, :, actions[0]] > 0).astype("float")
            pB_updated = pB_updated + (lr * db)

        elif n_factors > 1:
            for f in range(n_factors):
                db = qs[f].cross(qs_prev[f], return_numpy=True)
                db = db * (B[f][:, :, actions[f]] > 0).astype("float")
                pB_updated[f] = pB_updated[f] + (lr * db)
    else:
        for f_idx in factors:
            db = qs[f_idx].cross(qs_prev[f_idx], return_numpy=True)
            db = db * (B[f_idx][:, :, actions[f_idx]] > 0).astype("float")
            pB_updated[f_idx] = pB_updated[f_idx] + (lr * db)

    return pB_updated
Esempio n. 15
0
def update_likelihood_dirichlet(pA,
                                A,
                                obs,
                                qs,
                                lr=1.0,
                                return_numpy=True,
                                modalities="all"):
    """ Update Dirichlet parameters of the likelihood distribution 

    Parameters
    -----------
    - pA [numpy nd.array, array-of-arrays (with np.ndarray entries), or Dirichlet (either single-modality or AoA)]:
        The prior Dirichlet parameters of the generative model, parameterizing the agent's beliefs about the observation likelihood. 
    - A [numpy nd.array, object-like array of arrays, or Categorical (either single-modality or AoA)]:
        The observation likelihood of the generative model. 
    - obs [numpy 1D array, array-of-arrays (with 1D numpy array entries), int or tuple]:
        A discrete observation used in the update equation
    - Qx [numpy 1D array, array-of-arrays (where each entry is a numpy 1D array), or Categorical (either single-factor or AoA)]:
            Current marginal posterior beliefs about hidden state factors
    - lr [float, optional]:
            Learning rate.
    - return_numpy [bool, optional]:
        Logical flag to determine whether output is a numpy array or a Dirichlet
    - modalities [list, optional]:
        Indices (in terms of range(n_modalities)) of the observation modalities to include in learning.
        Defaults to 'all, meaning that observation likelihood matrices for all modalities
        are updated as a function of observations in the different modalities.
    """

    pA = utils.to_numpy(pA)

    if utils.is_arr_of_arr(pA):
        n_modalities = len(pA)
        n_observations = [pA[m].shape[0] for m in range(n_modalities)]
    else:
        n_modalities = 1
        n_observations = [pA.shape[0]]

    if return_numpy:
        pA_updated = pA.copy()
    else:
        pA_updated = utils.to_dirichlet(pA.copy())

    # observation index
    if isinstance(obs, (int, np.integer)):
        obs = np.eye(A.shape[0])[obs]

    # observation indices
    elif isinstance(obs, tuple):
        obs = np.array(
            [np.eye(n_observations[g])[obs[g]] for g in range(n_modalities)],
            dtype=object)

    # convert to Categorical to make the cross product easier
    obs = utils.to_categorical(obs)

    if modalities == "all":
        if n_modalities == 1:
            da = obs.cross(qs, return_numpy=True)
            da = da * (A > 0).astype("float")
            pA_updated = pA_updated + (lr * da)

        elif n_modalities > 1:
            for g in range(n_modalities):
                da = obs[g].cross(qs, return_numpy=True)
                da = da * (A[g] > 0).astype("float")
                pA_updated[g] = pA_updated[g] + (lr * da)
    else:
        for g_idx in modalities:
            da = obs[g_idx].cross(qs, return_numpy=True)
            da = da * (A[g_idx] > 0).astype("float")
            pA_updated[g_idx] = pA_updated[g_idx] + (lr * da)

    return pA_updated