def test_sample_single(self):

        # values are already normalized
        values = np.array([1.0, 0.0])
        c = Categorical(values=values)
        self.assertEqual(0, c.sample())

        # values are not normalized
        values = np.array([0, 10.0])
        c = Categorical(values=values)
        self.assertEqual(1, c.sample())
Ejemplo n.º 2
0
def sample_action(q_pi, policies, n_control, sampling_type="marginal_action"):
    """
    Samples action from posterior over policies, using one of two methods. 
    Parameters
    ----------
    q_pi [1D numpy.ndarray or Categorical]:
        Posterior beliefs about (possibly multi-step) policies.
    policies [list of numpy ndarrays]:
        List of arrays that indicate the policies under consideration. Each element within the list is a matrix that stores the 
        the indices of the actions  upon the separate hidden state factors, at each timestep (nStep x nControlFactor)
    n_control [list of integers]:
        List of the dimensionalities of the different (controllable)) hidden state factors
    sampling_type [string, 'marginal_action' or 'posterior_sample']:
        Indicates whether the sampled action for a given hidden state factor is given by the evidence for that action, marginalized across different policies ('marginal_action')
        or simply the action entailed by a sample from the posterior over policies
    Returns
    ----------
    selectedPolicy [1D numpy ndarray]:
        Numpy array containing the indices of the actions along each control factor
    """

    n_factors = len(n_control)

    if sampling_type == "marginal_action":

        if utils.is_distribution(q_pi):
            q_pi = utils.to_numpy(q_pi)

        action_marginals = np.empty(n_factors, dtype=object)
        for c_idx in range(n_factors):
            action_marginals[c_idx] = np.zeros(n_control[c_idx])

        # weight each action according to its integrated posterior probability over policies and timesteps
        for pol_idx, policy in enumerate(policies):
            for t in range(policy.shape[0]):
                for factor_i, action_i in enumerate(policy[t, :]):
                    action_marginals[factor_i][action_i] += q_pi[pol_idx]

        action_marginals = Categorical(values=action_marginals)
        action_marginals.normalize()
        selected_policy = np.array(action_marginals.sample())

    elif sampling_type == "posterior_sample":
        if utils.is_distribution(q_pi):
            policy_index = q_pi.sample()
            selected_policy = policies[policy_index]
        else:
            q_pi = Categorical(values=q_pi)
            policy_index = q_pi.sample()
            selected_policy = policies[policy_index]

    return selected_policy
    def test_sample_AoA(self):

        # values are already normalized
        values_1 = np.array([1.0, 0.0])
        values_2 = np.array([0.0, 1.0, 0.0])
        values = np.array([values_1, values_2])
        c = Categorical(values=values)
        self.assertTrue(np.isclose(np.array([0, 1]), c.sample()).all())

        # values are not normalized
        values_1 = np.array([10.0, 0.0])
        values_2 = np.array([0.0, 10.0, 0.0])
        values = np.array([values_1, values_2])
        c = Categorical(values=values)
        self.assertTrue(np.isclose(np.array([0, 1]), c.sample()).all())
Ejemplo n.º 4
0
def sample_action(p_i, possible_policies, Nu, sampling_type="marginal_action"):
    """
    Samples action from posterior over policies, using one of two methods. 
    @TODO: Needs to be amended for use with multi-step policies (where possible_policies is a list of np.arrays (nStep x nFactor), not just a list of tuples as it is now)
    Parameters
    ----------
    p_i [1D numpy.ndarray or Categorical]:
        Variational posterior over policies.
    possible_policies [list of tuples]:
        List of tuples that indicate the possible policies under consideration. Each tuple stores the actions taken upon the separate hidden state factors. 
        Same length as p_i.
    Nu [list of integers]:
        List of the dimensionalities of the different (controllable)) hidden states
    sampling_type [string, 'marginal_action' or 'posterior_sample']:
        Indicates whether the sampled action for a given hidden state factor is given by the evidence for that action, marginalized across different policies ('marginal_action')
        or simply the action entailed by the policy sampled from the posterior. 
    Returns
    ----------
    selectedPolicy [tuple]:
        tuple containing the list of actions selected by the agent
    """

    numControls = len(Nu)

    if sampling_type == "marginal_action":

        if isinstance(p_i, Categorical):
            p_i = p_i.values.squeeze()

        action_marginals = np.empty(numControls, dtype=object)
        for nu_i in range(numControls):
            action_marginals[nu_i] = np.zeros(Nu[nu_i])

        # Weight each action according to the posterior probability it gets across policies
        for pol_i, policy in enumerate(possible_policies):
            for nu_i, a_i in enumerate(policy):
                action_marginals[nu_i][a_i] += p_i[pol_i]

        action_marginals = Categorical(values=action_marginals)
        action_marginals.normalize()
        selected_policy = action_marginals.sample()

    elif sampling_type == "posterior_sample":
        if isinstance(p_i, Categorical):
            policy_index = p_i.sample()
            selected_policy = possible_policies[policy_index]
        else:
            sample_onehot = np.random.multinomial(1, p_i.squeeze())
            policy_index = np.where(sample_onehot == 1)[0][0]
            selected_policy = possible_policies[policy_index]

    return selected_policy