예제 #1
0
    def test_fpi_inference(self):
        num_obs = [2, 4]
        num_states = [2, 2]
        num_control = [2, 2]
        A = utils.random_A_matrix(num_obs, num_states)
        B = utils.random_B_matrix(num_states, num_control)

        C = utils.obj_array_zeros([num_ob for num_ob in num_obs])
        C[1][0] = 1.0
        C[1][1] = -2.0

        agent = Agent(A=A, B=B, C=C, control_fac_idx=[1])
        o, s = [0, 2], [0, 0]
        qx = agent.infer_states(o)
        agent.infer_policies()
        action = agent.sample_action()

        self.assertEqual(len(action), len(num_control))
예제 #2
0
    def test_mmp_active_inference(self):
        """
        Tests to make sure whole active inference loop works (with various past and future
        inference/policy horizons).

        @TODO: Need to check this against a MATLAB output, where
        the sequence of all observations / actions / generative model
        parameters are used (with deterministic action sampling and
        pre-determined generative process outputs - i.e. no effects of action)
        """

        num_obs = [3, 2]
        num_states = [4, 3]
        num_control = [1, 3]
        A = utils.random_A_matrix(num_obs, num_states)
        B = utils.random_B_matrix(num_states, num_control)

        C = utils.obj_array_zeros(num_obs)
        C[1][0] = 1.0
        C[1][1] = -2.0

        agent = Agent(A=A,
                      B=B,
                      C=C,
                      control_fac_idx=[1],
                      inference_algo="MMP",
                      policy_len=2,
                      inference_horizon=3)

        T = 10

        for t in range(T):

            o = [
                np.random.randint(num_ob) for num_ob in num_obs
            ]  # just randomly generate observations at each timestep, no generative process
            qx = agent.infer_states(o)
            agent.infer_policies()
            action = agent.sample_action()

        print(agent.prev_actions)
        print(agent.prev_obs)
예제 #3
0
    def test_mmp_inference(self):
        num_obs = [2, 4]
        num_states = [2, 2]
        num_control = [2, 2]
        A = utils.random_A_matrix(num_obs, num_states)
        B = utils.random_B_matrix(num_states, num_control)

        C = utils.obj_array_zeros(num_obs)
        C[1][0] = 1.0
        C[1][1] = -2.0

        agent = Agent(A=A,
                      B=B,
                      C=C,
                      control_fac_idx=[1],
                      inference_algo="MMP",
                      policy_len=5,
                      inference_horizon=1)
        o = [0, 2]
        qx = agent.infer_states(o)

        print(qx[0].shape)
        print(qx[1].shape)
예제 #4
0
    def test_update_posterior_states_v2(self):
        """
        Testing our SPM-ified version of `run_MMP` with
            1 hidden state factor & 1 outcome modality, at a random fixed
            timestep during the generative process
        """

        past_len = 3
        future_len = 4
        num_policies = 5
        num_states = [6, 7, 8]
        num_controls = [9, 10, 11]
        num_obs = [12, 13, 14]
        num_modalities = len(num_obs)

        A = random_A_matrix(num_obs, num_states)
        B = random_B_matrix(num_states, num_controls)
        prev_obs = [rand_onehot_obs(num_obs) for _ in range(past_len)]
        prev_actions = np.array([rand_controls(num_controls) for _ in range(past_len)])
        policies = [
            np.array([rand_controls(num_controls) for _ in range(future_len)])
            for _ in range(num_policies)
        ]
        prior = rand_dist_states(num_states)

        qs_seq_pi, VFE_policies = update_posterior_states_v2(A, B, prev_obs, policies, prev_actions, prior=prior, policy_sep_prior = False)

        qs_seq_pi_future = utils.obj_array(num_policies)
        for p_idx in range(num_policies):
            qs_seq_pi_future[p_idx] = qs_seq_pi[p_idx][(1 + past_len) :]
        
        # create  C matrix
        # horizon = len(qs_seq_pi_future[0])
        # C = utils.obj_array(horizon)
        # for t in range(horizon):
        #     C[t] = utils.obj_array(num_modalities)
        #     for g in range(num_modalities):
        #         C[t][g] = np.ones(num_obs[g]) 
        
        C = utils.obj_array_uniform(num_obs)
        
        q_pi, efe = update_posterior_policies_mmp(
            qs_seq_pi_future,
            A,
            B,
            C,
            policies,
            use_utility=True,
            use_states_info_gain=True,
            use_param_info_gain=False,
            prior = None,
            pA=None,
            pB=None,
            F = VFE_policies,
            E = None,
            gamma=16.0,
            return_numpy=True,
        )

        qs_pi_curr_t = utils.obj_array(num_policies)
        for p_idx in range(num_policies):
            qs_pi_curr_t[p_idx] = qs_seq_pi[p_idx][past_len]
        
        qs_bma = average_states_over_policies(qs_pi_curr_t, q_pi) # Bayesian model average of hidden states across policies
예제 #5
0
    return obs


def rand_controls(num_controls):
    if type(num_controls) is int:
        num_controls = [num_controls]
    controls = np.zeros(len(num_controls))
    for i in range(len(num_controls)):
        controls[i] = np.random.randint(num_controls[i])
    return controls


if __name__ == "__main__":
    past_len = 4
    future_len = 4
    num_states = [8, 12, 13]
    num_controls = [12, 3, 16]
    num_obs = [12, 14, 6]

    A = random_A_matrix(num_obs, num_states)
    B = random_B_matrix(num_states, num_controls)
    prev_obs = [rand_onehot_obs(num_obs) for _ in range(past_len)]
    prev_actions = np.array(
        [rand_controls(num_controls) for _ in range(past_len)])
    policy = np.array([rand_controls(num_controls) for _ in range(future_len)])

    ll_seq = get_joint_likelihood_seq(A, prev_obs, num_states)
    qs_seq = run_mmp_v2(A, B, ll_seq, policy, grad_descent=True)
    for t, qs in enumerate(qs_seq):
        print(f"Step {t} shape {[el.shape for el in qs]}")