Ejemplo n.º 1
0
    def __init__(self):
        super().__init__(nactions=4,
                         noutcomes=3,
                         nstates=4,
                         min_actions_per_context=None,
                         alpha=0.1,
                         alpha_start=1.,
                         shift_flip='shift',
                         reward_lb=-1,
                         reward_ub=1,
                         reward_drift='on',
                         drift_mu=np.zeros(3),
                         drift_sd=1.)


data = generate_behavioural_data(MyBanditTask, RWSoftmaxAgent, 20, 200)


def log_prob(w, D):
    agent = RWSoftmaxAgent(task=MyBanditTask(),
                           learning_rate=w[0],
                           inverse_softmax_temp=w[1])
    L = 0
    for t in range(D.shape[0]):
        x = D[t, :7]
        u = D[t, 7:11]
        r = D[t, 11]
        x_ = D[t, 12:]
        L += u @ agent.log_prob(x)
        agent.learning(x, u, r, x_, None)
    return L
Ejemplo n.º 2
0
def test_cooccurrence_matrix():
    data1 = generate_behavioural_data(TwoArmedBandit, RWSoftmaxAgent, 10, 20)
    data1.make_behavioural_ngrams(2)
    data1.make_cooccurrence_matrix(2)
Ejemplo n.º 3
0
def test_bdf():
    data1 = generate_behavioural_data(TwoArmedBandit, RWSoftmaxAgent, 10, 20)
    data2 = generate_behavioural_data(TwoArmedBandit, RWSoftmaxAgent, 10, 20)
    data = merge_behavioural_data([data1, data2])
Ejemplo n.º 4
0
import numpy as np
import matplotlib.pyplot as plt
from fitr import generate_behavioural_data
from fitr.environments import TwoArmedBandit
from fitr.agents import RWSoftmaxAgent
from fitr.inference import mlepar
from fitr.utils import sigmoid
from fitr.utils import stable_exp
from fitr.criticism.plotting import actual_estimate
import fitr.gradients as grad

N = 50  # number of subjects
T = 200  # number of trials

# Generate synthetic data
data = generate_behavioural_data(TwoArmedBandit, RWSoftmaxAgent, N, T)


# Create log-likelihood function
def log_prob(w, D):
    lr = sigmoid(w[0], a_min=-6, a_max=6)
    ist = stable_exp(w[1], a_min=-10, a_max=10)
    agent = RWSoftmaxAgent(TwoArmedBandit(), lr, ist)
    L = 0
    for t in range(D.shape[0]):
        x = D[t, :3]
        u = D[t, 3:5]
        r = D[t, 5]
        x_ = D[t, 6:]
        agent.log_prob(x, u)
        agent.learning(x, u, r, x_, None)