Ejemplo n.º 1
0
def loglik(w, D):
    X1 = D[:, :nx]
    U1 = D[:, nx:nx + nu]
    R = D[:, nx + nu].flatten()
    X2 = D[:, nx + nu + 1:nx + nu + 1 + nx]
    U2 = D[:, nx + nu + 1 + nx:nx + nu + 1 + nx + nu]
    done = D[:, nx + nu + 1 + nx + nu:nx + nu + 1 + nx + nu + 1].flatten()
    w = fu.transform(w, [fu.sigmoid, fu.stable_exp, fu.sigmoid]).flatten()
    J = reparam_jac(w)
    q = SARSASoftmaxAgent(task=task(),
                          learning_rate=w[0],
                          inverse_softmax_temp=w[1],
                          discount_factor=1.,
                          trace_decay=w[2])
    ntrials = X1.shape[0]
    for t in range(ntrials):
        if np.equal(done[t], 0):
            q.reset_trace()
            u2 = U2[t]
        else:
            u2 = np.zeros(nu)
        q.log_prob(X1[t], U1[t])
        q.learning(X1[t], U1[t], R[t], X2[t], u2)
    L = q.logprob_
    g = q.grad_[[0, 1, 3]]
    H = np.hstack((q.hess_[:, :2], q.hess_[:, -1].reshape(-1, 1)))
    H = np.vstack((H[:2, :], H[-1, :].reshape(1, -1)))
    return -L, -J @ g, -J.T @ H @ J
Ejemplo n.º 2
0
def loglik(w, D):
    X1, U1, R, X2  = D[:,:nx], D[:,nx:nx+nu], D[:,nx+nu], D[:,nx+nu+1:nx+nu+1+nx]
    w = fu.transform(w, [fu.sigmoid, clipped_exp, fu.I]).flatten()
    J = reparam_jac(w)
    q = RWStickySoftmaxAgent(task=task(),
                       learning_rate=w[0],
                       inverse_softmax_temp=w[1],
                       perseveration=w[2])
    ntrials = X1.shape[0]
    for t in range(ntrials):
        q.log_prob(X1[t], U1[t])
        q.learning(X1[t], U1[t], R[t], X2[t], None)
    L = q.logprob_
    return -L, [email protected]_, [email protected]_@J
Ejemplo n.º 3
0
def test_transform():
    x = np.array([0, 0, -10, 0, 55])
    x_ = transform(x, [sigmoid, tanh, relu, stable_exp, I])
    y = np.array([0.5, 0, 0, 1, 55])
    assert np.all(np.equal(x_.flatten(), y))
                  maxstarts=4,
                  maxstarts_without_improvement=2,
                  init_sd=1,
                  njobs=-1,
                  jac=True,
                  hess=True,
                  method='trust-exact')

res_rwssm = mlepar(f=rwstickysoftmax_loglik,
                   data=data_rwsticky.tensor,
                   nparams=3,
                   minstarts=2,
                   maxstarts=4,
                   maxstarts_without_improvement=2,
                   init_sd=1,
                   njobs=-1,
                   jac=True,
                   hess=True,
                   method='trust-exact')

res.xmin

xhat = res.xmin[np.logical_not(np.any(np.isnan(res.xmin), axis=1)), :]
xhat = np.stack(
    fu.transform(xhat[i], [fu.sigmoid, fu.stable_exp]).flatten()
    for i in range(xhat.shape[0]))
xtrue = data.params[np.logical_not(np.any(np.isnan(res.xmin), axis=1)), 1:]

f = actual_estimate(xtrue[:, 0], xhat[:, 0])
f = actual_estimate(xtrue[xhat[:, 1] < 20, 1], xhat[xhat[:, 1] < 20, 1])