Exemple #1
0
def JN(domain: Domain, policy: Policy.Policy, N):
    # method to return the Expected value after N turn with a policy in a domain
    if N == 0:
        return 0
    else:
        R = domain.reward(domain.state, policy.action(domain.state))
        domain.moves(policy.action(domain.state))
        return R + domain.gamma * JN(domain, policy, N-1)
Exemple #2
0
def MatrixJN(domain: Domain, policy: Policy.Policy, N):
    # method to return the list of Matrix of Expected value after N turn with a policy in a domain
    L = [np.array([[0. for k in range(domain.n)] for l in range(domain.m)])]
    for h in range(1, N):
        L.append(np.array([[0. for k in range(domain.n)] for l in range(domain.m)]))
        for i in range(domain.n):
            for j in range(domain.m):
                L[-1][j][i] = domain.reward([i, j], policy.action([i, j]))
                L[-1][j][i] += domain.gamma * (1 - domain.beta) * L[-2][min(max(j + policy.action([i, j])[1], 0), domain.m - 1)][min(max(i + policy.action([i, j])[0], 0), domain.n - 1)]
                L[-1][j][i] += domain.gamma * domain.beta * L[-2][0][0]
    return L