コード例 #1
0
ファイル: part_a.py プロジェクト: ysx001/AA203-Homework
def simulate():
    # dynfun = dynamics(stochastic=False)
    dynfun = dynamics(stochastic=True)  # uncomment for stochastic dynamics

    costfun = cost()

    T = 100  # episode length
    N = 100  # number of episodes
    gamma = 0.95  # discount factor
    A = dynfun.A
    B = dynfun.B
    Q = costfun.Q
    R = costfun.R

    L, P = Riccati(A, B, Q, R)

    total_costs = []

    for n in range(N):
        costs = []

        x = dynfun.reset()
        for t in range(T):

            # policy
            u = (-L @ x)

            # get reward
            c = costfun.evaluate(x, u)
            costs.append((gamma**t) * c)

            # dynamics step
            x = dynfun.step(u)

        total_costs.append(sum(costs))

    return np.mean(total_costs)
コード例 #2
0
from model import dynamics, cost
import numpy as np

dynfun = dynamics(stochastic=False)
# dynfun = dynamics(stochastic=True) # uncomment for stochastic dynamics

costfun = cost()


T = 100 # episode length
N = 100 # number of episodes
gamma = 0.95 # discount factor

# Riccati recursion
def Riccati(A,B,Q,R):

    # TODO implement infinite horizon riccati recursion
    #matlab code that works
    # P_current = Q;
    # P_new = P_current;
    # L_current = zeros(size(Q,1), size(R,2)); %general form, in ex. creates 4x1 zeros matrix
    # L_new = L_current;
    # firstIt = true; 
    # while (norm(L_new - L_current, 2) >= 1e-4) || (firstIt == true)
    #     if firstIt == true
    #         firstIt = false;
    #     end
    #     L_current = L_new;
    #     P_current = P_new;
    #     L_new = -inv(R + B'*P_current*B)*(B'*P_current*A);
    #     P_new = Q + L_new'*R*L_new + (A + B*L_new)'*P_current*(A + B*L_new);
コード例 #3
0
#%%
from model import dynamics, cost
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from tqdm import tqdm

stochastic_dynamics = False # set to True for stochastic dynamics
dynfun = dynamics(stochastic=stochastic_dynamics)
costfun = cost()

T = 100 # episode length
N = 100 # number of episodes
gamma = 0.95 # discount factor

total_costs = []
total_loss = []

# Riccati recursion
def Riccati(A,B,Q,R):

    # implement infinite horizon riccati recursion
    P = np.zeros(np.shape(A))
    P_k1 = np.ones(np.shape(A))
    while np.amax(np.absolute(P_k1-P)) > 1e-1:
        # print(np.amax(np.absolute(P_k1-P)))
        P_k1 = deepcopy(P)
        L = -np.linalg.inv(R + B.T.dot(P_k1).dot(B)).dot(B.T).dot(P_k1).dot(A)
        P = Q + A.T.dot(P_k1).dot(A + B.dot(L))
        P *= gamma