def followon_vector(P, G, di): """Compute the followon trace.""" assert (is_stochastic(P)) assert (is_diagonal(G)) I = np.eye(len(P)) return np.dot(np.linalg.inv(I - np.dot(G, P.T)), di)
def followon_vector(P, G, di): """Compute the followon trace.""" assert(is_stochastic(P)) assert(is_diagonal(G)) I = np.eye(len(P)) return np.dot(np.linalg.inv(I - np.dot(G, P.T)), di)
def warp(P, G, L): """ The matrix which warps the distribution due to gamma and lambda. warp = (I - P_{\pi} \Gamma \Lambda)^{-1} NB: "warp matrix" is non-standard terminology. P : The transition matrix (under a policy) G : Diagonal matrix, diag([gamma(s_1), ...]) L : Diagonal matrix, diag([lambda(s_1), ...]) """ assert (is_stochastic(P)) return np.linalg.inv(I)
def warp(P, G, L): """ The matrix which warps the distribution due to gamma and lambda. warp = (I - P_{\pi} \Gamma \Lambda)^{-1} NB: "warp matrix" is non-standard terminology. P : The transition matrix (under a policy) G : Diagonal matrix, diag([gamma(s_1), ...]) L : Diagonal matrix, diag([lambda(s_1), ...]) """ assert(is_stochastic(P)) return np.linalg.inv(I )
def bellman(P, G, r): """Compute the solution to the Bellman equation.""" assert (is_stochastic(P)) assert (is_diagonal(G)) I = np.eye(len(P)) return np.dot(np.linalg.inv(I - np.dot(G, P)), r)
def bellman(P,G,r): """Compute the solution to the Bellman equation.""" assert(is_stochastic(P)) assert(is_diagonal(G)) I = np.eye(len(P)) return np.dot(np.linalg.inv(I - np.dot(G,P)), r)