def forward(self, obs: Tensor, act: Tensor) -> Tensor: obs, act = (nt.vector(x) for x in (obs, act)) state, time = unpack_obs(obs) tau = nt.vector_to_matrix(torch.cat([state, act], dim="R")) time = nt.vector_to_scalar(time) C, c = self._index_parameters(time) c = nt.vector_to_matrix(c) cost = nt.transpose(tau) @ C @ tau / 2 + nt.transpose(c) @ tau reward = nt.matrix_to_scalar(cost.neg()) return nt.where(time.eq(self.horizon), torch.zeros_like(reward), reward)
def forward(self, obs: Tensor) -> Tensor: state, time = unpack_obs(obs) time = nt.vector_to_scalar(time) quad, linear, const = index_quadratic_parameters(self.quad, self.linear, self.const, time, max_idx=self.horizon) state = nt.vector_to_matrix(state) cost = nt.matrix_to_scalar( nt.transpose(state) @ quad @ state / 2 + nt.transpose(nt.vector_to_matrix(linear)) @ state + nt.scalar_to_matrix(const)) return cost.neg()
def forward(self, rho: lqr.GaussInit, vval: lqr.Quadratic): """Expected cost given mean and covariance matrix of the initial state. https://en.wikipedia.org/wiki/Quadratic_form_(statistics)#Expectation. """ # pylint:disable=invalid-name,no-self-use V, v, c = vval v = nt.vector_to_matrix(v) c = nt.scalar_to_matrix(c) mean, cov = rho mean = nt.vector_to_matrix(mean) value = (nt.scalar_to_matrix(nt.trace(cov @ V)) / 2 + nt.transpose(mean) @ V @ mean / 2 + nt.transpose(v) @ mean + c) return nt.matrix_to_scalar(value)
def test_cholesky(spdm: Tensor): scale_tril = nt.cholesky(spdm) assert scale_tril.shape == spdm.shape assert scale_tril.names == spdm.names assert scale_tril.dtype == spdm.dtype assert (nt.diagonal(scale_tril) >= 0).all() assert nt.allclose(scale_tril @ nt.transpose(scale_tril), spdm)
def forward(self, obs: Tensor, action: Tensor) -> Tensor: state, time = unpack_obs(obs) time = nt.vector_to_scalar(time) # noinspection PyTypeChecker quad, linear, const = index_quadratic_parameters(self.quad, self.linear, self.const, time, max_idx=self.horizon - 1) vec = nt.vector_to_matrix(torch.cat([state, action], dim="R")) cost = nt.matrix_to_scalar( nt.transpose(vec) @ quad @ vec / 2 + nt.transpose(nt.vector_to_matrix(linear)) @ vec + nt.scalar_to_matrix(const)) val = cost.neg() return nt.where(time.eq(self.horizon), torch.zeros_like(val), val)
def check_dynamics_covariance(W: Tensor, n_state: int, horizon: int, stationary: int, sample_covariance: bool): assert_horizon_len(W, horizon) assert_row_size(W, n_state) assert_col_size(W, n_state) assert nt.allclose(W, nt.transpose(W)) eigval, _ = torch.linalg.eigh(nt.unnamed(W)) assert eigval.gt(0).all() assert sample_covariance != nt.allclose(W, nt.matrix(torch.eye(n_state))) # noinspection PyTypeChecker assert (horizon == 1 or not sample_covariance or stationary == nt.allclose(W, W.select("H", 0)))
def stabilizing_gain( dynamics: lqr.LinSDynamics, abs_low: float = 0.0, abs_high: float = 1.0, rng: RNG = None, ) -> Tensor: """Compute gain that stabilizes a linear dynamical system.""" # pylint:disable=invalid-name A, B = stationary_dynamics_factors(dynamics) result = place_dynamics_poles(A.numpy(), B.numpy(), abs_low=abs_low, abs_high=abs_high, rng=rng) K = torch.empty_like(nt.transpose(B)) K.copy_(torch.as_tensor(-result.gain_matrix)) return K
def stabilizing_policy(dynamics: lqr.LinSDynamics, rng: RNG = None) -> lqr.Linear: """Compute linear policy parameters that stabilize an LQG. Warning: This is only defined for stationary systems Raises: AssertionError: if the dynamics are non-stationary """ # pylint:disable=invalid-name assert isstationary(dynamics) K = stabilizing_gain(dynamics, rng=rng) _, B = dynamics_factors(dynamics) K = nt.horizon(K.expand_as(nt.transpose(B))) # k must be a column vector the size of control vectors, equivalent to the # size of the columns of K # noinspection PyTypeChecker k = torch.zeros_like(K.select("C", 0)) return K, k
def standard_form(self) -> lqr.GaussInit: # pylint:disable=missing-function-docstring loc = nt.vector(self.loc) scale_tril = self.scale_tril() sigma = scale_tril @ nt.transpose(scale_tril) return lqr.GaussInit(loc, sigma)
def forward(self) -> Tensor: """Compute the symmetric positive definite matrix from parameters.""" ltril = nt.matrix(self.ltril) pre_diag = nt.vector(self.pre_diag) cholesky = assemble_cholesky(ltril, pre_diag, beta=self.beta) return cholesky @ nt.transpose(cholesky)
def as_linsdynamics(self) -> lqr.LinSDynamics: # pylint:disable=missing-function-docstring F, f, scale_tril = self._transition_factors() Sigma = scale_tril @ nt.transpose(scale_tril) return lqr.LinSDynamics(F, f, Sigma)