def forward(self, obs: Tensor, act: Tensor) -> Tensor: obs, act = (nt.vector(x) for x in (obs, act)) state, time = unpack_obs(obs) tau = nt.vector_to_matrix(torch.cat([state, act], dim="R")) time = nt.vector_to_scalar(time) C, c = self._index_parameters(time) c = nt.vector_to_matrix(c) cost = nt.transpose(tau) @ C @ tau / 2 + nt.transpose(c) @ tau reward = nt.matrix_to_scalar(cost.neg()) return nt.where(time.eq(self.horizon), torch.zeros_like(reward), reward)
def forward(self, obs: Tensor) -> Tensor: state, time = unpack_obs(obs) time = nt.vector_to_scalar(time) quad, linear, const = index_quadratic_parameters(self.quad, self.linear, self.const, time, max_idx=self.horizon) state = nt.vector_to_matrix(state) cost = nt.matrix_to_scalar( nt.transpose(state) @ quad @ state / 2 + nt.transpose(nt.vector_to_matrix(linear)) @ state + nt.scalar_to_matrix(const)) return cost.neg()
def forward(self, rho: lqr.GaussInit, vval: lqr.Quadratic): """Expected cost given mean and covariance matrix of the initial state. https://en.wikipedia.org/wiki/Quadratic_form_(statistics)#Expectation. """ # pylint:disable=invalid-name,no-self-use V, v, c = vval v = nt.vector_to_matrix(v) c = nt.scalar_to_matrix(c) mean, cov = rho mean = nt.vector_to_matrix(mean) value = (nt.scalar_to_matrix(nt.trace(cov @ V)) / 2 + nt.transpose(mean) @ V @ mean / 2 + nt.transpose(v) @ mean + c) return nt.matrix_to_scalar(value)
def forward(self, obs: Tensor, action: Tensor) -> Tensor: state, time = unpack_obs(obs) time = nt.vector_to_scalar(time) # noinspection PyTypeChecker quad, linear, const = index_quadratic_parameters(self.quad, self.linear, self.const, time, max_idx=self.horizon - 1) vec = nt.vector_to_matrix(torch.cat([state, action], dim="R")) cost = nt.matrix_to_scalar( nt.transpose(vec) @ quad @ vec / 2 + nt.transpose(nt.vector_to_matrix(linear)) @ vec + nt.scalar_to_matrix(const)) val = cost.neg() return nt.where(time.eq(self.horizon), torch.zeros_like(val), val)
def refine_quadratic_output(quadratic: Quadratic): A, b, c = quadratic A, b, c = nt.horizon(nt.matrix(A), nt.matrix_to_vector(b), nt.matrix_to_scalar(c)) return A, b, c