def policy(n_state: int, n_ctrl: int, horizon: int) -> lqr.Linear: K = torch.Tensor(horizon, n_ctrl, n_state) k = torch.Tensor(horizon, n_ctrl) nn.init.xavier_uniform_(K) nn.init.constant_(k, 0) K, k = nt.horizon(nt.matrix(K), nt.vector(k)) return K, k
def _gains_at( self, index: Union[IntTensor, LongTensor, None] = None) -> tuple[Tensor, Tensor]: K, k = nt.horizon(nt.matrix(self.K), nt.vector(self.k)) if index is not None: index = torch.clamp(index, max=self.horizon - 1) # Assumes index is a named scalar tensor # noinspection PyTypeChecker K, k = (nt.index_by(x, dim="H", index=index) for x in (K, k)) return K, k
def _transition_factors( self, index: Optional[IntTensor] = None) -> (Tensor, Tensor, Tensor): F, f, L = nt.horizon(nt.matrix(self.F), nt.vector(self.f), self.scale_tril()) if index is not None: if self.stationary: idx = torch.zeros_like(index) else: # Timesteps after termination use last parameters idx = torch.clamp(index, max=self.horizon - 1).int() F, f, L = (nt.index_by(x, dim="H", index=idx) for x in (F, f, L)) return F, f, L
def standard_form(self) -> Quadratic: """Return parameters in standard quadratic form. Returns: Tuple with matrix, vector, and scalar parameters, including their gradients (cloned) """ params = (self.quad, self.linear, self.const) refines = (nt.matrix, nt.vector, nt.scalar) quadratic = nt.horizon(*(r(p) for r, p in zip(refines, params))) for tensor, param in zip(quadratic, params): tensor.grad = None if param.grad is None else param.grad.clone() # noinspection PyTypeChecker return quadratic
def index_quadratic_parameters( quad: nn.Parameter, linear: nn.Parameter, const: nn.Parameter, index: IntTensor, max_idx: int, ) -> tuple[Tensor, Tensor, Tensor]: # pylint:disable=missing-function-docstring quad, linear, const = nt.horizon(nt.matrix(quad), nt.vector(linear), nt.scalar(const)) index = torch.clamp(index, max=max_idx) quad, linear, const = map(lambda x: nt.index_by(x, dim="H", index=index), (quad, linear, const)) return quad, linear, const
def refine_lqr(dynamics: LinDynamics, cost: QuadCost) -> Tuple[LinDynamics, QuadCost]: """Add dimension names to LQR parameters. Args: dynamics: transition matrix and vector cost: quadratic cost matrix and vector Returns: A tuple with named dynamics and cost parameters """ F, f = dynamics C, c = cost F, C = nt.matrix(F, C) f, c = nt.vector(f, c) F, f, C, c = nt.horizon(F, f, C, c) return LinDynamics(F, f), QuadCost(C, c)
def stabilizing_policy(dynamics: lqr.LinSDynamics, rng: RNG = None) -> lqr.Linear: """Compute linear policy parameters that stabilize an LQG. Warning: This is only defined for stationary systems Raises: AssertionError: if the dynamics are non-stationary """ # pylint:disable=invalid-name assert isstationary(dynamics) K = stabilizing_gain(dynamics, rng=rng) _, B = dynamics_factors(dynamics) K = nt.horizon(K.expand_as(nt.transpose(B))) # k must be a column vector the size of control vectors, equivalent to the # size of the columns of K # noinspection PyTypeChecker k = torch.zeros_like(K.select("C", 0)) return K, k
def linear(self, n_state: int, n_ctrl: int, horizon: int) -> lqr.Linear: K, k = torch.randn(horizon, n_ctrl, n_state), torch.randn(horizon, n_ctrl) K, k = nt.horizon(nt.matrix(K), nt.vector(k)) return K, k
def policy(n_state: int, n_ctrl: int, horizon: int) -> Linear: K = torch.rand((horizon, n_ctrl, n_state)) k = torch.rand((horizon, n_ctrl)) K, k = nt.horizon(nt.matrix(K), nt.vector(k)) return K, k
def _refined_parameters(self) -> tuple[Tensor, Tensor]: C, c = nt.horizon(nt.matrix(self.C), nt.vector(self.c)) return C, c
def refine_quadratic_output(quadratic: Quadratic): A, b, c = quadratic A, b, c = nt.horizon(nt.matrix(A), nt.matrix_to_vector(b), nt.matrix_to_scalar(c)) return A, b, c
def refine_linear_output(linear: Linear): K, k = linear K, k = nt.horizon(nt.matrix(K), nt.matrix_to_vector(k)) return K, k
def refine_cost_input(cost: QuadCost): C, c = cost C, c = nt.horizon(nt.matrix(C), nt.vector_to_matrix(c)) return QuadCost(C, c)
def refine_sdynamics_input(dynamics: LinSDynamics): F, f, W = dynamics F, f = refine_dynamics_input((F, f)) W = nt.horizon(nt.matrix(W)) return LinSDynamics(F, f, W)
def refine_dynamics_input(dynamics: LinDynamics): F, f = dynamics F, f = nt.horizon(nt.matrix(F), nt.vector_to_matrix(f)) return LinDynamics(F, f)
def refine_linear_input(linear: Linear): K, k = linear K, k = nt.horizon(nt.matrix(K), nt.vector_to_matrix(k)) return K, k
def gains(self) -> lqr.Linear: """Return current parameters as linear parameters.""" K, k = nt.horizon(nt.matrix(self.K), nt.vector(self.k)) K.grad, k.grad = self.K.grad, self.k.grad return K, k
def refine_cost_ouput(cost: QuadCost) -> QuadCost: C, c = cost C, c = nt.horizon(nt.matrix(C), nt.matrix_to_vector(c)) return QuadCost(C, c)