コード例 #1
0
    def forward(self, obs: Tensor, act: Tensor) -> Tensor:
        obs, act = (nt.vector(x) for x in (obs, act))
        state, time = unpack_obs(obs)
        tau = nt.vector_to_matrix(torch.cat([state, act], dim="R"))
        time = nt.vector_to_scalar(time)

        C, c = self._index_parameters(time)
        c = nt.vector_to_matrix(c)

        cost = nt.transpose(tau) @ C @ tau / 2 + nt.transpose(c) @ tau
        reward = nt.matrix_to_scalar(cost.neg())
        return nt.where(time.eq(self.horizon), torch.zeros_like(reward),
                        reward)
コード例 #2
0
 def forward(self, obs: Tensor) -> Tensor:
     state, time = unpack_obs(obs)
     time = nt.vector_to_scalar(time)
     quad, linear, const = index_quadratic_parameters(self.quad,
                                                      self.linear,
                                                      self.const,
                                                      time,
                                                      max_idx=self.horizon)
     state = nt.vector_to_matrix(state)
     cost = nt.matrix_to_scalar(
         nt.transpose(state) @ quad @ state / 2 +
         nt.transpose(nt.vector_to_matrix(linear)) @ state +
         nt.scalar_to_matrix(const))
     return cost.neg()
コード例 #3
0
    def forward(self, rho: lqr.GaussInit, vval: lqr.Quadratic):
        """Expected cost given mean and covariance matrix of the initial state.

        https://en.wikipedia.org/wiki/Quadratic_form_(statistics)#Expectation.
        """
        # pylint:disable=invalid-name,no-self-use
        V, v, c = vval
        v = nt.vector_to_matrix(v)
        c = nt.scalar_to_matrix(c)
        mean, cov = rho
        mean = nt.vector_to_matrix(mean)

        value = (nt.scalar_to_matrix(nt.trace(cov @ V)) / 2 +
                 nt.transpose(mean) @ V @ mean / 2 + nt.transpose(v) @ mean +
                 c)
        return nt.matrix_to_scalar(value)
コード例 #4
0
def test_cholesky(spdm: Tensor):
    scale_tril = nt.cholesky(spdm)

    assert scale_tril.shape == spdm.shape
    assert scale_tril.names == spdm.names
    assert scale_tril.dtype == spdm.dtype
    assert (nt.diagonal(scale_tril) >= 0).all()
    assert nt.allclose(scale_tril @ nt.transpose(scale_tril), spdm)
コード例 #5
0
 def forward(self, obs: Tensor, action: Tensor) -> Tensor:
     state, time = unpack_obs(obs)
     time = nt.vector_to_scalar(time)
     # noinspection PyTypeChecker
     quad, linear, const = index_quadratic_parameters(self.quad,
                                                      self.linear,
                                                      self.const,
                                                      time,
                                                      max_idx=self.horizon -
                                                      1)
     vec = nt.vector_to_matrix(torch.cat([state, action], dim="R"))
     cost = nt.matrix_to_scalar(
         nt.transpose(vec) @ quad @ vec / 2 +
         nt.transpose(nt.vector_to_matrix(linear)) @ vec +
         nt.scalar_to_matrix(const))
     val = cost.neg()
     return nt.where(time.eq(self.horizon), torch.zeros_like(val), val)
コード例 #6
0
def check_dynamics_covariance(W: Tensor, n_state: int, horizon: int,
                              stationary: int, sample_covariance: bool):
    assert_horizon_len(W, horizon)
    assert_row_size(W, n_state)
    assert_col_size(W, n_state)

    assert nt.allclose(W, nt.transpose(W))
    eigval, _ = torch.linalg.eigh(nt.unnamed(W))
    assert eigval.gt(0).all()

    assert sample_covariance != nt.allclose(W, nt.matrix(torch.eye(n_state)))

    # noinspection PyTypeChecker
    assert (horizon == 1 or not sample_covariance
            or stationary == nt.allclose(W, W.select("H", 0)))
コード例 #7
0
def stabilizing_gain(
    dynamics: lqr.LinSDynamics,
    abs_low: float = 0.0,
    abs_high: float = 1.0,
    rng: RNG = None,
) -> Tensor:
    """Compute gain that stabilizes a linear dynamical system."""
    # pylint:disable=invalid-name
    A, B = stationary_dynamics_factors(dynamics)
    result = place_dynamics_poles(A.numpy(),
                                  B.numpy(),
                                  abs_low=abs_low,
                                  abs_high=abs_high,
                                  rng=rng)
    K = torch.empty_like(nt.transpose(B))
    K.copy_(torch.as_tensor(-result.gain_matrix))
    return K
コード例 #8
0
def stabilizing_policy(dynamics: lqr.LinSDynamics,
                       rng: RNG = None) -> lqr.Linear:
    """Compute linear policy parameters that stabilize an LQG.

    Warning:
        This is only defined for stationary systems

    Raises:
        AssertionError: if the dynamics are non-stationary
    """
    # pylint:disable=invalid-name
    assert isstationary(dynamics)
    K = stabilizing_gain(dynamics, rng=rng)

    _, B = dynamics_factors(dynamics)
    K = nt.horizon(K.expand_as(nt.transpose(B)))
    # k must be a column vector the size of control vectors, equivalent to the
    # size of the columns of K
    # noinspection PyTypeChecker
    k = torch.zeros_like(K.select("C", 0))
    return K, k
コード例 #9
0
 def standard_form(self) -> lqr.GaussInit:
     # pylint:disable=missing-function-docstring
     loc = nt.vector(self.loc)
     scale_tril = self.scale_tril()
     sigma = scale_tril @ nt.transpose(scale_tril)
     return lqr.GaussInit(loc, sigma)
コード例 #10
0
 def forward(self) -> Tensor:
     """Compute the symmetric positive definite matrix from parameters."""
     ltril = nt.matrix(self.ltril)
     pre_diag = nt.vector(self.pre_diag)
     cholesky = assemble_cholesky(ltril, pre_diag, beta=self.beta)
     return cholesky @ nt.transpose(cholesky)
コード例 #11
0
 def as_linsdynamics(self) -> lqr.LinSDynamics:
     # pylint:disable=missing-function-docstring
     F, f, scale_tril = self._transition_factors()
     Sigma = scale_tril @ nt.transpose(scale_tril)
     return lqr.LinSDynamics(F, f, Sigma)