コード例 #1
0
    def forward(self, obs: Tensor, act: Tensor) -> Tensor:
        obs, act = (nt.vector(x) for x in (obs, act))
        state, time = unpack_obs(obs)
        tau = nt.vector_to_matrix(torch.cat([state, act], dim="R"))
        time = nt.vector_to_scalar(time)

        C, c = self._index_parameters(time)
        c = nt.vector_to_matrix(c)

        cost = nt.transpose(tau) @ C @ tau / 2 + nt.transpose(c) @ tau
        reward = nt.matrix_to_scalar(cost.neg())
        return nt.where(time.eq(self.horizon), torch.zeros_like(reward),
                        reward)
コード例 #2
0
 def forward(self, obs: Tensor) -> Tensor:
     state, time = unpack_obs(obs)
     time = nt.vector_to_scalar(time)
     quad, linear, const = index_quadratic_parameters(self.quad,
                                                      self.linear,
                                                      self.const,
                                                      time,
                                                      max_idx=self.horizon)
     state = nt.vector_to_matrix(state)
     cost = nt.matrix_to_scalar(
         nt.transpose(state) @ quad @ state / 2 +
         nt.transpose(nt.vector_to_matrix(linear)) @ state +
         nt.scalar_to_matrix(const))
     return cost.neg()
コード例 #3
0
    def forward(self, rho: lqr.GaussInit, vval: lqr.Quadratic):
        """Expected cost given mean and covariance matrix of the initial state.

        https://en.wikipedia.org/wiki/Quadratic_form_(statistics)#Expectation.
        """
        # pylint:disable=invalid-name,no-self-use
        V, v, c = vval
        v = nt.vector_to_matrix(v)
        c = nt.scalar_to_matrix(c)
        mean, cov = rho
        mean = nt.vector_to_matrix(mean)

        value = (nt.scalar_to_matrix(nt.trace(cov @ V)) / 2 +
                 nt.transpose(mean) @ V @ mean / 2 + nt.transpose(v) @ mean +
                 c)
        return nt.matrix_to_scalar(value)
コード例 #4
0
 def forward(self, obs: Tensor, action: Tensor) -> Tensor:
     state, time = unpack_obs(obs)
     time = nt.vector_to_scalar(time)
     # noinspection PyTypeChecker
     quad, linear, const = index_quadratic_parameters(self.quad,
                                                      self.linear,
                                                      self.const,
                                                      time,
                                                      max_idx=self.horizon -
                                                      1)
     vec = nt.vector_to_matrix(torch.cat([state, action], dim="R"))
     cost = nt.matrix_to_scalar(
         nt.transpose(vec) @ quad @ vec / 2 +
         nt.transpose(nt.vector_to_matrix(linear)) @ vec +
         nt.scalar_to_matrix(const))
     val = cost.neg()
     return nt.where(time.eq(self.horizon), torch.zeros_like(val), val)
コード例 #5
0
def refine_quadratic_output(quadratic: Quadratic):
    A, b, c = quadratic
    A, b, c = nt.horizon(nt.matrix(A), nt.matrix_to_vector(b),
                         nt.matrix_to_scalar(c))
    return A, b, c