def input(self, state: StateType, t: int) -> np.ndarray:
     if not self._solved:
         raise RuntimeError('Need to call DiscretePolicy.solve() before asking for inputs.')
     if self._policy_type == 'trv':
         return self._K[:, :, t] @ state + self._h[:, t]
     else:
         return self._K[:, :, t] @ dists.GaussianDist(self._C[:, :, t] @ state + self._a[:, t],
                                                      self._Sigma_eta[:, :, t]).sample() + self._h[:, t]
Ejemplo n.º 2
0
    def marginal(self, prior: dists.GaussianDist) -> dists.GaussianDist:
        """
        Computes the distribution of Y resulting from a prior over X.

        :param prior: The assumed Gaussian prior on X.
        :return: The marginal Gaussian distribution of Y.
        """
        return dists.GaussianDist(
            self._A @ prior.mean() + self._b,
            self._A @ prior.cov() @ self._A.transpose() + self._cov)
Ejemplo n.º 3
0
    def conditional(self, chan_input: np.ndarray) -> dists.GaussianDist:
        """
        Computes the distribution of Y assuming X = x.

        :param chan_input: The integer x.
        :return: A Gaussian distribution over a set of size m representing the resulting distribution of Y.
        """
        mean = self._A @ chan_input.flatten() + self._b
        cov = self._cov

        return dists.GaussianDist(mean, cov)
Ejemplo n.º 4
0
    def test_sample(self):
        np.random.seed(0)
        n = 100000
        dist1 = dists.GaussianDist(np.ones(1), 2 * np.eye(1))
        dist2 = dists.GaussianDist(np.zeros((3, 1)), np.eye(3))

        samples = dist1.sample(n)
        mean = samples.sum() / n
        var = (1 / n) * ((samples - mean) ** 2).sum()

        self.assertAlmostEqual(mean, 1, 2)
        self.assertAlmostEqual(var, 2, 1)

        samples = dist2.sample(n)
        mean = samples.sum(axis=1) / n

        deviation = samples - (mean.reshape((3, 1)) @ np.ones((1, n)))
        cov = (1 / (n - 1)) * (deviation @ deviation.transpose())

        self.assertTrue(np.allclose(mean, np.zeros((3, 1)), atol=0.01))
        self.assertTrue(np.allclose(cov, np.eye(3), atol=0.01))
Ejemplo n.º 5
0
    def test_posterior(self):
        np.random.seed(0)

        # This example is taken from the end of: http://web.stanford.edu/class/ee363/lectures/estim.pdf
        channel = channels.LGChannel(
            np.array([[np.cos(np.pi / 6), np.sin(np.pi / 6)]]), np.ones((1)),
            np.eye(1))
        input = dists.GaussianDist(np.array([1, 1]),
                                   np.array([[2**2, 0], [0, 0.5**2]]))

        sample_input = input.sample()
        conditional = channel.conditional(sample_input)
        sample_output = conditional.sample()

        posterior = channel.posterior(input, sample_output)

        self.assertTrue(
            np.allclose(posterior.cov(),
                        np.array([[1.046, -0.107], [-0.107, 0.246]]),
                        atol=0.01))

        # One more time with a different channel to test the mean

        channel = channels.LGChannel(
            np.array([[np.cos(i * np.pi / 12),
                       np.sin(i * np.pi / 12)] for i in range(24)]),
            np.ones((24)), np.eye(24))
        input = dists.GaussianDist(np.array([1, 1]),
                                   np.array([[2**2, 0], [0, 0.5**2]]))

        sample_input = input.sample()
        conditional = channel.conditional(sample_input)
        sample_output = conditional.sample()

        posterior = channel.posterior(input, sample_output)

        self.assertTrue(
            np.allclose(posterior.mean(), sample_input.flatten(), atol=0.6))
Ejemplo n.º 6
0
    def joint(self, chan_input: dists.GaussianDist) -> dists.GaussianDist:
        """
        Computes the joint Gaussian distribution for (X, Y).

        :param chan_input: The Gaussian distribution of X.
        :return: A the Gaussian distribution of (X, Y) with n + m variables.
        """
        mean = self._A @ chan_input.mean() + self._b
        cov = self._A @ chan_input.cov() @ self._A.transpose() + self._cov

        return dists.GaussianDist(
            np.block([chan_input.mean(), mean]),
            np.block(
                [[chan_input.cov(),
                  chan_input.cov() @ self._A.transpose()],
                 [self._A.transpose() @ chan_input.cov(), cov]]))
Ejemplo n.º 7
0
    def test_ilqr(self):
        g = np.zeros((4, 4))
        g[0, -1] = 3.2
        Qf = np.zeros((4, 4))
        Qf[0, 0] = 1

        s = Slip(init_dist=dists.GaussianDist(
            np.array([0, 0.3927, -3.2733, -6.7881]), 1e-3 * np.eye(4)),
                 horizon=3,
                 proc_cov=1e-4 * np.diag(np.array([1, 0.1, 0.5, 0.5])),
                 meas_cov=1e-4 * np.eye(4),
                 Q=np.zeros((4, 4, 3)),
                 g=g,
                 R=10 * np.ones((1, 1, 3)),
                 w=np.zeros((1, 3)),
                 Qf=Qf)

        policy = ILQRPolicy(s)
        policy.solve(iters=5, verbose=True)

        self.assertAlmostEqual(policy._state_traj[0, -1], g[0, -1], places=3)
Ejemplo n.º 8
0
    def posterior(self, prior: dists.GaussianDist,
                  output: np.ndarray) -> dists.GaussianDist:
        """
        Computes the posterior distribution over X given Y = y.

        Reference: https://web.stanford.edu/class/ee363/lectures/estim.pdf

        :param prior: A finite distribution over n elements representing assumed prior distribution over X.
        :param output: An m-vector representing the observed value of y.
        :return: A finite distribution over n elements representing the posterior distribution over X.
        """

        B = prior.cov() @ self._A.transpose() @ np.linalg.inv(
            self._A @ prior.cov() @ self._A.transpose() + self._cov)

        output_mean = self._A @ prior.mean() + self._b

        return dists.GaussianDist(
            prior.mean() + B @ (output.flatten() - output_mean),
            np.linalg.inv(
                self._A.transpose() @ np.linalg.inv(self._cov) @ self._A +
                np.linalg.inv(prior.cov())))
    def solve(self, tradeoff: float, iters: int = 10, initial_inputs: Union[None, np.ndarray] = None,
              init_K: Union[None, np.ndarray] = None, init_h: Union[None, np.ndarray] = None,
              init_verbose: bool = False, relinearize_every: int = 5):
        A = np.zeros((self._problem.n_states, self._problem.n_states, self._problem.horizon))
        B = np.zeros((self._problem.n_states, self._problem.n_inputs, self._problem.horizon))


        C = np.dstack([np.eye(self._trv_size, self._problem.n_states)] * self._problem.horizon)
        a = np.zeros(self._trv_size, self._problem.horizon)

        Sigma_eta = np.zeros((self._trv_size, self._trv_size, self._problem.horizon))

        for t in range(self._problem.horizon):
            Sigma_eta[:, :, t] = 0.01 * np.random.rand(self._trv_size, self._trv_size)
            Sigma_eta[:, :, t] = Sigma_eta[:,:, t] * Sigma_eta[:,:, t].transpose()

        if init_K is None:
            K = np.zeros((self._problem.n_inputs, self._trv_size, self._problem.horizon))
        else:
            K = init_K.copy()

        if init_h is None:
            h = np.zeros((self._problem.n_inputs, self._problem.horizon))
        else:
            h = init_h.copy()

        Q = self._problem._Q
        R = self._problem._R

        P = np.zeros(self._problem.n_states, self._problem.n_states, self._problem.horizon + 1)
        b = np.zeros(self._problem.n_states, self._problem.horizon + 1)

        delta_states = [dists.GaussianDist(np.zeros(self._problem.n_states),
                                           np.zeros((self._problem.n_states, self._problem.n_states)))
                            for i in range(self._problem.horizon)]

        delta_inputs = [dists.GaussianDist(np.zeros(self._problem.n_inputs),
                                           np.zeros((self._problem.n_inputs, self._problem.n_inputs)))
                        for i in range(self._problem.horizon)]

        nominal_states = np.zeros((self._problem.n_states, self._problem.horizon + 1))
        nominal_states[:, 0] = self._problem.init_dist.mean()

        if initial_inputs is None:
            nominal_inputs = initial_inputs.copy()
        else:
            nominal_inputs = np.zeros((self._problem.n_inputs, self._problem.horizon))

        for t in range(self._problem.horizon):
            nominal_states[:, t + 1] = self._problem.dynamics(nominal_states[:, t], nominal_inputs[:, t], t).mean()

        relinearize = False
        obj_val = np.inf
        obj_hist = np.zeros(iters)
        mi_total = 0
        expected_cost_total = 0
        best_expected_cost = np.inf
        best_mi = np.inf

        for iter in range(iters):
            # Forward dynamics
            expected_cost_total = 0
            mi_total = 0

            for t in range(self._problem.horizon):
                delta_inputs[t] = dists.GaussianDist(K[:, :, t] @ (C[:, :, t] @ delta_states[t].mean()
                                                                   + a[:, t]) + h[:, t],
                                                     K[:, :, t] @ (C[:, :, t] @ delta_states[t].cov()
                                                                   @ C[:, :, t].transpose() + Sigma_eta[:, :, t])
                                                     @ K[:, :, t].transpose())

                A[:,:, t], B[:,:, t] = self._problem.linearize_dynamics(nominal_states[:, t], nominal_inputs[:, t], t)

                # PEP8? What's that?
                # These bits are far more readable as long lines.
                delta_states[t + 1] = dists.GaussianDist((self._problem.dynamics(nominal_states[:, t] + delta_states[t].mean(),
                                                                                nominal_inputs[:, t] + delta_inputs[t], t) - nominal_states[:, t + 1]).mean(), (A[:, :, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]) @ delta_states[t].cov() @ (A[:,:, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]).transpose() + (B[:,:, t] @ K[:, :, t]) @ Sigma_eta[:, :, t] @ (B[:, :, t] @ K[:,:, t]).transpose() + self._problem._proc_cov)

                expected_cost_total += self._problem.cost(nominal_states[:, t] + delta_states[t].mean(), nominal_inputs[:, t] + delta_inputs[:, t].mean(), t)

                # TODO: Test mutual info computation.
                mi_total += channels.LGChannel(C[:, :, t], a[:, t], Sigma_eta[:,:, t]).mutual_info(delta_states[t])

            obj_hist[iter] = expected_cost_total + (1 / tradeoff) * mi_total

            if obj_hist[iter] < obj_val:
                self._C = C.copy()
                self._a = a.copy()
                self._K = K.copy()
                self._Sigma_eta = Sigma_eta.copy()
                self._h = h.copy()
                self._nominal_inputs = nominal_inputs
                self._nominal_states = nominal_states

                self._A = A
                self._B = B

                for t in range(self._problem.horizon + 1):
                    self._delta_states[t] = delta_states[t]


                relinearize = True

            if iter % relinearize_every == 0 and relinearize:
                relinearize = False

                for t in range(self._problem.horizon):
                    nominal_inputs[:, t] = nominal_inputs + delta_inputs[t].mean()
                    nominal_states[:, t + 1] = self._problem.dynamics(nominal_states[:, t],
                                                                      nominal_inputs[:, t], t).mean()

                continue

            delta_g = self._problem._g - nominal_states
            delta_w = self._problem._w - nominal_inputs

            P[:, :, -1] = self._problem._Qf
            b[:, -1] = -Q[:, :, -1] @ delta_g[:, -1]

            # Here be dragons...
            for t in range(self._problem.horizon, -1, -1):
                # TRV Given State Map:
                Sigma_eta[:, :, t] = np.linalg.inv(tradeoff * K[:, :, t].transpose() @ (B[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] + R[:, :, t]) @ K[:, :, t]
                                          + np.linalg.inv(C[:, :, t] @ delta_states[t].cov() @ C[:, :, t].transpose() + Sigma_eta[:, :, t]))

                F = np.linalg.inv(C[:, :, t] @ delta_states[t].cov() @ C[:, :, t].transpose() + Sigma_eta[:, :, t])

                C[:, :, t] = -tradeoff * Sigma_eta[:, :, t] @ K[:, :, t].transpose() @ B[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t]

                a[:, t] = -Sigma_eta[:, :, t] @ (tradeoff * K[:, :, t].transpose() @ B[:, :, t].transpose() @ (b[:, t + 1] + P[:, :, t + 1] @ B[:, :, t] @ h[:, t])
                                        + tradeoff * K[:, :, t].transpose() @ R[:, :, t] @ (h[:, t] - delta_w[:, t]) - F * (C[:, :, t] * delta_states[t].mean() + a[:, :, t]))

                # Input Given TRV Map:
                # First some shorthand
                x_bar = delta_states[t].mean()
                Sigma_x = delta_states[t].cov()
                x_tilde_bar = C[:, :, t] @ x_bar + a[:, t]
                Sigma_x_tilde = C[:, :, t] @ Sigma_x @ C[:, :, t].transpose() + Sigma_eta[:, :, t]

                cpK = cp.Variable((self._problem.n_inputs, self._trv_size))
                cph = cp.Variable(self._problem.n_inputs)

                objective = 0.5 * (cpK @ x_tilde_bar + cph - delta_w[:, t]).T @ R[:, :, t] @ (cpK @ x_tilde_bar + cph - delta_w[:, t]) + 0.5 * cp.trace(cpK.T @ R[:, :, t] @ cpK @ Sigma_x_tilde) + 0.5 @ x_bar @ A[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t] @ x_bar + 0.5 * x_bar.transpose() @ (A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK @ C[:, :, t] + C[:, :, t].transpose() @ cpK.T @ B[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t]) @ x_bar + x_bar.transpose() @ A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK @ a[:, t] + x_bar.transpose() @ A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cph + 0.5 * x_tilde_bar.transpose() * cpK.T @ B[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK @ x_tilde_bar + x_tilde_bar.transpose() @ cpK.T @ B.transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cph + 0.5 @ cph.T @ B.transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cph + b[:, t + 1].transpose() @ (A[:, :, t] @ x_bar + B[:, :, t] @ cpK @ x_tilde_bar + B[:, :, t] @ cph) + 0.5 * cp.trace(Sigma_x @ A[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t] + Sigma_x @ (A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ K @ C[:, :, t] + C[:, :, t].transpose() @ cpK.T @ B[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t])) + cp.trace(Sigma_x_tilde @ cpK.T @ B.transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK)
                prob = cp.Problem(cp.Minimize(objective), [])
                prob.solve(solver=cp.MOSEK)

                K[:, :, t] = cpK.value.copy()
                h[:, t] = cph.value.copy()

                # Value Function:
                G = C[:, :, t].transpose() @ F @ C[:, :, t]

                P[:, :, t] = Q[:, :, t] + (1 / tradeoff) * G + C.transpose() @ K.transpose() @ R @ K @ C + (A + B @ K @ C).transpose() @ P @ (A + B @ K @ C)

                b[:, t] = (A[:, :, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]).transpose() @ P[:, :, t + 1] @ B[:, :, t] @ K[:, :, t] @ a[:, t] - Q[:, :, t] @ delta_g[:, t] - (1 / tradeoff) * G @ delta_states[t].mean() + C[:, :, t].transpose() @ K[:, :, t].transpose() @ R[:, :, t] @ K[:, :, t] @ a[:, t] + (A[:, :, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]).transpose() @ b[:, t + 1] + C[:, :, t].transpose() @ K[:, :, t].transpose() @ R[:, :, t] @ h[:, t] - C[:, :, t].transpose() @ K[:, :, t].transpose() @ R[:, :, t] @ delta_w[:, t] + A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ h[:, t] + C[:, :, t].transpose() @ K[:, :, t].transpose() @ B[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ h[:, t]
Ejemplo n.º 10
0
 def sensor(self, state: StateType, t: int) -> dists.Distribution:
     return dists.GaussianDist(state, self._meas_cov)
Ejemplo n.º 11
0
 def dynamics(self, state: StateType, input: InputType,
              t: int) -> dists.Distribution:
     return dists.GaussianDist(slip_return_map(state, input, self),
                               self._proc_cov)
Ejemplo n.º 12
0
 def sensor(self, state: np.ndarray, t: int) -> dists.GaussianDist:
     return dists.GaussianDist(self._C[:, :, t] @ state, self._meas_cov)
Ejemplo n.º 13
0
 def dynamics(self, state: np.ndarray, input: np.ndarray,
              t: int) -> dists.GaussianDist:
     return dists.GaussianDist(
         self._A[:, :, t] @ state + self._B[:, :, t] @ input,
         self._proc_cov)