예제 #1
0
def test_lqr_slew_rate():
    n_batch = 2
    n_state, n_ctrl = 3, 4
    n_sc = n_state + n_ctrl
    T = 5
    alpha = 0.2

    torch.manual_seed(1)
    C = torch.randn(T, n_batch, n_sc, n_sc)
    C = C.transpose(2,3).matmul(C)
    c = torch.randn(T, n_batch, n_sc)
    x_init = torch.randn(n_batch, n_state)
    R = torch.eye(n_state) + alpha*torch.randn(n_state, n_state)
    S = torch.randn(n_state, n_ctrl)
    f = torch.randn(n_state)
    C, c, x_init, R, S, f = map(Variable, (C, c, x_init, R, S, f))

    dynamics = AffineDynamics(R, S, f)

    x, u, objs = mpc.MPC(
        n_state, n_ctrl, T,
        u_lower=None, u_upper=None, u_init=None,
        lqr_iter=10,
        backprop=False,
        verbose=1,
        exit_unconverged=False,
        eps=1e-4,
    )(x_init, QuadCost(C, c), dynamics)

    # The solution should be the same when the slew rate approaches 0.
    x_slew_eps, u_slew_eps, objs_slew_eps = mpc.MPC(
        n_state, n_ctrl, T,
        u_lower=None, u_upper=None, u_init=None,
        lqr_iter=10,
        backprop=False,
        verbose=1,
        exit_unconverged=False,
        eps=1e-4,
        slew_rate_penalty=1e-6,
    )(x_init, QuadCost(C, c), dynamics)

    npt.assert_allclose(x.data.numpy(), x_slew_eps.data.numpy(), atol=1e-3)
    npt.assert_allclose(u.data.numpy(), u_slew_eps.data.numpy(), atol=1e-3)

    x_slew, u_slew, objs_slew= mpc.MPC(
        n_state, n_ctrl, T,
        u_lower=None, u_upper=None, u_init=None,
        lqr_iter=10,
        backprop=False,
        verbose=1,
        exit_unconverged=False,
        eps=1e-4,
        slew_rate_penalty=1.,
    )(x_init, QuadCost(C, c), dynamics)

    assert np.alltrue((objs < objs_slew).numpy())

    d = torch.norm(u[:-1] - u[1:]).item()
    d_slew = torch.norm(u_slew[:-1] - u_slew[1:]).item()
    assert d_slew < d
예제 #2
0
def test_lqr_linear_unbounded():
    npr.seed(1)

    n_batch = 2
    n_state, n_ctrl = 3, 4
    n_sc = n_state + n_ctrl
    T = 5
    C = npr.randn(T, n_batch, n_sc, n_sc)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = npr.randn(T, n_batch, n_sc)
    alpha = 0.2
    R = np.tile(np.eye(n_state)+alpha*np.random.randn(n_state, n_state),
                (T, n_batch, 1, 1))
    S = np.tile(np.random.randn(n_state, n_ctrl), (T, n_batch, 1, 1))
    F = np.concatenate((R, S), axis=3)
    f = np.tile(npr.randn(n_state), (T, n_batch, 1))
    x_init = npr.randn(n_batch, n_state)
    # u_lower = -100.*npr.random((T, n_batch, n_ctrl))
    # u_upper = 100.*npr.random((T, n_batch, n_ctrl))
    u_lower = -1e4*np.ones((T, n_batch, n_ctrl))
    u_upper = 1e4*np.ones((T, n_batch, n_ctrl))

    tau_cp, objs_cp = lqr_cp(
        C[:,0], c[:,0], F[:,0], f[:,0], x_init[0], T, n_state, n_ctrl,
        None, None
    )
    tau_cp = tau_cp.T
    x_cp = tau_cp[:,:n_state]
    u_cp = tau_cp[:,n_state:]

    C, c, R, S, F, f, x_init, u_lower, u_upper = [
        Variable(torch.Tensor(x).double()) if x is not None else None
        for x in [C, c, R, S, F, f, x_init, u_lower, u_upper]
    ]

    dynamics = AffineDynamics(R[0,0], S[0,0], f[0,0])

    u_lqr = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, u_lower, u_upper, u_lqr,
        lqr_iter=10,
        backprop=False,
        verbose=1,
        exit_unconverged=True,
    )(x_init, QuadCost(C, c), dynamics)
    tau_lqr = torch.cat((x_lqr, u_lqr), 2)
    tau_lqr = util.get_data_maybe(tau_lqr)
    npt.assert_allclose(tau_cp, tau_lqr[:,0].numpy(), rtol=1e-3)

    u_lqr = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, None, None, u_lqr,
        lqr_iter=10,
        backprop=False,
        exit_unconverged=False,
    )(x_init, QuadCost(C, c), dynamics)
    tau_lqr = torch.cat((x_lqr, u_lqr), 2)
    tau_lqr = util.get_data_maybe(tau_lqr)
    npt.assert_allclose(tau_cp, tau_lqr[:,0].numpy(), rtol=1e-3)
예제 #3
0
def test_lqr_backward_cost_affine_dynamics_module_constrained():
    npr.seed(0)
    torch.manual_seed(0)
    n_batch, n_state, n_ctrl, T = 1, 2, 2, 2
    hidden_sizes = [10]
    n_sc = n_state + n_ctrl

    C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64)

    x_init = npr.randn(n_batch, n_state).astype(np.float64)
    # beta = 0.5
    beta = 2.0
    u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)
    u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)

    _C, _c, _x_init, _u_lower, _u_upper = [
        Variable(torch.Tensor(x).double(), requires_grad=True)
        if x is not None else None
        for x in [C, c, x_init, u_lower, u_upper]
    ]
    F = Variable(
        torch.randn(1, 1, n_state, n_sc).repeat(T-1, 1, 1, 1).double(),
        requires_grad=True)
    dynamics = AffineDynamics(F[0,0,:,:n_state], F[0,0,:,n_state:])

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
        lqr_iter=20,
        verbose=1,
    )(_x_init, QuadCost(_C, _c), LinDx(F))
    u_lqr_flat = u_lqr.view(-1)

    du_dF = []
    for i in range(len(u_lqr_flat)):
        dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1)
        du_dF.append(dF)
    du_dF = torch.stack(du_dF).data.numpy()

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
        lqr_iter=20,
        verbose=1,
    )(_x_init, QuadCost(_C, _c), dynamics)
    u_lqr_flat = u_lqr.view(-1)

    du_dF_ = []
    for i in range(len(u_lqr_flat)):
        dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1)
        du_dF_.append(dF)
    du_dF_ = torch.stack(du_dF_).data.numpy()

    npt.assert_allclose(du_dF, du_dF_, atol=1e-4)
예제 #4
0
    def forward_numpy(C, c, x_init, u_lower, u_upper, fc0b):
        _C, _c, _x_init, _u_lower, _u_upper, fc0b = [
            Variable(torch.Tensor(x).double(), requires_grad=True)
            if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, fc0b]
        ]

        dynamics.fcs[0].bias.data[:] = fc0b.data
        # dynamics.A.data[:] = fc0b.view(n_state, n_state).data
        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state,
            n_ctrl,
            T,
            _u_lower,
            _u_upper,
            u_init,
            lqr_iter=40,
            verbose=-1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=1,
            slew_rate_penalty=1.0,
        )(_x_init, QuadCost(_C, _c), dynamics)
        return util.get_data_maybe(u_lqr.view(-1)).numpy()
예제 #5
0
def test_lqr_linear_bounded_delta():
    npr.seed(1)

    n_batch = 2
    n_state, n_ctrl, T = 3, 4, 5
    n_sc = n_state + n_ctrl
    C = npr.randn(T, n_batch, n_sc, n_sc)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = npr.randn(T, n_batch, n_sc)
    alpha = 0.2
    R = np.tile(
        np.eye(n_state) + alpha * np.random.randn(n_state, n_state),
        (T, n_batch, 1, 1))
    S = 0.01 * np.tile(np.random.randn(n_state, n_ctrl), (T, n_batch, 1, 1))
    F = np.concatenate((R, S), axis=3)
    f = np.tile(npr.randn(n_state), (T, n_batch, 1))
    x_init = npr.randn(n_batch, n_state)
    u_lower = -npr.random((T, n_batch, n_ctrl))
    u_upper = npr.random((T, n_batch, n_ctrl))

    tau_cp, objs_cp = lqr_cp(
        C[:, 0],
        c[:, 0],
        F[:, 0],
        f[:, 0],
        x_init[0],
        T,
        n_state,
        n_ctrl,
        u_lower[:, 0],
        u_upper[:, 0],
    )
    tau_cp = tau_cp.T
    x_cp = tau_cp[:, :n_state]
    u_cp = tau_cp[:, n_state:]

    C, c, R, S, F, f, x_init, u_lower, u_upper = [
        Variable(torch.Tensor(x).double()) if x is not None else None
        for x in [C, c, R, S, F, f, x_init, u_lower, u_upper]
    ]
    dynamics = AffineDynamics(R[0, 0], S[0, 0], f[0, 0])

    delta_u = 0.1
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state,
        n_ctrl,
        T,
        u_lower,
        u_upper,
        lqr_iter=1,
        verbose=1,
        delta_u=delta_u,
        backprop=False,
        exit_unconverged=False,
    )(x_init, QuadCost(C, c), dynamics)

    u_lqr = util.get_data_maybe(u_lqr)
    assert torch.abs(u_lqr).max() <= delta_u
예제 #6
0
    def get_loss(x_init, _A, _B):
        lqr_iter = 2

        F = torch.cat((expert['A'], expert['B']), dim=1) \
            .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1)
        x_true, u_true, objs_true = mpc.MPC(
            n_state,
            n_ctrl,
            args.T,
            u_lower=u_lower,
            u_upper=u_upper,
            u_init=u_init,
            lqr_iter=lqr_iter,
            verbose=-1,
            exit_unconverged=False,
            detach_unconverged=False,
            n_batch=n_batch,
        )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F))

        F = torch.cat((_A, _B), dim=1) \
            .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1)
        x_pred, u_pred, objs_pred = mpc.MPC(
            n_state,
            n_ctrl,
            args.T,
            u_lower=u_lower,
            u_upper=u_upper,
            u_init=u_init,
            lqr_iter=lqr_iter,
            verbose=-1,
            exit_unconverged=False,
            detach_unconverged=False,
            n_batch=n_batch,
        )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F))

        traj_loss = torch.mean((u_true - u_pred)**2) + \
                    torch.mean((x_true - x_pred)**2)
        return traj_loss
예제 #7
0
    def forward_numpy(C, c, x_init, u_lower, u_upper, F):
        _C, _c, _x_init, _u_lower, _u_upper, F = [
            Variable(torch.Tensor(x).double()) if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, F]
        ]

        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
            lqr_iter=40,
            verbose=1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=2,
        )(_x_init, QuadCost(_C, _c), LinDx(F))
        return util.get_data_maybe(u_lqr.view(-1)).numpy()
예제 #8
0
    def forward(self, x_init, C, c, d):
        ft = torch.mm(self.Bd_hat, d).transpose(0, 1)  # T-1 x n_state
        ft = ft.unsqueeze(1)  # T-1 x 1 x n_state

        x_pred, u_pred, _ = mpc.MPC(
            n_state=n_state,
            n_ctrl=n_ctrl,
            T=T,
            u_lower=self.u_lower,
            u_upper=self.u_upper,
            lqr_iter=20,
            verbose=0,
            exit_unconverged=False,
        )(x_init.double(), QuadCost(C.double(), c.double()),
          LinDx(self.F_hat.repeat(T - 1, 1, 1, 1), None))

        return x_pred[1, 0, :], u_pred[0, 0, :]
예제 #9
0
    def mpc(self,
            dx,
            xinit,
            q,
            p,
            u_init=None,
            eps_override=None,
            lqr_iter_override=None):
        n_batch = xinit.shape[0]

        n_sc = self.true_dx.n_state + self.true_dx.n_ctrl

        Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(
            self.mpc_T, n_batch, 1, 1)
        p = p.unsqueeze(0).repeat(self.mpc_T, n_batch, 1)

        if eps_override:
            eps = eps_override
        else:
            eps = self.true_dx.mpc_eps

        if lqr_iter_override:
            lqr_iter = lqr_iter_override
        else:
            lqr_iter = self.lqr_iter

        x_mpc, u_mpc, objs_mpc = mpc.MPC(
            self.true_dx.n_state,
            self.true_dx.n_ctrl,
            self.mpc_T,
            u_lower=self.true_dx.lower,
            u_upper=self.true_dx.upper,
            u_init=u_init,
            lqr_iter=lqr_iter,
            verbose=0,
            exit_unconverged=False,
            detach_unconverged=True,
            linesearch_decay=self.true_dx.linesearch_decay,
            max_linesearch_iter=self.true_dx.max_linesearch_iter,
            grad_method=self.grad_method,
            eps=eps,
            # slew_rate_penalty=self.slew_rate_penalty,
            # prev_ctrl=prev_ctrl,
        )(xinit, QuadCost(Q, p), dx)
        return x_mpc, u_mpc
예제 #10
0
    def forward(self, x_init, ft, C, c, current = True, n_iters=20):
        T, n_batch, n_dist = ft.shape
        if current == True:
            F_hat = self.F_hat
            Bd_hat = self.Bd_hat
        else:
            F_hat = self.F_hat_old
            Bd_hat = self.Bd_hat_old
 
        x_lqr, u_lqr, objs_lqr = mpc.MPC(n_state=self.n_state,
                                         n_ctrl=self.n_ctrl,
                                         T=self.T,
                                         u_lower= self.u_lower.repeat(self.T, n_batch, 1),
                                         u_upper= self.u_upper.repeat(self.T, n_batch, 1),
                                         lqr_iter=n_iters,
                                         backprop = True,
                                         verbose=0,
                                         exit_unconverged=False,
                                         )(x_init.double(), QuadCost(C.double(), c.double()),
                                           LinDx(F_hat.repeat(self.T-1, n_batch, 1, 1), ft.double()))
        return x_lqr, u_lqr
예제 #11
0
def construct_MPC(A, B, ref, dt):
    n_batch, n_state, n_ctrl, T = 1, args.hidden_dim, 1, 5
    n_sc = n_state + n_ctrl

    goal_weights = torch.ones(args.hidden_dim)

    ctrl_penalty = 0.1 * torch.ones(n_ctrl)
    q = torch.cat((goal_weights, ctrl_penalty))
    px = -torch.sqrt(goal_weights) * ref
    p = torch.cat((px[0], torch.zeros(n_ctrl)))
    Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
    p = p.unsqueeze(0).repeat(T, n_batch, 1)

    F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + dt*A, dt * B], axis = 1))
    F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
    f = torch.zeros([5, 1, 3])

    u_lower = -torch.ones(T, n_batch, n_ctrl) *2
    u_upper = torch.ones(T, n_batch, n_ctrl) * 2

    cost = QuadCost(Q, p)
    dynamic = LinDx(F)

    mpc_model = mpc.MPC(
        n_state = n_state,
        n_ctrl = n_ctrl,
        n_batch = n_batch,
        backprop = False,
        T=T,
        u_lower = u_lower, 
        u_upper = u_upper,
        lqr_iter = 10,
        verbose = 0,
        exit_unconverged=False,
        eps=1e-2,)
    return mpc_model, cost, dynamic
예제 #12
0
        ctrl_penalty = 0.01 * torch.ones(n_ctrl)
        q = torch.cat((goal_weights, ctrl_penalty))
        px = -torch.sqrt(goal_weights) * ref
        p = torch.cat((px, torch.zeros((n_batch, n_ctrl))), dim = 1)
        Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
        p = p.unsqueeze(0).repeat(T, 1, 1)

        
        F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + env.env.dt*A, env.env.dt * B], axis = 1))
        F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
        f = torch.zeros([5, 1, 3])

        u_lower = -torch.ones(T, n_batch, n_ctrl) *2
        u_upper = torch.ones(T, n_batch, n_ctrl) * 2

        cost = QuadCost(Q, p)
        dynamic = LinDx(F)

        u_init = None
        for k in range(5):
            state = env.reset()
            state = model.transform_state(state)
            for i in range(100):
                env.render()
                state = torch.FloatTensor(state.copy().reshape((1, -1)))
                y = model.encoder(state).detach()
                
                act = -np.dot(K, (y-ref).T)
                if i % 5 == 0:
                    mpc_model = mpc.MPC(
                        n_state = n_state,
예제 #13
0
def test_memory():
    torch.manual_seed(0)

    n_batch, n_state, n_ctrl, T = 2, 3, 4, 5
    n_sc = n_state + n_ctrl

    # Randomly initialize a PSD quadratic cost and linear dynamics.
    C = torch.randn(T * n_batch, n_sc, n_sc)
    C = torch.bmm(C, C.transpose(1, 2)).view(T, n_batch, n_sc, n_sc)
    c = torch.randn(T, n_batch, n_sc)

    alpha = 0.2
    R = (torch.eye(n_state) + alpha * torch.randn(n_state, n_state)).repeat(
        T, n_batch, 1, 1)
    S = torch.randn(T, n_batch, n_state, n_ctrl)
    F = torch.cat((R, S), dim=3)

    # The initial state.
    x_init = torch.randn(n_batch, n_state)

    # The upper and lower control bounds.
    u_lower = -torch.rand(T, n_batch, n_ctrl)
    u_upper = torch.rand(T, n_batch, n_ctrl)

    process = psutil.Process(os.getpid())

    # gc.collect()
    # start_mem = process.memory_info().rss

    # _lqr = LQRStep(
    #     n_state=n_state,
    #     n_ctrl=n_ctrl,
    #     T=T,
    #     u_lower=u_lower,
    #     u_upper=u_upper,
    #     u_zero_I=u_zero_I,
    #     true_cost=cost,
    #     true_dynamics=dynamics,
    #     delta_u=delta_u,
    #     delta_space=True,
    #     # current_x=x,
    #     # current_u=u,
    # )
    # e = Variable(torch.Tensor())
    # x, u = _lqr(x_init, C, c, F, f if f is not None else e)

    # gc.collect()
    # mem_used = process.memory_info().rss - start_mem
    # print(mem_used)
    # assert mem_used == 0

    gc.collect()
    start_mem = process.memory_info().rss

    _mpc = mpc.MPC(
        n_state=n_state,
        n_ctrl=n_ctrl,
        T=T,
        u_lower=u_lower,
        u_upper=u_upper,
        lqr_iter=20,
        verbose=1,
        backprop=False,
        exit_unconverged=False,
    )
    _mpc(x_init, QuadCost(C, c), LinDx(F))
    del _mpc

    gc.collect()
    mem_used = process.memory_info().rss - start_mem
    print(mem_used)
    assert mem_used == 0
예제 #14
0
def test_lqr_backward_cost_nn_dynamics_module_constrained_slew():
    npr.seed(0)
    torch.manual_seed(0)
    n_batch, n_state, n_ctrl, T = 1, 2, 2, 2
    hidden_sizes = [10, 10]
    n_sc = n_state + n_ctrl

    C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64)

    x_init = npr.randn(n_batch, n_state).astype(np.float64)
    beta = 1.
    u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64)
    u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64)

    dynamics = NNDynamics(n_state, n_ctrl, hidden_sizes,
                          activation='sigmoid').double()
    fc0b = dynamics.fcs[0].bias.view(-1).data.numpy().copy()

    def forward_numpy(C, c, x_init, u_lower, u_upper, fc0b):
        _C, _c, _x_init, _u_lower, _u_upper, fc0b = [
            Variable(torch.Tensor(x).double(), requires_grad=True)
            if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, fc0b]
        ]

        dynamics.fcs[0].bias.data[:] = fc0b.data
        # dynamics.A.data[:] = fc0b.view(n_state, n_state).data
        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state,
            n_ctrl,
            T,
            _u_lower,
            _u_upper,
            u_init,
            lqr_iter=40,
            verbose=-1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=1,
            slew_rate_penalty=1.0,
        )(_x_init, QuadCost(_C, _c), dynamics)
        return util.get_data_maybe(u_lqr.view(-1)).numpy()

    def f_c(c_flat):
        c_ = c_flat.reshape(T, n_batch, n_sc)
        return forward_numpy(C, c_, x_init, u_lower, u_upper, fc0b)

    def f_fc0b(fc0b):
        return forward_numpy(C, c, x_init, u_lower, u_upper, fc0b)

    u = forward_numpy(C, c, x_init, u_lower, u_upper, fc0b)

    # Make sure the solution is strictly partially on the boundary.
    assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1))
    assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1)))

    du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1))
    du_dfc0b_fd = nd.Jacobian(f_fc0b)(fc0b.reshape(-1))

    dynamics.fcs[0].bias.data = torch.DoubleTensor(fc0b).clone()

    _C, _c, _x_init, _u_lower, _u_upper, fc0b = [
        Variable(torch.Tensor(x).double(), requires_grad=True)
        if x is not None else None
        for x in [C, c, x_init, u_lower, u_upper, fc0b]
    ]

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state,
        n_ctrl,
        T,
        _u_lower,
        _u_upper,
        u_init,
        lqr_iter=20,
        verbose=-1,
        max_linesearch_iter=1,
        grad_method=GradMethods.ANALYTIC,
        slew_rate_penalty=1.0,
    )(_x_init, QuadCost(_C, _c), dynamics)
    u_lqr_flat = u_lqr.view(-1)

    du_dC = []
    du_dc = []
    du_dfc0b = []
    for i in range(len(u_lqr_flat)):
        dCi = grad(u_lqr_flat[i], [_C],
                   retain_graph=True)[0].contiguous().view(-1)
        dci = grad(u_lqr_flat[i], [_c],
                   retain_graph=True)[0].contiguous().view(-1)
        dfc0b = grad(u_lqr_flat[i], [dynamics.fcs[0].bias],
                     retain_graph=True)[0].view(-1)
        du_dC.append(dCi)
        du_dc.append(dci)
        du_dfc0b.append(dfc0b)
    du_dC = torch.stack(du_dC).data.numpy()
    du_dc = torch.stack(du_dc).data.numpy()
    du_dfc0b = torch.stack(du_dfc0b).data.numpy()

    npt.assert_allclose(du_dc_fd, du_dc, atol=1e-3)
    npt.assert_allclose(du_dfc0b_fd, du_dfc0b, atol=1e-3)
예제 #15
0
            dx.n_state,
            dx.n_ctrl,
            mpc_T,
            u_init=u_init,
            u_lower=dx.lower,
            u_upper=dx.upper,
            lqr_iter=500,
            verbose=0,
            exit_unconverged=False,
            detach_unconverged=False,
            linesearch_decay=dx.linesearch_decay,
            max_linesearch_iter=dx.max_linesearch_iter,
            grad_method=GradMethods.AUTO_DIFF,
            eps=dx.mpc_eps,
            n_batch=1,
        ).cuda()(x, QuadCost(Q, p), dx)
        # ).cuda()(x, AcrobotStateCost(goal_state.unsqueeze(0), coef_c=1e-2), dx)
        x1, y1, x2, y2 = dx.model.visualize_point(
            state.detach().cpu().numpy()[0])
        print(nominal_objs)
        plt.plot([0] + [x1] + [x2], [0] + [y1] + [y2], color='gray')
        if (x2 - xg)**2 + (y2 - yg)**2 < 0.2:
            break

        next_action = nominal_actions[0]
        u_init = torch.cat(
            (nominal_actions[1:], torch.zeros(1, n_batch, dx.n_ctrl)), dim=0)
        u_init[-2] = u_init[-3]
        with torch.no_grad():
            state = dx(state, next_action)
        # print(state)
예제 #16
0
def test_lqr_backward_cost_linear_dynamics_constrained():
    npr.seed(0)
    torch.manual_seed(0)
    n_batch, n_state, n_ctrl, T = 1, 2, 2, 3
    hidden_sizes = [10, 10]
    n_sc = n_state + n_ctrl

    C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64)

    x_init = npr.randn(n_batch, n_state).astype(np.float64)
    beta = 0.5
    u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64)
    u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64)

    F = npr.randn(T - 1, n_batch, n_state, n_sc)

    def forward_numpy(C, c, x_init, u_lower, u_upper, F):
        _C, _c, _x_init, _u_lower, _u_upper, F = [
            Variable(torch.Tensor(x).double()) if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, F]
        ]

        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state,
            n_ctrl,
            T,
            _u_lower,
            _u_upper,
            u_init,
            lqr_iter=40,
            verbose=1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=2,
        )(_x_init, QuadCost(_C, _c), LinDx(F))
        return util.get_data_maybe(u_lqr.view(-1)).numpy()

    def f_c(c_flat):
        c_ = c_flat.reshape(T, n_batch, n_sc)
        return forward_numpy(C, c_, x_init, u_lower, u_upper, F)

    def f_F(F_flat):
        F_ = F_flat.reshape(T - 1, n_batch, n_state, n_sc)
        return forward_numpy(C, c, x_init, u_lower, u_upper, F_)

    def f_x_init(x_init):
        x_init = x_init.reshape(1, -1)
        return forward_numpy(C, c, x_init, u_lower, u_upper, F)

    u = forward_numpy(C, c, x_init, u_lower, u_upper, F)

    # Make sure the solution is strictly partially on the boundary.
    assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1))
    assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1)))

    du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1))
    du_dF_fd = nd.Jacobian(f_F)(F.reshape(-1))
    du_dxinit_fd = nd.Jacobian(f_x_init)(x_init[0])

    _C, _c, _x_init, _u_lower, _u_upper, F = [
        Variable(torch.Tensor(x).double(), requires_grad=True)
        if x is not None else None
        for x in [C, c, x_init, u_lower, u_upper, F]
    ]

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state,
        n_ctrl,
        T,
        _u_lower,
        _u_upper,
        u_init,
        lqr_iter=20,
        verbose=1,
    )(_x_init, QuadCost(_C, _c), LinDx(F))
    u_lqr_flat = u_lqr.view(-1)

    du_dC = []
    du_dc = []
    du_dF = []
    du_dx_init = []
    for i in range(len(u_lqr_flat)):
        dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1)
        dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1)
        dF = grad(u_lqr_flat[i], [F], retain_graph=True)[0].view(-1)
        dx_init = grad(u_lqr_flat[i], [_x_init], retain_graph=True)[0].view(-1)
        du_dC.append(dCi)
        du_dc.append(dci)
        du_dF.append(dF)
        du_dx_init.append(dx_init)
    du_dC = torch.stack(du_dC).data.numpy()
    du_dc = torch.stack(du_dc).data.numpy()
    du_dF = torch.stack(du_dF).data.numpy()
    du_dx_init = torch.stack(du_dx_init).data.numpy()

    npt.assert_allclose(du_dc_fd, du_dc, atol=1e-4)
    npt.assert_allclose(du_dF, du_dF_fd, atol=1e-4)
    npt.assert_allclose(du_dx_init, du_dxinit_fd, atol=1e-4)
예제 #17
0
            dx.n_state,  # Number of states
            dx.n_ctrl,  # Number of control inputs
            mpc_T,  # MPC prediction horizon in number of timesteps
            u_init=u_init,  # Initial guess for inputs
            u_lower=dx.lower,  # Lower limit on inputs
            u_upper=dx.upper,  # Upper limit on inputs
            lqr_iter=100,  # Number of iterations per LQR solution step
            verbose=0,  # Verbosity, 0 is just warnings. 1 will give more info
            exit_unconverged=False,
            detach_unconverged=False,
            backprop=True,
            linesearch_decay=dx.linesearch_decay,
            max_linesearch_iter=dx.max_linesearch_iter,
            grad_method=GradMethods.AUTO_DIFF,  # FINITE_DIFF,
            eps=1e-3,
        )(x, QuadCost(Q, p), dx)

        # Save the first of the nominal actions determined by the MPC solution to use as
        # the real next control input
        next_action = nominal_actions[0]

        # Update the initial control input to include the current input as the first in
        # the sequence, then zero the rest
        # TODO: 02/09/19 - JEV - Would be better to use the previous solution as the
        # initial guess here? The mpc.MPC function also has a prev_ctrl argument to explore.
        u_init = torch.cat(
            (nominal_actions[1:], torch.zeros(1, n_batch,
                                              dx.n_ctrl).to(device)),
            dim=0)
        u_init[-2] = u_init[-3]
예제 #18
0
def mpc(s1,
        a,
        T,
        ball_vel=BALL_VEL,
        lambda_a=1e-3,
        centering=False,
        paddle_vel=None,
        vert_cost=False,
        verbose=1):
    ns = len(s1)
    na = len(a[0])

    class BreakoutDynamics(nn.Module):
        def __init__(self):
            super().__init__()

        def forward(self, x, u):
            x_dim = x.ndimension()
            if x_dim == 1:
                x = x.unsqueeze(0)
            params = from_vector(x[0])
            world, ball, paddle, blocks, paddle_idx = make_world(
                *params, ball_vel)

            next_x = simulate_breakout(u[0],
                                       world,
                                       ball,
                                       paddle,
                                       blocks,
                                       paddle_idx,
                                       paddle_vel_x=paddle_vel[0],
                                       paddle_vel_y=paddle_vel[1])
            return next_x

    dynamics = BreakoutDynamics()
    if CUDA:
        dynamics = dynamics.cuda()
    u_lower, u_upper = -ACTION_VAL, ACTION_VAL
    x_init = s1.clone()
    u_init = get_tensor(a).clone()
    if CUDA:
        u_init = u_init.cuda()

    ball_vel_y = ball_vel[1]
    ball_pos_y = s1[-2]
    paddle_pos_y = s1[1]

    s_Q = torch.zeros(ns)

    Q = torch.cat([s_Q, torch.ones(na) * lambda_a
                   ]).type_as(s1).diag().unsqueeze(0).repeat(T, 1, 1)
    if vert_cost:
        Q[:, ns - 2, ns - 2] = 10
    # Simple X tracking
    if ball_vel_y > 0:
        frames_to_paddle = (paddle_pos_y - ball_pos_y) / ball_vel_y / DT
        for t in range(T):
            if int(frames_to_paddle) + 1 >= t:
                Q[t, ns - 3, ns - 3] = 1
                Q[t, ns - 3, 0] = -1
                Q[t, 0, 0] = 1
                Q[t, 0, ns - 3] = -1
    else:
        if centering and ball_pos_y > 1000:
            Q[:, ns - 3, ns - 3] = 1
            Q[:, ns - 3, 0] = -1
            Q[:, 0, 0] = 1
            Q[:, 0, ns - 3] = -1

    p = torch.zeros(ns + na).type_as(Q)
    p = p.unsqueeze(0).repeat(T, 1)

    Q = Q.unsqueeze(1)
    p = p.unsqueeze(1)

    x_init = x_init.unsqueeze(0)
    solver = MPC(
        ns,
        na,
        T=T,
        # x_init=x_init,
        u_init=u_init,
        u_lower=u_lower,
        u_upper=u_upper,
        verbose=verbose,
        delta_u=10 * ACTION_VAL,
        lqr_iter=1,
        grad_method=GradMethods.AUTO_DIFF,
        n_batch=1,
        max_linesearch_iter=1,
        exit_unconverged=False,
        backprop=False,
    )
    if CUDA:
        solver = solver.cuda()
    cost = QuadCost(Q, p)
    x, u, objs = solver(x_init, cost, dynamics)
    u = u.squeeze(1)
    if CUDA:
        u = u.cpu()
    return u.data.numpy()