Beispiel #1
0
def test_lqr_backward_cost_affine_dynamics_module_constrained():
    npr.seed(0)
    torch.manual_seed(0)
    n_batch, n_state, n_ctrl, T = 1, 2, 2, 2
    hidden_sizes = [10]
    n_sc = n_state + n_ctrl

    C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64)

    x_init = npr.randn(n_batch, n_state).astype(np.float64)
    # beta = 0.5
    beta = 2.0
    u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)
    u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)

    _C, _c, _x_init, _u_lower, _u_upper = [
        Variable(torch.Tensor(x).double(), requires_grad=True)
        if x is not None else None
        for x in [C, c, x_init, u_lower, u_upper]
    ]
    F = Variable(
        torch.randn(1, 1, n_state, n_sc).repeat(T-1, 1, 1, 1).double(),
        requires_grad=True)
    dynamics = AffineDynamics(F[0,0,:,:n_state], F[0,0,:,n_state:])

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
        lqr_iter=20,
        verbose=1,
    )(_x_init, QuadCost(_C, _c), LinDx(F))
    u_lqr_flat = u_lqr.view(-1)

    du_dF = []
    for i in range(len(u_lqr_flat)):
        dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1)
        du_dF.append(dF)
    du_dF = torch.stack(du_dF).data.numpy()

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
        lqr_iter=20,
        verbose=1,
    )(_x_init, QuadCost(_C, _c), dynamics)
    u_lqr_flat = u_lqr.view(-1)

    du_dF_ = []
    for i in range(len(u_lqr_flat)):
        dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1)
        du_dF_.append(dF)
    du_dF_ = torch.stack(du_dF_).data.numpy()

    npt.assert_allclose(du_dF, du_dF_, atol=1e-4)
    def get_loss(x_init, _A, _B):
        lqr_iter = 2

        F = torch.cat((expert['A'], expert['B']), dim=1) \
            .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1)
        x_true, u_true, objs_true = mpc.MPC(
            n_state,
            n_ctrl,
            args.T,
            u_lower=u_lower,
            u_upper=u_upper,
            u_init=u_init,
            lqr_iter=lqr_iter,
            verbose=-1,
            exit_unconverged=False,
            detach_unconverged=False,
            n_batch=n_batch,
        )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F))

        F = torch.cat((_A, _B), dim=1) \
            .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1)
        x_pred, u_pred, objs_pred = mpc.MPC(
            n_state,
            n_ctrl,
            args.T,
            u_lower=u_lower,
            u_upper=u_upper,
            u_init=u_init,
            lqr_iter=lqr_iter,
            verbose=-1,
            exit_unconverged=False,
            detach_unconverged=False,
            n_batch=n_batch,
        )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F))

        traj_loss = torch.mean((u_true - u_pred)**2) + \
                    torch.mean((x_true - x_pred)**2)
        return traj_loss
Beispiel #3
0
    def forward_numpy(C, c, x_init, u_lower, u_upper, F):
        _C, _c, _x_init, _u_lower, _u_upper, F = [
            Variable(torch.Tensor(x).double()) if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, F]
        ]

        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
            lqr_iter=40,
            verbose=1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=2,
        )(_x_init, QuadCost(_C, _c), LinDx(F))
        return util.get_data_maybe(u_lqr.view(-1)).numpy()
Beispiel #4
0
    def forward(self, x_init, C, c, d):
        ft = torch.mm(self.Bd_hat, d).transpose(0, 1)  # T-1 x n_state
        ft = ft.unsqueeze(1)  # T-1 x 1 x n_state

        x_pred, u_pred, _ = mpc.MPC(
            n_state=n_state,
            n_ctrl=n_ctrl,
            T=T,
            u_lower=self.u_lower,
            u_upper=self.u_upper,
            lqr_iter=20,
            verbose=0,
            exit_unconverged=False,
        )(x_init.double(), QuadCost(C.double(), c.double()),
          LinDx(self.F_hat.repeat(T - 1, 1, 1, 1), None))

        return x_pred[1, 0, :], u_pred[0, 0, :]
Beispiel #5
0
    def forward(self, x_init, ft, C, c, current = True, n_iters=20):
        T, n_batch, n_dist = ft.shape
        if current == True:
            F_hat = self.F_hat
            Bd_hat = self.Bd_hat
        else:
            F_hat = self.F_hat_old
            Bd_hat = self.Bd_hat_old
 
        x_lqr, u_lqr, objs_lqr = mpc.MPC(n_state=self.n_state,
                                         n_ctrl=self.n_ctrl,
                                         T=self.T,
                                         u_lower= self.u_lower.repeat(self.T, n_batch, 1),
                                         u_upper= self.u_upper.repeat(self.T, n_batch, 1),
                                         lqr_iter=n_iters,
                                         backprop = True,
                                         verbose=0,
                                         exit_unconverged=False,
                                         )(x_init.double(), QuadCost(C.double(), c.double()),
                                           LinDx(F_hat.repeat(self.T-1, n_batch, 1, 1), ft.double()))
        return x_lqr, u_lqr
Beispiel #6
0
def construct_MPC(A, B, ref, dt):
    n_batch, n_state, n_ctrl, T = 1, args.hidden_dim, 1, 5
    n_sc = n_state + n_ctrl

    goal_weights = torch.ones(args.hidden_dim)

    ctrl_penalty = 0.1 * torch.ones(n_ctrl)
    q = torch.cat((goal_weights, ctrl_penalty))
    px = -torch.sqrt(goal_weights) * ref
    p = torch.cat((px[0], torch.zeros(n_ctrl)))
    Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
    p = p.unsqueeze(0).repeat(T, n_batch, 1)

    F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + dt*A, dt * B], axis = 1))
    F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
    f = torch.zeros([5, 1, 3])

    u_lower = -torch.ones(T, n_batch, n_ctrl) *2
    u_upper = torch.ones(T, n_batch, n_ctrl) * 2

    cost = QuadCost(Q, p)
    dynamic = LinDx(F)

    mpc_model = mpc.MPC(
        n_state = n_state,
        n_ctrl = n_ctrl,
        n_batch = n_batch,
        backprop = False,
        T=T,
        u_lower = u_lower, 
        u_upper = u_upper,
        lqr_iter = 10,
        verbose = 0,
        exit_unconverged=False,
        eps=1e-2,)
    return mpc_model, cost, dynamic
Beispiel #7
0
        q = torch.cat((goal_weights, ctrl_penalty))
        px = -torch.sqrt(goal_weights) * ref
        p = torch.cat((px, torch.zeros((n_batch, n_ctrl))), dim = 1)
        Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
        p = p.unsqueeze(0).repeat(T, 1, 1)

        
        F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + env.env.dt*A, env.env.dt * B], axis = 1))
        F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1)
        f = torch.zeros([5, 1, 3])

        u_lower = -torch.ones(T, n_batch, n_ctrl) *2
        u_upper = torch.ones(T, n_batch, n_ctrl) * 2

        cost = QuadCost(Q, p)
        dynamic = LinDx(F)

        u_init = None
        for k in range(5):
            state = env.reset()
            state = model.transform_state(state)
            for i in range(100):
                env.render()
                state = torch.FloatTensor(state.copy().reshape((1, -1)))
                y = model.encoder(state).detach()
                
                act = -np.dot(K, (y-ref).T)
                if i % 5 == 0:
                    mpc_model = mpc.MPC(
                        n_state = n_state,
                        n_ctrl = n_ctrl,
Beispiel #8
0
def test_memory():
    torch.manual_seed(0)

    n_batch, n_state, n_ctrl, T = 2, 3, 4, 5
    n_sc = n_state + n_ctrl

    # Randomly initialize a PSD quadratic cost and linear dynamics.
    C = torch.randn(T * n_batch, n_sc, n_sc)
    C = torch.bmm(C, C.transpose(1, 2)).view(T, n_batch, n_sc, n_sc)
    c = torch.randn(T, n_batch, n_sc)

    alpha = 0.2
    R = (torch.eye(n_state) + alpha * torch.randn(n_state, n_state)).repeat(
        T, n_batch, 1, 1)
    S = torch.randn(T, n_batch, n_state, n_ctrl)
    F = torch.cat((R, S), dim=3)

    # The initial state.
    x_init = torch.randn(n_batch, n_state)

    # The upper and lower control bounds.
    u_lower = -torch.rand(T, n_batch, n_ctrl)
    u_upper = torch.rand(T, n_batch, n_ctrl)

    process = psutil.Process(os.getpid())

    # gc.collect()
    # start_mem = process.memory_info().rss

    # _lqr = LQRStep(
    #     n_state=n_state,
    #     n_ctrl=n_ctrl,
    #     T=T,
    #     u_lower=u_lower,
    #     u_upper=u_upper,
    #     u_zero_I=u_zero_I,
    #     true_cost=cost,
    #     true_dynamics=dynamics,
    #     delta_u=delta_u,
    #     delta_space=True,
    #     # current_x=x,
    #     # current_u=u,
    # )
    # e = Variable(torch.Tensor())
    # x, u = _lqr(x_init, C, c, F, f if f is not None else e)

    # gc.collect()
    # mem_used = process.memory_info().rss - start_mem
    # print(mem_used)
    # assert mem_used == 0

    gc.collect()
    start_mem = process.memory_info().rss

    _mpc = mpc.MPC(
        n_state=n_state,
        n_ctrl=n_ctrl,
        T=T,
        u_lower=u_lower,
        u_upper=u_upper,
        lqr_iter=20,
        verbose=1,
        backprop=False,
        exit_unconverged=False,
    )
    _mpc(x_init, QuadCost(C, c), LinDx(F))
    del _mpc

    gc.collect()
    mem_used = process.memory_info().rss - start_mem
    print(mem_used)
    assert mem_used == 0
Beispiel #9
0
def test_lqr_backward_cost_linear_dynamics_constrained():
    npr.seed(0)
    torch.manual_seed(0)
    n_batch, n_state, n_ctrl, T = 1, 2, 2, 3
    hidden_sizes = [10, 10]
    n_sc = n_state + n_ctrl

    C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64)

    x_init = npr.randn(n_batch, n_state).astype(np.float64)
    beta = 0.5
    u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64)
    u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64)

    F = npr.randn(T - 1, n_batch, n_state, n_sc)

    def forward_numpy(C, c, x_init, u_lower, u_upper, F):
        _C, _c, _x_init, _u_lower, _u_upper, F = [
            Variable(torch.Tensor(x).double()) if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, F]
        ]

        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state,
            n_ctrl,
            T,
            _u_lower,
            _u_upper,
            u_init,
            lqr_iter=40,
            verbose=1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=2,
        )(_x_init, QuadCost(_C, _c), LinDx(F))
        return util.get_data_maybe(u_lqr.view(-1)).numpy()

    def f_c(c_flat):
        c_ = c_flat.reshape(T, n_batch, n_sc)
        return forward_numpy(C, c_, x_init, u_lower, u_upper, F)

    def f_F(F_flat):
        F_ = F_flat.reshape(T - 1, n_batch, n_state, n_sc)
        return forward_numpy(C, c, x_init, u_lower, u_upper, F_)

    def f_x_init(x_init):
        x_init = x_init.reshape(1, -1)
        return forward_numpy(C, c, x_init, u_lower, u_upper, F)

    u = forward_numpy(C, c, x_init, u_lower, u_upper, F)

    # Make sure the solution is strictly partially on the boundary.
    assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1))
    assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1)))

    du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1))
    du_dF_fd = nd.Jacobian(f_F)(F.reshape(-1))
    du_dxinit_fd = nd.Jacobian(f_x_init)(x_init[0])

    _C, _c, _x_init, _u_lower, _u_upper, F = [
        Variable(torch.Tensor(x).double(), requires_grad=True)
        if x is not None else None
        for x in [C, c, x_init, u_lower, u_upper, F]
    ]

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state,
        n_ctrl,
        T,
        _u_lower,
        _u_upper,
        u_init,
        lqr_iter=20,
        verbose=1,
    )(_x_init, QuadCost(_C, _c), LinDx(F))
    u_lqr_flat = u_lqr.view(-1)

    du_dC = []
    du_dc = []
    du_dF = []
    du_dx_init = []
    for i in range(len(u_lqr_flat)):
        dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1)
        dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1)
        dF = grad(u_lqr_flat[i], [F], retain_graph=True)[0].view(-1)
        dx_init = grad(u_lqr_flat[i], [_x_init], retain_graph=True)[0].view(-1)
        du_dC.append(dCi)
        du_dc.append(dci)
        du_dF.append(dF)
        du_dx_init.append(dx_init)
    du_dC = torch.stack(du_dC).data.numpy()
    du_dc = torch.stack(du_dc).data.numpy()
    du_dF = torch.stack(du_dF).data.numpy()
    du_dx_init = torch.stack(du_dx_init).data.numpy()

    npt.assert_allclose(du_dc_fd, du_dc, atol=1e-4)
    npt.assert_allclose(du_dF, du_dF_fd, atol=1e-4)
    npt.assert_allclose(du_dx_init, du_dxinit_fd, atol=1e-4)