def test_lqr_backward_cost_affine_dynamics_module_constrained(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 2 hidden_sizes = [10] n_sc = n_state + n_ctrl C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) # beta = 0.5 beta = 2.0 u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) _C, _c, _x_init, _u_lower, _u_upper = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper] ] F = Variable( torch.randn(1, 1, n_state, n_sc).repeat(T-1, 1, 1, 1).double(), requires_grad=True) dynamics = AffineDynamics(F[0,0,:,:n_state], F[0,0,:,n_state:]) u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=1, )(_x_init, QuadCost(_C, _c), LinDx(F)) u_lqr_flat = u_lqr.view(-1) du_dF = [] for i in range(len(u_lqr_flat)): dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1) du_dF.append(dF) du_dF = torch.stack(du_dF).data.numpy() u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=1, )(_x_init, QuadCost(_C, _c), dynamics) u_lqr_flat = u_lqr.view(-1) du_dF_ = [] for i in range(len(u_lqr_flat)): dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1) du_dF_.append(dF) du_dF_ = torch.stack(du_dF_).data.numpy() npt.assert_allclose(du_dF, du_dF_, atol=1e-4)
def get_loss(x_init, _A, _B): lqr_iter = 2 F = torch.cat((expert['A'], expert['B']), dim=1) \ .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1) x_true, u_true, objs_true = mpc.MPC( n_state, n_ctrl, args.T, u_lower=u_lower, u_upper=u_upper, u_init=u_init, lqr_iter=lqr_iter, verbose=-1, exit_unconverged=False, detach_unconverged=False, n_batch=n_batch, )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F)) F = torch.cat((_A, _B), dim=1) \ .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1) x_pred, u_pred, objs_pred = mpc.MPC( n_state, n_ctrl, args.T, u_lower=u_lower, u_upper=u_upper, u_init=u_init, lqr_iter=lqr_iter, verbose=-1, exit_unconverged=False, detach_unconverged=False, n_batch=n_batch, )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F)) traj_loss = torch.mean((u_true - u_pred)**2) + \ torch.mean((x_true - x_pred)**2) return traj_loss
def forward_numpy(C, c, x_init, u_lower, u_upper, F): _C, _c, _x_init, _u_lower, _u_upper, F = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=1, exit_unconverged=True, backprop=False, max_linesearch_iter=2, )(_x_init, QuadCost(_C, _c), LinDx(F)) return util.get_data_maybe(u_lqr.view(-1)).numpy()
def forward(self, x_init, C, c, d): ft = torch.mm(self.Bd_hat, d).transpose(0, 1) # T-1 x n_state ft = ft.unsqueeze(1) # T-1 x 1 x n_state x_pred, u_pred, _ = mpc.MPC( n_state=n_state, n_ctrl=n_ctrl, T=T, u_lower=self.u_lower, u_upper=self.u_upper, lqr_iter=20, verbose=0, exit_unconverged=False, )(x_init.double(), QuadCost(C.double(), c.double()), LinDx(self.F_hat.repeat(T - 1, 1, 1, 1), None)) return x_pred[1, 0, :], u_pred[0, 0, :]
def forward(self, x_init, ft, C, c, current = True, n_iters=20): T, n_batch, n_dist = ft.shape if current == True: F_hat = self.F_hat Bd_hat = self.Bd_hat else: F_hat = self.F_hat_old Bd_hat = self.Bd_hat_old x_lqr, u_lqr, objs_lqr = mpc.MPC(n_state=self.n_state, n_ctrl=self.n_ctrl, T=self.T, u_lower= self.u_lower.repeat(self.T, n_batch, 1), u_upper= self.u_upper.repeat(self.T, n_batch, 1), lqr_iter=n_iters, backprop = True, verbose=0, exit_unconverged=False, )(x_init.double(), QuadCost(C.double(), c.double()), LinDx(F_hat.repeat(self.T-1, n_batch, 1, 1), ft.double())) return x_lqr, u_lqr
def construct_MPC(A, B, ref, dt): n_batch, n_state, n_ctrl, T = 1, args.hidden_dim, 1, 5 n_sc = n_state + n_ctrl goal_weights = torch.ones(args.hidden_dim) ctrl_penalty = 0.1 * torch.ones(n_ctrl) q = torch.cat((goal_weights, ctrl_penalty)) px = -torch.sqrt(goal_weights) * ref p = torch.cat((px[0], torch.zeros(n_ctrl))) Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) p = p.unsqueeze(0).repeat(T, n_batch, 1) F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + dt*A, dt * B], axis = 1)) F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) f = torch.zeros([5, 1, 3]) u_lower = -torch.ones(T, n_batch, n_ctrl) *2 u_upper = torch.ones(T, n_batch, n_ctrl) * 2 cost = QuadCost(Q, p) dynamic = LinDx(F) mpc_model = mpc.MPC( n_state = n_state, n_ctrl = n_ctrl, n_batch = n_batch, backprop = False, T=T, u_lower = u_lower, u_upper = u_upper, lqr_iter = 10, verbose = 0, exit_unconverged=False, eps=1e-2,) return mpc_model, cost, dynamic
q = torch.cat((goal_weights, ctrl_penalty)) px = -torch.sqrt(goal_weights) * ref p = torch.cat((px, torch.zeros((n_batch, n_ctrl))), dim = 1) Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) p = p.unsqueeze(0).repeat(T, 1, 1) F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + env.env.dt*A, env.env.dt * B], axis = 1)) F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) f = torch.zeros([5, 1, 3]) u_lower = -torch.ones(T, n_batch, n_ctrl) *2 u_upper = torch.ones(T, n_batch, n_ctrl) * 2 cost = QuadCost(Q, p) dynamic = LinDx(F) u_init = None for k in range(5): state = env.reset() state = model.transform_state(state) for i in range(100): env.render() state = torch.FloatTensor(state.copy().reshape((1, -1))) y = model.encoder(state).detach() act = -np.dot(K, (y-ref).T) if i % 5 == 0: mpc_model = mpc.MPC( n_state = n_state, n_ctrl = n_ctrl,
def test_memory(): torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 2, 3, 4, 5 n_sc = n_state + n_ctrl # Randomly initialize a PSD quadratic cost and linear dynamics. C = torch.randn(T * n_batch, n_sc, n_sc) C = torch.bmm(C, C.transpose(1, 2)).view(T, n_batch, n_sc, n_sc) c = torch.randn(T, n_batch, n_sc) alpha = 0.2 R = (torch.eye(n_state) + alpha * torch.randn(n_state, n_state)).repeat( T, n_batch, 1, 1) S = torch.randn(T, n_batch, n_state, n_ctrl) F = torch.cat((R, S), dim=3) # The initial state. x_init = torch.randn(n_batch, n_state) # The upper and lower control bounds. u_lower = -torch.rand(T, n_batch, n_ctrl) u_upper = torch.rand(T, n_batch, n_ctrl) process = psutil.Process(os.getpid()) # gc.collect() # start_mem = process.memory_info().rss # _lqr = LQRStep( # n_state=n_state, # n_ctrl=n_ctrl, # T=T, # u_lower=u_lower, # u_upper=u_upper, # u_zero_I=u_zero_I, # true_cost=cost, # true_dynamics=dynamics, # delta_u=delta_u, # delta_space=True, # # current_x=x, # # current_u=u, # ) # e = Variable(torch.Tensor()) # x, u = _lqr(x_init, C, c, F, f if f is not None else e) # gc.collect() # mem_used = process.memory_info().rss - start_mem # print(mem_used) # assert mem_used == 0 gc.collect() start_mem = process.memory_info().rss _mpc = mpc.MPC( n_state=n_state, n_ctrl=n_ctrl, T=T, u_lower=u_lower, u_upper=u_upper, lqr_iter=20, verbose=1, backprop=False, exit_unconverged=False, ) _mpc(x_init, QuadCost(C, c), LinDx(F)) del _mpc gc.collect() mem_used = process.memory_info().rss - start_mem print(mem_used) assert mem_used == 0
def test_lqr_backward_cost_linear_dynamics_constrained(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 3 hidden_sizes = [10, 10] n_sc = n_state + n_ctrl C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) beta = 0.5 u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) F = npr.randn(T - 1, n_batch, n_state, n_sc) def forward_numpy(C, c, x_init, u_lower, u_upper, F): _C, _c, _x_init, _u_lower, _u_upper, F = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=1, exit_unconverged=True, backprop=False, max_linesearch_iter=2, )(_x_init, QuadCost(_C, _c), LinDx(F)) return util.get_data_maybe(u_lqr.view(-1)).numpy() def f_c(c_flat): c_ = c_flat.reshape(T, n_batch, n_sc) return forward_numpy(C, c_, x_init, u_lower, u_upper, F) def f_F(F_flat): F_ = F_flat.reshape(T - 1, n_batch, n_state, n_sc) return forward_numpy(C, c, x_init, u_lower, u_upper, F_) def f_x_init(x_init): x_init = x_init.reshape(1, -1) return forward_numpy(C, c, x_init, u_lower, u_upper, F) u = forward_numpy(C, c, x_init, u_lower, u_upper, F) # Make sure the solution is strictly partially on the boundary. assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1)) assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1))) du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1)) du_dF_fd = nd.Jacobian(f_F)(F.reshape(-1)) du_dxinit_fd = nd.Jacobian(f_x_init)(x_init[0]) _C, _c, _x_init, _u_lower, _u_upper, F = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=1, )(_x_init, QuadCost(_C, _c), LinDx(F)) u_lqr_flat = u_lqr.view(-1) du_dC = [] du_dc = [] du_dF = [] du_dx_init = [] for i in range(len(u_lqr_flat)): dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1) dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1) dF = grad(u_lqr_flat[i], [F], retain_graph=True)[0].view(-1) dx_init = grad(u_lqr_flat[i], [_x_init], retain_graph=True)[0].view(-1) du_dC.append(dCi) du_dc.append(dci) du_dF.append(dF) du_dx_init.append(dx_init) du_dC = torch.stack(du_dC).data.numpy() du_dc = torch.stack(du_dc).data.numpy() du_dF = torch.stack(du_dF).data.numpy() du_dx_init = torch.stack(du_dx_init).data.numpy() npt.assert_allclose(du_dc_fd, du_dc, atol=1e-4) npt.assert_allclose(du_dF, du_dF_fd, atol=1e-4) npt.assert_allclose(du_dx_init, du_dxinit_fd, atol=1e-4)