def test_lqr_slew_rate(): n_batch = 2 n_state, n_ctrl = 3, 4 n_sc = n_state + n_ctrl T = 5 alpha = 0.2 torch.manual_seed(1) C = torch.randn(T, n_batch, n_sc, n_sc) C = C.transpose(2,3).matmul(C) c = torch.randn(T, n_batch, n_sc) x_init = torch.randn(n_batch, n_state) R = torch.eye(n_state) + alpha*torch.randn(n_state, n_state) S = torch.randn(n_state, n_ctrl) f = torch.randn(n_state) C, c, x_init, R, S, f = map(Variable, (C, c, x_init, R, S, f)) dynamics = AffineDynamics(R, S, f) x, u, objs = mpc.MPC( n_state, n_ctrl, T, u_lower=None, u_upper=None, u_init=None, lqr_iter=10, backprop=False, verbose=1, exit_unconverged=False, eps=1e-4, )(x_init, QuadCost(C, c), dynamics) # The solution should be the same when the slew rate approaches 0. x_slew_eps, u_slew_eps, objs_slew_eps = mpc.MPC( n_state, n_ctrl, T, u_lower=None, u_upper=None, u_init=None, lqr_iter=10, backprop=False, verbose=1, exit_unconverged=False, eps=1e-4, slew_rate_penalty=1e-6, )(x_init, QuadCost(C, c), dynamics) npt.assert_allclose(x.data.numpy(), x_slew_eps.data.numpy(), atol=1e-3) npt.assert_allclose(u.data.numpy(), u_slew_eps.data.numpy(), atol=1e-3) x_slew, u_slew, objs_slew= mpc.MPC( n_state, n_ctrl, T, u_lower=None, u_upper=None, u_init=None, lqr_iter=10, backprop=False, verbose=1, exit_unconverged=False, eps=1e-4, slew_rate_penalty=1., )(x_init, QuadCost(C, c), dynamics) assert np.alltrue((objs < objs_slew).numpy()) d = torch.norm(u[:-1] - u[1:]).item() d_slew = torch.norm(u_slew[:-1] - u_slew[1:]).item() assert d_slew < d
def test_lqr_linear_unbounded(): npr.seed(1) n_batch = 2 n_state, n_ctrl = 3, 4 n_sc = n_state + n_ctrl T = 5 C = npr.randn(T, n_batch, n_sc, n_sc) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = npr.randn(T, n_batch, n_sc) alpha = 0.2 R = np.tile(np.eye(n_state)+alpha*np.random.randn(n_state, n_state), (T, n_batch, 1, 1)) S = np.tile(np.random.randn(n_state, n_ctrl), (T, n_batch, 1, 1)) F = np.concatenate((R, S), axis=3) f = np.tile(npr.randn(n_state), (T, n_batch, 1)) x_init = npr.randn(n_batch, n_state) # u_lower = -100.*npr.random((T, n_batch, n_ctrl)) # u_upper = 100.*npr.random((T, n_batch, n_ctrl)) u_lower = -1e4*np.ones((T, n_batch, n_ctrl)) u_upper = 1e4*np.ones((T, n_batch, n_ctrl)) tau_cp, objs_cp = lqr_cp( C[:,0], c[:,0], F[:,0], f[:,0], x_init[0], T, n_state, n_ctrl, None, None ) tau_cp = tau_cp.T x_cp = tau_cp[:,:n_state] u_cp = tau_cp[:,n_state:] C, c, R, S, F, f, x_init, u_lower, u_upper = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, R, S, F, f, x_init, u_lower, u_upper] ] dynamics = AffineDynamics(R[0,0], S[0,0], f[0,0]) u_lqr = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, u_lower, u_upper, u_lqr, lqr_iter=10, backprop=False, verbose=1, exit_unconverged=True, )(x_init, QuadCost(C, c), dynamics) tau_lqr = torch.cat((x_lqr, u_lqr), 2) tau_lqr = util.get_data_maybe(tau_lqr) npt.assert_allclose(tau_cp, tau_lqr[:,0].numpy(), rtol=1e-3) u_lqr = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, None, None, u_lqr, lqr_iter=10, backprop=False, exit_unconverged=False, )(x_init, QuadCost(C, c), dynamics) tau_lqr = torch.cat((x_lqr, u_lqr), 2) tau_lqr = util.get_data_maybe(tau_lqr) npt.assert_allclose(tau_cp, tau_lqr[:,0].numpy(), rtol=1e-3)
def test_lqr_backward_cost_affine_dynamics_module_constrained(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 2 hidden_sizes = [10] n_sc = n_state + n_ctrl C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) # beta = 0.5 beta = 2.0 u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) _C, _c, _x_init, _u_lower, _u_upper = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper] ] F = Variable( torch.randn(1, 1, n_state, n_sc).repeat(T-1, 1, 1, 1).double(), requires_grad=True) dynamics = AffineDynamics(F[0,0,:,:n_state], F[0,0,:,n_state:]) u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=1, )(_x_init, QuadCost(_C, _c), LinDx(F)) u_lqr_flat = u_lqr.view(-1) du_dF = [] for i in range(len(u_lqr_flat)): dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1) du_dF.append(dF) du_dF = torch.stack(du_dF).data.numpy() u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=1, )(_x_init, QuadCost(_C, _c), dynamics) u_lqr_flat = u_lqr.view(-1) du_dF_ = [] for i in range(len(u_lqr_flat)): dF = grad(u_lqr_flat[i], [F], create_graph=True)[0].view(-1) du_dF_.append(dF) du_dF_ = torch.stack(du_dF_).data.numpy() npt.assert_allclose(du_dF, du_dF_, atol=1e-4)
def forward_numpy(C, c, x_init, u_lower, u_upper, fc0b): _C, _c, _x_init, _u_lower, _u_upper, fc0b = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, fc0b] ] dynamics.fcs[0].bias.data[:] = fc0b.data # dynamics.A.data[:] = fc0b.view(n_state, n_state).data u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=-1, exit_unconverged=True, backprop=False, max_linesearch_iter=1, slew_rate_penalty=1.0, )(_x_init, QuadCost(_C, _c), dynamics) return util.get_data_maybe(u_lqr.view(-1)).numpy()
def test_lqr_linear_bounded_delta(): npr.seed(1) n_batch = 2 n_state, n_ctrl, T = 3, 4, 5 n_sc = n_state + n_ctrl C = npr.randn(T, n_batch, n_sc, n_sc) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = npr.randn(T, n_batch, n_sc) alpha = 0.2 R = np.tile( np.eye(n_state) + alpha * np.random.randn(n_state, n_state), (T, n_batch, 1, 1)) S = 0.01 * np.tile(np.random.randn(n_state, n_ctrl), (T, n_batch, 1, 1)) F = np.concatenate((R, S), axis=3) f = np.tile(npr.randn(n_state), (T, n_batch, 1)) x_init = npr.randn(n_batch, n_state) u_lower = -npr.random((T, n_batch, n_ctrl)) u_upper = npr.random((T, n_batch, n_ctrl)) tau_cp, objs_cp = lqr_cp( C[:, 0], c[:, 0], F[:, 0], f[:, 0], x_init[0], T, n_state, n_ctrl, u_lower[:, 0], u_upper[:, 0], ) tau_cp = tau_cp.T x_cp = tau_cp[:, :n_state] u_cp = tau_cp[:, n_state:] C, c, R, S, F, f, x_init, u_lower, u_upper = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, R, S, F, f, x_init, u_lower, u_upper] ] dynamics = AffineDynamics(R[0, 0], S[0, 0], f[0, 0]) delta_u = 0.1 x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, u_lower, u_upper, lqr_iter=1, verbose=1, delta_u=delta_u, backprop=False, exit_unconverged=False, )(x_init, QuadCost(C, c), dynamics) u_lqr = util.get_data_maybe(u_lqr) assert torch.abs(u_lqr).max() <= delta_u
def get_loss(x_init, _A, _B): lqr_iter = 2 F = torch.cat((expert['A'], expert['B']), dim=1) \ .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1) x_true, u_true, objs_true = mpc.MPC( n_state, n_ctrl, args.T, u_lower=u_lower, u_upper=u_upper, u_init=u_init, lqr_iter=lqr_iter, verbose=-1, exit_unconverged=False, detach_unconverged=False, n_batch=n_batch, )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F)) F = torch.cat((_A, _B), dim=1) \ .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1) x_pred, u_pred, objs_pred = mpc.MPC( n_state, n_ctrl, args.T, u_lower=u_lower, u_upper=u_upper, u_init=u_init, lqr_iter=lqr_iter, verbose=-1, exit_unconverged=False, detach_unconverged=False, n_batch=n_batch, )(x_init, QuadCost(expert['Q'], expert['p']), LinDx(F)) traj_loss = torch.mean((u_true - u_pred)**2) + \ torch.mean((x_true - x_pred)**2) return traj_loss
def forward_numpy(C, c, x_init, u_lower, u_upper, F): _C, _c, _x_init, _u_lower, _u_upper, F = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=1, exit_unconverged=True, backprop=False, max_linesearch_iter=2, )(_x_init, QuadCost(_C, _c), LinDx(F)) return util.get_data_maybe(u_lqr.view(-1)).numpy()
def forward(self, x_init, C, c, d): ft = torch.mm(self.Bd_hat, d).transpose(0, 1) # T-1 x n_state ft = ft.unsqueeze(1) # T-1 x 1 x n_state x_pred, u_pred, _ = mpc.MPC( n_state=n_state, n_ctrl=n_ctrl, T=T, u_lower=self.u_lower, u_upper=self.u_upper, lqr_iter=20, verbose=0, exit_unconverged=False, )(x_init.double(), QuadCost(C.double(), c.double()), LinDx(self.F_hat.repeat(T - 1, 1, 1, 1), None)) return x_pred[1, 0, :], u_pred[0, 0, :]
def mpc(self, dx, xinit, q, p, u_init=None, eps_override=None, lqr_iter_override=None): n_batch = xinit.shape[0] n_sc = self.true_dx.n_state + self.true_dx.n_ctrl Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat( self.mpc_T, n_batch, 1, 1) p = p.unsqueeze(0).repeat(self.mpc_T, n_batch, 1) if eps_override: eps = eps_override else: eps = self.true_dx.mpc_eps if lqr_iter_override: lqr_iter = lqr_iter_override else: lqr_iter = self.lqr_iter x_mpc, u_mpc, objs_mpc = mpc.MPC( self.true_dx.n_state, self.true_dx.n_ctrl, self.mpc_T, u_lower=self.true_dx.lower, u_upper=self.true_dx.upper, u_init=u_init, lqr_iter=lqr_iter, verbose=0, exit_unconverged=False, detach_unconverged=True, linesearch_decay=self.true_dx.linesearch_decay, max_linesearch_iter=self.true_dx.max_linesearch_iter, grad_method=self.grad_method, eps=eps, # slew_rate_penalty=self.slew_rate_penalty, # prev_ctrl=prev_ctrl, )(xinit, QuadCost(Q, p), dx) return x_mpc, u_mpc
def forward(self, x_init, ft, C, c, current = True, n_iters=20): T, n_batch, n_dist = ft.shape if current == True: F_hat = self.F_hat Bd_hat = self.Bd_hat else: F_hat = self.F_hat_old Bd_hat = self.Bd_hat_old x_lqr, u_lqr, objs_lqr = mpc.MPC(n_state=self.n_state, n_ctrl=self.n_ctrl, T=self.T, u_lower= self.u_lower.repeat(self.T, n_batch, 1), u_upper= self.u_upper.repeat(self.T, n_batch, 1), lqr_iter=n_iters, backprop = True, verbose=0, exit_unconverged=False, )(x_init.double(), QuadCost(C.double(), c.double()), LinDx(F_hat.repeat(self.T-1, n_batch, 1, 1), ft.double())) return x_lqr, u_lqr
def construct_MPC(A, B, ref, dt): n_batch, n_state, n_ctrl, T = 1, args.hidden_dim, 1, 5 n_sc = n_state + n_ctrl goal_weights = torch.ones(args.hidden_dim) ctrl_penalty = 0.1 * torch.ones(n_ctrl) q = torch.cat((goal_weights, ctrl_penalty)) px = -torch.sqrt(goal_weights) * ref p = torch.cat((px[0], torch.zeros(n_ctrl))) Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) p = p.unsqueeze(0).repeat(T, n_batch, 1) F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + dt*A, dt * B], axis = 1)) F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) f = torch.zeros([5, 1, 3]) u_lower = -torch.ones(T, n_batch, n_ctrl) *2 u_upper = torch.ones(T, n_batch, n_ctrl) * 2 cost = QuadCost(Q, p) dynamic = LinDx(F) mpc_model = mpc.MPC( n_state = n_state, n_ctrl = n_ctrl, n_batch = n_batch, backprop = False, T=T, u_lower = u_lower, u_upper = u_upper, lqr_iter = 10, verbose = 0, exit_unconverged=False, eps=1e-2,) return mpc_model, cost, dynamic
ctrl_penalty = 0.01 * torch.ones(n_ctrl) q = torch.cat((goal_weights, ctrl_penalty)) px = -torch.sqrt(goal_weights) * ref p = torch.cat((px, torch.zeros((n_batch, n_ctrl))), dim = 1) Q = torch.diag(q).unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) p = p.unsqueeze(0).repeat(T, 1, 1) F = torch.FloatTensor(np.concatenate([np.eye(args.hidden_dim) + env.env.dt*A, env.env.dt * B], axis = 1)) F = F.unsqueeze(0).unsqueeze(0).repeat(T, n_batch, 1, 1) f = torch.zeros([5, 1, 3]) u_lower = -torch.ones(T, n_batch, n_ctrl) *2 u_upper = torch.ones(T, n_batch, n_ctrl) * 2 cost = QuadCost(Q, p) dynamic = LinDx(F) u_init = None for k in range(5): state = env.reset() state = model.transform_state(state) for i in range(100): env.render() state = torch.FloatTensor(state.copy().reshape((1, -1))) y = model.encoder(state).detach() act = -np.dot(K, (y-ref).T) if i % 5 == 0: mpc_model = mpc.MPC( n_state = n_state,
def test_memory(): torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 2, 3, 4, 5 n_sc = n_state + n_ctrl # Randomly initialize a PSD quadratic cost and linear dynamics. C = torch.randn(T * n_batch, n_sc, n_sc) C = torch.bmm(C, C.transpose(1, 2)).view(T, n_batch, n_sc, n_sc) c = torch.randn(T, n_batch, n_sc) alpha = 0.2 R = (torch.eye(n_state) + alpha * torch.randn(n_state, n_state)).repeat( T, n_batch, 1, 1) S = torch.randn(T, n_batch, n_state, n_ctrl) F = torch.cat((R, S), dim=3) # The initial state. x_init = torch.randn(n_batch, n_state) # The upper and lower control bounds. u_lower = -torch.rand(T, n_batch, n_ctrl) u_upper = torch.rand(T, n_batch, n_ctrl) process = psutil.Process(os.getpid()) # gc.collect() # start_mem = process.memory_info().rss # _lqr = LQRStep( # n_state=n_state, # n_ctrl=n_ctrl, # T=T, # u_lower=u_lower, # u_upper=u_upper, # u_zero_I=u_zero_I, # true_cost=cost, # true_dynamics=dynamics, # delta_u=delta_u, # delta_space=True, # # current_x=x, # # current_u=u, # ) # e = Variable(torch.Tensor()) # x, u = _lqr(x_init, C, c, F, f if f is not None else e) # gc.collect() # mem_used = process.memory_info().rss - start_mem # print(mem_used) # assert mem_used == 0 gc.collect() start_mem = process.memory_info().rss _mpc = mpc.MPC( n_state=n_state, n_ctrl=n_ctrl, T=T, u_lower=u_lower, u_upper=u_upper, lqr_iter=20, verbose=1, backprop=False, exit_unconverged=False, ) _mpc(x_init, QuadCost(C, c), LinDx(F)) del _mpc gc.collect() mem_used = process.memory_info().rss - start_mem print(mem_used) assert mem_used == 0
def test_lqr_backward_cost_nn_dynamics_module_constrained_slew(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 2 hidden_sizes = [10, 10] n_sc = n_state + n_ctrl C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) beta = 1. u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) dynamics = NNDynamics(n_state, n_ctrl, hidden_sizes, activation='sigmoid').double() fc0b = dynamics.fcs[0].bias.view(-1).data.numpy().copy() def forward_numpy(C, c, x_init, u_lower, u_upper, fc0b): _C, _c, _x_init, _u_lower, _u_upper, fc0b = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, fc0b] ] dynamics.fcs[0].bias.data[:] = fc0b.data # dynamics.A.data[:] = fc0b.view(n_state, n_state).data u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=-1, exit_unconverged=True, backprop=False, max_linesearch_iter=1, slew_rate_penalty=1.0, )(_x_init, QuadCost(_C, _c), dynamics) return util.get_data_maybe(u_lqr.view(-1)).numpy() def f_c(c_flat): c_ = c_flat.reshape(T, n_batch, n_sc) return forward_numpy(C, c_, x_init, u_lower, u_upper, fc0b) def f_fc0b(fc0b): return forward_numpy(C, c, x_init, u_lower, u_upper, fc0b) u = forward_numpy(C, c, x_init, u_lower, u_upper, fc0b) # Make sure the solution is strictly partially on the boundary. assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1)) assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1))) du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1)) du_dfc0b_fd = nd.Jacobian(f_fc0b)(fc0b.reshape(-1)) dynamics.fcs[0].bias.data = torch.DoubleTensor(fc0b).clone() _C, _c, _x_init, _u_lower, _u_upper, fc0b = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, fc0b] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=-1, max_linesearch_iter=1, grad_method=GradMethods.ANALYTIC, slew_rate_penalty=1.0, )(_x_init, QuadCost(_C, _c), dynamics) u_lqr_flat = u_lqr.view(-1) du_dC = [] du_dc = [] du_dfc0b = [] for i in range(len(u_lqr_flat)): dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].contiguous().view(-1) dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].contiguous().view(-1) dfc0b = grad(u_lqr_flat[i], [dynamics.fcs[0].bias], retain_graph=True)[0].view(-1) du_dC.append(dCi) du_dc.append(dci) du_dfc0b.append(dfc0b) du_dC = torch.stack(du_dC).data.numpy() du_dc = torch.stack(du_dc).data.numpy() du_dfc0b = torch.stack(du_dfc0b).data.numpy() npt.assert_allclose(du_dc_fd, du_dc, atol=1e-3) npt.assert_allclose(du_dfc0b_fd, du_dfc0b, atol=1e-3)
dx.n_state, dx.n_ctrl, mpc_T, u_init=u_init, u_lower=dx.lower, u_upper=dx.upper, lqr_iter=500, verbose=0, exit_unconverged=False, detach_unconverged=False, linesearch_decay=dx.linesearch_decay, max_linesearch_iter=dx.max_linesearch_iter, grad_method=GradMethods.AUTO_DIFF, eps=dx.mpc_eps, n_batch=1, ).cuda()(x, QuadCost(Q, p), dx) # ).cuda()(x, AcrobotStateCost(goal_state.unsqueeze(0), coef_c=1e-2), dx) x1, y1, x2, y2 = dx.model.visualize_point( state.detach().cpu().numpy()[0]) print(nominal_objs) plt.plot([0] + [x1] + [x2], [0] + [y1] + [y2], color='gray') if (x2 - xg)**2 + (y2 - yg)**2 < 0.2: break next_action = nominal_actions[0] u_init = torch.cat( (nominal_actions[1:], torch.zeros(1, n_batch, dx.n_ctrl)), dim=0) u_init[-2] = u_init[-3] with torch.no_grad(): state = dx(state, next_action) # print(state)
def test_lqr_backward_cost_linear_dynamics_constrained(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 3 hidden_sizes = [10, 10] n_sc = n_state + n_ctrl C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) beta = 0.5 u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) F = npr.randn(T - 1, n_batch, n_state, n_sc) def forward_numpy(C, c, x_init, u_lower, u_upper, F): _C, _c, _x_init, _u_lower, _u_upper, F = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=1, exit_unconverged=True, backprop=False, max_linesearch_iter=2, )(_x_init, QuadCost(_C, _c), LinDx(F)) return util.get_data_maybe(u_lqr.view(-1)).numpy() def f_c(c_flat): c_ = c_flat.reshape(T, n_batch, n_sc) return forward_numpy(C, c_, x_init, u_lower, u_upper, F) def f_F(F_flat): F_ = F_flat.reshape(T - 1, n_batch, n_state, n_sc) return forward_numpy(C, c, x_init, u_lower, u_upper, F_) def f_x_init(x_init): x_init = x_init.reshape(1, -1) return forward_numpy(C, c, x_init, u_lower, u_upper, F) u = forward_numpy(C, c, x_init, u_lower, u_upper, F) # Make sure the solution is strictly partially on the boundary. assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1)) assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1))) du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1)) du_dF_fd = nd.Jacobian(f_F)(F.reshape(-1)) du_dxinit_fd = nd.Jacobian(f_x_init)(x_init[0]) _C, _c, _x_init, _u_lower, _u_upper, F = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=1, )(_x_init, QuadCost(_C, _c), LinDx(F)) u_lqr_flat = u_lqr.view(-1) du_dC = [] du_dc = [] du_dF = [] du_dx_init = [] for i in range(len(u_lqr_flat)): dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1) dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1) dF = grad(u_lqr_flat[i], [F], retain_graph=True)[0].view(-1) dx_init = grad(u_lqr_flat[i], [_x_init], retain_graph=True)[0].view(-1) du_dC.append(dCi) du_dc.append(dci) du_dF.append(dF) du_dx_init.append(dx_init) du_dC = torch.stack(du_dC).data.numpy() du_dc = torch.stack(du_dc).data.numpy() du_dF = torch.stack(du_dF).data.numpy() du_dx_init = torch.stack(du_dx_init).data.numpy() npt.assert_allclose(du_dc_fd, du_dc, atol=1e-4) npt.assert_allclose(du_dF, du_dF_fd, atol=1e-4) npt.assert_allclose(du_dx_init, du_dxinit_fd, atol=1e-4)
dx.n_state, # Number of states dx.n_ctrl, # Number of control inputs mpc_T, # MPC prediction horizon in number of timesteps u_init=u_init, # Initial guess for inputs u_lower=dx.lower, # Lower limit on inputs u_upper=dx.upper, # Upper limit on inputs lqr_iter=100, # Number of iterations per LQR solution step verbose=0, # Verbosity, 0 is just warnings. 1 will give more info exit_unconverged=False, detach_unconverged=False, backprop=True, linesearch_decay=dx.linesearch_decay, max_linesearch_iter=dx.max_linesearch_iter, grad_method=GradMethods.AUTO_DIFF, # FINITE_DIFF, eps=1e-3, )(x, QuadCost(Q, p), dx) # Save the first of the nominal actions determined by the MPC solution to use as # the real next control input next_action = nominal_actions[0] # Update the initial control input to include the current input as the first in # the sequence, then zero the rest # TODO: 02/09/19 - JEV - Would be better to use the previous solution as the # initial guess here? The mpc.MPC function also has a prev_ctrl argument to explore. u_init = torch.cat( (nominal_actions[1:], torch.zeros(1, n_batch, dx.n_ctrl).to(device)), dim=0) u_init[-2] = u_init[-3]
def mpc(s1, a, T, ball_vel=BALL_VEL, lambda_a=1e-3, centering=False, paddle_vel=None, vert_cost=False, verbose=1): ns = len(s1) na = len(a[0]) class BreakoutDynamics(nn.Module): def __init__(self): super().__init__() def forward(self, x, u): x_dim = x.ndimension() if x_dim == 1: x = x.unsqueeze(0) params = from_vector(x[0]) world, ball, paddle, blocks, paddle_idx = make_world( *params, ball_vel) next_x = simulate_breakout(u[0], world, ball, paddle, blocks, paddle_idx, paddle_vel_x=paddle_vel[0], paddle_vel_y=paddle_vel[1]) return next_x dynamics = BreakoutDynamics() if CUDA: dynamics = dynamics.cuda() u_lower, u_upper = -ACTION_VAL, ACTION_VAL x_init = s1.clone() u_init = get_tensor(a).clone() if CUDA: u_init = u_init.cuda() ball_vel_y = ball_vel[1] ball_pos_y = s1[-2] paddle_pos_y = s1[1] s_Q = torch.zeros(ns) Q = torch.cat([s_Q, torch.ones(na) * lambda_a ]).type_as(s1).diag().unsqueeze(0).repeat(T, 1, 1) if vert_cost: Q[:, ns - 2, ns - 2] = 10 # Simple X tracking if ball_vel_y > 0: frames_to_paddle = (paddle_pos_y - ball_pos_y) / ball_vel_y / DT for t in range(T): if int(frames_to_paddle) + 1 >= t: Q[t, ns - 3, ns - 3] = 1 Q[t, ns - 3, 0] = -1 Q[t, 0, 0] = 1 Q[t, 0, ns - 3] = -1 else: if centering and ball_pos_y > 1000: Q[:, ns - 3, ns - 3] = 1 Q[:, ns - 3, 0] = -1 Q[:, 0, 0] = 1 Q[:, 0, ns - 3] = -1 p = torch.zeros(ns + na).type_as(Q) p = p.unsqueeze(0).repeat(T, 1) Q = Q.unsqueeze(1) p = p.unsqueeze(1) x_init = x_init.unsqueeze(0) solver = MPC( ns, na, T=T, # x_init=x_init, u_init=u_init, u_lower=u_lower, u_upper=u_upper, verbose=verbose, delta_u=10 * ACTION_VAL, lqr_iter=1, grad_method=GradMethods.AUTO_DIFF, n_batch=1, max_linesearch_iter=1, exit_unconverged=False, backprop=False, ) if CUDA: solver = solver.cuda() cost = QuadCost(Q, p) x, u, objs = solver(x_init, cost, dynamics) u = u.squeeze(1) if CUDA: u = u.cpu() return u.data.numpy()