def test_lqr_backward_cost_nn_dynamics_module_constrained(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 2 hidden_sizes = [10, 10] n_sc = n_state + n_ctrl C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) beta = 1. u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) dynamics = NNDynamics( n_state, n_ctrl, hidden_sizes, activation='sigmoid').double() fc0b = dynamics.fcs[0].bias.view(-1).data.numpy().copy() def forward_numpy(C, c, x_init, u_lower, u_upper, fc0b): _C, _c, _x_init, _u_lower, _u_upper, fc0b = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, fc0b] ] dynamics.fcs[0].bias.data[:] = fc0b.data # dynamics.A.data[:] = fc0b.view(n_state, n_state).data u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=-1, exit_unconverged=True, backprop=False, max_linesearch_iter=1, )(_x_init, QuadCost(_C, _c), dynamics) return util.get_data_maybe(u_lqr.view(-1)).numpy() def f_c(c_flat): c_ = c_flat.reshape(T, n_batch, n_sc) return forward_numpy(C, c_, x_init, u_lower, u_upper, fc0b) def f_fc0b(fc0b): return forward_numpy(C, c, x_init, u_lower, u_upper, fc0b) u = forward_numpy(C, c, x_init, u_lower, u_upper, fc0b) # Make sure the solution is strictly partially on the boundary. assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1)) assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1))) du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1)) du_dfc0b_fd = nd.Jacobian(f_fc0b)(fc0b.reshape(-1)) dynamics.fcs[0].bias.data = torch.DoubleTensor(fc0b).clone() _C, _c, _x_init, _u_lower, _u_upper, fc0b = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, fc0b] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=-1, max_linesearch_iter=1, grad_method=GradMethods.ANALYTIC, )(_x_init, QuadCost(_C, _c), dynamics) u_lqr_flat = u_lqr.view(-1) du_dC = [] du_dc = [] du_dfc0b = [] for i in range(len(u_lqr_flat)): dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1) dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1) dfc0b = grad(u_lqr_flat[i], [dynamics.fcs[0].bias], retain_graph=True)[0].view(-1) du_dC.append(dCi) du_dc.append(dci) du_dfc0b.append(dfc0b) du_dC = torch.stack(du_dC).data.numpy() du_dc = torch.stack(du_dc).data.numpy() du_dfc0b = torch.stack(du_dfc0b).data.numpy() npt.assert_allclose(du_dc_fd, du_dc, atol=1e-3) npt.assert_allclose(du_dfc0b_fd, du_dfc0b, atol=1e-3)
)) # nx + nu px = -torch.sqrt(goal_weights) * goal_state p = torch.cat((px, torch.zeros(nu))) Q = torch.diag(q).repeat(TIMESTEPS, N_BATCH, 1, 1) # T x B x nx+nu x nx+nu p = p.repeat(TIMESTEPS, N_BATCH, 1) cost = mpc.QuadCost(Q, p) # T x B x nx+nu (linear component of cost) # run MPC total_reward = 0 for i in range(run_iter): state = env.state.copy() state = torch.tensor(state).view(1, -1).float() command_start = time.perf_counter() # recreate controller using updated u_init (kind of wasteful right?) ctrl = mpc.MPC(nx, nu, TIMESTEPS, u_lower=ACTION_LOW, u_upper=ACTION_HIGH, lqr_iter=LQR_ITER, exit_unconverged=False, eps=1e-2, n_batch=N_BATCH, backprop=False, verbose=0, u_init=u_init, grad_method=mpc.GradMethods.AUTO_DIFF) # compute action based on current state, dynamics, and cost # nominal_states, nominal_actions, nominal_objs = ctrl(state, cost, PendulumDynamics()) nominal_states, nominal_actions, nominal_objs = ctrl(state, cost, MountainCarDynamics()) action = nominal_actions[0] # take first planned action u_init = torch.cat((nominal_actions[1:], torch.zeros(1, N_BATCH, nu)), dim=0) elapsed = time.perf_counter() - command_start s, r, _, _ = env.step(action.detach().numpy()) total_reward += r logger.debug("action taken: %.4f cost received: %.4f time taken: %.5fs", action, -r, elapsed) if render: env.render()
def test_lqr_backward_cost_linear_dynamics_constrained(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 3 hidden_sizes = [10, 10] n_sc = n_state + n_ctrl C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) beta = 0.5 u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64) F = npr.randn(T-1, n_batch, n_state, n_sc) f = npr.randn(T-1, n_batch, n_state) def forward_numpy(C, c, x_init, u_lower, u_upper, F, f): _C, _c, _x_init, _u_lower, _u_upper, F, f = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F, f] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=1, exit_unconverged=True, backprop=False, max_linesearch_iter=2, )(_x_init, QuadCost(_C, _c), LinDx(F, f)) return util.get_data_maybe(u_lqr.view(-1)).numpy() def f_c(c_flat): c_ = c_flat.reshape(T, n_batch, n_sc) return forward_numpy(C, c_, x_init, u_lower, u_upper, F, f) def f_F(F_flat): F_ = F_flat.reshape(T-1, n_batch, n_state, n_sc) return forward_numpy(C, c, x_init, u_lower, u_upper, F_, f) def f_f(f_flat): f_ = f_flat.reshape(T-1, n_batch, n_state) return forward_numpy(C, c, x_init, u_lower, u_upper, F, f_) def f_x_init(x_init): x_init = x_init.reshape(1, -1) return forward_numpy(C, c, x_init, u_lower, u_upper, F, f) u = forward_numpy(C, c, x_init, u_lower, u_upper, F, f) # Make sure the solution is strictly partially on the boundary. assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1)) assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1))) du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1)) du_dF_fd = nd.Jacobian(f_F)(F.reshape(-1)) du_df_fd = nd.Jacobian(f_f)(f.reshape(-1)) du_dxinit_fd = nd.Jacobian(f_x_init)(x_init[0]) _C, _c, _x_init, _u_lower, _u_upper, F, f = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, F, f] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=1, )(_x_init, QuadCost(_C, _c), LinDx(F, f)) u_lqr_flat = u_lqr.view(-1) du_dC = [] du_dc = [] du_dF = [] du_df = [] du_dx_init = [] for i in range(len(u_lqr_flat)): dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1) dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1) dF = grad(u_lqr_flat[i], [F], retain_graph=True)[0].view(-1) df = grad(u_lqr_flat[i], [f], retain_graph=True)[0].view(-1) dx_init = grad(u_lqr_flat[i], [_x_init], retain_graph=True)[0].view(-1) du_dC.append(dCi) du_dc.append(dci) du_dF.append(dF) du_df.append(df) du_dx_init.append(dx_init) du_dC = torch.stack(du_dC).data.numpy() du_dc = torch.stack(du_dc).data.numpy() du_dF = torch.stack(du_dF).data.numpy() du_df = torch.stack(du_df).data.numpy() du_dx_init = torch.stack(du_dx_init).data.numpy() npt.assert_allclose(du_dc_fd, du_dc, atol=1e-4) npt.assert_allclose(du_dF, du_dF_fd, atol=1e-4) npt.assert_allclose(du_df, du_df_fd, atol=1e-4) npt.assert_allclose(du_dx_init, du_dxinit_fd, atol=1e-4)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--n_state', type=int, default=3) parser.add_argument('--n_ctrl', type=int, default=3) parser.add_argument('--T', type=int, default=5) parser.add_argument('--save', type=str) parser.add_argument('--work', type=str, default='work') parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--seed', type=int, default=0) args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() t = '.'.join([ "{}={}".format(x, getattr(args, x)) for x in ['n_state', 'n_ctrl', 'T'] ]) setproctitle.setproctitle('bamos.' + t + '.{}'.format(args.seed)) if args.save is None: args.save = os.path.join(args.work, t, str(args.seed)) if os.path.exists(args.save): shutil.rmtree(args.save) os.makedirs(args.save, exist_ok=True) meta_file = os.path.join(args.save, 'meta.json') meta = create_experiment(args.n_state, args.n_ctrl, args.T) with open(meta_file, 'w') as f: json.dump(meta, f, indent=4) true_model = {} for k in ['Q', 'p', 'A', 'B']: v = torch.Tensor(np.array(meta[k])).double() if torch.cuda.is_available(): v = v.cuda() v = Variable(v) meta[k] = v true_model[k] = v n_state, n_ctrl, alpha = args.n_state, args.n_ctrl, meta['alpha'] npr.seed(1) # Intentionally 1 instead of args.seed so these are the same. A_model = np.eye(n_state) + alpha * np.random.randn(n_state, n_state) B_model = npr.randn(n_state, n_ctrl) dtype = true_model['Q'].data.type() A_model = Parameter(torch.Tensor(A_model).type(dtype)) B_model = Parameter(torch.Tensor(B_model).type(dtype)) # u_lower, u_upper = -100., 100. u_lower, u_upper = -1., 1. delta = u_init = None optimizer = optim.RMSprop((A_model, B_model), lr=1e-2) torch.manual_seed(args.seed) fname = os.path.join(args.save, 'losses.csv') loss_f = open(fname, 'w') loss_f.write('im_loss,mse\n') loss_f.flush() n_batch = 64 for i in range(5000): x_init = Variable(1. * torch.randn(n_batch, n_state).type(dtype)) optimizer.zero_grad() try: F = torch.cat((true_model['A'], true_model['B']), dim=1) \ .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1) x_true, u_true, objs_true = mpc.MPC( n_state, n_ctrl, args.T, x_init, u_lower=u_lower, u_upper=u_upper, u_init=u_init, mpc_iter=100, verbose=-1, exit_unconverged=False, detach_unconverged=False, F=F, n_batch=n_batch, )(true_model['Q'], true_model['p']) F = torch.cat((A_model, B_model), dim=1) \ .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1) x_pred, u_pred, objs_pred = mpc.MPC( n_state, n_ctrl, args.T, x_init, u_lower=u_lower, u_upper=u_upper, u_init=u_init, mpc_iter=100, verbose=-1, exit_unconverged=False, detach_unconverged=False, F=F, n_batch=n_batch, )(true_model['Q'], true_model['p']) traj_loss = torch.mean((u_true - u_pred)**2) # torch.mean((x_true-x_pred)**2) traj_loss.backward() optimizer.step() # import ipdb; ipdb.set_trace() model_loss = torch.mean((A_model-true_model['A'])**2) + \ torch.mean((B_model-true_model['B'])**2) loss_f.write('{},{}\n'.format(traj_loss.data[0], model_loss.data[0])) loss_f.flush() plot_interval = 100 if i % plot_interval == 0: os.system('./plot.py "{}" &'.format(args.save)) print(A_model, true_model['A']) print('{:04d}: traj_loss: {:.4f} model_loss: {:.4f}'.format( i, traj_loss.data[0], model_loss.data[0])) except KeyboardInterrupt: raise except Exception as e: # print(e) # pass raise