def test_cartpole_integrator(self): ########### PyTorch system ############### pyTorchEnv = CartPoleModel(initRandom=False) param_truth = pyTorchEnv.theta.detach().numpy() # set the variance to 0 to have a determinisitic environmenht pyTorchEnv.set_param_values( torch.from_numpy(np.concatenate([param_truth, np.zeros(2)])).float()) ########### casadi system ################ casadiEnv = CartpoleModelCasadi() states, states_d, controls, params = casadiEnv.buildDynamicalSystem() euler_func = euler_integration(states, states_d, controls, pyTorchEnv.tau, integrator_stepsize=1, angular_idx=[2]) step = ca.Function("step", [states, controls, params], [euler_func]) ############ Simulating the system ########## for traj in range(10): # simulate in total 10 different trajectoriess timesteps = 500 # simulate the system for 50 timesteps # u_inputs = ca.SX.sym("u_outputs", timesteps) # # init_z_state = ca.SX(x0) # z_states = [init_z_state] # for i in range(control_steps): # current_z = step(z_states[-1], u_inputs[i], param_truth) # z_states.append(current_z) # simulate 1 trajectory sim_one_traj = step.mapaccum("all_steps", timesteps) actions = ca.DM(np.random.rand(timesteps)) policy = ReplayControlPolicy(pyTorchEnv.spec, np.array(actions)) policy.normalized_input = [False, False, False, False] policy.normalized_output = [False] path = rollout_torch(pyTorchEnv, policy, timesteps, terminate_only_max_path=True) x0 = ca.DM(path["observations"][0, :]) sim_states = sim_one_traj(x0, actions, ca.repmat(param_truth, 1, timesteps)) np.testing.assert_allclose(np.array(sim_states.T), path["next_observations"], rtol=1e-2, atol=1e-4)
default=1000, help='Max length of rollout') parser.add_argument('--speedup', type=float, default=1, help='Speedup') parser.add_argument('--show_discretized', type=str2bool, default=True, help='Visualize discretized env') parser.add_argument('--policy_path', type=str) args = parser.parse_args() with tf.Session() as sess: data = joblib.load(args.file) imitation_env = data['imitationModel'] imitation_env.load_state_dict(torch.load(args.file[:-4] + "_model.pkl")) if args.policy_path is None: policy = data['policy'] else: policy_data = joblib.load(args.policy_path) policy = policy_data['policy'] for param in imitation_env.parameters(): print("params", param) while True: path = rollout_torch(imitation_env, policy, max_path_length=args.max_path_length, animated=True, speedup=args.speedup) if not query_yes_no('Continue simulation?'): break