コード例 #1
0
    def test_cartpole_integrator(self):

        ########### PyTorch system ###############

        pyTorchEnv = CartPoleModel(initRandom=False)
        param_truth = pyTorchEnv.theta.detach().numpy()
        # set the variance to 0 to have a determinisitic environmenht
        pyTorchEnv.set_param_values(
            torch.from_numpy(np.concatenate([param_truth,
                                             np.zeros(2)])).float())

        ########### casadi system ################

        casadiEnv = CartpoleModelCasadi()

        states, states_d, controls, params = casadiEnv.buildDynamicalSystem()

        euler_func = euler_integration(states,
                                       states_d,
                                       controls,
                                       pyTorchEnv.tau,
                                       integrator_stepsize=1,
                                       angular_idx=[2])

        step = ca.Function("step", [states, controls, params], [euler_func])

        ############ Simulating the system ##########

        for traj in range(10):  # simulate in total 10 different trajectoriess
            timesteps = 500  # simulate the system for 50 timesteps
            # u_inputs = ca.SX.sym("u_outputs", timesteps)
            #
            # init_z_state = ca.SX(x0)
            # z_states = [init_z_state]
            # for i in range(control_steps):
            #     current_z = step(z_states[-1], u_inputs[i], param_truth)
            #     z_states.append(current_z)

            # simulate 1 trajectory
            sim_one_traj = step.mapaccum("all_steps", timesteps)
            actions = ca.DM(np.random.rand(timesteps))

            policy = ReplayControlPolicy(pyTorchEnv.spec, np.array(actions))
            policy.normalized_input = [False, False, False, False]
            policy.normalized_output = [False]
            path = rollout_torch(pyTorchEnv,
                                 policy,
                                 timesteps,
                                 terminate_only_max_path=True)

            x0 = ca.DM(path["observations"][0, :])
            sim_states = sim_one_traj(x0, actions,
                                      ca.repmat(param_truth, 1, timesteps))

            np.testing.assert_allclose(np.array(sim_states.T),
                                       path["next_observations"],
                                       rtol=1e-2,
                                       atol=1e-4)
コード例 #2
0
                        default=1000,
                        help='Max length of rollout')
    parser.add_argument('--speedup', type=float, default=1, help='Speedup')
    parser.add_argument('--show_discretized',
                        type=str2bool,
                        default=True,
                        help='Visualize discretized env')
    parser.add_argument('--policy_path', type=str)
    args = parser.parse_args()

    with tf.Session() as sess:
        data = joblib.load(args.file)
        imitation_env = data['imitationModel']
        imitation_env.load_state_dict(torch.load(args.file[:-4] +
                                                 "_model.pkl"))
        if args.policy_path is None:
            policy = data['policy']
        else:
            policy_data = joblib.load(args.policy_path)
            policy = policy_data['policy']
        for param in imitation_env.parameters():
            print("params", param)
        while True:
            path = rollout_torch(imitation_env,
                                 policy,
                                 max_path_length=args.max_path_length,
                                 animated=True,
                                 speedup=args.speedup)
            if not query_yes_no('Continue simulation?'):
                break