Beispiel #1
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        # Note: intentionally under-sampled to keep the problem small
        qbins = np.linspace(0., 2.*math.pi, 11)
        qdotbins = np.linspace(-10., 10., 11)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 5))]
        timestep = 0.01

        num_callbacks = [0]

        def callback(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            num_callbacks[0] += 1

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.periodic_boundary_conditions = [
            PeriodicBoundaryCondition(0, 0., 2.*math.pi)
        ]
        options.visualization_callback = callback
        options.input_port_index = InputPortSelection.kUseFirstInputIfItExists
        options.assume_non_continuous_states_are_fixed = False

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)

        self.assertGreater(num_callbacks[0], 0)
Beispiel #2
0
def VI(u_cost=180.**2):
    tree = RigidBodyTree("/opt/underactuated/src/cartpole/cartpole.urdf",
                         FloatingBaseType.kFixed)
    plant = RigidBodyPlant(tree)
    simulator = Simulator(plant)
    options = DynamicProgrammingOptions()

    def min_time_cost(context):
        x = context.get_continuous_state_vector().CopyToVector()
        u = plant.EvalVectorInput(context, 0).CopyToVector()
        x[1] = x[1] - math.pi
        if x.dot(x) < .1: # seeks get x to (math.pi, 0., 0., 0.)
            return 0.
        return 1. + 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost)

    def quadratic_regulator_cost(context):
        x = context.get_continuous_state_vector().CopyToVector()
        x[1] = x[1] - math.pi
        u = plant.EvalVectorInput(context, 0).CopyToVector()
        return 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost)

    if (True):
        cost_function = min_time_cost
        input_limit = 360.
        options.convergence_tol = 0.001
        state_steps = 19
        input_steps = 19
    else:
        cost_function = quadratic_regulator_cost
        input_limit = 250.
        options.convergence_tol = 0.01
        state_steps = 19
        input_steps = 19

    ####### SETTINGS ####### My cartpole linspaces are off??????
    # State: (x, theta, x_dot, theta_dot)
    # Previous Best... (min. time) (3)
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.2, 8), np.linspace(math.pi-0.2, math.pi+0.2, 11), np.linspace(math.pi+0.2, 8, 2*math.pi)))
    xdotbins = np.linspace(-10., 10., state_steps)
    thetadotbins = np.linspace(-10., 10., state_steps)
    timestep = 0.01

    # Test 1 (4)
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.12, 8), np.linspace(math.pi-0.12, math.pi+0.12, 11), np.linspace(math.pi+0.12, 8, 2*math.pi)))
    xdotbins = np.linspace(-10., 10., state_steps+2)
    thetadotbins = np.hstack((np.linspace(-10., -1.5, 9), np.linspace(-1.5, 1.5, 11), np.linspace(1.5, 10., 9)))
    # timestep = 0.001 <- wasn't active...

    # Test 2 - Test 1 was worse? WOW I HAD A BUG! - in my last np.linspace  (5) SWEET!!!
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.2, 10), np.linspace(math.pi-0.2, math.pi+0.2, 9), np.linspace(math.pi+0.2, 2*math.pi, 10)))
    xdotbins = np.linspace(-10., 10., state_steps+2)
    thetadotbins = np.linspace(-10., 10., state_steps)
    timestep = 0.01
    input_limit = 1000. # test_stabilize_top7 for the higher input_limit version


    options.periodic_boundary_conditions = [
        PeriodicBoundaryCondition(1, 0., 2.*math.pi),
    ]
    state_grid = [set(xbins), set(thetabins), set(xdotbins), set(thetadotbins)]
    input_grid = [set(np.linspace(-input_limit, input_limit, input_steps))] # Input: x force

    print("VI with u_cost={} beginning".format(u_cost))
    policy, cost_to_go = FittedValueIteration(simulator, cost_function,
                                              state_grid, input_grid,
                                              timestep, options)
    print("VI with u_cost={} completed!".format(u_cost))

    save_policy("u_cost={:.0f}_torque_limit={:.0f}".format(u_cost, input_limit), policy, cost_to_go, state_grid)
    return policy, cost_to_go

if (True):
    cost_function = min_time_cost
    input_limit = 3.
    options.convergence_tol = 0.001
else:
    cost_function = quadratic_regulator_cost
    input_limit = 3.
    options.convergence_tol = 0.1

qbins = np.linspace(0., 2. * math.pi, 51)
qdotbins = np.linspace(-10., 10., 51)
state_grid = [set(qbins), set(qdotbins)]
options.periodic_boundary_conditions = [
    PeriodicBoundaryCondition(0, 0., 2. * math.pi),
]
input_grid = [set(np.linspace(-input_limit, input_limit, 9))]
timestep = 0.01

[Q, Qdot] = np.meshgrid(qbins, qdotbins)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_xlabel("theta")
ax.set_ylabel("thetadot")
ax.set_title("Cost-to-Go")

fig2 = plt.figure()
ax2 = fig2.gca(projection='3d')
ax2.set_xlabel("q")
ax2.set_ylabel("qdot")