Esempio n. 1
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        qbins = np.linspace(0., 2. * math.pi, 51)
        qdotbins = np.linspace(-10., 10., 41)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 9))]
        timestep = 0.01

        [Q, Qdot] = np.meshgrid(qbins, qdotbins)
        fig = plt.figure()
        ax = fig.gca(projection='3d')

        def draw(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            if iteration % 20 != 0:
                return
            plt.title("iteration " + str(iteration))
            J = np.reshape(cost_to_go, Q.shape)
            surf = ax.plot_surface(Q,
                                   Qdot,
                                   J,
                                   rstride=1,
                                   cstride=1,
                                   cmap=color_map.jet)
            plt.pause(0.00001)
            surf.remove()

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.state_indices_with_periodic_boundary_conditions = {0}
        options.visualization_callback = draw

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)
Esempio n. 2
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        qbins = np.linspace(0., 2.*math.pi, 51)
        qdotbins = np.linspace(-10., 10., 41)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 9))]
        timestep = 0.01

        [Q, Qdot] = np.meshgrid(qbins, qdotbins)
        fig = plt.figure()
        ax = fig.gca(projection='3d')

        def draw(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            if iteration % 20 != 0:
                return
            plt.title("iteration " + str(iteration))
            J = np.reshape(cost_to_go, Q.shape)
            surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1,
                                   cmap=color_map.jet)
            plt.pause(0.00001)
            surf.remove()

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.state_indices_with_periodic_boundary_conditions = {0}
        options.visualization_callback = draw

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)
Esempio n. 3
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        # Note: intentionally under-sampled to keep the problem small
        qbins = np.linspace(0., 2.*math.pi, 11)
        qdotbins = np.linspace(-10., 10., 11)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 5))]
        timestep = 0.01

        num_callbacks = [0]

        def callback(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            num_callbacks[0] += 1

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.periodic_boundary_conditions = [
            PeriodicBoundaryCondition(0, 0., 2.*math.pi)
        ]
        options.visualization_callback = callback
        options.input_port_index = InputPortSelection.kUseFirstInputIfItExists
        options.assume_non_continuous_states_are_fixed = False

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)

        self.assertGreater(num_callbacks[0], 0)
Esempio n. 4
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        # Note: intentionally under-sampled to keep the problem small
        qbins = np.linspace(0., 2.*math.pi, 11)
        qdotbins = np.linspace(-10., 10., 11)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 5))]
        timestep = 0.01

        num_callbacks = [0]

        def callback(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            num_callbacks[0] += 1

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.periodic_boundary_conditions = [
            PeriodicBoundaryCondition(0, 0., 2.*math.pi)
        ]
        options.visualization_callback = callback

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)

        self.assertGreater(num_callbacks[0], 0)
Esempio n. 5
0
def min_time_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return x.dot(x) + 20 * u.dot(u)


if (True):
    cost_function = min_time_cost
    options.convergence_tol = 0.001
else:
    cost_function = quadratic_regulator_cost
    options.convergence_tol = 0.1

qbins = np.linspace(-3., 3., 31)
qdotbins = np.linspace(-3., 3., 51)
state_grid = [set(qbins), set(qdotbins)]

input_limit = 1.
input_grid = [set(np.linspace(-input_limit, input_limit, 9))]
timestep = 0.01

[Q, Qdot] = np.meshgrid(qbins, qdotbins)

fig = plt.figure()
Esempio n. 6
0
def VI(u_cost=180.**2):
    tree = RigidBodyTree("/opt/underactuated/src/cartpole/cartpole.urdf",
                         FloatingBaseType.kFixed)
    plant = RigidBodyPlant(tree)
    simulator = Simulator(plant)
    options = DynamicProgrammingOptions()

    def min_time_cost(context):
        x = context.get_continuous_state_vector().CopyToVector()
        u = plant.EvalVectorInput(context, 0).CopyToVector()
        x[1] = x[1] - math.pi
        if x.dot(x) < .1: # seeks get x to (math.pi, 0., 0., 0.)
            return 0.
        return 1. + 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost)

    def quadratic_regulator_cost(context):
        x = context.get_continuous_state_vector().CopyToVector()
        x[1] = x[1] - math.pi
        u = plant.EvalVectorInput(context, 0).CopyToVector()
        return 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost)

    if (True):
        cost_function = min_time_cost
        input_limit = 360.
        options.convergence_tol = 0.001
        state_steps = 19
        input_steps = 19
    else:
        cost_function = quadratic_regulator_cost
        input_limit = 250.
        options.convergence_tol = 0.01
        state_steps = 19
        input_steps = 19

    ####### SETTINGS ####### My cartpole linspaces are off??????
    # State: (x, theta, x_dot, theta_dot)
    # Previous Best... (min. time) (3)
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.2, 8), np.linspace(math.pi-0.2, math.pi+0.2, 11), np.linspace(math.pi+0.2, 8, 2*math.pi)))
    xdotbins = np.linspace(-10., 10., state_steps)
    thetadotbins = np.linspace(-10., 10., state_steps)
    timestep = 0.01

    # Test 1 (4)
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.12, 8), np.linspace(math.pi-0.12, math.pi+0.12, 11), np.linspace(math.pi+0.12, 8, 2*math.pi)))
    xdotbins = np.linspace(-10., 10., state_steps+2)
    thetadotbins = np.hstack((np.linspace(-10., -1.5, 9), np.linspace(-1.5, 1.5, 11), np.linspace(1.5, 10., 9)))
    # timestep = 0.001 <- wasn't active...

    # Test 2 - Test 1 was worse? WOW I HAD A BUG! - in my last np.linspace  (5) SWEET!!!
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.2, 10), np.linspace(math.pi-0.2, math.pi+0.2, 9), np.linspace(math.pi+0.2, 2*math.pi, 10)))
    xdotbins = np.linspace(-10., 10., state_steps+2)
    thetadotbins = np.linspace(-10., 10., state_steps)
    timestep = 0.01
    input_limit = 1000. # test_stabilize_top7 for the higher input_limit version


    options.periodic_boundary_conditions = [
        PeriodicBoundaryCondition(1, 0., 2.*math.pi),
    ]
    state_grid = [set(xbins), set(thetabins), set(xdotbins), set(thetadotbins)]
    input_grid = [set(np.linspace(-input_limit, input_limit, input_steps))] # Input: x force

    print("VI with u_cost={} beginning".format(u_cost))
    policy, cost_to_go = FittedValueIteration(simulator, cost_function,
                                              state_grid, input_grid,
                                              timestep, options)
    print("VI with u_cost={} completed!".format(u_cost))

    save_policy("u_cost={:.0f}_torque_limit={:.0f}".format(u_cost, input_limit), policy, cost_to_go, state_grid)
    return policy, cost_to_go
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    x[0] = x[0] - math.pi
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return 2*x.dot(x) + u.dot(u)


if (True):
    cost_function = min_time_cost
    input_limit = 3.
    options.convergence_tol = 0.001
else:
    cost_function = quadratic_regulator_cost
    input_limit = 3.
    options.convergence_tol = 0.1

qbins = np.linspace(0., 2. * math.pi, 51)
qdotbins = np.linspace(-10., 10., 51)
state_grid = [set(qbins), set(qdotbins)]
options.periodic_boundary_conditions = [
    PeriodicBoundaryCondition(0, 0., 2.*math.pi),
]
input_grid = [set(np.linspace(-input_limit, input_limit, 9))]
timestep = 0.01

[Q, Qdot] = np.meshgrid(qbins, qdotbins)