Esempio n. 1
0
    def test_linear_programming_approximate_dynamic_programming(self):
        integrator = Integrator(1)
        simulator = Simulator(integrator)

        # minimum time cost function (1 for all non-zero states).
        def cost_function(context):
            x = context.get_continuous_state_vector().CopyToVector()
            if (math.fabs(x[0]) > 0.1):
                return 1.
            else:
                return 0.

        def cost_to_go_function(state, parameters):
            return parameters[0] * math.fabs(state[0])

        state_samples = np.array([[-4., -3., -2., -1., 0., 1., 2., 3., 4.]])
        input_samples = np.array([[-1., 0., 1.]])

        timestep = 1.0
        options = DynamicProgrammingOptions()
        options.discount_factor = 1.

        J = LinearProgrammingApproximateDynamicProgramming(
            simulator, cost_function, cost_to_go_function, 1,
            state_samples, input_samples, timestep, options)

        self.assertAlmostEqual(J[0], 1., delta=1e-6)
Esempio n. 2
0
    def test_linear_programming_approximate_dynamic_programming(self):
        integrator = Integrator(1)
        simulator = Simulator(integrator)

        # minimum time cost function (1 for all non-zero states).
        def cost_function(context):
            x = context.get_continuous_state_vector().CopyToVector()
            if (math.fabs(x[0]) > 0.1):
                return 1.
            else:
                return 0.

        def cost_to_go_function(state, parameters):
            return parameters[0] * math.fabs(state[0])

        state_samples = np.array([[-4., -3., -2., -1., 0., 1., 2., 3., 4.]])
        input_samples = np.array([[-1., 0., 1.]])

        timestep = 1.0
        options = DynamicProgrammingOptions()
        options.discount_factor = 1.

        J = LinearProgrammingApproximateDynamicProgramming(
            simulator, cost_function, cost_to_go_function, 1,
            state_samples, input_samples, timestep, options)

        self.assertAlmostEqual(J[0], 1., delta=1e-6)
Esempio n. 3
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        qbins = np.linspace(0., 2. * math.pi, 51)
        qdotbins = np.linspace(-10., 10., 41)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 9))]
        timestep = 0.01

        [Q, Qdot] = np.meshgrid(qbins, qdotbins)
        fig = plt.figure()
        ax = fig.gca(projection='3d')

        def draw(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            if iteration % 20 != 0:
                return
            plt.title("iteration " + str(iteration))
            J = np.reshape(cost_to_go, Q.shape)
            surf = ax.plot_surface(Q,
                                   Qdot,
                                   J,
                                   rstride=1,
                                   cstride=1,
                                   cmap=color_map.jet)
            plt.pause(0.00001)
            surf.remove()

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.state_indices_with_periodic_boundary_conditions = {0}
        options.visualization_callback = draw

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)
Esempio n. 4
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        qbins = np.linspace(0., 2.*math.pi, 51)
        qdotbins = np.linspace(-10., 10., 41)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 9))]
        timestep = 0.01

        [Q, Qdot] = np.meshgrid(qbins, qdotbins)
        fig = plt.figure()
        ax = fig.gca(projection='3d')

        def draw(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            if iteration % 20 != 0:
                return
            plt.title("iteration " + str(iteration))
            J = np.reshape(cost_to_go, Q.shape)
            surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1,
                                   cmap=color_map.jet)
            plt.pause(0.00001)
            surf.remove()

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.state_indices_with_periodic_boundary_conditions = {0}
        options.visualization_callback = draw

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)
Esempio n. 5
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        # Note: intentionally under-sampled to keep the problem small
        qbins = np.linspace(0., 2.*math.pi, 11)
        qdotbins = np.linspace(-10., 10., 11)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 5))]
        timestep = 0.01

        num_callbacks = [0]

        def callback(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            num_callbacks[0] += 1

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.periodic_boundary_conditions = [
            PeriodicBoundaryCondition(0, 0., 2.*math.pi)
        ]
        options.visualization_callback = callback

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)

        self.assertGreater(num_callbacks[0], 0)
Esempio n. 6
0
    def test_fitted_value_iteration_pendulum(self):
        plant = PendulumPlant()
        simulator = Simulator(plant)

        def quadratic_regulator_cost(context):
            x = context.get_continuous_state_vector().CopyToVector()
            x[0] = x[0] - math.pi
            u = plant.EvalVectorInput(context, 0).CopyToVector()
            return x.dot(x) + u.dot(u)

        # Note: intentionally under-sampled to keep the problem small
        qbins = np.linspace(0., 2. * math.pi, 11)
        qdotbins = np.linspace(-10., 10., 11)
        state_grid = [set(qbins), set(qdotbins)]

        input_limit = 2.
        input_mesh = [set(np.linspace(-input_limit, input_limit, 5))]
        timestep = 0.01

        num_callbacks = [0]

        def callback(iteration, mesh, cost_to_go, policy):
            # Drawing is slow, don't draw every frame.
            num_callbacks[0] += 1

        options = DynamicProgrammingOptions()
        options.convergence_tol = 1.
        options.periodic_boundary_conditions = [
            PeriodicBoundaryCondition(0, 0., 2. * math.pi)
        ]
        options.visualization_callback = callback

        policy, cost_to_go = FittedValueIteration(simulator,
                                                  quadratic_regulator_cost,
                                                  state_grid, input_mesh,
                                                  timestep, options)

        self.assertGreater(num_callbacks[0], 0)
Esempio n. 7
0
        VectorSystem.__init__(self, 1, 2, direct_feedthrough=False)
        self.DeclareContinuousState(2)

    # qqdot(t) = u(t)
    def DoCalcVectorTimeDerivatives(self, context, u, x, xdot):
        xdot[0] = x[1]
        xdot[1] = u

    # y(t) = x(t)
    def DoCalcVectorOutput(self, context, u, x, y):
        y[:] = x


plant = DoubleIntegrator()
simulator = Simulator(plant)
options = DynamicProgrammingOptions()


def min_time_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return x.dot(x) + 20 * u.dot(u)

Esempio n. 8
0
        self._DeclareContinuousState(2)

    # qddot(t) = u(t)
    def _DoCalcVectorTimeDerivatives(self, context, u, x, xdot):
        xdot[0] = x[1]
        xdot[1] = u

    # y(t) = x(t)
    def _DoCalcVectorOutput(self, context, u, x, y):
        y[:] = x


# Set up a simulation of this system.
plant = DoubleIntegrator()
simulator = Simulator(plant)
options = DynamicProgrammingOptions()


# This function evaluates a running cost
# that penalizes distance from the origin,
# as well as control effort.
def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return 2 * x.dot(x) + 0 * u.dot(u)


# This function evaluates a minimum time
# running cost, given a context (which contains
# information about the current system state).
def min_time_cost(context):
Esempio n. 9
0
def VI(u_cost=180.**2):
    tree = RigidBodyTree("/opt/underactuated/src/cartpole/cartpole.urdf",
                         FloatingBaseType.kFixed)
    plant = RigidBodyPlant(tree)
    simulator = Simulator(plant)
    options = DynamicProgrammingOptions()

    def min_time_cost(context):
        x = context.get_continuous_state_vector().CopyToVector()
        u = plant.EvalVectorInput(context, 0).CopyToVector()
        x[1] = x[1] - math.pi
        if x.dot(x) < .1: # seeks get x to (math.pi, 0., 0., 0.)
            return 0.
        return 1. + 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost)

    def quadratic_regulator_cost(context):
        x = context.get_continuous_state_vector().CopyToVector()
        x[1] = x[1] - math.pi
        u = plant.EvalVectorInput(context, 0).CopyToVector()
        return 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost)

    if (True):
        cost_function = min_time_cost
        input_limit = 360.
        options.convergence_tol = 0.001
        state_steps = 19
        input_steps = 19
    else:
        cost_function = quadratic_regulator_cost
        input_limit = 250.
        options.convergence_tol = 0.01
        state_steps = 19
        input_steps = 19

    ####### SETTINGS ####### My cartpole linspaces are off??????
    # State: (x, theta, x_dot, theta_dot)
    # Previous Best... (min. time) (3)
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.2, 8), np.linspace(math.pi-0.2, math.pi+0.2, 11), np.linspace(math.pi+0.2, 8, 2*math.pi)))
    xdotbins = np.linspace(-10., 10., state_steps)
    thetadotbins = np.linspace(-10., 10., state_steps)
    timestep = 0.01

    # Test 1 (4)
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.12, 8), np.linspace(math.pi-0.12, math.pi+0.12, 11), np.linspace(math.pi+0.12, 8, 2*math.pi)))
    xdotbins = np.linspace(-10., 10., state_steps+2)
    thetadotbins = np.hstack((np.linspace(-10., -1.5, 9), np.linspace(-1.5, 1.5, 11), np.linspace(1.5, 10., 9)))
    # timestep = 0.001 <- wasn't active...

    # Test 2 - Test 1 was worse? WOW I HAD A BUG! - in my last np.linspace  (5) SWEET!!!
    xbins = np.linspace(-10., 10., state_steps)
    thetabins = np.hstack((np.linspace(0., math.pi-0.2, 10), np.linspace(math.pi-0.2, math.pi+0.2, 9), np.linspace(math.pi+0.2, 2*math.pi, 10)))
    xdotbins = np.linspace(-10., 10., state_steps+2)
    thetadotbins = np.linspace(-10., 10., state_steps)
    timestep = 0.01
    input_limit = 1000. # test_stabilize_top7 for the higher input_limit version


    options.periodic_boundary_conditions = [
        PeriodicBoundaryCondition(1, 0., 2.*math.pi),
    ]
    state_grid = [set(xbins), set(thetabins), set(xdotbins), set(thetadotbins)]
    input_grid = [set(np.linspace(-input_limit, input_limit, input_steps))] # Input: x force

    print("VI with u_cost={} beginning".format(u_cost))
    policy, cost_to_go = FittedValueIteration(simulator, cost_function,
                                              state_grid, input_grid,
                                              timestep, options)
    print("VI with u_cost={} completed!".format(u_cost))

    save_policy("u_cost={:.0f}_torque_limit={:.0f}".format(u_cost, input_limit), policy, cost_to_go, state_grid)
    return policy, cost_to_go
Esempio n. 10
0
import math
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

from pydrake.all import (DiagramBuilder, SignalLogger, Simulator, VectorSystem)
from pydrake.examples.pendulum import PendulumPlant
from pydrake.systems.controllers import (DynamicProgrammingOptions,
                                         FittedValueIteration,
                                         PeriodicBoundaryCondition)
from visualizer import PendulumVisualizer

plant = PendulumPlant()
simulator = Simulator(plant)
options = DynamicProgrammingOptions()


def min_time_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    x[0] = x[0] - math.pi
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    x[0] = x[0] - math.pi
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return 2 * x.dot(x) + u.dot(u)
Esempio n. 11
0
        VectorSystem.__init__(self, 1, 1)
        self._DeclareContinuousState(2)

    # qqdot(t) = u(t)
    def _DoCalcVectorTimeDerivatives(self, context, u, x, xdot):
        xdot[0] = x[1]
        xdot[1] = u

    # y(t) = x(t)
    def _DoCalcVectorOutput(self, context, u, x, y):
        y[:] = x


plant = DoubleIntegrator()
simulator = Simulator(plant)
options = DynamicProgrammingOptions()


def min_time_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return 2*x.dot(x) + 10*u.dot(u)

import math
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

from pydrake.all import (DiagramBuilder, Simulator, VectorSystem)
from pydrake.examples.pendulum import PendulumPlant
from pydrake.systems.controllers import (DynamicProgrammingOptions,
                                         FittedValueIteration)
from underactuated import PendulumVisualizer

plant = PendulumPlant()
simulator = Simulator(plant)
options = DynamicProgrammingOptions()


def min_time_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    x[0] = x[0] - math.pi
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    x[0] = x[0] - math.pi
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return 2 * x.dot(x) + u.dot(u)
Esempio n. 13
0
import math
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

from pydrake.all import (DiagramBuilder, SignalLogger, Simulator, VectorSystem)
from pydrake.examples.pendulum import PendulumPlant
from pydrake.systems.controllers import (
    DynamicProgrammingOptions, FittedValueIteration, PeriodicBoundaryCondition)
from visualizer import PendulumVisualizer


plant = PendulumPlant()
simulator = Simulator(plant)
options = DynamicProgrammingOptions()


def min_time_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    x[0] = x[0] - math.pi
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    x[0] = x[0] - math.pi
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return 2*x.dot(x) + u.dot(u)
Esempio n. 14
0
        VectorSystem.__init__(self, 1, 2, direct_feedthrough=False)
        self.DeclareContinuousState(2)

    # qqdot(t) = u(t)
    def DoCalcVectorTimeDerivatives(self, context, u, x, xdot):
        xdot[0] = x[1]
        xdot[1] = u

    # y(t) = x(t)
    def DoCalcVectorOutput(self, context, u, x, y):
        y[:] = x


plant = DoubleIntegrator()
simulator = Simulator(plant)
options = DynamicProgrammingOptions()


def min_time_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    if x.dot(x) < .05:
        return 0.
    return 1.


def quadratic_regulator_cost(context):
    x = context.get_continuous_state_vector().CopyToVector()
    u = plant.EvalVectorInput(context, 0).CopyToVector()
    return x.dot(x) + 20*u.dot(u)