def test_linear_programming_approximate_dynamic_programming(self): integrator = Integrator(1) simulator = Simulator(integrator) # minimum time cost function (1 for all non-zero states). def cost_function(context): x = context.get_continuous_state_vector().CopyToVector() if (math.fabs(x[0]) > 0.1): return 1. else: return 0. def cost_to_go_function(state, parameters): return parameters[0] * math.fabs(state[0]) state_samples = np.array([[-4., -3., -2., -1., 0., 1., 2., 3., 4.]]) input_samples = np.array([[-1., 0., 1.]]) timestep = 1.0 options = DynamicProgrammingOptions() options.discount_factor = 1. J = LinearProgrammingApproximateDynamicProgramming( simulator, cost_function, cost_to_go_function, 1, state_samples, input_samples, timestep, options) self.assertAlmostEqual(J[0], 1., delta=1e-6)
def test_linear_programming_approximate_dynamic_programming(self): integrator = Integrator(1) simulator = Simulator(integrator) # minimum time cost function (1 for all non-zero states). def cost_function(context): x = context.get_continuous_state_vector().CopyToVector() if (math.fabs(x[0]) > 0.1): return 1. else: return 0. def cost_to_go_function(state, parameters): return parameters[0] * math.fabs(state[0]) state_samples = np.array([[-4., -3., -2., -1., 0., 1., 2., 3., 4.]]) input_samples = np.array([[-1., 0., 1.]]) timestep = 1.0 options = DynamicProgrammingOptions() options.discount_factor = 1. J = LinearProgrammingApproximateDynamicProgramming( simulator, cost_function, cost_to_go_function, 1, state_samples, input_samples, timestep, options) self.assertAlmostEqual(J[0], 1., delta=1e-6)
def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) qbins = np.linspace(0., 2. * math.pi, 51) qdotbins = np.linspace(-10., 10., 41) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 9))] timestep = 0.01 [Q, Qdot] = np.meshgrid(qbins, qdotbins) fig = plt.figure() ax = fig.gca(projection='3d') def draw(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. if iteration % 20 != 0: return plt.title("iteration " + str(iteration)) J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=color_map.jet) plt.pause(0.00001) surf.remove() options = DynamicProgrammingOptions() options.convergence_tol = 1. options.state_indices_with_periodic_boundary_conditions = {0} options.visualization_callback = draw policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options)
def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) qbins = np.linspace(0., 2.*math.pi, 51) qdotbins = np.linspace(-10., 10., 41) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 9))] timestep = 0.01 [Q, Qdot] = np.meshgrid(qbins, qdotbins) fig = plt.figure() ax = fig.gca(projection='3d') def draw(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. if iteration % 20 != 0: return plt.title("iteration " + str(iteration)) J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=color_map.jet) plt.pause(0.00001) surf.remove() options = DynamicProgrammingOptions() options.convergence_tol = 1. options.state_indices_with_periodic_boundary_conditions = {0} options.visualization_callback = draw policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options)
def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) # Note: intentionally under-sampled to keep the problem small qbins = np.linspace(0., 2.*math.pi, 11) qdotbins = np.linspace(-10., 10., 11) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 5))] timestep = 0.01 num_callbacks = [0] def callback(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. num_callbacks[0] += 1 options = DynamicProgrammingOptions() options.convergence_tol = 1. options.periodic_boundary_conditions = [ PeriodicBoundaryCondition(0, 0., 2.*math.pi) ] options.visualization_callback = callback policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options) self.assertGreater(num_callbacks[0], 0)
def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) # Note: intentionally under-sampled to keep the problem small qbins = np.linspace(0., 2. * math.pi, 11) qdotbins = np.linspace(-10., 10., 11) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 5))] timestep = 0.01 num_callbacks = [0] def callback(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. num_callbacks[0] += 1 options = DynamicProgrammingOptions() options.convergence_tol = 1. options.periodic_boundary_conditions = [ PeriodicBoundaryCondition(0, 0., 2. * math.pi) ] options.visualization_callback = callback policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options) self.assertGreater(num_callbacks[0], 0)
VectorSystem.__init__(self, 1, 2, direct_feedthrough=False) self.DeclareContinuousState(2) # qqdot(t) = u(t) def DoCalcVectorTimeDerivatives(self, context, u, x, xdot): xdot[0] = x[1] xdot[1] = u # y(t) = x(t) def DoCalcVectorOutput(self, context, u, x, y): y[:] = x plant = DoubleIntegrator() simulator = Simulator(plant) options = DynamicProgrammingOptions() def min_time_cost(context): x = context.get_continuous_state_vector().CopyToVector() if x.dot(x) < .05: return 0. return 1. def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + 20 * u.dot(u)
self._DeclareContinuousState(2) # qddot(t) = u(t) def _DoCalcVectorTimeDerivatives(self, context, u, x, xdot): xdot[0] = x[1] xdot[1] = u # y(t) = x(t) def _DoCalcVectorOutput(self, context, u, x, y): y[:] = x # Set up a simulation of this system. plant = DoubleIntegrator() simulator = Simulator(plant) options = DynamicProgrammingOptions() # This function evaluates a running cost # that penalizes distance from the origin, # as well as control effort. def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() u = plant.EvalVectorInput(context, 0).CopyToVector() return 2 * x.dot(x) + 0 * u.dot(u) # This function evaluates a minimum time # running cost, given a context (which contains # information about the current system state). def min_time_cost(context):
def VI(u_cost=180.**2): tree = RigidBodyTree("/opt/underactuated/src/cartpole/cartpole.urdf", FloatingBaseType.kFixed) plant = RigidBodyPlant(tree) simulator = Simulator(plant) options = DynamicProgrammingOptions() def min_time_cost(context): x = context.get_continuous_state_vector().CopyToVector() u = plant.EvalVectorInput(context, 0).CopyToVector() x[1] = x[1] - math.pi if x.dot(x) < .1: # seeks get x to (math.pi, 0., 0., 0.) return 0. return 1. + 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[1] = x[1] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return 2*x.dot(x)/(10**2+math.pi**2+10**2+math.pi**2) + u.dot(u)/(u_cost) if (True): cost_function = min_time_cost input_limit = 360. options.convergence_tol = 0.001 state_steps = 19 input_steps = 19 else: cost_function = quadratic_regulator_cost input_limit = 250. options.convergence_tol = 0.01 state_steps = 19 input_steps = 19 ####### SETTINGS ####### My cartpole linspaces are off?????? # State: (x, theta, x_dot, theta_dot) # Previous Best... (min. time) (3) xbins = np.linspace(-10., 10., state_steps) thetabins = np.hstack((np.linspace(0., math.pi-0.2, 8), np.linspace(math.pi-0.2, math.pi+0.2, 11), np.linspace(math.pi+0.2, 8, 2*math.pi))) xdotbins = np.linspace(-10., 10., state_steps) thetadotbins = np.linspace(-10., 10., state_steps) timestep = 0.01 # Test 1 (4) xbins = np.linspace(-10., 10., state_steps) thetabins = np.hstack((np.linspace(0., math.pi-0.12, 8), np.linspace(math.pi-0.12, math.pi+0.12, 11), np.linspace(math.pi+0.12, 8, 2*math.pi))) xdotbins = np.linspace(-10., 10., state_steps+2) thetadotbins = np.hstack((np.linspace(-10., -1.5, 9), np.linspace(-1.5, 1.5, 11), np.linspace(1.5, 10., 9))) # timestep = 0.001 <- wasn't active... # Test 2 - Test 1 was worse? WOW I HAD A BUG! - in my last np.linspace (5) SWEET!!! xbins = np.linspace(-10., 10., state_steps) thetabins = np.hstack((np.linspace(0., math.pi-0.2, 10), np.linspace(math.pi-0.2, math.pi+0.2, 9), np.linspace(math.pi+0.2, 2*math.pi, 10))) xdotbins = np.linspace(-10., 10., state_steps+2) thetadotbins = np.linspace(-10., 10., state_steps) timestep = 0.01 input_limit = 1000. # test_stabilize_top7 for the higher input_limit version options.periodic_boundary_conditions = [ PeriodicBoundaryCondition(1, 0., 2.*math.pi), ] state_grid = [set(xbins), set(thetabins), set(xdotbins), set(thetadotbins)] input_grid = [set(np.linspace(-input_limit, input_limit, input_steps))] # Input: x force print("VI with u_cost={} beginning".format(u_cost)) policy, cost_to_go = FittedValueIteration(simulator, cost_function, state_grid, input_grid, timestep, options) print("VI with u_cost={} completed!".format(u_cost)) save_policy("u_cost={:.0f}_torque_limit={:.0f}".format(u_cost, input_limit), policy, cost_to_go, state_grid) return policy, cost_to_go
import math import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from pydrake.all import (DiagramBuilder, SignalLogger, Simulator, VectorSystem) from pydrake.examples.pendulum import PendulumPlant from pydrake.systems.controllers import (DynamicProgrammingOptions, FittedValueIteration, PeriodicBoundaryCondition) from visualizer import PendulumVisualizer plant = PendulumPlant() simulator = Simulator(plant) options = DynamicProgrammingOptions() def min_time_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi if x.dot(x) < .05: return 0. return 1. def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return 2 * x.dot(x) + u.dot(u)
VectorSystem.__init__(self, 1, 1) self._DeclareContinuousState(2) # qqdot(t) = u(t) def _DoCalcVectorTimeDerivatives(self, context, u, x, xdot): xdot[0] = x[1] xdot[1] = u # y(t) = x(t) def _DoCalcVectorOutput(self, context, u, x, y): y[:] = x plant = DoubleIntegrator() simulator = Simulator(plant) options = DynamicProgrammingOptions() def min_time_cost(context): x = context.get_continuous_state_vector().CopyToVector() if x.dot(x) < .05: return 0. return 1. def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() u = plant.EvalVectorInput(context, 0).CopyToVector() return 2*x.dot(x) + 10*u.dot(u)
import math import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from pydrake.all import (DiagramBuilder, Simulator, VectorSystem) from pydrake.examples.pendulum import PendulumPlant from pydrake.systems.controllers import (DynamicProgrammingOptions, FittedValueIteration) from underactuated import PendulumVisualizer plant = PendulumPlant() simulator = Simulator(plant) options = DynamicProgrammingOptions() def min_time_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi if x.dot(x) < .05: return 0. return 1. def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return 2 * x.dot(x) + u.dot(u)
import math import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from pydrake.all import (DiagramBuilder, SignalLogger, Simulator, VectorSystem) from pydrake.examples.pendulum import PendulumPlant from pydrake.systems.controllers import ( DynamicProgrammingOptions, FittedValueIteration, PeriodicBoundaryCondition) from visualizer import PendulumVisualizer plant = PendulumPlant() simulator = Simulator(plant) options = DynamicProgrammingOptions() def min_time_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi if x.dot(x) < .05: return 0. return 1. def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return 2*x.dot(x) + u.dot(u)
VectorSystem.__init__(self, 1, 2, direct_feedthrough=False) self.DeclareContinuousState(2) # qqdot(t) = u(t) def DoCalcVectorTimeDerivatives(self, context, u, x, xdot): xdot[0] = x[1] xdot[1] = u # y(t) = x(t) def DoCalcVectorOutput(self, context, u, x, y): y[:] = x plant = DoubleIntegrator() simulator = Simulator(plant) options = DynamicProgrammingOptions() def min_time_cost(context): x = context.get_continuous_state_vector().CopyToVector() if x.dot(x) < .05: return 0. return 1. def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + 20*u.dot(u)