def test_discrete_states(): nstates = disc.nstates() print 'There are %d states' % nstates # inspect every state for si in xrange(disc.nstates()): s = disc.to_continuous(si) # ensure it convert back to the same integer ti = disc.to_integer(s) assert ti == si, \ 'rediscritized state #%d is #%d. continuous is %s'%(si,ti,s) R = disc.reachable_states(si) # ensure its reachable states are valid for sj in R: assert 0 <= sj and sj < nstates, 'state %d reaches invalid state %d' % ( si, sj) # the quantized representation of each component ix, iy, ialpha, ispeed, itheta = disc.integer_to_qcoords(si) # It should be possible to turn the wheel at every state s_left = sim.apply_control(s, (0., disc.max_dtheta))['val'] si_left = disc.to_integer(s_left) if itheta < disc.ntheta - 1: assert si_left != si and si_left in R, 'left turn is not reachable' s_right = sim.apply_control(s, (0., -disc.max_dtheta))['val'] si_right = disc.to_integer(s_right) if itheta > 0: # only check if the turn is possible assert si_right != si and si_right in R, 'right turn is not reachable' # It should be possible to accelerate at every state s_faster = sim.apply_control(s, (disc.max_ddx, 0.))['val'] si_faster = disc.to_integer(s_faster) if ispeed < disc.nspeed - 1: assert si_faster != si and si_faster in R, "can't go faster" s_slower = sim.apply_control(s, (-disc.max_ddx, 0.))['val'] si_slower = disc.to_integer(s_slower) if ispeed > 0: assert si_slower != si and si_slower in R, "can't go slower" # The highest speed should cause the car to move if ispeed == disc.nspeed - 1 and ix > 0 and ix < disc.nx - 1 and iy > 0 and iy < disc.nx - 1: s_next = sim.apply_control(s, (0., 0.))['val'] si_next = disc.to_integer(s_next) assert si_next != si and si_next in R, "no way to move" print 'OK'
def test_discrete_states(): nstates = disc.nstates() print 'There are %d states'%nstates # inspect every state for si in xrange(disc.nstates()): s = disc.to_continuous(si) # ensure it convert back to the same integer ti = disc.to_integer(s) assert ti == si, \ 'rediscritized state #%d is #%d. continuous is %s'%(si,ti,s) R = disc.reachable_states(si) # ensure its reachable states are valid for sj in R: assert 0<=sj and sj<nstates, 'state %d reaches invalid state %d'%(si,sj) # the quantized representation of each component ix,iy,ialpha,ispeed,itheta = disc.integer_to_qcoords(si) # It should be possible to turn the wheel at every state s_left = sim.apply_control(s, (0.,disc.max_dtheta))['val'] si_left = disc.to_integer(s_left) if itheta<disc.ntheta-1: assert si_left!=si and si_left in R, 'left turn is not reachable' s_right = sim.apply_control(s, (0.,-disc.max_dtheta))['val'] si_right = disc.to_integer(s_right) if itheta>0: # only check if the turn is possible assert si_right!=si and si_right in R, 'right turn is not reachable' # It should be possible to accelerate at every state s_faster = sim.apply_control(s, (disc.max_ddx,0.))['val'] si_faster = disc.to_integer(s_faster) if ispeed<disc.nspeed-1: assert si_faster!=si and si_faster in R, "can't go faster" s_slower = sim.apply_control(s, (-disc.max_ddx,0.))['val'] si_slower = disc.to_integer(s_slower) if ispeed>0: assert si_slower!=si and si_slower in R, "can't go slower" # The highest speed should cause the car to move if ispeed==disc.nspeed-1 and ix>0 and ix<disc.nx-1 and iy>0 and iy<disc.nx-1: s_next = sim.apply_control(s,(0.,0.))['val'] si_next = disc.to_integer(s_next) assert si_next!=si and si_next in R, "no way to move" print 'OK'
def test_greedy_controller(): "generate and draw a random path" path = sim.genpath() s = (.5, -.7, pi/2, .04, -pi/4) max_dtheta = pi/20 max_theta = pi/4 max_ddx = 0.01 target_speed = .1 lambda_speed = 10. S = [] Ls = [] for i in xrange(80): S.append( s ) def L(u): return one_step_cost(u, path, S[-1], target_speed, lambda_speed) u = greedy_controller(L, s, max_dtheta, max_theta, max_ddx) Ls.append( L(u)[0] ) s = sim.apply_control(s, u)['val'] if True: sim.deriv_check(L, u, 1e-2) sim.show_results(path, S, Ls, animated=0.1)
def test_transition_to_action1(): # a random problem path = sim.genpath() s0 = (.5, -.7, pi/2, .04, +pi/4) max_dtheta = pi/20 max_ddx = 0.01 # solve it as discrete def cost(s): return path(reshape(s[:2],(1,2)))['val']**2 S,Ls,_ = discrete.continuous_plan(15, cost, 0.8, s0) # recover control actions and apply to current state Scont = [s0] for s0,s1 in zip(S,S[1:]): u = transition_to_action(s0, s1, sim.apply_control, max_dtheta, max_ddx) s1hat = sim.apply_control(Scont[-1], u)['val'] Scont.append( s1hat ) # show discrete trajectory sim.show_results(path, S, Ls, animated=0) # show continuous trajectory sim.animate_car(P.figure(0).add_subplot(1,1,1), Scont, remove_car=False, sleep=0., alphas=linspace(0.1,.5,len(S))**2, color='r') P.draw()
def test_greedy_controller(): "generate and draw a random path" path = sim.genpath() s = (.5, -.7, pi / 2, .04, -pi / 4) max_dtheta = pi / 20 max_theta = pi / 4 max_ddx = 0.01 target_speed = .1 lambda_speed = 10. S = [] Ls = [] for i in xrange(80): S.append(s) def L(u): return one_step_cost(u, path, S[-1], target_speed, lambda_speed) u = greedy_controller(L, s, max_dtheta, max_theta, max_ddx) Ls.append(L(u)[0]) s = sim.apply_control(s, u)['val'] if True: sim.deriv_check(L, u, 1e-2) sim.show_results(path, S, Ls, animated=0.1)
def test_transition_to_action1(): # a random problem path = sim.genpath() s0 = (.5, -.7, pi / 2, .04, +pi / 4) max_dtheta = pi / 20 max_ddx = 0.01 # solve it as discrete def cost(s): return path(reshape(s[:2], (1, 2)))['val']**2 S, Ls, _ = discrete.continuous_plan(15, cost, 0.8, s0) # recover control actions and apply to current state Scont = [s0] for s0, s1 in zip(S, S[1:]): u = transition_to_action(s0, s1, sim.apply_control, max_dtheta, max_ddx) s1hat = sim.apply_control(Scont[-1], u)['val'] Scont.append(s1hat) # show discrete trajectory sim.show_results(path, S, Ls, animated=0) # show continuous trajectory sim.animate_car(P.figure(0).add_subplot(1, 1, 1), Scont, remove_car=False, sleep=0., alphas=linspace(0.1, .5, len(S))**2, color='r') P.draw()
def test_transition_to_action(): s0 = (.5, -.7, pi/2, .04, -pi/4) max_dtheta = pi/20 max_ddx = 0.01 for it in xrange(5): # apply a random control u = array(((random.rand()-0.5)*max_ddx, (random.rand()-0.5)*max_dtheta)) s1 = sim.apply_control(s0,u)['val'] # recover it u_ = transition_to_action(s0, s1, sim.apply_control, max_dtheta, max_ddx) assert all(abs(u_-u)/abs(u)<1e-2), 'Recovered control %s is not %s'%(u_,u) print 'OK'
def one_step_cost(u, path, s0, target_speed, lambda_speed): """ Evaluate the scalar function L(u) = c(s(u)) = p(x(u)) + lambda_speed * (speed(u)-target_speed)^2 and its dervivatives wrt u. """ # s(u) s = sim.apply_control(s0,u, derivs={'du'}) # c(s(u)) c = state_cost(path,target_speed, lambda_speed, s['val'], derivs=['ds']) L = c['val'] # d/du c(s(u)) = dc/ds * ds/du dL = dot( c['ds'], s['du'] ) return L, dL
def reachable_states(self, si): "the set of discrete states reachable from the given discrete state" # return a cached result if available if self.reachable[si] is not None: return self.reachable[si] s = self.to_continuous(si) res = set([ self.to_integer(sim.apply_control(s, (ddx, dtheta))['val']) for ddx in linspace(-self.max_ddx, self.max_ddx, self.nddx) for dtheta in linspace(-self.max_dtheta, self.max_dtheta, self.ndtheta) ]) # cache before returning self.reachable[si] = res return res
def reachable_states(self, si): "the set of discrete states reachable from the given discrete state" # return a cached result if available if self.reachable[si] is not None: return self.reachable[si] s = self.to_continuous(si) res = set([ self.to_integer( sim.apply_control(s, (ddx,dtheta))['val'] ) for ddx in linspace(-self.max_ddx, self.max_ddx, self.nddx) for dtheta in linspace(-self.max_dtheta, self.max_dtheta, self.ndtheta) ]) # cache before returning self.reachable[si] = res return res
def one_step_cost(u, path, s0, target_speed, lambda_speed): """ Evaluate the scalar function L(u) = c(s(u)) = p(x(u)) + lambda_speed * (speed(u)-target_speed)^2 and its dervivatives wrt u. """ # s(u) s = sim.apply_control(s0, u, derivs={'du'}) # c(s(u)) c = state_cost(path, target_speed, lambda_speed, s['val'], derivs=['ds']) L = c['val'] # d/du c(s(u)) = dc/ds * ds/du dL = dot(c['ds'], s['du']) return L, dL
def test_transition_to_action(): s0 = (.5, -.7, pi / 2, .04, -pi / 4) max_dtheta = pi / 20 max_ddx = 0.01 for it in xrange(5): # apply a random control u = array(((random.rand() - 0.5) * max_ddx, (random.rand() - 0.5) * max_dtheta)) s1 = sim.apply_control(s0, u)['val'] # recover it u_ = transition_to_action(s0, s1, sim.apply_control, max_dtheta, max_ddx) assert all(abs(u_ - u) / abs(u) < 1e-2), 'Recovered control %s is not %s' % (u_, u) print 'OK'
def reachable(T, s0, sT, cost, dynamics, max_dtheta, max_theta, max_ddx, max_line_search=30, show_results=lambda *a:None): """Find control signals u_1...u_T, u_t=(ddx_t,dtheta_t) and the ensuing states s_1...s_T that minimize L(s_0,...,s_{T-1}) subject to s_t = f(s_{t-1}, u_t) | dtheta_t | < max_dtheta | ddx_t | < max_ddx One of sT or cost must be None. if sT is set, then L = || s_{t-1} - sT || otherwise, it is L = sum_{t=0}^{T-1} cost(s_t) Solves this as a Sequential Quadratic program by approximating L by a quadratic and f by an affine function. """ s0 = array(s0) if sT is not None: sT = array(sT) assert (cost is None) != (sT is None), 'only one of cost or sT may be specified' # initial iterates and objective terms Sv = [s0] * T Uv = [None] + [zeros(2)]*T L = None if cost: L = [cost(s0)['val']] * T last_obj = None # last objective value attained step = 1. # last line search step size iters = 0 n_line_searches = 0 while True: show_results(Sv, L, '%d, %d line searches so far. step size %g'%(iters, n_line_searches, step)) iters += 1 # variables, objective, and constraints of the quadratic problem S = [None] * T U = [None] * T S[0] = CX.Parameter(5, name='s0') S[0].value = s0 constraints = [] if cost: objective = zeros(1) # define the QP for t in xrange(1,T): # f(u_t, s_{t-1}) and its derivatives f = dynamics(Sv[t-1], Uv[t], {'du','ds'}) dfds = vstack(f['ds']) dfdu = vstack(f['du']) # define u_t and s_t U[t] = CX.Variable(2, name='u%d'%t) S[t] = CX.Variable(5, name='s%d'%t) # constraints: # s_t = linearized f(s_t-1, u_t) about previous iterate # and bounds on s_t and u_t constraints += [ S[t] == f['val'] + dfds*(S[t-1]-Sv[t-1]) + dfdu*(U[t]-Uv[t]), CX.abs(U[t][0]) <= max_ddx, CX.abs(U[t][1]) <= max_dtheta, CX.abs(S[t][4]) <= max_theta ] if cost: # accumulate objective c = cost(Sv[t], derivs={'ds','ds2'}) c['ds2'] = make_psd(c['ds2']) objective += c['val'] + (S[t]-Sv[t]).T*c['ds'] + 0.5*CX.quad_form(S[t]-Sv[t], c['ds2']) if sT is not None: # objective is || s_t - sT || objective = CX.square(CX.norm(S[T-1] - sT)) # solve for S and U p = CX.Problem(CX.Minimize(objective), constraints) r = p.solve(solver=CX.CVXOPT, verbose=False) assert isfinite(r) # line search on U, from Uv along U-Uv line_search_failed = True while n_line_searches < max_line_search: n_line_searches += 1 # compute and apply the controls along the step Us = [] Svs = [s0] for u,u0 in zip(U[1:],Uv[1:]): # a step along the search direction us = u0 + step * (ravel(u.value)-u0) # make it feasible us[0] = clip(us[0], -max_ddx, max_ddx) us[1] = clip(us[1], -max_dtheta, max_dtheta) Us.append(us) # apply controls Svs.append( sim.apply_control(Svs[-1], us)['val'] ) # objective value based on the last state if cost: L = [ cost(s)['val'] for s in Svs ] obj = sum(L) else: obj = sum((Svs[-1]-sT)**2) if last_obj is None or obj < last_obj: step *= 1.1 # lengthen the step for the next round line_search_failed = False # converged break else: step *= 0.7 # shorten the step and try again if line_search_failed: # converged break # throw away this iterate else: # accept the iterate Sv = Svs Uv = [None] + Us last_obj = obj return Sv,Uv
def reachable(T, s0, sT, cost, dynamics, max_dtheta, max_theta, max_ddx, max_line_search=30, show_results=lambda *a: None): """Find control signals u_1...u_T, u_t=(ddx_t,dtheta_t) and the ensuing states s_1...s_T that minimize L(s_0,...,s_{T-1}) subject to s_t = f(s_{t-1}, u_t) | dtheta_t | < max_dtheta | ddx_t | < max_ddx One of sT or cost must be None. if sT is set, then L = || s_{t-1} - sT || otherwise, it is L = sum_{t=0}^{T-1} cost(s_t) Solves this as a Sequential Quadratic program by approximating L by a quadratic and f by an affine function. """ s0 = array(s0) if sT is not None: sT = array(sT) assert (cost is None) != (sT is None), 'only one of cost or sT may be specified' # initial iterates and objective terms Sv = [s0] * T Uv = [None] + [zeros(2)] * T L = None if cost: L = [cost(s0)['val']] * T last_obj = None # last objective value attained step = 1. # last line search step size iters = 0 n_line_searches = 0 while True: show_results( Sv, L, '%d, %d line searches so far. step size %g' % (iters, n_line_searches, step)) iters += 1 # variables, objective, and constraints of the quadratic problem S = [None] * T U = [None] * T S[0] = CX.Parameter(5, name='s0') S[0].value = s0 constraints = [] if cost: objective = zeros(1) # define the QP for t in xrange(1, T): # f(u_t, s_{t-1}) and its derivatives f = dynamics(Sv[t - 1], Uv[t], {'du', 'ds'}) dfds = vstack(f['ds']) dfdu = vstack(f['du']) # define u_t and s_t U[t] = CX.Variable(2, name='u%d' % t) S[t] = CX.Variable(5, name='s%d' % t) # constraints: # s_t = linearized f(s_t-1, u_t) about previous iterate # and bounds on s_t and u_t constraints += [ S[t] == f['val'] + dfds * (S[t - 1] - Sv[t - 1]) + dfdu * (U[t] - Uv[t]), CX.abs(U[t][0]) <= max_ddx, CX.abs(U[t][1]) <= max_dtheta, CX.abs(S[t][4]) <= max_theta ] if cost: # accumulate objective c = cost(Sv[t], derivs={'ds', 'ds2'}) c['ds2'] = make_psd(c['ds2']) objective += c['val'] + ( S[t] - Sv[t]).T * c['ds'] + 0.5 * CX.quad_form( S[t] - Sv[t], c['ds2']) if sT is not None: # objective is || s_t - sT || objective = CX.square(CX.norm(S[T - 1] - sT)) # solve for S and U p = CX.Problem(CX.Minimize(objective), constraints) r = p.solve(solver=CX.CVXOPT, verbose=False) assert isfinite(r) # line search on U, from Uv along U-Uv line_search_failed = True while n_line_searches < max_line_search: n_line_searches += 1 # compute and apply the controls along the step Us = [] Svs = [s0] for u, u0 in zip(U[1:], Uv[1:]): # a step along the search direction us = u0 + step * (ravel(u.value) - u0) # make it feasible us[0] = clip(us[0], -max_ddx, max_ddx) us[1] = clip(us[1], -max_dtheta, max_dtheta) Us.append(us) # apply controls Svs.append(sim.apply_control(Svs[-1], us)['val']) # objective value based on the last state if cost: L = [cost(s)['val'] for s in Svs] obj = sum(L) else: obj = sum((Svs[-1] - sT)**2) if last_obj is None or obj < last_obj: step *= 1.1 # lengthen the step for the next round line_search_failed = False # converged break else: step *= 0.7 # shorten the step and try again if line_search_failed: # converged break # throw away this iterate else: # accept the iterate Sv = Svs Uv = [None] + Us last_obj = obj return Sv, Uv