Esempio n. 1
0
def test_discrete_states():
    nstates = disc.nstates()
    print 'There are %d states' % nstates

    # inspect every state
    for si in xrange(disc.nstates()):
        s = disc.to_continuous(si)

        # ensure it convert back to the same integer
        ti = disc.to_integer(s)
        assert ti == si, \
          'rediscritized state #%d is #%d. continuous is %s'%(si,ti,s)

        R = disc.reachable_states(si)

        # ensure its reachable states are valid
        for sj in R:
            assert 0 <= sj and sj < nstates, 'state %d reaches invalid state %d' % (
                si, sj)

        # the quantized representation of each component
        ix, iy, ialpha, ispeed, itheta = disc.integer_to_qcoords(si)

        # It should be possible to turn the wheel at every state
        s_left = sim.apply_control(s, (0., disc.max_dtheta))['val']
        si_left = disc.to_integer(s_left)
        if itheta < disc.ntheta - 1:
            assert si_left != si and si_left in R, 'left turn is not reachable'

        s_right = sim.apply_control(s, (0., -disc.max_dtheta))['val']
        si_right = disc.to_integer(s_right)
        if itheta > 0:  # only check if the turn is possible
            assert si_right != si and si_right in R, 'right turn is not reachable'

        # It should be possible to accelerate at every state
        s_faster = sim.apply_control(s, (disc.max_ddx, 0.))['val']
        si_faster = disc.to_integer(s_faster)
        if ispeed < disc.nspeed - 1:
            assert si_faster != si and si_faster in R, "can't go faster"

        s_slower = sim.apply_control(s, (-disc.max_ddx, 0.))['val']
        si_slower = disc.to_integer(s_slower)
        if ispeed > 0:
            assert si_slower != si and si_slower in R, "can't go slower"

        # The highest speed should cause the car to move
        if ispeed == disc.nspeed - 1 and ix > 0 and ix < disc.nx - 1 and iy > 0 and iy < disc.nx - 1:
            s_next = sim.apply_control(s, (0., 0.))['val']
            si_next = disc.to_integer(s_next)
            assert si_next != si and si_next in R, "no way to move"

    print 'OK'
Esempio n. 2
0
def test_discrete_states():
  nstates = disc.nstates()
  print 'There are %d states'%nstates

  # inspect every state
  for si in xrange(disc.nstates()):
    s = disc.to_continuous(si)

    # ensure it convert back to the same integer
    ti = disc.to_integer(s)
    assert ti == si, \
      'rediscritized state #%d is #%d. continuous is %s'%(si,ti,s)

    R = disc.reachable_states(si)

    # ensure its reachable states are valid
    for sj in R:
      assert 0<=sj and sj<nstates, 'state %d reaches invalid state %d'%(si,sj)

    # the quantized representation of each component
    ix,iy,ialpha,ispeed,itheta = disc.integer_to_qcoords(si)

    # It should be possible to turn the wheel at every state
    s_left = sim.apply_control(s, (0.,disc.max_dtheta))['val']
    si_left = disc.to_integer(s_left)
    if itheta<disc.ntheta-1:
      assert si_left!=si and si_left in R, 'left turn is not reachable'

    s_right = sim.apply_control(s, (0.,-disc.max_dtheta))['val']
    si_right = disc.to_integer(s_right)
    if itheta>0: # only check if the turn is possible
      assert si_right!=si and si_right in R, 'right turn is not reachable'

    # It should be possible to accelerate at every state
    s_faster = sim.apply_control(s, (disc.max_ddx,0.))['val']
    si_faster = disc.to_integer(s_faster)
    if ispeed<disc.nspeed-1:
      assert si_faster!=si and si_faster in R, "can't go faster"

    s_slower = sim.apply_control(s, (-disc.max_ddx,0.))['val']
    si_slower = disc.to_integer(s_slower)
    if ispeed>0:
      assert si_slower!=si and si_slower in R, "can't go slower"

    # The highest speed should cause the car to move
    if ispeed==disc.nspeed-1 and ix>0 and ix<disc.nx-1 and iy>0 and iy<disc.nx-1:
      s_next = sim.apply_control(s,(0.,0.))['val']
      si_next = disc.to_integer(s_next)
      assert si_next!=si and si_next in R, "no way to move"

  print 'OK'
Esempio n. 3
0
def test_greedy_controller():
  "generate and draw a random path"
  path = sim.genpath()

  s = (.5, -.7, pi/2, .04, -pi/4)
  max_dtheta = pi/20
  max_theta = pi/4
  max_ddx = 0.01
  target_speed = .1
  lambda_speed = 10.

  S = []
  Ls = []
  for i in xrange(80):
    S.append( s )

    def L(u):
      return one_step_cost(u, path, S[-1], target_speed, lambda_speed)

    u = greedy_controller(L, s, max_dtheta, max_theta, max_ddx)
    Ls.append( L(u)[0] )

    s = sim.apply_control(s, u)['val']

    if True:
      sim.deriv_check(L, u, 1e-2)


  sim.show_results(path, S, Ls, animated=0.1)
Esempio n. 4
0
def test_transition_to_action1():
  # a random problem
  path = sim.genpath()
  s0 = (.5, -.7, pi/2, .04, +pi/4)
  max_dtheta = pi/20
  max_ddx = 0.01

  # solve it as discrete
  def cost(s):
    return path(reshape(s[:2],(1,2)))['val']**2
  S,Ls,_ = discrete.continuous_plan(15, cost, 0.8, s0)

  # recover control actions and apply to current state
  Scont = [s0]
  for s0,s1 in zip(S,S[1:]):
    u = transition_to_action(s0, s1, sim.apply_control, max_dtheta, max_ddx)
    s1hat = sim.apply_control(Scont[-1], u)['val']
    Scont.append( s1hat )

  # show discrete trajectory
  sim.show_results(path, S, Ls, animated=0)
  # show continuous trajectory
  sim.animate_car(P.figure(0).add_subplot(1,1,1), Scont,
                  remove_car=False, sleep=0.,
                  alphas=linspace(0.1,.5,len(S))**2,
                  color='r')
  P.draw()
Esempio n. 5
0
def test_greedy_controller():
    "generate and draw a random path"
    path = sim.genpath()

    s = (.5, -.7, pi / 2, .04, -pi / 4)
    max_dtheta = pi / 20
    max_theta = pi / 4
    max_ddx = 0.01
    target_speed = .1
    lambda_speed = 10.

    S = []
    Ls = []
    for i in xrange(80):
        S.append(s)

        def L(u):
            return one_step_cost(u, path, S[-1], target_speed, lambda_speed)

        u = greedy_controller(L, s, max_dtheta, max_theta, max_ddx)
        Ls.append(L(u)[0])

        s = sim.apply_control(s, u)['val']

        if True:
            sim.deriv_check(L, u, 1e-2)

    sim.show_results(path, S, Ls, animated=0.1)
Esempio n. 6
0
def test_transition_to_action1():
    # a random problem
    path = sim.genpath()
    s0 = (.5, -.7, pi / 2, .04, +pi / 4)
    max_dtheta = pi / 20
    max_ddx = 0.01

    # solve it as discrete
    def cost(s):
        return path(reshape(s[:2], (1, 2)))['val']**2

    S, Ls, _ = discrete.continuous_plan(15, cost, 0.8, s0)

    # recover control actions and apply to current state
    Scont = [s0]
    for s0, s1 in zip(S, S[1:]):
        u = transition_to_action(s0, s1, sim.apply_control, max_dtheta,
                                 max_ddx)
        s1hat = sim.apply_control(Scont[-1], u)['val']
        Scont.append(s1hat)

    # show discrete trajectory
    sim.show_results(path, S, Ls, animated=0)
    # show continuous trajectory
    sim.animate_car(P.figure(0).add_subplot(1, 1, 1),
                    Scont,
                    remove_car=False,
                    sleep=0.,
                    alphas=linspace(0.1, .5, len(S))**2,
                    color='r')
    P.draw()
Esempio n. 7
0
def test_transition_to_action():
  s0 = (.5, -.7, pi/2, .04, -pi/4)
  max_dtheta = pi/20
  max_ddx = 0.01

  for it in xrange(5):
    # apply a random control
    u = array(((random.rand()-0.5)*max_ddx, (random.rand()-0.5)*max_dtheta))
    s1 = sim.apply_control(s0,u)['val']

    # recover it
    u_ = transition_to_action(s0, s1, sim.apply_control, max_dtheta, max_ddx)

    assert all(abs(u_-u)/abs(u)<1e-2), 'Recovered control %s is not %s'%(u_,u)

  print 'OK'
Esempio n. 8
0
def one_step_cost(u, path, s0, target_speed, lambda_speed):
  """
  Evaluate the scalar function

    L(u) = c(s(u)) = p(x(u)) + lambda_speed * (speed(u)-target_speed)^2

  and its dervivatives wrt u.
  """
  # s(u)
  s = sim.apply_control(s0,u, derivs={'du'})
  # c(s(u))
  c = state_cost(path,target_speed, lambda_speed, s['val'], derivs=['ds'])
  L = c['val']
  # d/du c(s(u)) = dc/ds * ds/du
  dL = dot( c['ds'], s['du'] )
  return L, dL
Esempio n. 9
0
    def reachable_states(self, si):
        "the set of discrete states reachable from the given discrete state"
        # return a cached result if available
        if self.reachable[si] is not None:
            return self.reachable[si]

        s = self.to_continuous(si)
        res = set([
            self.to_integer(sim.apply_control(s, (ddx, dtheta))['val'])
            for ddx in linspace(-self.max_ddx, self.max_ddx, self.nddx) for
            dtheta in linspace(-self.max_dtheta, self.max_dtheta, self.ndtheta)
        ])

        # cache before returning
        self.reachable[si] = res
        return res
Esempio n. 10
0
  def reachable_states(self, si):
    "the set of discrete states reachable from the given discrete state"
    # return a cached result if available
    if self.reachable[si] is not None:
      return self.reachable[si]

    s = self.to_continuous(si)
    res = set([
      self.to_integer( sim.apply_control(s, (ddx,dtheta))['val'] )
      for ddx in linspace(-self.max_ddx, self.max_ddx, self.nddx)
      for dtheta in linspace(-self.max_dtheta, self.max_dtheta, self.ndtheta)
      ])

    # cache before returning
    self.reachable[si] = res
    return res
Esempio n. 11
0
def one_step_cost(u, path, s0, target_speed, lambda_speed):
    """
  Evaluate the scalar function

    L(u) = c(s(u)) = p(x(u)) + lambda_speed * (speed(u)-target_speed)^2

  and its dervivatives wrt u.
  """
    # s(u)
    s = sim.apply_control(s0, u, derivs={'du'})
    # c(s(u))
    c = state_cost(path, target_speed, lambda_speed, s['val'], derivs=['ds'])
    L = c['val']
    # d/du c(s(u)) = dc/ds * ds/du
    dL = dot(c['ds'], s['du'])
    return L, dL
Esempio n. 12
0
def test_transition_to_action():
    s0 = (.5, -.7, pi / 2, .04, -pi / 4)
    max_dtheta = pi / 20
    max_ddx = 0.01

    for it in xrange(5):
        # apply a random control
        u = array(((random.rand() - 0.5) * max_ddx,
                   (random.rand() - 0.5) * max_dtheta))
        s1 = sim.apply_control(s0, u)['val']

        # recover it
        u_ = transition_to_action(s0, s1, sim.apply_control, max_dtheta,
                                  max_ddx)

        assert all(abs(u_ - u) /
                   abs(u) < 1e-2), 'Recovered control %s is not %s' % (u_, u)

    print 'OK'
Esempio n. 13
0
def reachable(T, s0, sT, cost, dynamics,  max_dtheta, max_theta, max_ddx,
              max_line_search=30, show_results=lambda *a:None):
  """Find control signals u_1...u_T, u_t=(ddx_t,dtheta_t) and the
  ensuing states s_1...s_T that

        minimize   L(s_0,...,s_{T-1})
      subject to   s_t = f(s_{t-1}, u_t)
                   | dtheta_t | < max_dtheta
                   | ddx_t | < max_ddx

  One of sT or cost must be None.  if sT is set, then
        L = || s_{t-1} - sT ||
  otherwise, it is
        L = sum_{t=0}^{T-1}  cost(s_t)

  Solves this as a Sequential Quadratic program by approximating L by
  a quadratic and f by an affine function.
  """
  s0 = array(s0)
  if sT is not None:
    sT = array(sT)
  assert (cost is None) != (sT is None), 'only one of cost or sT may be specified'

  # initial iterates and objective terms
  Sv = [s0] * T
  Uv = [None] + [zeros(2)]*T
  L = None
  if cost:
    L = [cost(s0)['val']] * T

  last_obj = None   # last objective value attained
  step = 1.        # last line search step size
  iters = 0
  n_line_searches = 0
  while True:
    show_results(Sv, L, '%d, %d line searches so far. step size %g'%(iters,
                                                                     n_line_searches,
                                                                     step))
    iters += 1

    # variables, objective, and constraints of the quadratic problem
    S = [None] * T
    U = [None] * T
    S[0] = CX.Parameter(5, name='s0')
    S[0].value = s0
    constraints = []
    if cost:
      objective = zeros(1)

    # define the QP
    for t in xrange(1,T):
      # f(u_t, s_{t-1}) and its derivatives
      f = dynamics(Sv[t-1], Uv[t], {'du','ds'})
      dfds = vstack(f['ds'])
      dfdu = vstack(f['du'])

      # define u_t and s_t
      U[t] = CX.Variable(2, name='u%d'%t)
      S[t] = CX.Variable(5, name='s%d'%t)

      # constraints:
      #     s_t = linearized f(s_t-1, u_t) about previous iterate
      #     and bounds on s_t and u_t
      constraints += [
        S[t] == f['val'] + dfds*(S[t-1]-Sv[t-1]) + dfdu*(U[t]-Uv[t]),
        CX.abs(U[t][0]) <= max_ddx,
        CX.abs(U[t][1]) <= max_dtheta,
        CX.abs(S[t][4]) <= max_theta ]

      if cost:
        # accumulate objective
        c = cost(Sv[t], derivs={'ds','ds2'})
        c['ds2'] = make_psd(c['ds2'])
        objective += c['val'] + (S[t]-Sv[t]).T*c['ds'] + 0.5*CX.quad_form(S[t]-Sv[t],
                                                                          c['ds2'])

    if sT is not None:
      # objective is || s_t - sT ||
      objective = CX.square(CX.norm(S[T-1] - sT))


    # solve for S and U
    p = CX.Problem(CX.Minimize(objective), constraints)
    r = p.solve(solver=CX.CVXOPT, verbose=False)
    assert isfinite(r)

    # line search on U, from Uv along U-Uv
    line_search_failed = True
    while n_line_searches < max_line_search:
      n_line_searches += 1

      # compute and apply the controls along the step
      Us = []
      Svs = [s0]
      for u,u0 in zip(U[1:],Uv[1:]):
        # a step along the search direction
        us = u0 + step * (ravel(u.value)-u0)
        # make it feasible
        us[0] = clip(us[0], -max_ddx, max_ddx)
        us[1] = clip(us[1], -max_dtheta, max_dtheta)
        Us.append(us)
        # apply controls
        Svs.append( sim.apply_control(Svs[-1], us)['val'] )

      # objective value based on the last state
      if cost:
        L = [ cost(s)['val'] for s in Svs ]
        obj = sum(L)
      else:
        obj = sum((Svs[-1]-sT)**2)

      if last_obj is None or obj < last_obj:
        step *= 1.1                 # lengthen the step for the next round
        line_search_failed = False  # converged
        break
      else:
        step *= 0.7                 # shorten the step and try again

    if line_search_failed:          # converged
      break                         # throw away this iterate
    else:
      # accept the iterate
      Sv = Svs
      Uv = [None] + Us
      last_obj = obj

  return Sv,Uv
Esempio n. 14
0
def reachable(T,
              s0,
              sT,
              cost,
              dynamics,
              max_dtheta,
              max_theta,
              max_ddx,
              max_line_search=30,
              show_results=lambda *a: None):
    """Find control signals u_1...u_T, u_t=(ddx_t,dtheta_t) and the
  ensuing states s_1...s_T that

        minimize   L(s_0,...,s_{T-1})
      subject to   s_t = f(s_{t-1}, u_t)
                   | dtheta_t | < max_dtheta
                   | ddx_t | < max_ddx

  One of sT or cost must be None.  if sT is set, then
        L = || s_{t-1} - sT ||
  otherwise, it is
        L = sum_{t=0}^{T-1}  cost(s_t)

  Solves this as a Sequential Quadratic program by approximating L by
  a quadratic and f by an affine function.
  """
    s0 = array(s0)
    if sT is not None:
        sT = array(sT)
    assert (cost is None) != (sT is
                              None), 'only one of cost or sT may be specified'

    # initial iterates and objective terms
    Sv = [s0] * T
    Uv = [None] + [zeros(2)] * T
    L = None
    if cost:
        L = [cost(s0)['val']] * T

    last_obj = None  # last objective value attained
    step = 1.  # last line search step size
    iters = 0
    n_line_searches = 0
    while True:
        show_results(
            Sv, L, '%d, %d line searches so far. step size %g' %
            (iters, n_line_searches, step))
        iters += 1

        # variables, objective, and constraints of the quadratic problem
        S = [None] * T
        U = [None] * T
        S[0] = CX.Parameter(5, name='s0')
        S[0].value = s0
        constraints = []
        if cost:
            objective = zeros(1)

        # define the QP
        for t in xrange(1, T):
            # f(u_t, s_{t-1}) and its derivatives
            f = dynamics(Sv[t - 1], Uv[t], {'du', 'ds'})
            dfds = vstack(f['ds'])
            dfdu = vstack(f['du'])

            # define u_t and s_t
            U[t] = CX.Variable(2, name='u%d' % t)
            S[t] = CX.Variable(5, name='s%d' % t)

            # constraints:
            #     s_t = linearized f(s_t-1, u_t) about previous iterate
            #     and bounds on s_t and u_t
            constraints += [
                S[t] == f['val'] + dfds * (S[t - 1] - Sv[t - 1]) + dfdu *
                (U[t] - Uv[t]),
                CX.abs(U[t][0]) <= max_ddx,
                CX.abs(U[t][1]) <= max_dtheta,
                CX.abs(S[t][4]) <= max_theta
            ]

            if cost:
                # accumulate objective
                c = cost(Sv[t], derivs={'ds', 'ds2'})
                c['ds2'] = make_psd(c['ds2'])
                objective += c['val'] + (
                    S[t] - Sv[t]).T * c['ds'] + 0.5 * CX.quad_form(
                        S[t] - Sv[t], c['ds2'])

        if sT is not None:
            # objective is || s_t - sT ||
            objective = CX.square(CX.norm(S[T - 1] - sT))

        # solve for S and U
        p = CX.Problem(CX.Minimize(objective), constraints)
        r = p.solve(solver=CX.CVXOPT, verbose=False)
        assert isfinite(r)

        # line search on U, from Uv along U-Uv
        line_search_failed = True
        while n_line_searches < max_line_search:
            n_line_searches += 1

            # compute and apply the controls along the step
            Us = []
            Svs = [s0]
            for u, u0 in zip(U[1:], Uv[1:]):
                # a step along the search direction
                us = u0 + step * (ravel(u.value) - u0)
                # make it feasible
                us[0] = clip(us[0], -max_ddx, max_ddx)
                us[1] = clip(us[1], -max_dtheta, max_dtheta)
                Us.append(us)
                # apply controls
                Svs.append(sim.apply_control(Svs[-1], us)['val'])

            # objective value based on the last state
            if cost:
                L = [cost(s)['val'] for s in Svs]
                obj = sum(L)
            else:
                obj = sum((Svs[-1] - sT)**2)

            if last_obj is None or obj < last_obj:
                step *= 1.1  # lengthen the step for the next round
                line_search_failed = False  # converged
                break
            else:
                step *= 0.7  # shorten the step and try again

        if line_search_failed:  # converged
            break  # throw away this iterate
        else:
            # accept the iterate
            Sv = Svs
            Uv = [None] + Us
            last_obj = obj

    return Sv, Uv