Esempio n. 1
0
  def __call__( self, start, goal, features, theta, speed ):
    start = self.convert.from_world2( start )
    goal = self.convert.from_world2( goal )

    momo.tick( "FB Costs" )
    costs = self.compute_costs( features, theta )
    momo.tack( "FB Costs" )
    #momo.tick( "FB Forward" )
    forward, backward = self.planner( costs, start, goal )
    #momo.tack( "FB Forward" )
    #momo.tick( "FB Backward" )
    #backward = self.planner( costs, goal, -1 )
    #momo.tack( "FB Backward" )
    return forward, backward, costs
Esempio n. 2
0
def compute_expectations( 
  states, frames, w, h, 
  convert, compute_costs, planner, compute_features, accum  
):
  velocities = [np.linalg.norm( v[2:] ) for v in states]
  avg_velocity = np.sum( velocities, 0 ) / len( velocities )
  features = compute_features( avg_velocity, frames[0] )

  momo.tick( "Forward-backward" )
  forward, backward, costs = planner( states[0], states[-1], features, w, avg_velocity )
  momo.tack( "Forward-backward" )

  momo.tick( "Accum" )
  cummulated, w_features  = accum( 
    forward, backward, costs, features, 
    convert.from_world2( states[0] ), h 
  )
  momo.tack( "Accum" )

  mu_expected = np.sum( w_features, axis = 0 )
  mu_expected = np.sum( mu_expected, axis = 0 )
  mu_expected = np.sum( mu_expected, axis = 0 )

  return mu_expected, cummulated, costs
Esempio n. 3
0
def compute_observed( feature_module, convert, states, frames, radius ):
  momo.tick( "Compute observed" )
  momo.tick( "Discretize" )
  l = len( states )
  grid_path = [convert.from_world2( s ) for s in states]
  repr_path = [convert.to_world2( convert.from_world2( s ), np.linalg.norm( s[2:] ) ) for s in states]
  momo.tack( "Discretize" )
  momo.tick( "Compute" )
  result = []
  for i in xrange( len( states ) ):
    result.append( feature_module.compute_feature( states[i], frames[i], radius ) )
    if i > 0:
      result[i] += result[i- 1]
  momo.tack( "Compute" )
  momo.tack( "Compute observed" )
  return result, grid_path
Esempio n. 4
0
def learn( feature_module, convert, frame_data, ids, radius, h ):
  feature_length = feature_module.FEATURE_LENGTH

  compute_costs = feature_module.compute_costs( convert )
  planner = momo.irl.planning.forward_backward( convert, compute_costs )
  compute_features = feature_module.compute_features( convert, radius )
  accum = compute_cummulated()

  observed_integral = {}
  grid_paths = {}
  for o_id in ids:
    states = frame_data[o_id]["states"]
    frames = frame_data[o_id]["frames"]
    obs, path = compute_observed( feature_module, convert, states, frames, radius )
    observed_integral[o_id] = obs
    grid_paths[o_id] = path

  # Initialize weight vector
  w  = ( np.ones( feature_length ) * 1.0 / feature_length ).astype( np.float64 )

  gamma = 0.5
  decay = 0.99
  min_w = None
  min_e = 1E6

  print "Gamma", gamma
  print "Decay", decay

  for times in xrange( 100 ):

    momo.tick( "Step" )
    sum_obs = np.zeros( feature_length, np.float64 )
    sum_exp = np.zeros( feature_length, np.float64 )

    for o_id in ids:
      states = frame_data[o_id]["states"]
      frames = frame_data[o_id]["frames"]
      l = len( states )
      for i in xrange( max( l - h, 1 ) ):
        momo.tick( "Compute Expectations" )
        expected, cummulated, costs =\
          momo.learning.max_ent.compute_expectations( 
            states[i:], frames[i:], w, h,
            convert, compute_costs, planner, compute_features, accum
          )
        momo.tack( "Compute Expectations" )
        observed = observed_integral[o_id][min( i + h, l - 1 )] * 1
        if i > 0:
          observed -= observed_integral[o_id][i - 1]
        sum_obs += observed
        sum_exp += expected

        if np.any( np.isnan( expected ) ):
          continue
        if np.sum( observed ) != 0 and np.sum( expected ) != 0:
          gradient = observed / np.sum( observed ) - expected / np.sum( expected )
        else:
          gradient = observed * 0.
        error = np.linalg.norm( gradient )
        #momo.plot.gradient_descent_step( cummulated, costs, grid_paths[o_id], error )
    if np.sum( sum_obs ) != 0 and np.sum( sum_exp ) != 0:
      gradient = sum_obs / np.sum( sum_obs ) - sum_exp / np.sum( sum_exp )
    error = np.linalg.norm( gradient )
    if error < min_e:
      min_e = error
      min_w = w
    print sum_obs, sum_exp
    print sum_obs / np.sum( sum_obs ), sum_exp / np.sum( sum_exp )
    print times, error
    if error < 0.05:
      break
    for i in xrange( feature_length ):
      w[i] *= exp( -gamma * decay ** times * gradient[i] )
      #w[i] *= exp( -gamma * gradient[i] )
    w /= np.sum( w )
    print "w", w
    momo.tack( "Step" )
    print "\n".join( momo.stats( "Step" ) )

  print min_e
  print "W", min_w
  return min_w
Esempio n. 5
0
    def __call__(self, costs, start, goal):
        if (costs < 0).any():
            raise runtime_error("The cost matrix cannot have negative values")
        mf = cl.mem_flags

        width = costs.shape[2]
        height = costs.shape[1]

        forward = np.zeros(costs.shape, dtype=np.float64)
        f_masks = np.zeros(costs.shape, dtype=np.int32)
        f_masks[tuple(reversed(start.tolist()))] = 1
        forward[tuple(reversed(start.tolist()))] = 1

        backward = np.zeros(costs.shape, dtype=np.float64)
        b_masks = np.zeros(costs.shape, dtype=np.int32)
        b_masks[tuple(reversed(goal.tolist()))] = 1
        backward[tuple(reversed(goal.tolist()))] = 1

        cost_buffer = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=costs.astype(np.float64))

        f_mask_buffer = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=f_masks)
        f1_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=forward)
        f2_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=forward)

        b_mask_buffer = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_masks)
        b1_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=backward)
        b2_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=backward)

        wait = []

        for i in xrange(sum(costs.shape)):
            momo.tick("first + second")
            momo.tick("first")
            e1 = self.flow.forwardPass(
                self.queue,
                costs.shape,
                None,
                np.int32(width),
                np.int32(height),
                self.idirection_buffer,
                cost_buffer,
                f_mask_buffer,
                b_mask_buffer,
                f1_buffer,
                f2_buffer,
                b1_buffer,
                b2_buffer,
                wait_for=wait,
            )
            wait = [e1]
            momo.tack("first")
            momo.tick("second")
            e2 = self.flow.updatePass(
                self.queue,
                costs.shape,
                None,
                np.int32(width),
                np.int32(height),
                f1_buffer,
                f2_buffer,
                b1_buffer,
                b2_buffer,
                wait_for=wait,
            )
            wait = [e2]
            momo.tack("second")
            momo.tack("first + second")
        momo.tick("wait")
        e2.wait()
        momo.tack("wait")
        momo.tick("copy")
        cl.enqueue_copy(self.queue, forward, f1_buffer)
        cl.enqueue_copy(self.queue, backward, b1_buffer)
        momo.tack("copy")
        return forward, backward