def __call__( self, start, goal, features, theta, speed ): start = self.convert.from_world2( start ) goal = self.convert.from_world2( goal ) momo.tick( "FB Costs" ) costs = self.compute_costs( features, theta ) momo.tack( "FB Costs" ) #momo.tick( "FB Forward" ) forward, backward = self.planner( costs, start, goal ) #momo.tack( "FB Forward" ) #momo.tick( "FB Backward" ) #backward = self.planner( costs, goal, -1 ) #momo.tack( "FB Backward" ) return forward, backward, costs
def compute_expectations( states, frames, w, h, convert, compute_costs, planner, compute_features, accum ): velocities = [np.linalg.norm( v[2:] ) for v in states] avg_velocity = np.sum( velocities, 0 ) / len( velocities ) features = compute_features( avg_velocity, frames[0] ) momo.tick( "Forward-backward" ) forward, backward, costs = planner( states[0], states[-1], features, w, avg_velocity ) momo.tack( "Forward-backward" ) momo.tick( "Accum" ) cummulated, w_features = accum( forward, backward, costs, features, convert.from_world2( states[0] ), h ) momo.tack( "Accum" ) mu_expected = np.sum( w_features, axis = 0 ) mu_expected = np.sum( mu_expected, axis = 0 ) mu_expected = np.sum( mu_expected, axis = 0 ) return mu_expected, cummulated, costs
def compute_observed( feature_module, convert, states, frames, radius ): momo.tick( "Compute observed" ) momo.tick( "Discretize" ) l = len( states ) grid_path = [convert.from_world2( s ) for s in states] repr_path = [convert.to_world2( convert.from_world2( s ), np.linalg.norm( s[2:] ) ) for s in states] momo.tack( "Discretize" ) momo.tick( "Compute" ) result = [] for i in xrange( len( states ) ): result.append( feature_module.compute_feature( states[i], frames[i], radius ) ) if i > 0: result[i] += result[i- 1] momo.tack( "Compute" ) momo.tack( "Compute observed" ) return result, grid_path
def learn( feature_module, convert, frame_data, ids, radius, h ): feature_length = feature_module.FEATURE_LENGTH compute_costs = feature_module.compute_costs( convert ) planner = momo.irl.planning.forward_backward( convert, compute_costs ) compute_features = feature_module.compute_features( convert, radius ) accum = compute_cummulated() observed_integral = {} grid_paths = {} for o_id in ids: states = frame_data[o_id]["states"] frames = frame_data[o_id]["frames"] obs, path = compute_observed( feature_module, convert, states, frames, radius ) observed_integral[o_id] = obs grid_paths[o_id] = path # Initialize weight vector w = ( np.ones( feature_length ) * 1.0 / feature_length ).astype( np.float64 ) gamma = 0.5 decay = 0.99 min_w = None min_e = 1E6 print "Gamma", gamma print "Decay", decay for times in xrange( 100 ): momo.tick( "Step" ) sum_obs = np.zeros( feature_length, np.float64 ) sum_exp = np.zeros( feature_length, np.float64 ) for o_id in ids: states = frame_data[o_id]["states"] frames = frame_data[o_id]["frames"] l = len( states ) for i in xrange( max( l - h, 1 ) ): momo.tick( "Compute Expectations" ) expected, cummulated, costs =\ momo.learning.max_ent.compute_expectations( states[i:], frames[i:], w, h, convert, compute_costs, planner, compute_features, accum ) momo.tack( "Compute Expectations" ) observed = observed_integral[o_id][min( i + h, l - 1 )] * 1 if i > 0: observed -= observed_integral[o_id][i - 1] sum_obs += observed sum_exp += expected if np.any( np.isnan( expected ) ): continue if np.sum( observed ) != 0 and np.sum( expected ) != 0: gradient = observed / np.sum( observed ) - expected / np.sum( expected ) else: gradient = observed * 0. error = np.linalg.norm( gradient ) #momo.plot.gradient_descent_step( cummulated, costs, grid_paths[o_id], error ) if np.sum( sum_obs ) != 0 and np.sum( sum_exp ) != 0: gradient = sum_obs / np.sum( sum_obs ) - sum_exp / np.sum( sum_exp ) error = np.linalg.norm( gradient ) if error < min_e: min_e = error min_w = w print sum_obs, sum_exp print sum_obs / np.sum( sum_obs ), sum_exp / np.sum( sum_exp ) print times, error if error < 0.05: break for i in xrange( feature_length ): w[i] *= exp( -gamma * decay ** times * gradient[i] ) #w[i] *= exp( -gamma * gradient[i] ) w /= np.sum( w ) print "w", w momo.tack( "Step" ) print "\n".join( momo.stats( "Step" ) ) print min_e print "W", min_w return min_w
def __call__(self, costs, start, goal): if (costs < 0).any(): raise runtime_error("The cost matrix cannot have negative values") mf = cl.mem_flags width = costs.shape[2] height = costs.shape[1] forward = np.zeros(costs.shape, dtype=np.float64) f_masks = np.zeros(costs.shape, dtype=np.int32) f_masks[tuple(reversed(start.tolist()))] = 1 forward[tuple(reversed(start.tolist()))] = 1 backward = np.zeros(costs.shape, dtype=np.float64) b_masks = np.zeros(costs.shape, dtype=np.int32) b_masks[tuple(reversed(goal.tolist()))] = 1 backward[tuple(reversed(goal.tolist()))] = 1 cost_buffer = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=costs.astype(np.float64)) f_mask_buffer = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=f_masks) f1_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=forward) f2_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=forward) b_mask_buffer = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_masks) b1_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=backward) b2_buffer = cl.Buffer(self.context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=backward) wait = [] for i in xrange(sum(costs.shape)): momo.tick("first + second") momo.tick("first") e1 = self.flow.forwardPass( self.queue, costs.shape, None, np.int32(width), np.int32(height), self.idirection_buffer, cost_buffer, f_mask_buffer, b_mask_buffer, f1_buffer, f2_buffer, b1_buffer, b2_buffer, wait_for=wait, ) wait = [e1] momo.tack("first") momo.tick("second") e2 = self.flow.updatePass( self.queue, costs.shape, None, np.int32(width), np.int32(height), f1_buffer, f2_buffer, b1_buffer, b2_buffer, wait_for=wait, ) wait = [e2] momo.tack("second") momo.tack("first + second") momo.tick("wait") e2.wait() momo.tack("wait") momo.tick("copy") cl.enqueue_copy(self.queue, forward, f1_buffer) cl.enqueue_copy(self.queue, backward, b1_buffer) momo.tack("copy") return forward, backward