def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = sample.get(data_type) if 'average' in config: x = np.mean(x.reshape((T, ) + config['average']), axis=1) _, dim_sensor = x.shape num_sensor = config['average'][0] l = x.dot(np.array(wp).T) ls = np.tile(np.array(wp), [T, num_sensor]) / num_sensor lss = np.zeros( (T, dim_sensor * num_sensor, dim_sensor * num_sensor)) else: _, dim_sensor = x.shape wpm = get_ramp_multiplier(self._hyperparams['ramp_option'], T, wp_final_multiplier=self. _hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. #print(x.shape) dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = sample.get(data_type) _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty # dist = x[8] - tgt[2] dist = x - tgt dist[:, :8] = 0. dist[:, 9:12] = 0 # print('dist', dist) # Evaluate penalty term l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = sample.get(data_type) _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha'] ) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval_mu(self, mu, T, Du, Dx): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = mu[:, 0:6] _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T, dU, dX = sample.T, sample.dU, sample.dX # Discretize waypoint time steps. waypoint_step = np.ceil( T * self._hyperparams['waypoint_time']) # ex. [8., 20.] if not isinstance(self._hyperparams['ramp_option'], list): self._hyperparams['ramp_option'] = [ self._hyperparams['ramp_option'] for _ in waypoint_step ] final_l = np.zeros(T) final_lu = np.zeros((T, dU)) final_lx = np.zeros((T, dX)) final_luu = np.zeros((T, dU, dU)) final_lxx = np.zeros((T, dX, dX)) final_lux = np.zeros((T, dU, dX)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] start = 0 for i in range(len(waypoint_step)): wp = config[i]['wp'] tgt = config[i]['target_state'] x = sample.get(data_type) ##print "\noriginal x:\n", x _, dim_sensor = x.shape wpm = get_ramp_multiplier(self._hyperparams['ramp_option'][i], int(waypoint_step[i] - start)) wp = wp * np.expand_dims(wpm, axis=-1) x = x[start:int(waypoint_step[i]), :] ##print "modified x:\n", x ##print "target:\n", tgt # Compute state penalty. dist = x - tgt ##print "dist:\n", dist # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [int(waypoint_step[i] - start), 1, 1]), np.zeros( (int(waypoint_step[i] - start), dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l[start:int(waypoint_step[i])] = l final_lx[start:int(waypoint_step[i]), 0:7] = ls final_lxx[start:int(waypoint_step[i]), 0:7, 0:7] = lss start = int(waypoint_step[i]) ##print "Exit on cost lin wp ksu" ##exit() #return l, lx, lu, lxx, luu, lux return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] # print("tgt for cost_state evaluation", tgt) # Modified by RH map_size = config['map_size'] # TODO # if agent is not bus, tgt = config['target_state'] # if agent is bus, update the target_state for each round of gpsMain.run() if map_size: # it's AgentBus target_state = config['target_state'] tgt = [target_state[0]-map_size[1]/2, map_size[0]/2-target_state[1], target_state[2]] x = sample.get(data_type) _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha'] ) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) # print("new tgt", tgt) # print("cost_state", final_l) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: # print("data_type", data_type) config = self._hyperparams['data_types'][data_type] wp = config['wp'] x = sample.get(data_type) _, dim_sensor = x.shape # print("x in cost_collision", x.shape) # print(x) wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. # TODO # create a state map with all polygons represented as 1 # draw a corresponding box to represent the bus, and then use the dot multiplication of overlapping to indicate collision loss map_size = config['map_size'] map_state = config['map_state'] # print("map_size for collision", map_size) target_state = config['target_state'] # print(target_state) # print("is target on road", map_state[ int(target_state[1]), int(target_state[0])]) target_state = [ target_state[0] - map_size[1] / 2, map_size[0] / 2 - target_state[1], target_state[2] ] # print("target for cost_collision") # print(target_state) dist = np.zeros(x.shape) for i in range(len(x)): # find the rectangle bus, given the center position x1 = int(BUS_LENGTH / 2 * np.cos(x[i][2]) + BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) x2 = int(BUS_LENGTH / 2 * np.cos(x[i][2]) - BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) x3 = int(-BUS_LENGTH / 2 * np.cos(x[i][2]) + BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) x4 = int(-BUS_LENGTH / 2 * np.cos(x[i][2]) - BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) y1 = int(BUS_LENGTH / 2 * np.sin(x[i][2]) + BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) y2 = int(BUS_LENGTH / 2 * np.sin(x[i][2]) - BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) y3 = int(-BUS_LENGTH / 2 * np.sin(x[i][2]) + BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) y4 = int(-BUS_LENGTH / 2 * np.sin(x[i][2]) - BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) xmin = min(x1, x2, x3, x4) xmax = max(x1, x2, x3, x4) ymin = min(y1, y2, y3, y4) ymax = max(y1, y2, y3, y4) # simplify the overlapping as the sum of four endpoints of a bounding box # print(xmin, xmax, ymin, ymax) # which are based on box2D coords xmin = xmin + map_size[1] / 2 xmax = xmax + map_size[1] / 2 ymin = map_size[0] / 2 - ymin ymax = map_size[0] / 2 - ymax # print(xmin, xmax, ymin, ymax) # print(map_state[xmin, ymin], map_state[xmin, ymax], map_state[xmax, ymin], map_state[xmax, ymax] ) dist_temp = map_state[ymin, xmin] + map_state[ ymax, xmin] + map_state[ymin, xmax] + map_state[ymax, xmax] - 4 * ROAD # print("dist", dist_temp) dist[i] = [dist_temp, dist_temp, 0] # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) # print("return collision_cost") # print(final_l, final_lx, final_lu, final_lxx, final_luu, final_lux) # print("dsit_temp", dist_temp) # print("cost_collisoion", final_l[0]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux