def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T, dU, dX = sample.T, sample.dU, sample.dX orig_A, orig_b = self._hyperparams['A'], self._hyperparams['b'] # Discretize waypoint time steps. waypoint_step = np.ceil(T * self._hyperparams['waypoint_time']) A = np.zeros((T, dX + dU, dX + dU)) b = np.zeros((T, dX + dU)) if not isinstance(self._hyperparams['ramp_option'], list): self._hyperparams['ramp_option'] = [ self._hyperparams['ramp_option'] for _ in waypoint_step ] # Set up time-varying matrix and vector. start = 0 for i in range(len(waypoint_step)): wpm = get_ramp_multiplier(self._hyperparams['ramp_option'][i], waypoint_step[i] - start) for t in range(start, int(waypoint_step[i])): A[t, :, :] = wpm[t - start] * orig_A[i, :, :] b[t, :] = wpm[t - start] * orig_b[i, :] start = int(waypoint_step[i]) # Evaluate distance function. XU = np.concatenate([sample.get_X(), sample.get_U()], axis=1) dist = np.zeros((T, dX + dU)) for t in range(T): dist[t, :] = A[t, :, :].dot(XU[t, :]) + b[t, :] ix, iu = slice(dX), slice(dX, dX + dU) # Compute the loss. Al, Aly, Alyy = self._evalloss(dist) Alx = Aly[:, ix] Alu = Aly[:, iu] Alxx = Alyy[:, ix, ix] Aluu = Alyy[:, iu, iu] Alux = Alyy[:, ix, iu] # Compute state derivatives and value. Loss is the same. l = Al # First derivative given by A'*Aly. # Second derivative given by A'*Alyy*A. lx, lu = np.zeros((T, dX)), np.zeros((T, dU)) lxx, luu = np.zeros((T, dX, dX)), np.zeros((T, dU, dU)) lux = np.zeros((T, dU, dX)) for t in range(T): lx[t, :] = A[t, ix, ix].T.dot(Alx[t, :]) lu[t, :] = A[t, iu, iu].T.dot(Alu[t, :]) lxx[t, :, :] = A[t, ix, ix].T.dot(Alxx[t, :, :]).dot(A[t, ix, ix]) luu[t, :, :] = A[t, iu, iu].T.dot(Aluu[t, :, :]).dot(A[t, iu, iu]) lux[t, :, :] = A[t, ix, iu].T.dot(Alux[t, :, :]).dot(A[t, iu, ix]) return l, lx, lu, lxx, luu, lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T, dU, dX = sample.T, sample.dU, sample.dX orig_A, orig_b = self._hyperparams['A'], self._hyperparams['b'] # Discretize waypoint time steps. waypoint_step = np.ceil(T * self._hyperparams['waypoint_time']) A = np.zeros((T, dX+dU, dX+dU)) b = np.zeros((T, dX+dU)) if not isinstance(self._hyperparams['ramp_option'], list): self._hyperparams['ramp_option'] = [ self._hyperparams['ramp_option'] for _ in waypoint_step ] # Set up time-varying matrix and vector. start = 0 for i in range(len(waypoint_step)): wpm = get_ramp_multiplier(self._hyperparams['ramp_option'][i], waypoint_step[i] - start) for t in range(start, int(waypoint_step[i])): A[t, :, :] = wpm[t-start] * orig_A[i, :, :] b[t, :] = wpm[t-start] * orig_b[i, :] start = int(waypoint_step[i]) # Evaluate distance function. XU = np.concatenate([sample.get_X(), sample.get_U()], axis=1) dist = np.zeros((T, dX+dU)) for t in range(T): dist[t, :] = A[t, :, :].dot(XU[t, :]) + b[t, :] ix, iu = slice(dX), slice(dX, dX+dU) # Compute the loss. Al, Aly, Alyy = self._evalloss(dist) Alx = Aly[:, ix] Alu = Aly[:, iu] Alxx = Alyy[:, ix, ix] Aluu = Alyy[:, iu, iu] Alux = Alyy[:, ix, iu] # Compute state derivatives and value. Loss is the same. l = Al # First derivative given by A'*Aly. # Second derivative given by A'*Alyy*A. lx, lu = np.zeros((T, dX)), np.zeros((T, dU)) lxx, luu = np.zeros((T, dX, dX)), np.zeros((T, dU, dU)) lux = np.zeros((T, dU, dX)) for t in range(T): lx[t, :] = A[t, ix, ix].T.dot(Alx[t, :]) lu[t, :] = A[t, iu, iu].T.dot(Alu[t, :]) lxx[t, :, :] = A[t, ix, ix].T.dot(Alxx[t, :, :]).dot(A[t, ix, ix]) luu[t, :, :] = A[t, iu, iu].T.dot(Aluu[t, :, :]).dot(A[t, iu, iu]) lux[t, :, :] = A[t, ix, iu].T.dot(Alux[t, :, :]).dot(A[t, iu, ix]) return l, lx, lu, lxx, luu, lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = sample.get(data_type) if 'average' in config: x = np.mean(x.reshape((T, ) + config['average']), axis=1) _, dim_sensor = x.shape num_sensor = config['average'][0] l = x.dot(np.array(wp).T) ls = np.tile(np.array(wp), [T, num_sensor]) / num_sensor lss = np.zeros( (T, dim_sensor * num_sensor, dim_sensor * num_sensor)) else: _, dim_sensor = x.shape wpm = get_ramp_multiplier(self._hyperparams['ramp_option'], T, wp_final_multiplier=self. _hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. #print(x.shape) dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample, obj_val_only = False): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX cur_fcn = sample.agent.fcns[self.cur_cond_idx]['fcn_obj'] final_l = np.zeros(T) if not obj_val_only: final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) x = sample.get(CUR_LOC) _, dim = x.shape # Time step-specific weights wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'], wp_custom=self._hyperparams['wp_custom'] if 'wp_custom' in self._hyperparams else None ) if not obj_val_only: ls = np.empty((T, dim)) lss = np.empty((T, dim, dim)) cur_fcn.new_sample(batch_size="all") # Get noiseless gradient for t in range(T): final_l[t] = cur_fcn.evaluate(x[t,:][:,None]) if not obj_val_only: ls[t,:] = cur_fcn.grad(x[t,:][:,None])[:,0] lss[t,:,:] = cur_fcn.hess(x[t,:][:,None]) final_l = final_l * wpm if not obj_val_only: ls = ls * wpm[:,None] lss = lss * wpm[:,None,None] # Equivalent to final_lx[:,sensor_start_idx:sensor_end_idx] = ls sample.agent.pack_data_x(final_lx, ls, data_types=[CUR_LOC]) # Equivalent to final_lxx[:,sensor_start_idx:sensor_end_idx,sensor_start_idx:sensor_end_idx] = lss sample.agent.pack_data_x(final_lxx, lss, data_types=[CUR_LOC, CUR_LOC]) if obj_val_only: return (final_l,) else: return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T dX = sample.dX dU = sample.dU # Initialize terms. l = np.zeros(T) lu = np.zeros((T, dU)) lx = np.zeros((T, dX)) luu = np.zeros((T, dU, dU)) lxx = np.zeros((T, dX, dX)) lux = np.zeros((T, dU, dX)) wp = self._hyperparams['wp'] obs = sample.get(self._hyperparams['obstacle_type']) x = sample.get(self._hyperparams['position_type']) pot = sample.get(self._hyperparams['potential_type']) wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. obs_dist = x - obs sigma_square = self._hyperparams['sigma_square'] p_collision = np.exp(- np.sum(obs_dist ** 2, axis=1) / sigma_square) l_collision = p_collision * self._hyperparams['wp_col'] * wpm pot_prev = np.zeros_like(pot[:, 0]) pot_prev[0] = 0 # Penalty for first pot_prev[1:] = pot[:-1,0] p_survivability = 1 - p_collision l_progress = p_survivability * (pot[:, 0] - pot_prev) * self._hyperparams['wp_nf'] * wpm l = l_collision + l_progress # Evaluate penalty term. # l, ls, lss = evalhinglel2loss( # wp, dist, self._hyperparams['d_safe'], self._hyperparams['l2'], # ) # # # Add to current terms. # sample.agent.pack_data_x(lx, ls, data_types=[data_type]) # sample.agent.pack_data_x(lxx, lss, # data_types=[data_type, data_type]) return l, lx, lu, lxx, luu, lux
def eval(self, sample): """ Evaluate forward kinematics (end-effector penalties) cost. Temporary note: This implements the 'joint' penalty type from the matlab code, with the velocity/velocity diff/etc. penalties removed. (use CostState instead) Args: sample: A single sample. """ T = sample.T dX = sample.dX dU = sample.dU wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1) # Initialize terms. l = np.zeros(T) lu = np.zeros((T, dU)) lx = np.zeros((T, dX)) luu = np.zeros((T, dU, dU)) lxx = np.zeros((T, dX, dX)) lux = np.zeros((T, dU, dX)) # Choose target. tgt = self._hyperparams['target_end_effector'] pt = sample.get(END_EFFECTOR_POINTS) dist = pt - tgt # TODO - These should be partially zeros so we're not double # counting. # (see pts_jacobian_only in matlab costinfos code) jx = sample.get(END_EFFECTOR_POINT_JACOBIANS) print "tgt: ", tgt.shape, \ "pt: ", pt.shape, \ "jx: ", jx.shape # Evaluate penalty term. Use estimated Jacobians and no higher # order terms. jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2])) # evall1l2term or evallogl2term l, ls, lss = self._hyperparams['evalnorm']( wp, dist, jx, jxx_zeros, self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha'] ) # Add to current terms. sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES]) sample.agent.pack_data_x(lxx, lss, data_types=[JOINT_ANGLES, JOINT_ANGLES]) return l, lx, lu, lxx, luu, lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = sample.get(data_type) _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty # dist = x[8] - tgt[2] dist = x - tgt dist[:, :8] = 0. dist[:, 9:12] = 0 # print('dist', dist) # Evaluate penalty term l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate forward kinematics (end-effector penalties) cost. Temporary note: This implements the 'joint' penalty type from the matlab code, with the velocity/velocity diff/etc. penalties removed. (use CostState instead) Args: sample: A single sample. """ T = sample.T dX = sample.dX dU = sample.dU wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1) # Initialize terms. l = np.zeros(T) lu = np.zeros((T, dU)) lx = np.zeros((T, dX)) luu = np.zeros((T, dU, dU)) lxx = np.zeros((T, dX, dX)) lux = np.zeros((T, dU, dX)) # Choose target. tgt = self._hyperparams['target_end_effector'] pt = sample.get(END_EFFECTOR_POINTS) dist = pt - tgt # TODO - These should be partially zeros so we're not double # counting. # (see pts_jacobian_only in matlab costinfos code) jx = sample.get(END_EFFECTOR_POINT_JACOBIANS) # Evaluate penalty term. Use estimated Jacobians and no higher # order terms. jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2])) l, ls, lss = self._hyperparams['evalnorm']( wp, dist, jx, jxx_zeros, self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha'] ) # Add to current terms. sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES]) sample.agent.pack_data_x(lxx, lss, data_types=[JOINT_ANGLES, JOINT_ANGLES]) return l, lx, lu, lxx, luu, lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = sample.get(data_type) _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha'] ) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T dX = sample.dX dU = sample.dU # Initialize terms. l = np.zeros(T) lu = np.zeros((T, dU)) lx = np.zeros((T, dX)) luu = np.zeros((T, dU, dU)) lxx = np.zeros((T, dX, dX)) lux = np.zeros((T, dU, dX)) data_type = self._hyperparams['position_type'] wp = self._hyperparams['wp'] obs = sample.get(self._hyperparams['obstacle_type']) x = sample.get(data_type) wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. dist = x - obs # Evaluate penalty term. l, ls, lss = evalhinglel2loss( wp, dist, self._hyperparams['d_safe'], self._hyperparams['l2'], ) # Add to current terms. sample.agent.pack_data_x(lx, ls, data_types=[data_type]) sample.agent.pack_data_x(lxx, lss, data_types=[data_type, data_type]) return l, lx, lu, lxx, luu, lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) tt_total = 0 for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] max_distance = config['max_distance'] outside_cost = config['outside_cost'] inside_cost = config['inside_cost'] x = sample.get(data_type) _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = wp * np.expand_dims(wpm, axis=-1) # Compute binary region penalty. dist = np.abs(x - tgt) for t in xrange(T): # If at least one of the coordinates is outside of # the region assign outside_cost, otherwise inside_cost. if np.sum(dist[t]) > max_distance: final_l[t] += outside_cost else: final_l[t] += inside_cost return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): T = sample.T dX = sample.dX dU = sample.dU wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1) l = np.zeros(T) lu = np.zeros((T, dU)) lx = np.zeros((T, dX)) luu = np.zeros((T, dU, dU)) lxx = np.zeros((T, dX, dX)) lux = np.zeros((T, dU, dX)) pt = sample.get(END_EFFECTOR_POINTS) # The following is hard-coded: determines position of gripper and block # and computes the distance between them pt_ee_l = pt[:, 0:3] pt_ee_r = pt[:, 3:6] pt_ee_avg = 0.5 * (pt_ee_r + pt_ee_l) pt_block = pt[:, 6:9] dist = pt_ee_avg - pt_block wp = np.ones((T,3)) jx = sample.get(END_EFFECTOR_POINT_JACOBIANS) # Also hard-coded is this Jacobian calculation jx_1 = 0.5 * (jx[:, 0:3, :] + jx[:, 3:6, :]) - jx[:, 6:9, :] jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2])) l, ls, lss = self._hyperparams['evalnorm']( wp, dist, jx_1, jxx_zeros, self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha'] ) sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES]) sample.agent.pack_data_x(lxx, lss, data_types=[JOINT_ANGLES, JOINT_ANGLES]) return l, lx, lu, lxx, luu, lux
def eval_mu(self, mu, T, Du, Dx): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] x = mu[:, 0:6] _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T, dU, dX = sample.T, sample.dU, sample.dX # Discretize waypoint time steps. waypoint_step = np.ceil( T * self._hyperparams['waypoint_time']) # ex. [8., 20.] if not isinstance(self._hyperparams['ramp_option'], list): self._hyperparams['ramp_option'] = [ self._hyperparams['ramp_option'] for _ in waypoint_step ] final_l = np.zeros(T) final_lu = np.zeros((T, dU)) final_lx = np.zeros((T, dX)) final_luu = np.zeros((T, dU, dU)) final_lxx = np.zeros((T, dX, dX)) final_lux = np.zeros((T, dU, dX)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] start = 0 for i in range(len(waypoint_step)): wp = config[i]['wp'] tgt = config[i]['target_state'] x = sample.get(data_type) ##print "\noriginal x:\n", x _, dim_sensor = x.shape wpm = get_ramp_multiplier(self._hyperparams['ramp_option'][i], int(waypoint_step[i] - start)) wp = wp * np.expand_dims(wpm, axis=-1) x = x[start:int(waypoint_step[i]), :] ##print "modified x:\n", x ##print "target:\n", tgt # Compute state penalty. dist = x - tgt ##print "dist:\n", dist # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [int(waypoint_step[i] - start), 1, 1]), np.zeros( (int(waypoint_step[i] - start), dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l[start:int(waypoint_step[i])] = l final_lx[start:int(waypoint_step[i]), 0:7] = ls final_lxx[start:int(waypoint_step[i]), 0:7, 0:7] = lss start = int(waypoint_step[i]) ##print "Exit on cost lin wp ksu" ##exit() #return l, lx, lu, lxx, luu, lux return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: config = self._hyperparams['data_types'][data_type] wp = config['wp'] tgt = config['target_state'] # print("tgt for cost_state evaluation", tgt) # Modified by RH map_size = config['map_size'] # TODO # if agent is not bus, tgt = config['target_state'] # if agent is bus, update the target_state for each round of gpsMain.run() if map_size: # it's AgentBus target_state = config['target_state'] tgt = [target_state[0]-map_size[1]/2, map_size[0]/2-target_state[1], target_state[2]] x = sample.get(data_type) _, dim_sensor = x.shape wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier'] ) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. dist = x - tgt # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha'] ) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) # print("new tgt", tgt) # print("cost_state", final_l) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate cost function and derivatives on a sample. Args: sample: A single sample """ T = sample.T Du = sample.dU Dx = sample.dX final_l = np.zeros(T) final_lu = np.zeros((T, Du)) final_lx = np.zeros((T, Dx)) final_luu = np.zeros((T, Du, Du)) final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) for data_type in self._hyperparams['data_types']: # print("data_type", data_type) config = self._hyperparams['data_types'][data_type] wp = config['wp'] x = sample.get(data_type) _, dim_sensor = x.shape # print("x in cost_collision", x.shape) # print(x) wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = wp * np.expand_dims(wpm, axis=-1) # Compute state penalty. # TODO # create a state map with all polygons represented as 1 # draw a corresponding box to represent the bus, and then use the dot multiplication of overlapping to indicate collision loss map_size = config['map_size'] map_state = config['map_state'] # print("map_size for collision", map_size) target_state = config['target_state'] # print(target_state) # print("is target on road", map_state[ int(target_state[1]), int(target_state[0])]) target_state = [ target_state[0] - map_size[1] / 2, map_size[0] / 2 - target_state[1], target_state[2] ] # print("target for cost_collision") # print(target_state) dist = np.zeros(x.shape) for i in range(len(x)): # find the rectangle bus, given the center position x1 = int(BUS_LENGTH / 2 * np.cos(x[i][2]) + BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) x2 = int(BUS_LENGTH / 2 * np.cos(x[i][2]) - BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) x3 = int(-BUS_LENGTH / 2 * np.cos(x[i][2]) + BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) x4 = int(-BUS_LENGTH / 2 * np.cos(x[i][2]) - BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0]) y1 = int(BUS_LENGTH / 2 * np.sin(x[i][2]) + BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) y2 = int(BUS_LENGTH / 2 * np.sin(x[i][2]) - BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) y3 = int(-BUS_LENGTH / 2 * np.sin(x[i][2]) + BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) y4 = int(-BUS_LENGTH / 2 * np.sin(x[i][2]) - BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1]) xmin = min(x1, x2, x3, x4) xmax = max(x1, x2, x3, x4) ymin = min(y1, y2, y3, y4) ymax = max(y1, y2, y3, y4) # simplify the overlapping as the sum of four endpoints of a bounding box # print(xmin, xmax, ymin, ymax) # which are based on box2D coords xmin = xmin + map_size[1] / 2 xmax = xmax + map_size[1] / 2 ymin = map_size[0] / 2 - ymin ymax = map_size[0] / 2 - ymax # print(xmin, xmax, ymin, ymax) # print(map_state[xmin, ymin], map_state[xmin, ymax], map_state[xmax, ymin], map_state[xmax, ymax] ) dist_temp = map_state[ymin, xmin] + map_state[ ymax, xmin] + map_state[ymin, xmax] + map_state[ymax, xmax] - 4 * ROAD # print("dist", dist_temp) dist[i] = [dist_temp, dist_temp, 0] # Evaluate penalty term. l, ls, lss = evall1l2term( wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) final_l += l sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) sample.agent.pack_data_x(final_lxx, lss, data_types=[data_type, data_type]) # print("return collision_cost") # print(final_l, final_lx, final_lu, final_lxx, final_luu, final_lux) # print("dsit_temp", dist_temp) # print("cost_collisoion", final_l[0]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
def eval(self, sample): """ Evaluate forward kinematics (end-effector penalties) cost. Temporary note: This implements the 'joint' penalty type from the matlab code, with the velocity/velocity diff/etc. penalties removed. (use CostState instead) Args: sample: A single sample. """ T = sample.T dX = sample.dX dU = sample.dU wpm = get_ramp_multiplier( self._hyperparams['ramp_option'], T, wp_final_multiplier=self._hyperparams['wp_final_multiplier']) wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1) # Initialize terms. l = np.zeros(T) lu = np.zeros((T, dU)) lx = np.zeros((T, dX)) luu = np.zeros((T, dU, dU)) lxx = np.zeros((T, dX, dX)) lux = np.zeros((T, dU, dX)) # Choose target. tgt = self._hyperparams['target_end_effector'] pt = sample.get(END_EFFECTOR_POINTS) dist_shape = pt.shape if tgt.ndim > 1: #multi-goal, expand pt in goal dimension and tgt in time dimension for broadcast pt = np.expand_dims(pt, axis=0) tgt = np.expand_dims(tgt, axis=1) dist = pt - tgt #goals*time*dX goal_idx = np.argmin(np.sum(np.power(dist, 2), axis=-1), axis=0) dist = dist[goal_idx, range(T)] else: dist = pt - tgt assert dist.shape == dist_shape # TODO - These should be partially zeros so we're not double # counting. # (see pts_jacobian_only in matlab costinfos code) jx = sample.get(END_EFFECTOR_POINT_JACOBIANS) # Evaluate penalty term. Use estimated Jacobians and no higher # order terms. jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2])) l, ls, lss = self._hyperparams['evalnorm'](wp, dist, jx, jxx_zeros, self._hyperparams['l1'], self._hyperparams['l2'], self._hyperparams['alpha']) # Add to current terms. sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES]) sample.agent.pack_data_x(lxx, lss, data_types=[JOINT_ANGLES, JOINT_ANGLES]) return l, lx, lu, lxx, luu, lux