def _set_map(self): if not self.map_inited: print( 'Gym env - Sim map not initialized, call env.init_map() to init map.' ) map_request_proto = sim_requests_pb2.SimRequest() map_request_proto.type = 1 map_request_proto.map_request.map.extend( (1. - self.map_img / 255.).flatten().tolist()) map_request_proto.map_request.origin_x = self.origin[0] map_request_proto.map_request.origin_y = self.origin[1] map_request_proto.map_request.map_resolution = self.map_resolution map_request_proto.map_request.free_threshold = self.free_thresh map_request_proto.map_request.map_height = self.map_height map_request_proto.map_request.map_width = self.map_width map_request_string = map_request_proto.SerializeToString() self.socket.send(map_request_string) sim_response_string = self.socket.recv() sim_response_proto = sim_requests_pb2.SimResponse() sim_response_proto.ParseFromString(sim_response_string) set_map_result = sim_response_proto.map_result.result if set_map_result == 1: print('Gym env - Set map failed, exiting...') sys.exit()
def _set_map(self): """ Sets the map for the simulator instance """ if not self.map_inited: print('Gym env - Sim map not initialized, call env.init_map() to init map.') # create and fill in protobuf map_request_proto = sim_requests_pb2.SimRequest() map_request_proto.type = 1 map_request_proto.map_request.map.extend((1. - self.map_img/255.).flatten().tolist()) map_request_proto.map_request.origin_x = self.origin[0] map_request_proto.map_request.origin_y = self.origin[1] map_request_proto.map_request.map_resolution = self.map_resolution # TODO: double check if this value is valid map_request_proto.map_request.free_threshold = self.free_thresh map_request_proto.map_request.map_height = self.map_height map_request_proto.map_request.map_width = self.map_width # serialization map_request_string = map_request_proto.SerializeToString() # send set map request # print('Gym env - Sending set map request...') self.socket.send(map_request_string) # print('Gym env - Map request sent.') # receive response from sim instance sim_response_string = self.socket.recv() # parse map response proto sim_response_proto = sim_requests_pb2.SimResponse() sim_response_proto.ParseFromString(sim_response_string) # get results set_map_result = sim_response_proto.map_result.result if set_map_result == 1: print('Gym env - Set map failed, exiting...') sys.exit()
def update_params(self, mu, h_cg, l_r, cs_f, cs_r, I_z, mass, exe_path, double_finish=False): self.params = [mu, h_cg, l_r, cs_f, cs_r, I_z, mass] self.params_set = True if self.sim_p is None: self._start_executable(exe_path) self._set_map() self.double_finish = double_finish update_param_proto = sim_requests_pb2.SimRequest() update_param_proto.type = 3 update_param_proto.update_request.mu = mu update_param_proto.update_request.h_cg = h_cg update_param_proto.update_request.l_r = l_r update_param_proto.update_request.cs_f = cs_f update_param_proto.update_request.cs_r = cs_r update_param_proto.update_request.I_z = I_z update_param_proto.update_request.mass = mass update_param_string = update_param_proto.SerializeToString() self.socket.send(update_param_string) update_response_string = self.socket.recv() update_response_proto = sim_requests_pb2.SimResponse() update_response_proto.ParseFromString(update_response_string) if update_response_proto.update_resp.result: print('Gym env - Update param failed') return None
def update_params(self, mu, h_cg, l_r, cs_f, cs_r, I_z, mass, exe_path): self.params = [mu, h_cg, l_r, cs_f, cs_r, I_z, mass] self.params_set = True if self.sim_p is None: # print('starting ex and setting map') self._start_executable(exe_path) self._set_map() # print('before creating proto') # create update proto update_param_proto = sim_requests_pb2.SimRequest() update_param_proto.type = 3 update_param_proto.update_request.mu = mu update_param_proto.update_request.h_cg = h_cg update_param_proto.update_request.l_r = l_r update_param_proto.update_request.cs_f = cs_f update_param_proto.update_request.cs_r = cs_r update_param_proto.update_request.I_z = I_z update_param_proto.update_request.mass = mass # serialize reset proto update_param_string = update_param_proto.SerializeToString() # print('proto serialized') # send update param request self.socket.send(update_param_string) # print('Gym env - Update param request sent.') # receive response update_response_string = self.socket.recv() update_response_proto = sim_requests_pb2.SimResponse() update_response_proto.ParseFromString(update_response_string) if update_response_proto.update_resp.result: print('Gym env - Update param failed') return None
def reset(self, poses=None): self.current_time = 0.0 self.in_collision = False self.collision_angles = None self.num_toggles = 0 self.near_start = True self.near_starts = np.array([True] * self.num_agents) self.toggle_list = np.zeros((self.num_agents, )) if poses: pose_x = poses['x'] pose_y = poses['y'] pose_theta = poses['theta'] self.start_x = pose_x[0] self.start_y = pose_y[0] self.start_theta = pose_theta[0] self.start_xs = np.array(pose_x) self.start_ys = np.array(pose_y) self.start_thetas = np.array(pose_theta) self.start_rot = np.array( [[np.cos(-self.start_theta), -np.sin(-self.start_theta)], [np.sin(-self.start_theta), np.cos(-self.start_theta)]]) reset_request_proto = sim_requests_pb2.SimRequest() reset_request_proto.type = 4 reset_request_proto.reset_bypose_request.num_cars = self.num_agents reset_request_proto.reset_bypose_request.ego_idx = 0 reset_request_proto.reset_bypose_request.car_x.extend(pose_x) reset_request_proto.reset_bypose_request.car_y.extend(pose_y) reset_request_proto.reset_bypose_request.car_theta.extend( pose_theta) reset_request_string = reset_request_proto.SerializeToString() self.socket.send(reset_request_string) else: self.start_x = 0.0 self.start_y = 0.0 self.start_theta = 0.0 self.start_rot = np.array( [[np.cos(-self.start_theta), -np.sin(-self.start_theta)], [np.sin(-self.start_theta), np.cos(-self.start_theta)]]) reset_request_proto = sim_requests_pb2.SimRequest() reset_request_proto.type = 2 reset_request_proto.reset_request.num_cars = self.num_agents reset_request_proto.reset_request.ego_idx = 0 reset_request_string = reset_request_proto.SerializeToString() self.socket.send(reset_request_string) reset_response_string = self.socket.recv() reset_response_proto = sim_requests_pb2.SimResponse() reset_response_proto.ParseFromString(reset_response_string) if reset_response_proto.reset_resp.result: print('Gym env - Reset failed') return None vels = [0.0] * self.num_agents angs = [0.0] * self.num_agents action = {'ego_idx': self.ego_idx, 'speed': vels, 'steer': angs} obs, reward, done, info = self.step(action) return obs, reward, done, info
def update_params(self, mu, h_cg, l_r, cs_f, cs_r, I_z, mass, exe_path, double_finish=False): # if not self.sim_p is None: # print('Gym env - Sim server exists, killing...') # self.socket.send(b'dead') # self.sim_p.kill() # os.kill(self.sim_p.pid, signal.SIGINT) # self.sim_p = None # print('in update params') self.params = [mu, h_cg, l_r, cs_f, cs_r, I_z, mass] self.params_set = True if self.sim_p is None: # print('starting ex and setting map') self._start_executable(exe_path) self._set_map() self.double_finish = double_finish # print('before creating proto') # create update proto update_param_proto = sim_requests_pb2.SimRequest() update_param_proto.type = 3 update_param_proto.update_request.mu = mu update_param_proto.update_request.h_cg = h_cg update_param_proto.update_request.l_r = l_r update_param_proto.update_request.cs_f = cs_f update_param_proto.update_request.cs_r = cs_r update_param_proto.update_request.I_z = I_z update_param_proto.update_request.mass = mass # serialize reset proto update_param_string = update_param_proto.SerializeToString() # print('proto serialized') # send update param request self.socket.send(update_param_string) # print('Gym env - Update param request sent.') # receive response update_response_string = self.socket.recv() update_response_proto = sim_requests_pb2.SimResponse() update_response_proto.ParseFromString(update_response_string) if update_response_proto.update_resp.result: print('Gym env - Update param failed') return None
def step(self, action): # can't step if params not set if not self.params_set: print( 'ERROR - Gym Env - Params not set, call update params before stepping.' ) sys.exit() # action is a list of steering angles + command velocities # also a ego car index # action should a DICT with {'ego_idx': int, 'speed':[], 'steer':[]} step_request_proto = sim_requests_pb2.SimRequest() step_request_proto.type = 0 step_request_proto.step_request.ego_idx = action['ego_idx'] step_request_proto.step_request.requested_vel.extend(action['speed']) step_request_proto.step_request.requested_ang.extend(action['steer']) # serialization step_request_string = step_request_proto.SerializeToString() # send step request self.socket.send(step_request_string) # receive response from sim instance sim_response_string = self.socket.recv() # print('Gym env - Received response for step request.') # parse map response proto sim_response_proto = sim_requests_pb2.SimResponse() sim_response_proto.ParseFromString(sim_response_string) # get results # make sure we have the right type of response response_type = sim_response_proto.type # TODO: also check for stepping fail if not response_type == 0: print('Gym env - Wrong response type for stepping, exiting...') sys.exit() observations_proto = sim_response_proto.sim_obs # make sure the ego idx matches if not observations_proto.ego_idx == action['ego_idx']: print('Gym env - Ego index mismatch, exiting...') sys.exit() # get observations carobs_list = observations_proto.observations # construct observation dict # Observation DICT, assume indices consistent: {'ego_idx':int, 'scans':[[]], 'poses_x':[], 'poses_y':[], 'poses_theta':[], 'linear_vels_x':[], 'linear_vels_y':[], 'ang_vels_z':[], 'collisions':[], 'collision_angles':[]} obs = { 'ego_idx': observations_proto.ego_idx, 'scans': [], 'poses_x': [], 'poses_y': [], 'poses_theta': [], 'linear_vels_x': [], 'linear_vels_y': [], 'ang_vels_z': [], 'collisions': [], 'collision_angles': [], 'lap_times': [], 'lap_counts': [] } for car_obs in carobs_list: obs['scans'].append(car_obs.scan) obs['poses_x'].append(car_obs.pose_x) obs['poses_y'].append(car_obs.pose_y) if abs(car_obs.theta) < np.pi: obs['poses_theta'].append(car_obs.theta) else: obs['poses_theta'].append(-((2 * np.pi) - car_obs.theta)) obs['linear_vels_x'].append(car_obs.linear_vel_x) obs['linear_vels_y'].append(car_obs.linear_vel_y) obs['ang_vels_z'].append(car_obs.ang_vel_z) obs['collisions'].append(car_obs.collision) obs['collision_angles'].append(car_obs.collision_angle) obs['lap_times'] = self.lap_times obs['lap_counts'] = self.lap_counts # TODO: do we need step reward? reward = self.timestep # update accumulated time in env self.current_time = self.current_time + self.timestep # TODO: donezo should be done in simulator? could be done here as well self._update_state(obs) if self.double_finish: done, temp = self._check_done() info = {'checkpoint_done': temp} else: done = self._check_done() info = {} # TODO: return obs, reward, done, info return obs, reward, done, info
def step(self, action): if not self.params_set: print( 'ERROR - Gym Env - Params not set, call update params before stepping.' ) sys.exit() step_request_proto = sim_requests_pb2.SimRequest() step_request_proto.type = 0 step_request_proto.step_request.ego_idx = action['ego_idx'] step_request_proto.step_request.requested_vel.extend(action['speed']) step_request_proto.step_request.requested_ang.extend(action['steer']) step_request_string = step_request_proto.SerializeToString() self.socket.send(step_request_string) sim_response_string = self.socket.recv() sim_response_proto = sim_requests_pb2.SimResponse() sim_response_proto.ParseFromString(sim_response_string) response_type = sim_response_proto.type if not response_type == 0: print('Gym env - Wrong response type for stepping, exiting...') sys.exit() observations_proto = sim_response_proto.sim_obs if not observations_proto.ego_idx == action['ego_idx']: print('Gym env - Ego index mismatch, exiting...') sys.exit() carobs_list = observations_proto.observations obs = { 'ego_idx': observations_proto.ego_idx, 'scans': [], 'poses_x': [], 'poses_y': [], 'poses_theta': [], 'linear_vels_x': [], 'linear_vels_y': [], 'ang_vels_z': [], 'collisions': [], 'collision_angles': [], 'min_dists': [] } for car_obs in carobs_list: obs['scans'].append(car_obs.scan) obs['poses_x'].append(car_obs.pose_x) obs['poses_y'].append(car_obs.pose_y) if abs(car_obs.theta) < np.pi: obs['poses_theta'].append(car_obs.theta) else: obs['poses_theta'].append(-((2 * np.pi) - car_obs.theta)) obs['linear_vels_x'].append(car_obs.linear_vel_x) obs['linear_vels_y'].append(car_obs.linear_vel_y) obs['ang_vels_z'].append(car_obs.ang_vel_z) obs['collisions'].append(car_obs.collision) obs['collision_angles'].append(car_obs.collision_angle) reward = self.timestep self.current_time = self.current_time + self.timestep self._update_state(obs) if self.double_finish: done, temp = self._check_done() info = {'checkpoint_done': temp} else: done = self._check_done() info = {} return obs, reward, done, info
def reset(self, poses=None): self.current_time = 0.0 self.in_collision = False self.collision_angles = None self.num_toggles = 0 self.near_start = True self.lap_check = [False, False] self.actual_s = [None, None] self.actual_d = [None, None] self.near_starts = np.array([True] * self.num_agents) self.toggle_list = np.zeros((self.num_agents, )) if poses: pose_x = poses['x'] pose_y = poses['y'] pose_theta = poses['theta'] self.start_x = pose_x[0] self.start_y = pose_y[0] self.start_theta = pose_theta[0] self.start_xs = np.array(pose_x) self.start_ys = np.array(pose_y) self.start_thetas = np.array(pose_theta) self.start_rot = np.array( [[np.cos(-self.start_theta), -np.sin(-self.start_theta)], [np.sin(-self.start_theta), np.cos(-self.start_theta)]]) # create reset by pose proto reset_request_proto = sim_requests_pb2.SimRequest() reset_request_proto.type = 4 reset_request_proto.reset_bypose_request.num_cars = self.num_agents reset_request_proto.reset_bypose_request.ego_idx = 0 reset_request_proto.reset_bypose_request.car_x.extend(pose_x) reset_request_proto.reset_bypose_request.car_y.extend(pose_y) reset_request_proto.reset_bypose_request.car_theta.extend( pose_theta) reset_request_string = reset_request_proto.SerializeToString() self.socket.send(reset_request_string) else: # create reset proto self.start_x = 0.0 self.start_y = 0.0 self.start_theta = 0.0 self.start_rot = np.array( [[np.cos(-self.start_theta), -np.sin(-self.start_theta)], [np.sin(-self.start_theta), np.cos(-self.start_theta)]]) reset_request_proto = sim_requests_pb2.SimRequest() reset_request_proto.type = 2 reset_request_proto.reset_request.num_cars = self.num_agents reset_request_proto.reset_request.ego_idx = 0 # serialize reset proto reset_request_string = reset_request_proto.SerializeToString() # send reset proto string self.socket.send(reset_request_string) # receive response from sim reset_response_string = self.socket.recv() reset_response_proto = sim_requests_pb2.SimResponse() reset_response_proto.ParseFromString(reset_response_string) if reset_response_proto.reset_resp.result: print('Gym env - Reset failed') # TODO: failure handling return None # TODO: return with gym convention, one step? vels = [0.0] * self.num_agents angs = [0.0] * self.num_agents action = {'ego_idx': self.ego_idx, 'speed': vels, 'steer': angs} # print('Gym env - Reset done') obs, reward, done, info = self.step(action) # print('Gym env - step done for reset') return obs, reward, done, info
def step(self, action): # can't step if params not set if not self.params_set: print( 'ERROR - Gym Env - Params not set, call update params before stepping.' ) sys.exit() # action is a list of steering angles + command velocities # also a ego car index # action should a DICT with {'ego_idx': int, 'speed':[], 'steer':[]} step_request_proto = sim_requests_pb2.SimRequest() step_request_proto.type = 0 step_request_proto.step_request.ego_idx = 0 step_request_proto.step_request.requested_vel.extend(action[0]) step_request_proto.step_request.requested_ang.extend(action[1]) # serialization step_request_string = step_request_proto.SerializeToString() # send step request self.socket.send(step_request_string) # receive response from sim instance sim_response_string = self.socket.recv() # print('Gym env - Received response for step request.') # parse map response proto sim_response_proto = sim_requests_pb2.SimResponse() sim_response_proto.ParseFromString(sim_response_string) # get results # make sure we have the right type of response response_type = sim_response_proto.type # TODO: also check for stepping fail if not response_type == 0: print('Gym env - Wrong response type for stepping, exiting...') sys.exit() observations_proto = sim_response_proto.sim_obs # make sure the ego idx matches if not observations_proto.ego_idx == 0: print('Gym env - Ego index mismatch, exiting...') sys.exit() # get observations carobs_list = observations_proto.observations # construct observation dict # Observation DICT, assume indices consistent: {'ego_idx':int, 'scans':[[]], 'poses_x':[], 'poses_y':[], 'poses_theta':[], 'linear_vels_x':[], 'linear_vels_y':[], 'ang_vels_z':[], 'collisions':[], 'collision_angles':[]} simulation_input = { 'ego_idx': observations_proto.ego_idx, 'scans': [], 'poses_x': [], 'poses_y': [], 'poses_theta': [], 'linear_vels_x': [], 'linear_vels_y': [], 'ang_vels_z': [], 'collisions': [], 'collision_angles': [], 'lap_times': [], 'lap_counts': [] } for car_obs in carobs_list: simulation_input['scans'].append(car_obs.scan) simulation_input['poses_x'].append(car_obs.pose_x) simulation_input['poses_y'].append(car_obs.pose_y) if abs(car_obs.theta) < np.pi: simulation_input['poses_theta'].append(car_obs.theta) else: simulation_input['poses_theta'].append(-( (2 * np.pi) - car_obs.theta)) simulation_input['linear_vels_x'].append(car_obs.linear_vel_x) simulation_input['linear_vels_y'].append(car_obs.linear_vel_y) simulation_input['ang_vels_z'].append(car_obs.ang_vel_z) simulation_input['collisions'].append(car_obs.collision) simulation_input['collision_angles'].append( car_obs.collision_angle) simulation_input['lap_times'] = self.lap_times simulation_input['lap_counts'] = self.lap_counts # update accumulated time in env self.current_time = self.current_time + self.timestep self._update_state(simulation_input) obs1 = np.concatenate([ np.asarray(simulation_input['scans'][0]), np.asarray(simulation_input['scans'][1]), np.asarray(simulation_input['poses_x']), np.asarray(simulation_input['poses_y']), np.asarray(simulation_input['poses_theta']), np.asarray(simulation_input['linear_vels_x']), np.asarray(simulation_input['linear_vels_y']), np.asarray(simulation_input['ang_vels_z']) ]) obs2 = np.concatenate([ np.asarray(simulation_input['scans'][1]), np.asarray(simulation_input['scans'][0]), np.asarray( np.asarray([ simulation_input['poses_x'][1], simulation_input['poses_x'][0] ])), np.asarray( np.asarray([ simulation_input['poses_y'][1], simulation_input['poses_y'][0] ])), np.asarray( np.asarray([ simulation_input['poses_theta'][1], simulation_input['poses_theta'][0] ])), np.asarray( np.asarray([ simulation_input['linear_vels_x'][1], simulation_input['linear_vels_x'][0] ])), np.asarray( np.asarray([ simulation_input['linear_vels_y'][1], simulation_input['linear_vels_y'][0] ])), np.asarray( np.asarray([ simulation_input['ang_vels_z'][1], simulation_input['ang_vels_z'][0] ])) ]) reward1 = sqrt( simulation_input['linear_vels_x'][0] * simulation_input['linear_vels_x'][0] + simulation_input['linear_vels_y'][0] * simulation_input['linear_vels_y'][0]) \ - simulation_input['collisions'][1] * 10 - fabs(action[1][0]) * 0.1 reward2 = sqrt( simulation_input['linear_vels_x'][1] * simulation_input['linear_vels_x'][1] + simulation_input['linear_vels_y'][1] * simulation_input['linear_vels_y'][1]) \ - simulation_input['collisions'][1] * 10 - fabs(action[1][1]) * 0.1 done = False if simulation_input['collisions'][0]: reward1 = -10 done = True if simulation_input['collisions'][1]: reward2 = -10 done = True obs = np.array([obs1, obs2]) reward = np.array([reward1, reward2]) if done: print(reward, simulation_input['lap_times'], simulation_input['lap_counts']) done = np.array([done, done]) info = np.array([{}, {}]) return obs, reward, done, info