def run_step(self, debug=True): """ Execute one step of local planning which involves running the longitudinal and lateral PID controllers to follow the waypoints trajectory. :param debug: boolean flag to activate waypoints debugging :return: """ # not enough waypoints in the horizon? => add more! if not self._global_plan and len(self._waypoints_queue) < int( self._waypoints_queue.maxlen * 0.5): self._compute_next_waypoints(k=100) if len(self._waypoints_queue) == 0: control = carla.VehicleControl() control.steer = 0.0 control.throttle = 0.0 control.brake = 1.0 control.hand_brake = False control.manual_gear_shift = False return control # Buffering the waypoints if not self._waypoint_buffer: for i in range(self._buffer_size): if self._waypoints_queue: self._waypoint_buffer.append( self._waypoints_queue.popleft()) else: break # current vehicle waypoint self._current_waypoint = self._map.get_waypoint( self._vehicle.get_location()) # target waypoint self.target_waypoint, self._target_road_option = self._waypoint_buffer[ 0] # move using PID controllers control = self._vehicle_controller.run_step(self._target_speed, self.target_waypoint) # purge the queue of obsolete waypoints vehicle_transform = self._vehicle.get_transform() max_index = -1 for i, (waypoint, _) in enumerate(self._waypoint_buffer): if distance_vehicle(waypoint, vehicle_transform) < self._min_distance: max_index = i if max_index >= 0: for i in range(max_index + 1): self._waypoint_buffer.popleft() if debug: draw_waypoints(self._vehicle.get_world(), [self.target_waypoint], self._vehicle.get_location().z + 1.0) return control
def get_target_waypoint(self, debug=False): # not enough waypoints in the horizon? => add more! if not self._global_plan and len(self._waypoints_queue) < int( self._waypoints_queue.maxlen * 0.5): self._compute_next_waypoints(k=100) if len(self._waypoints_queue) == 0: return None # Buffering the waypoints if not self._waypoint_buffer: for i in range(self._buffer_size): if self._waypoints_queue: self._waypoint_buffer.append( self._waypoints_queue.popleft()) else: break # current vehicle waypoint self._current_waypoint = self._map.get_waypoint( self._vehicle.get_location()) # target waypoint print('_waypoints_queue, _waypoint_buffer', len(self._waypoints_queue), len(self._waypoint_buffer)) self.target_waypoint, self._target_road_option = self._waypoint_buffer[ 0] # purge the queue of obsolete waypoints vehicle_transform = self._vehicle.get_transform() max_index = -1 for i, (waypoint, _) in enumerate(self._waypoint_buffer): if distance_vehicle(waypoint, vehicle_transform) < self._min_distance: max_index = i if max_index >= 0: for i in range(max_index + 1): self._waypoint_buffer.popleft() if debug: draw_waypoints(self._vehicle.get_world(), [self.target_waypoint], self._vehicle.get_location().z + 1.0) return self.target_waypoint
def main(): ego_PPO = PPO(state_dim, action_dim, args.Alearning_rate, args.Clearning_rate, args.gamma, args.update_iteration, 0.2, True, action_std_init=1) npc_PPO = PPO(state_dim, action_dim, args.Alearning_rate, args.Clearning_rate, args.gamma, args.update_iteration, 0.2, True, action_std_init=1) client, world, blueprint_library = create_envs.connection() if args.log == True: main_writer = SummaryWriter(directory + '/' + args.mode + str(random.randint(0,1000))) count = 0 try: if args.load or args.mode == 'test': ego_PPO.load(directory + 'ego.pkl') npc_PPO.load(directory + 'npc.pkl') for i in range(args.max_episode): ego_total_reward = 0 npc_total_reward = 0 print('------------%dth time learning begins-----------'%i) ego_list,npc_list,obstacle_list,sensor_list = create_envs.Create_actors(world,blueprint_library) egosen_list = sensor_list[0] npcsen_list = sensor_list[1] if args.synchronous_mode: world.tick() # 客户端主导,tick else: world.wait_for_tick() # 服务器主导,tick ego_transform = ego_list[0].get_transform() npc_transform = npc_list[0].get_transform() if i == 0: # 全局路径 ego_start_location = ego_transform.location ego_end_location = ego_transform.location + carla.Location(x=135) ego_route = create_envs.route_positions_generate(ego_start_location,ego_end_location) misc.draw_waypoints(world,ego_route) npc_start_location = npc_transform.location npc_end_location = npc_transform.location + carla.Location(x=135) npc_route = create_envs.route_positions_generate(npc_start_location,npc_end_location) misc.draw_waypoints(world,npc_route) ego_step, npc_step = 1, 1 ego_state,_,_,npc_state,_,_ = create_envs.get_vehicle_step(ego_list[0], npc_list[0], egosen_list, npcsen_list, ego_route[ego_step], npc_route[npc_step], obstacle_list[0], 0) for t in range(args.max_length_of_trajectory): #---------动作决策---------- # if args.mode == 'test': # ego_action = ego_PPO.select_best_action(ego_state,npc_state) # npc_action = npc_PPO.select_best_action(npc_state,ego_state) # else: ego_action = ego_PPO.select_action(ego_state,npc_state) npc_action = npc_PPO.select_action(npc_state,ego_state) create_envs.set_vehicle_control(ego_list[0], npc_list[0], ego_action, npc_action, args.c_tau, t) #---------和环境交互动作反馈--------- frames = 1 # 步长 if args.synchronous_mode: for _ in range(frames): world.tick() # 客户端主导,tick else: world.wait_for_tick() # 服务器主导,tick # print('step:',ego_step) ego_next_state,ego_reward,ego_done,npc_next_state,npc_reward,npc_done = create_envs.get_vehicle_step(ego_list[0], npc_list[0], egosen_list, npcsen_list, ego_route[ego_step], npc_route[npc_step], obstacle_list[0], t) if ego_next_state[0] < 0.2: ego_step += 1 if npc_next_state[0] < 0.2: npc_step += 1 # print('state: ', ego_next_state) # 数据储存 count += 1 ego_state = ego_next_state npc_state = npc_next_state # print('ego_state: ', ego_state, 'npc_state: ', npc_state) ego_total_reward += ego_reward npc_total_reward += npc_reward ego_PPO.buffer.rewards.append(ego_reward) npc_PPO.buffer.rewards.append(npc_reward) if args.mode == 'test': if ego_done and npc_done: break if args.mode == 'train': if ego_done or npc_done: # 结束条件 ego_PPO.buffer.is_terminals.append(True) npc_PPO.buffer.is_terminals.append(True) break else: ego_PPO.buffer.is_terminals.append(False) npc_PPO.buffer.is_terminals.append(False) ego_total_reward /= t+1 npc_total_reward /= t+1 print("Episode: {} step: {} ego_Total_Reward: {:0.3f} npc_Total_Reward: {:0.3f}".format(i+1, t, ego_total_reward, npc_total_reward)) if args.log == True: main_writer.add_scalar('reward/ego_reward', ego_total_reward, global_step=i) main_writer.add_scalar('reward/npc_reward', npc_total_reward, global_step=i) main_writer.add_scalar('step/step', t+1, global_step=i) main_writer.add_scalar('step/ego_step', ego_step, global_step=i) main_writer.add_scalar('step/npc_step', npc_step, global_step=i) main_writer.add_scalar('offset/ego_offsety', ego_state[1], global_step=i) main_writer.add_scalar('offset/npc_offsety', npc_state[1], global_step=i) if args.mode == 'train': if i > 0 and (count+1) >= args.update_interval: ego_loss = ego_PPO.update(npc_PPO) print('ego_updated') npc_loss = npc_PPO.update(ego_PPO) print('npc_updated') main_writer.add_scalar('loss/ego_loss', npc_loss, global_step=i) main_writer.add_scalar('loss/npc_loss', npc_loss, global_step=i) ego_PPO.clear() npc_PPO.clear() count = 0 if i > 0 and (i+1) % args.log_interval == 0: ego_PPO.save(directory + 'ego.pkl') npc_PPO.save(directory + 'npc.pkl') print('Network Saved') for x in sensor_list[0]: if x.sensor.is_alive: x.sensor.destroy() for x in sensor_list[1]: if x.sensor.is_alive: x.sensor.destroy() for x in ego_list: # if x.is_alive: client.apply_batch([carla.command.DestroyActor(x)]) for x in npc_list: # if x.is_alive: client.apply_batch([carla.command.DestroyActor(x)]) for x in obstacle_list: # if x.is_alive: client.apply_batch([carla.command.DestroyActor(x)]) # del ego_list, npc_list, obstacle_list, sensor_list, egosen_list, npcsen_list print('Reset') finally: # 清洗环境 print('Start Cleaning Envs') settings = world.get_settings() settings.synchronous_mode = False settings.fixed_delta_seconds = None world.apply_settings(settings) for x in sensor_list[0]: if x.sensor.is_alive: x.sensor.destroy() for x in sensor_list[1]: if x.sensor.is_alive: x.sensor.destroy() for x in ego_list: # if x.is_alive: client.apply_batch([carla.command.DestroyActor(x)]) for x in npc_list: # if x.is_alive: client.apply_batch([carla.command.DestroyActor(x)]) for x in obstacle_list: # if x.is_alive: client.apply_batch([carla.command.DestroyActor(x)]) print('all clean, simulation done!')