Ejemplo n.º 1
0
    def run_step(self, debug=True):
        """
        Execute one step of local planning which involves running the longitudinal and lateral PID controllers to
        follow the waypoints trajectory.

        :param debug: boolean flag to activate waypoints debugging
        :return:
        """

        # not enough waypoints in the horizon? => add more!
        if not self._global_plan and len(self._waypoints_queue) < int(
                self._waypoints_queue.maxlen * 0.5):
            self._compute_next_waypoints(k=100)

        if len(self._waypoints_queue) == 0:
            control = carla.VehicleControl()
            control.steer = 0.0
            control.throttle = 0.0
            control.brake = 1.0
            control.hand_brake = False
            control.manual_gear_shift = False

            return control

        #   Buffering the waypoints
        if not self._waypoint_buffer:
            for i in range(self._buffer_size):
                if self._waypoints_queue:
                    self._waypoint_buffer.append(
                        self._waypoints_queue.popleft())
                else:
                    break

        # current vehicle waypoint
        self._current_waypoint = self._map.get_waypoint(
            self._vehicle.get_location())
        # target waypoint
        self.target_waypoint, self._target_road_option = self._waypoint_buffer[
            0]
        # move using PID controllers
        control = self._vehicle_controller.run_step(self._target_speed,
                                                    self.target_waypoint)

        # purge the queue of obsolete waypoints
        vehicle_transform = self._vehicle.get_transform()
        max_index = -1

        for i, (waypoint, _) in enumerate(self._waypoint_buffer):
            if distance_vehicle(waypoint,
                                vehicle_transform) < self._min_distance:
                max_index = i
        if max_index >= 0:
            for i in range(max_index + 1):
                self._waypoint_buffer.popleft()

        if debug:
            draw_waypoints(self._vehicle.get_world(), [self.target_waypoint],
                           self._vehicle.get_location().z + 1.0)

        return control
Ejemplo n.º 2
0
    def get_target_waypoint(self, debug=False):

        # not enough waypoints in the horizon? => add more!
        if not self._global_plan and len(self._waypoints_queue) < int(
                self._waypoints_queue.maxlen * 0.5):
            self._compute_next_waypoints(k=100)

        if len(self._waypoints_queue) == 0:
            return None

        #   Buffering the waypoints
        if not self._waypoint_buffer:
            for i in range(self._buffer_size):
                if self._waypoints_queue:
                    self._waypoint_buffer.append(
                        self._waypoints_queue.popleft())
                else:
                    break

        # current vehicle waypoint
        self._current_waypoint = self._map.get_waypoint(
            self._vehicle.get_location())
        # target waypoint
        print('_waypoints_queue, _waypoint_buffer', len(self._waypoints_queue),
              len(self._waypoint_buffer))
        self.target_waypoint, self._target_road_option = self._waypoint_buffer[
            0]

        # purge the queue of obsolete waypoints
        vehicle_transform = self._vehicle.get_transform()
        max_index = -1

        for i, (waypoint, _) in enumerate(self._waypoint_buffer):
            if distance_vehicle(waypoint,
                                vehicle_transform) < self._min_distance:
                max_index = i
        if max_index >= 0:
            for i in range(max_index + 1):
                self._waypoint_buffer.popleft()

        if debug:
            draw_waypoints(self._vehicle.get_world(), [self.target_waypoint],
                           self._vehicle.get_location().z + 1.0)

        return self.target_waypoint
Ejemplo n.º 3
0
def main():
    ego_PPO = PPO(state_dim, action_dim, args.Alearning_rate, args.Clearning_rate, args.gamma, args.update_iteration, 0.2, True, action_std_init=1)
    npc_PPO = PPO(state_dim, action_dim, args.Alearning_rate, args.Clearning_rate, args.gamma, args.update_iteration, 0.2, True, action_std_init=1)
    client, world, blueprint_library = create_envs.connection()
    if args.log == True:
        main_writer = SummaryWriter(directory + '/' + args.mode + str(random.randint(0,1000)))
    count = 0
    try:
        if args.load or args.mode == 'test': 
            ego_PPO.load(directory + 'ego.pkl')
            npc_PPO.load(directory + 'npc.pkl')

        for i in range(args.max_episode):
            ego_total_reward = 0
            npc_total_reward = 0
            print('------------%dth time learning begins-----------'%i)
            ego_list,npc_list,obstacle_list,sensor_list = create_envs.Create_actors(world,blueprint_library)
            egosen_list = sensor_list[0]
            npcsen_list = sensor_list[1]
            if args.synchronous_mode:
                world.tick() # 客户端主导,tick
            else:
                world.wait_for_tick() # 服务器主导,tick
            ego_transform = ego_list[0].get_transform()
            npc_transform = npc_list[0].get_transform()

            if i == 0:
                # 全局路径
                ego_start_location = ego_transform.location
                ego_end_location = ego_transform.location + carla.Location(x=135)
                ego_route = create_envs.route_positions_generate(ego_start_location,ego_end_location)
                misc.draw_waypoints(world,ego_route)

                npc_start_location = npc_transform.location
                npc_end_location = npc_transform.location + carla.Location(x=135)
                npc_route = create_envs.route_positions_generate(npc_start_location,npc_end_location)
                misc.draw_waypoints(world,npc_route)

            ego_step, npc_step = 1, 1
            ego_state,_,_,npc_state,_,_ = create_envs.get_vehicle_step(ego_list[0], npc_list[0], egosen_list, npcsen_list, ego_route[ego_step], npc_route[npc_step], obstacle_list[0], 0)
            
            for t in range(args.max_length_of_trajectory):
                #---------动作决策----------
                # if args.mode == 'test':
                #     ego_action = ego_PPO.select_best_action(ego_state,npc_state)
                #     npc_action = npc_PPO.select_best_action(npc_state,ego_state)
                # else:
                ego_action = ego_PPO.select_action(ego_state,npc_state)
                npc_action = npc_PPO.select_action(npc_state,ego_state)
                
                create_envs.set_vehicle_control(ego_list[0], npc_list[0], ego_action, npc_action, args.c_tau, t)
                #---------和环境交互动作反馈---------
                frames = 1 # 步长 
                if args.synchronous_mode:
                    for _ in range(frames):
                        world.tick() # 客户端主导,tick
                else:
                    world.wait_for_tick() # 服务器主导,tick
                # print('step:',ego_step)

                     
                ego_next_state,ego_reward,ego_done,npc_next_state,npc_reward,npc_done = create_envs.get_vehicle_step(ego_list[0], npc_list[0], egosen_list, npcsen_list, ego_route[ego_step], npc_route[npc_step], obstacle_list[0], t)
                
                if ego_next_state[0] < 0.2:
                    ego_step += 1                    
                if npc_next_state[0] < 0.2:
                    npc_step += 1
                # print('state: ', ego_next_state)
                # 数据储存
                
                count += 1
                ego_state = ego_next_state
                npc_state = npc_next_state
                # print('ego_state: ', ego_state, 'npc_state: ', npc_state)
                ego_total_reward += ego_reward
                npc_total_reward += npc_reward
                ego_PPO.buffer.rewards.append(ego_reward)
                npc_PPO.buffer.rewards.append(npc_reward)

                if args.mode == 'test':
                    if ego_done and npc_done:
                        break

                if args.mode == 'train':
                    if ego_done or npc_done: # 结束条件
                        ego_PPO.buffer.is_terminals.append(True)
                        npc_PPO.buffer.is_terminals.append(True)
                        break
                    else:
                        ego_PPO.buffer.is_terminals.append(False)
                        npc_PPO.buffer.is_terminals.append(False)


            ego_total_reward /= t+1
            npc_total_reward /= t+1
                

            print("Episode: {} step: {} ego_Total_Reward: {:0.3f} npc_Total_Reward: {:0.3f}".format(i+1, t, ego_total_reward, npc_total_reward))
            
            if args.log == True:  
                main_writer.add_scalar('reward/ego_reward', ego_total_reward, global_step=i)
                main_writer.add_scalar('reward/npc_reward', npc_total_reward, global_step=i)
                main_writer.add_scalar('step/step', t+1, global_step=i)
                main_writer.add_scalar('step/ego_step', ego_step, global_step=i)
                main_writer.add_scalar('step/npc_step', npc_step, global_step=i)
                main_writer.add_scalar('offset/ego_offsety', ego_state[1], global_step=i)
                main_writer.add_scalar('offset/npc_offsety', npc_state[1], global_step=i)  
                if args.mode == 'train':
                    if i > 0 and (count+1) >= args.update_interval:
                        ego_loss = ego_PPO.update(npc_PPO)
                        print('ego_updated')
                        npc_loss = npc_PPO.update(ego_PPO)
                        print('npc_updated')
                        main_writer.add_scalar('loss/ego_loss', npc_loss, global_step=i)
                        main_writer.add_scalar('loss/npc_loss', npc_loss, global_step=i)
                        ego_PPO.clear()
                        npc_PPO.clear()
                        count = 0
                    
                    if i > 0 and (i+1) % args.log_interval == 0:
                        ego_PPO.save(directory + 'ego.pkl')
                        npc_PPO.save(directory + 'npc.pkl')
                        print('Network Saved')

            for x in sensor_list[0]:
                if x.sensor.is_alive:
                    x.sensor.destroy()
            for x in sensor_list[1]:
                if x.sensor.is_alive:
                    x.sensor.destroy()            
            for x in ego_list:
                # if x.is_alive:
                client.apply_batch([carla.command.DestroyActor(x)])
            for x in npc_list:
                # if x.is_alive:
                client.apply_batch([carla.command.DestroyActor(x)])
            for x in obstacle_list:
                # if x.is_alive:
                client.apply_batch([carla.command.DestroyActor(x)])
            # del ego_list, npc_list, obstacle_list, sensor_list, egosen_list, npcsen_list
            print('Reset')

    finally:
        # 清洗环境
        print('Start Cleaning Envs')
        settings = world.get_settings()
        settings.synchronous_mode = False
        settings.fixed_delta_seconds = None
        world.apply_settings(settings)
        for x in sensor_list[0]:
            if x.sensor.is_alive:
                x.sensor.destroy()
        for x in sensor_list[1]:
            if x.sensor.is_alive:
                x.sensor.destroy()
        for x in ego_list:
            # if x.is_alive:
            client.apply_batch([carla.command.DestroyActor(x)])
        for x in npc_list:
            # if x.is_alive:
            client.apply_batch([carla.command.DestroyActor(x)])
        for x in obstacle_list:
            # if x.is_alive:
            client.apply_batch([carla.command.DestroyActor(x)])
        print('all clean, simulation done!')