Ejemplo n.º 1
0
def play():
  client = carla.Client(settings.CONNECTION_IP, settings.CONNECTION_PORT)
  client.set_timeout(20.0)

  # Create controllers
  trafic_control = TraficControlThread(client)
  weather_control = WeatherControlThread(client)
  trafic_control.start()
  weather_control.start()
  logger.info("Controllers started")

  predicter = ModelHandler(settings.MODEL_NAME, target_weights_path=MODEL_WEIGHTS, train=False)
  agent = Agent(999999, client, False)

  try:
    while True:
      step = 1

      state = agent.spawn()

      while True:
        start_step_time = time.time()

        action = int(np.argmax(predicter.get_qs(state)))
        new_state, _, done = agent.step(action)
        state = new_state

        if done:
          agent.clear_agent()
          break

        time_diff1 = agent.episode_start + step / settings.FPS_COMPENSATION - time.time()
        time_diff2 = start_step_time + 1 / settings.FPS_COMPENSATION - time.time()
        if time_diff1 > 0:
          time.sleep(min(0.125, time_diff1))
        elif time_diff2 > 0:
          time.sleep(min(0.125, time_diff2))
  except KeyboardInterrupt:
    logger.info("Exiting playing - Keyboard interrupt")
  except:
    logger.error("Playing failed")
  finally:
    trafic_control.terminate = True
    weather_control.terminate = True
Ejemplo n.º 2
0
class Trainer(Thread):
    def __init__(self, client, identifier, epsilon, get_qs_callbatch,
                 update_replay_memory_callback):
        super().__init__()
        self.daemon = True
        self.client = client

        self.terminate = False
        self.fail_flag = False
        self.halt = False

        self.get_qs = get_qs_callbatch
        self.update_replay_memory = update_replay_memory_callback
        self.identifier = identifier

        self.agent = Agent(identifier, self.client, True)

        self.action = None
        self.episode = 0
        self.epsilon = epsilon
        self.scores_history = deque(maxlen=settings.LOG_EVERY)
        self.score_record = None
        self.steps_per_second = deque(maxlen=settings.LOG_EVERY)

        self.actions_statistic = deque(
            maxlen=int(settings.LOG_EVERY * settings.SECONDS_PER_EXPISODE *
                       settings.FPS_COMPENSATION))

    def get_action(self, action: int):
        num_of_logged_actions = len(self.actions_statistic)
        if num_of_logged_actions <= 0: return 0
        return self.actions_statistic.count(action) / num_of_logged_actions

    def get_steps_per_second(self):
        if len(self.steps_per_second) > 0:
            return sum(self.steps_per_second) / len(self.steps_per_second)
        return 0

    def get_preview_data(self):
        if self.agent.prev_camera is not None and self.agent.initialized:
            return cv2.cvtColor(self.agent.prev_camera, cv2.COLOR_RGB2BGR)
        return np.zeros((settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[1],
                         settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[0],
                         settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[2]))

    def get_mean_score(self):
        if len(self.scores_history) > 0:
            return sum(self.scores_history) / len(self.scores_history)
        return 0

    def get_episode(self):
        return self.episode

    def run(self) -> None:
        logger.info(f"Trainer {self.identifier} started")

        while not self.terminate:
            if self.halt:
                time.sleep(0.1)
                continue

            reward = None
            episode_reward = 0
            step = 1

            try:
                state = self.agent.spawn()
                self.fail_flag = False
            except:
                self.fail_flag = True
                break

            episode_data_memory = deque()

            while not self.fail_flag:
                start_step_time = time.time()

                if self.epsilon is None or np.random.random() > self.epsilon:
                    self.action = int(np.argmax(self.get_qs(state)))
                    self.actions_statistic.append(self.action)
                else:
                    self.action = random.choice(list(settings.ACTIONS.keys()))

                try:
                    new_state, reward, done = self.agent.step(self.action)
                except:
                    logger.error(
                        f"Trainer {self.identifier} - Failed to make step")
                    self.fail_flag = True
                    break

                episode_data_memory.append(
                    (state, self.action, reward, new_state, done))
                state = new_state

                episode_reward += reward

                if done:
                    self.agent.clear_agent()
                    self.action = None
                    break

                time_diff1 = self.agent.episode_start + step / settings.FPS_COMPENSATION - time.time(
                )
                time_diff2 = start_step_time + 1 / settings.FPS_COMPENSATION - time.time(
                )
                if time_diff1 > 0:
                    time.sleep(min(0.125, time_diff1))
                elif time_diff2 > 0:
                    time.sleep(min(0.125, time_diff2))

                step += 1

            if not reward or not self.agent.episode_start: continue

            episode_time = time.time() - self.agent.episode_start
            if episode_time == 0: episode_time = 10 ^ -9
            average_steps_per_second = step / episode_time

            self.steps_per_second.append(average_steps_per_second)

            reward_factor = settings.FPS_COMPENSATION / average_steps_per_second
            episode_reward_weighted = (
                (episode_reward - reward) * reward_factor +
                reward) * settings.EPISODE_REWARD_MULTIPLIER

            if episode_time > settings.MINIMUM_EPISODE_LENGTH:
                self.update_replay_memory(episode_data_memory)
                self.scores_history.append(episode_reward_weighted)
                self.episode += 1

            del episode_data_memory

        logger.info(f"Trainer {self.identifier} stopped")