def evaluate_agent(self, agent, steps=1000): """ Прогонка цикла мира для конкретного агента (см. пример использования в комментариях после if _name__ == "__main__") :param agent: SimpleCarAgent :param steps: количество итераций цикла :param visual: рисовать картинку или нет :return: среднее значение награды агента за шаг """ agent.evaluate_mode = True self.set_agents([agent]) rewards = [] if self.visual: scale = self._prepare_visualization() for _ in range(steps): vision = self.vision_for(agent) action = agent.choose_action(vision) next_agent_state, collision = self.physics.move( self.agent_states[agent], action) self.circles[agent] += angle(self.agent_states[agent].position, next_agent_state.position) / (2 * pi) self.agent_states[agent] = next_agent_state rewards.append(self.reward(next_agent_state, collision, vision)) agent.receive_feedback(rewards[-1]) if self.visual: self.visualize(scale) if self._update_display() == pygame.QUIT: break # sleep(0.05) return np.mean(rewards), self.circles[agent]
def visualize(self, scale): """ Рисует картинку. Этот и все "приватные" (начинающиеся с _) методы необязательны для разбора. """ for i, agent in enumerate(self.agents): state = self.agent_states[agent] surface = self._agent_surfaces[i] rays_lengths = self.vision_for(agent)[-agent.rays:] self._agent_images[i] = [ self._draw_ladar(rays_lengths, state, scale), self._get_agent_image(surface, state, scale) ] if len(self.agents) == 1: a = self.agents[0] if a.step > 0: draw_text("Reward: %.3f" % a.reward_history[-1], self._info_surface, scale, self.size, text_color=white, bg_color=black) draw_text("Step: %d Avg reward: %.3f" % (a.step, a.avg_reward), self._info_surface, scale, self.size, text_color=white, bg_color=black, tlpoint=(self._info_surface.get_width() - 790, 10)) steer, acc = a.chosen_actions_history[-1] state = self.agent_states[a] draw_text("Action: steer.: %.2f, accel: %.2f" % (steer, acc), self._info_surface, scale, self.size, text_color=white, bg_color=black, tlpoint=(self._info_surface.get_width() - 500, 10)) draw_text("Inputs: |v|=%.2f, sin(angle): %.2f, circle: %.2f" % (abs(state.velocity), np.sin(angle(-state.position, state.heading)), self.circles[a]), self._info_surface, scale, self.size, text_color=white, bg_color=black, tlpoint=(self._info_surface.get_width() - 500, 50)) return pygame.surfarray.array3d(self._agent_surfaces[0])
def vision_for(self, agent): """ Строит видение мира для каждого агента :param agent: машинка, из которой мы смотрим :return: список из модуля скорости машинки, направленного угла между направлением машинки и направлением на центр и `agent.rays` до ближайших стен трека (запустите картинку, и станет совсем понятно) """ state = self.agent_states[agent] vision = [ abs(state.velocity), np.sin(angle(-state.position, state.heading)) ] extras = len(vision) delta = pi / (agent.rays - 1) start = rotate(state.heading, -pi / 2) sectors = len(self.map) for i in range(agent.rays): # define ray direction ray = rotate(start, i * delta) # define ray's intersections with walls vision.append(np.infty) for j in range(sectors): inner_wall = self.map[j - 1][0], self.map[j][0] outer_wall = self.map[j - 1][1], self.map[j][1] intersect = intersect_ray_with_segment((state.position, ray), inner_wall) intersect = abs( intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect intersect = intersect_ray_with_segment((state.position, ray), outer_wall) intersect = abs( intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect assert vision[-1] < np.infty, \ "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1])) assert len(vision) == agent.rays + extras, \ "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1])) return vision
def transition(self): """ Логика основного цикла: подсчёт для каждого агента видения агентом мира, выбор действия агентом, смена состояния и обработка реакции мира на выбранное действие """ for a in self.agents: vision = self.vision_for(a) action = a.choose_action(vision) next_agent_state, collision = self.physics.move( self.agent_states[a], action) self.circles[a] += angle(self.agent_states[a].position, next_agent_state.position) / (2 * pi) self.agent_states[a] = next_agent_state a.receive_feedback(self.reward(next_agent_state, collision, vision))
def eval_reward(self, state, collision): """ Награда "по умолчанию", используется в режиме evaluate Удобно, чтобы не приходилось отменять свои изменения в функции reward для оценки результата """ a = -np.sin(angle(-state.position, state.heading)) heading_reward = 1 if a > 0.1 else a if a > 0 else 0 heading_penalty = a if a <= 0 else 0 idle_penalty = 0 if abs( state.velocity) > self.MIN_SPEED else -self.IDLENESS_PENALTY speeding_penalty = 0 if abs( state.velocity) < self.MAX_SPEED else -self.SPEEDING_PENALTY * abs( state.velocity) collision_penalty = -max(abs(state.velocity), 0.1) * int(collision) * self.COLLISION_PENALTY return heading_reward * self.HEADING_REWARD + heading_penalty * self.WRONG_HEADING_PENALTY + collision_penalty \ + idle_penalty + speeding_penalty
def step(self, steering, acceleration): action = Action(steering, acceleration) for a in self.agents: next_agent_state, collision = self.physics.move( self.agent_states[a], action) progress = angle(self.agent_states[a].position, next_agent_state.position) / (2 * pi) self.circles[a] += progress self.agent_states[a] = next_agent_state vision = self.vision_for(a) reward = self.reward(collision, progress) a.sensor_data_history.append(vision) a.chosen_actions_history.append(action) a.reward_history.append(reward) a.step += 1 q = .001 if a.step > 1000 else 1. / float(a.step) a.avg_reward = (1. - q) * a.avg_reward + q * reward a.sum_reward += reward done = False if a.step == self.steps: done = True a.step = 0 return np.array(vision), reward, done, {'collision': collision}