def vision_for(self, agent): """ Строит видение мира для каждого агента :param agent: машинка, из которой мы смотрим :return: список из модуля скорости машинки, направленного угла между направлением машинки и направлением на центр и `agent.rays` до ближайших стен трека (запустите картинку, и станет совсем понятно) """ state = self.agent_states[agent] vision = [ abs(state.velocity), np.sin(angle(-state.position, state.heading)) ] extras = len(vision) delta = pi / (agent.rays - 1) start = rotate(state.heading, -pi / 2) sectors = len(self.map) for i in range(agent.rays): # define ray direction ray = rotate(start, i * delta) # define ray's intersections with walls vision.append(np.infty) for j in range(sectors): inner_wall = self.map[j - 1][0], self.map[j][0] outer_wall = self.map[j - 1][1], self.map[j][1] intersect = intersect_ray_with_segment((state.position, ray), inner_wall) intersect = abs( intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect intersect = intersect_ray_with_segment((state.position, ray), outer_wall) intersect = abs( intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect for obstacle in self.obs: for j in range(len(obstacle)): obstacle_wall = obstacle[j - 1], obstacle[j] intersect = intersect_ray_with_segment( (state.position, ray), obstacle_wall) intersect = abs( intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect assert vision[-1] < np.infty, \ "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1])) assert len(vision) == agent.rays + extras, \ "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1])) return vision
def reward(self, state, collision): """ Вычисление награды агента, находящегося в состоянии state. Эту функцию можно (и иногда нужно!) менять, чтобы обучить вашу сеть именно тем вещам, которые вы от неё хотите :param state: текущее состояние агента :param collision: произошло ли столкновение со стеной на прошлом шаге :return reward: награду агента (возможно, отрицательную) """ a = np.sin(angle(-state.position, state.heading)) heading_score = self.HEADING_REWARD * np.tanh(-2 * a) idle_penalty = 0 if abs(state.velocity) > self.MIN_SPEED else -self.IDLENESS_PENALTY speeding_penalty = 0 if abs(state.velocity) < self.MAX_SPEED else -self.SPEEDING_PENALTY * abs(state.velocity) collision_penalty = - max(abs(state.velocity), 0.1) * int(collision) * self.COLLISION_PENALTY dist = np.infty sectors = len(self.map) for j in range(sectors): inner_wall = self.map[j - 1][0], self.map[j][0] outer_wall = self.map[j - 1][1], self.map[j][1] intersect = intersect_ray_with_segment((state.position, state.heading), inner_wall) intersect = abs(intersect - state.position) if intersect is not None else np.infty if intersect < dist: dist = intersect intersect = intersect_ray_with_segment((state.position, state.heading), outer_wall) intersect = abs(intersect - state.position) if intersect is not None else np.infty if intersect < dist: dist = intersect dist_penalty = 0 if collision else - max(abs(state.velocity), 1) * self.COLLISION_PENALTY * np.exp(-dist) return heading_score + collision_penalty \ + idle_penalty + speeding_penalty + dist_penalty
def vision_for(self, agent): """ Строит видение мира для каждого агента :param agent: машинка, из которой мы смотрим :return: список из модуля скорости машинки, направленного угла между направлением машинки и направлением на центр и `agent.rays` до ближайших стен трека (запустите картинку, и станет совсем понятно) """ state = self.agent_states[agent] vision = [abs(state.velocity), np.sin(angle(-state.position, state.heading))] extras = len(vision) delta = pi / (agent.rays - 1) start = rotate(state.heading, - pi / 2) sectors = len(self.map) for i in range(agent.rays): # define ray direction ray = rotate(start, i * delta) # define ray's intersections with walls vision.append(np.infty) for j in range(sectors): inner_wall = self.map[j - 1][0], self.map[j][0] outer_wall = self.map[j - 1][1], self.map[j][1] intersect = intersect_ray_with_segment((state.position, ray), inner_wall) intersect = abs(intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect intersect = intersect_ray_with_segment((state.position, ray), outer_wall) intersect = abs(intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect for obstacle in self.obs: for j in range(len(obstacle)): obstacle_wall = obstacle[j - 1], obstacle[j] intersect = intersect_ray_with_segment((state.position, ray), obstacle_wall) intersect = abs(intersect - state.position) if intersect is not None else np.infty if intersect < vision[-1]: vision[-1] = intersect assert vision[-1] < np.infty, \ "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1])) assert len(vision) == agent.rays + extras, \ "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1])) return vision