Exemple #1
0
    def vision_for(self, agent):
        """
        Строит видение мира для каждого агента
        :param agent: машинка, из которой мы смотрим
        :return: список из модуля скорости машинки, направленного угла между направлением машинки
        и направлением на центр и `agent.rays` до ближайших стен трека (запустите картинку, и станет совсем понятно)
        """
        state = self.agent_states[agent]
        vision = [
            abs(state.velocity),
            np.sin(angle(-state.position, state.heading))
        ]
        extras = len(vision)

        delta = pi / (agent.rays - 1)
        start = rotate(state.heading, -pi / 2)

        sectors = len(self.map)
        for i in range(agent.rays):
            # define ray direction
            ray = rotate(start, i * delta)

            # define ray's intersections with walls
            vision.append(np.infty)
            for j in range(sectors):
                inner_wall = self.map[j - 1][0], self.map[j][0]
                outer_wall = self.map[j - 1][1], self.map[j][1]

                intersect = intersect_ray_with_segment((state.position, ray),
                                                       inner_wall)
                intersect = abs(
                    intersect -
                    state.position) if intersect is not None else np.infty
                if intersect < vision[-1]:
                    vision[-1] = intersect

                intersect = intersect_ray_with_segment((state.position, ray),
                                                       outer_wall)
                intersect = abs(
                    intersect -
                    state.position) if intersect is not None else np.infty
                if intersect < vision[-1]:
                    vision[-1] = intersect
            for obstacle in self.obs:
                for j in range(len(obstacle)):
                    obstacle_wall = obstacle[j - 1], obstacle[j]
                    intersect = intersect_ray_with_segment(
                        (state.position, ray), obstacle_wall)
                    intersect = abs(
                        intersect -
                        state.position) if intersect is not None else np.infty
                    if intersect < vision[-1]:
                        vision[-1] = intersect


            assert vision[-1] < np.infty, \
                "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1]))
        assert len(vision) == agent.rays + extras, \
            "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1]))
        return vision
    def reward(self, state, collision):
        """
        Вычисление награды агента, находящегося в состоянии state.
        Эту функцию можно (и иногда нужно!) менять, чтобы обучить вашу сеть именно тем вещам, которые вы от неё хотите
        :param state: текущее состояние агента
        :param collision: произошло ли столкновение со стеной на прошлом шаге
        :return reward: награду агента (возможно, отрицательную)
        """
        a = np.sin(angle(-state.position, state.heading))
        heading_score = self.HEADING_REWARD * np.tanh(-2 * a)  
        idle_penalty = 0 if abs(state.velocity) > self.MIN_SPEED else -self.IDLENESS_PENALTY
        speeding_penalty = 0 if abs(state.velocity) < self.MAX_SPEED else -self.SPEEDING_PENALTY * abs(state.velocity)
        collision_penalty = - max(abs(state.velocity), 0.1) * int(collision) * self.COLLISION_PENALTY

        dist = np.infty
        sectors = len(self.map)
        for j in range(sectors):
                inner_wall = self.map[j - 1][0], self.map[j][0]
                outer_wall = self.map[j - 1][1], self.map[j][1]

                intersect = intersect_ray_with_segment((state.position, state.heading), inner_wall)
                intersect = abs(intersect - state.position) if intersect is not None else np.infty
                if intersect < dist:
                    dist = intersect

                intersect = intersect_ray_with_segment((state.position, state.heading), outer_wall)
                intersect = abs(intersect - state.position) if intersect is not None else np.infty
                if intersect < dist:
                    dist = intersect
        dist_penalty = 0 if collision else - max(abs(state.velocity), 1) * self.COLLISION_PENALTY * np.exp(-dist)
        return heading_score + collision_penalty \
               + idle_penalty + speeding_penalty + dist_penalty
Exemple #3
0
    def vision_for(self, agent):
        """
        Строит видение мира для каждого агента
        :param agent: машинка, из которой мы смотрим
        :return: список из модуля скорости машинки, направленного угла между направлением машинки
        и направлением на центр и `agent.rays` до ближайших стен трека (запустите картинку, и станет совсем понятно)
        """
        state = self.agent_states[agent]
        vision = [abs(state.velocity), np.sin(angle(-state.position, state.heading))]
        extras = len(vision)

        delta = pi / (agent.rays - 1)
        start = rotate(state.heading, - pi / 2)

        sectors = len(self.map)
        for i in range(agent.rays):
            # define ray direction
            ray = rotate(start, i * delta)

            # define ray's intersections with walls
            vision.append(np.infty)
            for j in range(sectors):
                inner_wall = self.map[j - 1][0], self.map[j][0]
                outer_wall = self.map[j - 1][1], self.map[j][1]

                intersect = intersect_ray_with_segment((state.position, ray), inner_wall)
                intersect = abs(intersect - state.position) if intersect is not None else np.infty
                if intersect < vision[-1]:
                    vision[-1] = intersect

                intersect = intersect_ray_with_segment((state.position, ray), outer_wall)
                intersect = abs(intersect - state.position) if intersect is not None else np.infty
                if intersect < vision[-1]:
                    vision[-1] = intersect
            for obstacle in self.obs:
                for j in range(len(obstacle)):
                    obstacle_wall = obstacle[j - 1], obstacle[j]
                    intersect = intersect_ray_with_segment((state.position, ray), obstacle_wall)
                    intersect = abs(intersect - state.position) if intersect is not None else np.infty
                    if intersect < vision[-1]:
                        vision[-1] = intersect


            assert vision[-1] < np.infty, \
                "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1]))
        assert len(vision) == agent.rays + extras, \
            "Something went wrong: {}, {}".format(str(state), str(agent.chosen_actions_history[-1]))
        return vision