Esempio n. 1
0
    def __init__(self, discrete = False):

        # Draw canvas
        static_elements = [
            ['Grass'],
            ['TwoLaneRoad', -50, +50, -5, +5, 0.5],
        ]
        agents = [
            ['Ego', lambda: -45 + np.random.rand() * 10, -2.5, 0.0, wmath.Direction2D(mode = '+x')],
        ]
        canvas = graphics.Canvas(600, 600,
            static_elements, agents,
            ox = 300, oy = 300, scale = 600/100)
        
        # Zero pad by 3 features to create 7 + 3 + 3 = 13 features for continual learning
        super().__init__(canvas, zero_pad = 3, discrete = discrete)

        # Reward structure
        # Reward of -1.0 every step
        # When finished, +200, since we should be able to solve in < 200 steps for sure
        expected_finish_steps = 150
        existence_punishment = -1.0
        d = {
            "mid_lane": -2.5,
            "deviation_mid_lane": lambda p, t: abs(p['mid_lane']-p['ego'].f['y']),
            "moving_horizontally": lambda p, t: -5 <= p['ego'].f['y'] <= 5
        }
        p = {
            "stopped": lambda p, t: t > 0 and p['ego'].f['v'] == 0,
            "out_of_bounds": lambda p, t: (not p['ego'].any_regions()) or (not p['moving_horizontally']),
            "near_goal": lambda p, t: p['ego'].Lp(45, -2.5) <= 2.5,
        }
        r = [
            ["true", lambda p, t: -p['deviation_mid_lane']/5 + existence_punishment, 'satisfaction'],
        ]
        t = [
            ['out_of_bounds', -100, 'satisfaction'],
            ["stopped", -100, 'satisfaction'],
        ]
        s = [
            ['near_goal', 100 + expected_finish_steps, 'satisfaction'],
        ]
        self.reward_structure(d, p, r, t, s, round_to = 3)

        # Specify state
        ego_fn = lambda agent, rs: utilities.combine_dicts(agent.f.get_dict(), rs._p.get_dict())
        other_fn = lambda agent, rs: {}
        self.specify_state(ego_fn, other_fn)

        # Finally
        self.make_ready()
Esempio n. 2
0
 def closest_agent_forward(self, list_of_agents):
     my_direction = self.direction
     ret_agent = None
     min_displacement = np.inf
     for agent in list_of_agents:
         displacement_unit_vector = wmath.Direction2D(
             [agent.f['x'] - self.f['x'], agent.f['y'] - self.f['y']])
         displacement = np.sqrt((agent.f['x'] - self.f['x'])**2 +
                                (agent.f['y'] - self.f['y'])**2)
         if displacement < min_displacement and displacement_unit_vector.dot(
                 my_direction) > 0:
             min_displacement = displacement
             ret_agent = agent
     return {'d': min_displacement, 'o': ret_agent}
Esempio n. 3
0
 def closest_intersection_forward(self):
     assert (self.direction.mode in ['+x', '-x', '+y', '-y'])
     rx1, rx2, ry1, ry2 = self.lane_boundaries()  # clipping boundaries
     ret_intersection = None
     min_displacement = np.inf
     my_direction = self.direction
     for intersection in self.canvas.intersections:
         clipped_intersection = intersection.clip(
             rx1, rx2, ry1, ry2)  # clip to compute displacement,
         # but return the complete intersection
         if intersection.empty(): continue
         centerx, centery = clipped_intersection.center
         displacement_unit_vector = wmath.Direction2D(
             [centerx - self.f['x'], centery - self.f['y']])
         displacement = np.sqrt((centerx - self.f['x'])**2 +
                                (centery - self.f['y'])**2)
         if displacement < min_displacement and displacement_unit_vector.dot(
                 my_direction) > 0:
             min_displacement = displacement
             ret_intersection = intersection
     return {'d': min_displacement, 'o': ret_intersection}
Esempio n. 4
0
 def closest_stop_region_forward(self):
     assert (self.direction.mode in ['+x', '-x', '+y', '-y'])
     rx1, rx2, ry1, ry2 = self.lane_boundaries()  # clipping boundaries
     ret_stopregion = None
     min_displacement = np.inf
     my_direction = self.direction
     if 'x' in self.direction.mode: stopregions = self.canvas.stopx
     if 'y' in self.direction.mode: stopregions = self.canvas.stopy
     for stopregion in stopregions:
         stopregion = stopregion.clip(rx1, rx2, ry1, ry2)
         if stopregion.empty(): continue
         centerx, centery = stopregion.center
         displacement_unit_vector = wmath.Direction2D(
             [centerx - self.f['x'], centery - self.f['y']])
         displacement = np.sqrt((centerx - self.f['x'])**2 +
                                (centery - self.f['y'])**2)
         if displacement < min_displacement and displacement_unit_vector.dot(
                 my_direction) > 0:
             min_displacement = displacement
             ret_stopregion = stopregion
     return {'d': min_displacement, 'o': ret_stopregion}
Esempio n. 5
0
static_elements = [
    ['Grass'],
    ['TwoLaneRoad', -50, -5, -5, +5, 0.5],
    ['TwoLaneRoad', +5, +50, -5, +5, 0.5],
    ['TwoLaneRoad', -5, +5, -50, -5, 0.5],
    ['TwoLaneRoad', -5, +5, +5, +50, 0.5],
    ['Intersection', -5, +5, -5, +5],
    ['StopRegionX', -10, -5, -5, 0],
    ['StopRegionX', +5, +10, 0, +5],
    ['StopRegionY', -5, 0, +5, +10],
    ['StopRegionY', 0, +5, -10, -5],
]
agents = [
    ['Ego', -45, -2.5, 0.0,
     wmath.Direction2D(mode='+x')],
    ['Veh', -15, -2.5, 0.0,
     wmath.Direction2D(mode='+x')],
    ['Veh', +45, +2.5, 0.0,
     wmath.Direction2D(mode='-x')],
    ['Veh', +15, +2.5, 0.0,
     wmath.Direction2D(mode='-x')],
    ['Veh', +2.5, -45, 0.0,
     wmath.Direction2D(mode='+y')],
    ['Veh', +2.5, -15, 0.0,
     wmath.Direction2D(mode='+y')],
    ['Veh', -2.5, +45, 0.0,
     wmath.Direction2D(mode='-y')],
    ['Veh', -2.5, +15, 0.0,
     wmath.Direction2D(mode='-y')],
]
    def __init__(self, discrete=False):

        # Draw canvas
        static_elements = [
            ['Grass'],
            ['TwoLaneRoad', -50, +50, -5, +5, 0.5],
        ]
        agents = [
            [
                'Ego', lambda: -45 + np.random.rand() * 10, -2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
            [
                'Veh', lambda: -20 + np.random.rand() * 5, -2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
            [
                'Veh', lambda: -5 + np.random.rand() * 5, -2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
            [
                'Veh', lambda: +20 + np.random.rand() * 5, +2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
        ]
        canvas = graphics.Canvas(600,
                                 600,
                                 static_elements,
                                 agents,
                                 ox=300,
                                 oy=300,
                                 scale=600 / 100)
        default_policy = lambda c: [0, 0]
        super().__init__(canvas, default_policy, discrete=discrete)

        # Reward structure
        # Shortened the names of predicates
        d = {
            "mid_lane": -2.5,
            "car1": lambda p, t: p['v'][1],
            "car2": lambda p, t: p['v'][2],
            "car3": lambda p, t: p['v'][3],
            "deviation_mid_lane":
            lambda p, t: abs(p['mid_lane'] - p['ego'].f['y']),
            "moving_horizontally": lambda p, t: -5 <= p['ego'].f['y'] <= 5,
        }
        p = {
            "stopped":
            lambda p, t: t > 0 and p['ego'].f['v'] == 0,
            "out_of_bounds":
            lambda p, t:
            (not p['ego'].any_regions()) or (not p['moving_horizontally']),
            "past_a":
            lambda p, t: p['ego'].f['x'] > p['car1'].f['x'] + 10,
            "past_b":
            lambda p, t: p['ego'].f['x'] > p['car2'].f['x'] + 10,
            "past_c":
            lambda p, t: p['ego'].f['x'] > p['car3'].f['x'] + 5,
            "crash_a":
            lambda p, t: p['ego'].Lp(p['car1'].f['x'], p['car1'].f['y']
                                     ) <= 3.0,
            "crash_b":
            lambda p, t: p['ego'].Lp(p['car2'].f['x'], p['car2'].f['y']
                                     ) <= 3.0,
            "crash_c":
            lambda p, t: p['ego'].Lp(p['car3'].f['x'], p['car3'].f['y']
                                     ) <= 3.0,
        }
        # Reward of -1.0 every step
        # When finished, +150, since we should be able to solve in < 150 steps for sure
        expected_finish_steps = 150
        existence_punishment = -1.0
        past_a_reward = +0.5
        past_b_reward = +0.75
        r = [
            ["true", existence_punishment, 'satisfaction'],
            ['past_a and (not past_b)', past_a_reward, 'satisfaction'],
            ['past_b', past_b_reward, 'satisfaction'],
        ]
        t = [
            ["past_a and out_of_bounds", +50, 'satisfaction'],
            ["past_b and out_of_bounds", +25, 'satisfaction'],
            ["out_of_bounds", -100, 'satisfaction'],
            ["stopped", -100, 'satisfaction'],
            ["crash_a", -100, 'satisfaction'],
            ["crash_b", -100, 'satisfaction'],
            ["crash_c", -100, 'satisfaction'],
        ]
        s = [
            ['past_c', 100 + expected_finish_steps, 'satisfaction'],
        ]
        self.reward_structure(d, p, r, t, s, round_to=3)

        # Specify state
        ego_fn = lambda agent, rs: utilities.combine_dicts(
            agent.f.get_dict(), {
                k: v
                for k, v in rs._p.get_dict().items()
                if k not in ['past_c', 'crash_c']
            })
        other_fn = lambda agent, rs: {}
        self.specify_state(ego_fn, other_fn)

        # Finally
        self.make_ready()
Esempio n. 7
0
    def __init__(self, discrete=False):

        # Draw canvas
        static_elements = [
            ['Grass'],
            ['TwoLaneRoad', -50, +50, -5, +5, 0.5],
        ]
        agents = [[
            'Ego', lambda: -45 + np.random.rand() * 10, -2.5, 0.0,
            wmath.Direction2D(mode='+x')
        ],
                  [
                      'Veh', lambda: -15 + np.random.rand() * 10, +2.5, 0.0,
                      wmath.Direction2D(mode='+x')
                  ]]
        canvas = graphics.Canvas(600,
                                 600,
                                 static_elements,
                                 agents,
                                 ox=300,
                                 oy=300,
                                 scale=600 / 100)
        default_policy = lambda c: [0, 0]

        # Zero pad by 3 features to create 7 + 4 + 2 = 13 features for continual learning
        # super().__init__(canvas, default_policy, zero_pad = 2, discrete = discrete)
        super().__init__(canvas, default_policy, zero_pad=0, discrete=discrete)

        # Reward structure
        expected_finish_steps = 150
        existence_punishment = -1.0
        d = {
            "mid_lane": -2.5,
            "car": lambda p, t: p['v'][1],
            "deviation_mid_lane":
            lambda p, t: abs(p['mid_lane'] - p['ego'].f['y']),
            "moving_horizontally": lambda p, t: -5 <= p['ego'].f['y'] <= 5
        }
        p = {
            "stopped":
            lambda p, t: t > 0 and p['ego'].f['v'] == 0,
            "out_of_bounds":
            lambda p, t:
            (not p['ego'].any_regions()) or (not p['moving_horizontally']),
            "near_goal":
            lambda p, t: p['ego'].Lp(45, -2.5) <= 2.5,
            "collided":
            lambda p, t: p['ego'].Lp(p['car'].f['x'], p['car'].f['y']) <= 3.0,
        }
        r = [
            ["true", existence_punishment, 'satisfaction'],
        ]
        t = [
            ['out_of_bounds', -100, 'satisfaction'],
            ["stopped", -100, 'satisfaction'],
            ["collided", -100, 'satisfaction'],
        ]
        s = [
            ['near_goal', 100 + expected_finish_steps, 'satisfaction'],
        ]
        self.reward_structure(d, p, r, t, s, round_to=3)

        # Specify state
        ego_fn = lambda agent, rs: utilities.combine_dicts(
            agent.f.get_dict(), rs._p.get_dict())

        def ego_fn2(agent, rs):
            d1 = agent.f.get_dict()
            d2 = rs._p.get_dict()
            return utilities.combine_dicts(
                d1, {
                    "stopped": d2["stopped"],
                    "out_of_bounds": d2["out_of_bounds"],
                    "near_goal_osco": d2["near_goal"],
                    "collided_osco": d2["collided"],
                    "past_car_osc": -1,
                    "collided_osc": -1,
                })

        other_fn = lambda agent, rs: {}
        self.specify_state(ego_fn2, other_fn)

        # Finally
        self.make_ready()