Example #1
0
    def __init__(self, discrete = False):

        # Draw canvas
        static_elements = [
            ['Grass'],
            ['TwoLaneRoad', -50, +50, -5, +5, 0.5],
        ]
        agents = [
            ['Ego', lambda: -45 + np.random.rand() * 10, -2.5, 0.0, wmath.Direction2D(mode = '+x')],
        ]
        canvas = graphics.Canvas(600, 600,
            static_elements, agents,
            ox = 300, oy = 300, scale = 600/100)
        
        # Zero pad by 3 features to create 7 + 3 + 3 = 13 features for continual learning
        super().__init__(canvas, zero_pad = 3, discrete = discrete)

        # Reward structure
        # Reward of -1.0 every step
        # When finished, +200, since we should be able to solve in < 200 steps for sure
        expected_finish_steps = 150
        existence_punishment = -1.0
        d = {
            "mid_lane": -2.5,
            "deviation_mid_lane": lambda p, t: abs(p['mid_lane']-p['ego'].f['y']),
            "moving_horizontally": lambda p, t: -5 <= p['ego'].f['y'] <= 5
        }
        p = {
            "stopped": lambda p, t: t > 0 and p['ego'].f['v'] == 0,
            "out_of_bounds": lambda p, t: (not p['ego'].any_regions()) or (not p['moving_horizontally']),
            "near_goal": lambda p, t: p['ego'].Lp(45, -2.5) <= 2.5,
        }
        r = [
            ["true", lambda p, t: -p['deviation_mid_lane']/5 + existence_punishment, 'satisfaction'],
        ]
        t = [
            ['out_of_bounds', -100, 'satisfaction'],
            ["stopped", -100, 'satisfaction'],
        ]
        s = [
            ['near_goal', 100 + expected_finish_steps, 'satisfaction'],
        ]
        self.reward_structure(d, p, r, t, s, round_to = 3)

        # Specify state
        ego_fn = lambda agent, rs: utilities.combine_dicts(agent.f.get_dict(), rs._p.get_dict())
        other_fn = lambda agent, rs: {}
        self.specify_state(ego_fn, other_fn)

        # Finally
        self.make_ready()
    def __init__(self, discrete=False):

        # Draw canvas
        static_elements = [
            ['Grass'],
            ['TwoLaneRoad', -50, +50, -5, +5, 0.5],
        ]
        agents = [
            [
                'Ego', lambda: -45 + np.random.rand() * 10, -2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
            [
                'Veh', lambda: -20 + np.random.rand() * 5, -2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
            [
                'Veh', lambda: -5 + np.random.rand() * 5, -2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
            [
                'Veh', lambda: +20 + np.random.rand() * 5, +2.5, 0.0,
                wmath.Direction2D(mode='+x')
            ],
        ]
        canvas = graphics.Canvas(600,
                                 600,
                                 static_elements,
                                 agents,
                                 ox=300,
                                 oy=300,
                                 scale=600 / 100)
        default_policy = lambda c: [0, 0]
        super().__init__(canvas, default_policy, discrete=discrete)

        # Reward structure
        # Shortened the names of predicates
        d = {
            "mid_lane": -2.5,
            "car1": lambda p, t: p['v'][1],
            "car2": lambda p, t: p['v'][2],
            "car3": lambda p, t: p['v'][3],
            "deviation_mid_lane":
            lambda p, t: abs(p['mid_lane'] - p['ego'].f['y']),
            "moving_horizontally": lambda p, t: -5 <= p['ego'].f['y'] <= 5,
        }
        p = {
            "stopped":
            lambda p, t: t > 0 and p['ego'].f['v'] == 0,
            "out_of_bounds":
            lambda p, t:
            (not p['ego'].any_regions()) or (not p['moving_horizontally']),
            "past_a":
            lambda p, t: p['ego'].f['x'] > p['car1'].f['x'] + 10,
            "past_b":
            lambda p, t: p['ego'].f['x'] > p['car2'].f['x'] + 10,
            "past_c":
            lambda p, t: p['ego'].f['x'] > p['car3'].f['x'] + 5,
            "crash_a":
            lambda p, t: p['ego'].Lp(p['car1'].f['x'], p['car1'].f['y']
                                     ) <= 3.0,
            "crash_b":
            lambda p, t: p['ego'].Lp(p['car2'].f['x'], p['car2'].f['y']
                                     ) <= 3.0,
            "crash_c":
            lambda p, t: p['ego'].Lp(p['car3'].f['x'], p['car3'].f['y']
                                     ) <= 3.0,
        }
        # Reward of -1.0 every step
        # When finished, +150, since we should be able to solve in < 150 steps for sure
        expected_finish_steps = 150
        existence_punishment = -1.0
        past_a_reward = +0.5
        past_b_reward = +0.75
        r = [
            ["true", existence_punishment, 'satisfaction'],
            ['past_a and (not past_b)', past_a_reward, 'satisfaction'],
            ['past_b', past_b_reward, 'satisfaction'],
        ]
        t = [
            ["past_a and out_of_bounds", +50, 'satisfaction'],
            ["past_b and out_of_bounds", +25, 'satisfaction'],
            ["out_of_bounds", -100, 'satisfaction'],
            ["stopped", -100, 'satisfaction'],
            ["crash_a", -100, 'satisfaction'],
            ["crash_b", -100, 'satisfaction'],
            ["crash_c", -100, 'satisfaction'],
        ]
        s = [
            ['past_c', 100 + expected_finish_steps, 'satisfaction'],
        ]
        self.reward_structure(d, p, r, t, s, round_to=3)

        # Specify state
        ego_fn = lambda agent, rs: utilities.combine_dicts(
            agent.f.get_dict(), {
                k: v
                for k, v in rs._p.get_dict().items()
                if k not in ['past_c', 'crash_c']
            })
        other_fn = lambda agent, rs: {}
        self.specify_state(ego_fn, other_fn)

        # Finally
        self.make_ready()
Example #3
0
    ['Veh', +15, +2.5, 0.0,
     wmath.Direction2D(mode='-x')],
    ['Veh', +2.5, -45, 0.0,
     wmath.Direction2D(mode='+y')],
    ['Veh', +2.5, -15, 0.0,
     wmath.Direction2D(mode='+y')],
    ['Veh', -2.5, +45, 0.0,
     wmath.Direction2D(mode='-y')],
    ['Veh', -2.5, +15, 0.0,
     wmath.Direction2D(mode='-y')],
]

canvas = graphics.Canvas(600,
                         600,
                         static_elements,
                         agents,
                         ox=300,
                         oy=300,
                         scale=600 / 100)
default_policy = lambda agent: agent.aggressive_driving()
env = design.Environment(canvas, default_policy)
ego = env.agents[env.ego_id]

# Empty reward structure
env.reward_structure({}, {}, [], [], [], round_to=3)

# Empty state
ego_fn = lambda agent, rs: {}
other_fn = lambda agent, rs: {}
env.specify_state(ego_fn, other_fn)
Example #4
0
    def __init__(self, discrete=False):

        # Draw canvas
        static_elements = [
            ['Grass'],
            ['TwoLaneRoad', -50, +50, -5, +5, 0.5],
        ]
        agents = [[
            'Ego', lambda: -45 + np.random.rand() * 10, -2.5, 0.0,
            wmath.Direction2D(mode='+x')
        ],
                  [
                      'Veh', lambda: -15 + np.random.rand() * 10, +2.5, 0.0,
                      wmath.Direction2D(mode='+x')
                  ]]
        canvas = graphics.Canvas(600,
                                 600,
                                 static_elements,
                                 agents,
                                 ox=300,
                                 oy=300,
                                 scale=600 / 100)
        default_policy = lambda c: [0, 0]

        # Zero pad by 3 features to create 7 + 4 + 2 = 13 features for continual learning
        # super().__init__(canvas, default_policy, zero_pad = 2, discrete = discrete)
        super().__init__(canvas, default_policy, zero_pad=0, discrete=discrete)

        # Reward structure
        expected_finish_steps = 150
        existence_punishment = -1.0
        d = {
            "mid_lane": -2.5,
            "car": lambda p, t: p['v'][1],
            "deviation_mid_lane":
            lambda p, t: abs(p['mid_lane'] - p['ego'].f['y']),
            "moving_horizontally": lambda p, t: -5 <= p['ego'].f['y'] <= 5
        }
        p = {
            "stopped":
            lambda p, t: t > 0 and p['ego'].f['v'] == 0,
            "out_of_bounds":
            lambda p, t:
            (not p['ego'].any_regions()) or (not p['moving_horizontally']),
            "near_goal":
            lambda p, t: p['ego'].Lp(45, -2.5) <= 2.5,
            "collided":
            lambda p, t: p['ego'].Lp(p['car'].f['x'], p['car'].f['y']) <= 3.0,
        }
        r = [
            ["true", existence_punishment, 'satisfaction'],
        ]
        t = [
            ['out_of_bounds', -100, 'satisfaction'],
            ["stopped", -100, 'satisfaction'],
            ["collided", -100, 'satisfaction'],
        ]
        s = [
            ['near_goal', 100 + expected_finish_steps, 'satisfaction'],
        ]
        self.reward_structure(d, p, r, t, s, round_to=3)

        # Specify state
        ego_fn = lambda agent, rs: utilities.combine_dicts(
            agent.f.get_dict(), rs._p.get_dict())

        def ego_fn2(agent, rs):
            d1 = agent.f.get_dict()
            d2 = rs._p.get_dict()
            return utilities.combine_dicts(
                d1, {
                    "stopped": d2["stopped"],
                    "out_of_bounds": d2["out_of_bounds"],
                    "near_goal_osco": d2["near_goal"],
                    "collided_osco": d2["collided"],
                    "past_car_osc": -1,
                    "collided_osc": -1,
                })

        other_fn = lambda agent, rs: {}
        self.specify_state(ego_fn2, other_fn)

        # Finally
        self.make_ready()