def test_does_not_allow_zero_velocity(): with patch('rlbox.env.racetrack.rnd') as rnd: rnd.rand.return_value = 1 track = RaceTrack() track.pos = list(track.start_loc[0]) pos = track.pos.copy() state, _ = track.step([0, 0]) assert state == (pos[0] - 1, pos[1] + 1, 1, 1)
def test_returns_to_start_position_if_out(): with patch('rlbox.env.racetrack.rnd') as rnd: rnd.rand.return_value = 1 track = RaceTrack() track.pos = list(track.start_loc[5]) state, _ = track.step([1, 1]) assert state[2:] == (0, 0) assert track.loc(track.pos) == track.START
def test_moves_car_with_given_velocity(): with patch('rlbox.env.racetrack.rnd') as rnd: rnd.rand.return_value = 1 track = RaceTrack() start = track.pos.copy() state, reward = track.step([1, 0]) assert state == (start[0] - 1, start[1], 1, 0) assert reward == -1
def test_does_not_allow_velocity_out_of_bound(): with patch('rlbox.env.racetrack.rnd') as rnd: rnd.rand.return_value = 1 track = RaceTrack() track.pos = list(track.start_loc[0]) track.vel = [4, 4] state, _ = track.step([1, 1]) assert state[2:] == (4, 4)
def test_fails_on_accelerate_with_probability(): with patch('rlbox.env.racetrack.rnd') as rnd: rnd.rand.return_value = 0 track = RaceTrack() track.pos = [track.pos[0] - 1, track.pos[1]] track.vel = [1, 0] pos = track.pos.copy() state, reward = track.step([1, 1]) assert state == (pos[0] - 1, pos[1], 1, 0) assert reward == -1
def test_is_done_when_on_end_location(): track = RaceTrack() assert not track.done() with patch('rlbox.env.racetrack.rnd') as rnd: rnd.rand.return_value = 0 track.pos = [2, 14] track.vel = [1, 3] track.step([0, 1]) assert track.done()
def worker(runs, env_cfg, task, param): environment = RaceTrack(**env_cfg) agent = core.Agent( AGENT_PROGRAM[task[0]]( environment.act_spec, environment.obs_spec, **task[1]), lambda env: (env.obs, -1), lambda action, env: env.step(action)) core.Run(agent, environment).start() key = {**env_cfg, **{'runs': runs}, **{'alg': task[0]}, **task[1]} states, actions, rewards = environment.episode() run = param['run'] SharedMem.dump(Testbed.key_for('states', **key), run, states) SharedMem.dump(Testbed.key_for('rewards', **key), run, rewards) SharedMem.dump(Testbed.key_for('actions', **key), run, actions)
def simulate(): fig = plt.figure() track = RaceTrack() im = plt.imshow(track.print(), origin='lower', interpolation='none', animated=True) plt.gca().invert_yaxis() agent = RandomAgent(track.act_spec, track.obs_spec) def step(_): if not track.done(): track.step(agent(None, None)) im.set_array(track.print()) return im, ani = animation.FuncAnimation(fig, step, interval=50, blit=True) plt.show()
def plot(self): store = Store(self.summary) params = {**self.env, **{'runs': self.runs}, **{'alg': self.exe[0][0]}, **self.exe[0][1]} st = store[self.key_for('states', **params)] act = store[self.key_for('actions', **params)] rew = store[self.key_for('rewards', **params)] environment = RaceTrack(**self.env) if self.key_for('policy', **params) in store: pi = store[self.key_for('policy', **params)][:] else: agent_prog = RandomAgent(environment.act_spec, environment.obs_spec) pi = off_policy_monte_carlo( environment.act_spec, environment.obs_spec, agent_prog.policy(), stream(st, act, rew)) store.replace(self.key_for('policy', **params), pi) store.close() agent = core.Agent( RandomAgent(environment.act_spec, environment.obs_spec, pi), lambda env: (environment.STATE_IDX[env.obs], -1), lambda action, env: env.step(environment.ACT_SPACE[action])) def step(_): if not environment.done(): agent.run(environment) im.set_array(environment.print()) return im, for _ in range(0, 5): fig = plt.figure() environment = RaceTrack(**self.env) im = plt.imshow(environment.print(), origin='lower', interpolation='none', animated=True) plt.gca().invert_yaxis() ani = animation.FuncAnimation(fig, step, interval=100, blit=True) plt.show()
def test_prepares_action_space(): track = RaceTrack() assert track.act_spec[0].shape == (2, ) assert track.act_spec[0].lo == -1 assert track.act_spec[0].hi == 1
def test_initializes_with_zero_velocity(): track = RaceTrack() assert track.vel == [0, 0]
def test_initializes_on_start_location(): track = RaceTrack() assert TRACK[track.pos[0], track.pos[1]] == RaceTrack.START
def test_prepares_observation_space(): track = RaceTrack() assert len(track.obs_spec) == 4