def test_interpretation_plot_q_dqn_returns(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human') model = DQN(data) learn = AgentLearner(data, model) learn.fit(5) interp = AgentInterpretationAlpha(learn) interp.plot_heatmapped_episode(2)
def test_MarkovDecisionProcessDataBunch_init_no_valid(): error_msg = 'state space is %s but should be %s' error_msg2 = 'the datasets in the dataloader seem to be different from the data bunches datasets...' max_steps = 50 # Create 2 itemlists train_list = MDPDataset(gym.make('CartPole-v1'), max_steps=max_steps) env_databunch = MDPDataBunch.create(train_list, num_workers=0) env_databunch.valid_dl = None epochs = 3 assert max_steps == len(train_list) assert max_steps == len(train_list) assert max_steps == len(env_databunch.train_dl) assert env_databunch.valid_dl is None for epoch in range(epochs): print(f'epoch {epoch}') for element in env_databunch.train_dl: env_databunch.train_ds.actions = env_databunch.train_ds.env.action_space.sample( ) current_s, actual_s = element.shape[ 1], train_list.env.observation_space.shape[0] print( f'state {element} action {env_databunch.train_dl.dl.dataset.actions}' ) assert current_s == actual_s, error_msg % (current_s, actual_s) assert np.equal(env_databunch.train_dl.dl.dataset.actions, env_databunch.train_ds.actions), error_msg2
def test_individual_env(): msg = 'the datasets in the dataloader seem to be different from the data bunches datasets...' max_steps = 50 env = 'CarRacing-v0' print(f'Testing {env}') mdp_databunch = MDPDataBunch.from_env(env, max_steps=max_steps, num_workers=0) epochs = 1 assert max_steps == len(mdp_databunch.train_dl) assert max_steps == len(mdp_databunch.valid_dl) for epoch in range(epochs): for _ in mdp_databunch.train_dl: mdp_databunch.train_ds.actions = mdp_databunch.train_ds.get_random_action( ) # print(f'state {element.shape} action {mdp_databunch.train_dl.dl.dataset.actions}') assert np.sum( np.equal(mdp_databunch.train_dl.dl.dataset.actions, mdp_databunch.train_ds.actions)) == np.size( mdp_databunch.train_ds.actions), msg for _ in mdp_databunch.valid_dl: mdp_databunch.valid_ds.actions = mdp_databunch.valid_ds.get_random_action( ) # print(f'state {element.shape} action {mdp_databunch.valid_dl.dl.dataset.actions}') assert np.sum( np.equal(mdp_databunch.train_dl.dl.dataset.actions, mdp_databunch.train_ds.actions)) == np.size( mdp_databunch.train_ds.actions), msg
def test_interpretation_plot_sequence(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = DQN(data) learn = AgentLearner(data, model) epochs = 20 callbacks = learn.model.callbacks # type: Collection[LearnerCallback] [c.on_train_begin(learn=learn, n_epochs=epochs) for c in callbacks] for epoch in range(epochs): [c.on_epoch_begin(epoch=epoch) for c in callbacks] learn.model.train() counter = 0 for element in learn.data.train_dl: learn.data.train_ds.actions = learn.predict(element) [c.on_step_end(learn=learn) for c in callbacks] counter += 1 # if counter % 100 == 0:# or counter == 0: interp = AgentInterpretationAlpha(learn, ds_type=DatasetType.Train) interp.plot_heatmapped_episode(epoch) [c.on_epoch_end() for c in callbacks] [c.on_train_end() for c in callbacks]
def test_interpretation_heatmap(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human') model = DQN(data) learn = AgentLearner(data, model) epochs = 10 callbacks = learn.model.callbacks # type: Collection[LearnerCallback] [c.on_train_begin(learn=learn, n_epochs=epochs) for c in callbacks] for epoch in range(epochs): [c.on_epoch_begin(epoch=epoch) for c in callbacks] learn.model.train() for element in learn.data.train_dl: learn.data.train_ds.actions = learn.predict(element) [c.on_step_end(learn=learn) for c in callbacks] [c.on_epoch_end() for c in callbacks] # For now we are going to avoid executing learner_callbacks here. learn.model.eval() for element in learn.data.valid_dl: learn.data.valid_ds.actions = learn.predict(element) if epoch % 1 == 0: interp = AgentInterpretationAlpha(learn) interp.plot_heatmapped_episode(epoch) [c.on_train_end() for c in callbacks]
def test_fit_function_dqn(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = DQN(data, memory=PriorityExperienceReplay(1000)) learn = AgentLearner(data, model) learn.fit(5)
def test_priority_experience_replay(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = FixedTargetDQN(data, memory=PriorityExperienceReplay(1000)) learn = AgentLearner(data, model) learn.fit(5)
def test_double_dueling_dqn_model_maze(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = DoubleDuelingDQN(data) learn = AgentLearner(data, model) learn.fit(5)
def test_basic_dqn_model_maze(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=200) model = DQN(data) learn = AgentLearner(data, model) learn.fit(5)
def test_fixed_target_dqn_model_maze(): print('\n') data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = FixedTargetDQN(data) learn = AgentLearner(data, model) learn.fit(5)
def test_interpretation_plot_q_ddpg_returns(): data = MDPDataBunch.from_env('Pendulum-v0', render='human') # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human') model = DDPG(data, batch=8) learn = AgentLearner(data, model) learn.fit(5) interp = AgentInterpretationAlpha(learn) interp.plot_heatmapped_episode(2)
def test_epsilon(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=100, add_valid=False) model = FixedTargetDQN(data, batch_size=64, max_episodes=100, copy_over_frequency=4) learn = AgentLearner(data, model) learn.fit(20)
def test_envs_all(env): msg = 'the datasets in the dataloader seem to be different from the data bunches datasets...' max_steps = 50 print(f'Testing {env}') mdp_databunch = MDPDataBunch.from_env(env, max_steps=max_steps, num_workers=0) if mdp_databunch is None: print( f'Env {env} is probably Mujoco... Add imports if you want and try on your own. Don\'t like ' f'proprietary engines like this. If you have any issues, feel free to make a PR!' ) return epochs = 1 assert max_steps == len(mdp_databunch.train_dl) assert max_steps == len(mdp_databunch.valid_dl) a_s, s_s = mdp_databunch.get_action_state_size() assert a_s is not None assert s_s is not None a_s.sample() s_s.sample() for epoch in range(epochs): for _ in mdp_databunch.train_dl: mdp_databunch.train_ds.actions = mdp_databunch.train_ds.get_random_action( ) # print(f'state {element} action {mdp_databunch.train_dl.dl.dataset.actions}') assert np.sum(np.equal(mdp_databunch.train_dl.dl.dataset.actions, mdp_databunch.train_ds.actions)) == \ np.size(mdp_databunch.train_ds.actions), msg for _ in mdp_databunch.valid_dl: mdp_databunch.valid_ds.actions = mdp_databunch.valid_ds.get_random_action( ) # print(f'state {element} action {mdp_databunch.valid_dl.dl.dataset.actions}') assert np.sum(np.equal(mdp_databunch.train_dl.dl.dataset.actions, mdp_databunch.train_ds.actions)) == \ np.size(mdp_databunch.train_ds.actions), msg
def test_ddpg(): data = MDPDataBunch.from_env('Pendulum-v0', render='human') # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human') model = DDPG(data, batch=8) learn = AgentLearner(data, model) learn.fit(450)
def test_fit_function_ddpg(): data = MDPDataBunch.from_env('Pendulum-v0', render='human', max_steps=1000) model = DDPG(data, memory=PriorityExperienceReplay(1000)) learn = AgentLearner(data, model) learn.fit(5)