def test_compute_action_mask(action_mask: ActionMaskEnv): with mock.patch.object(action_mask.state_cache, "getLegalActions") as mock_get: mock_get.return_value = [] assert action_mask.compute_action_mask() == [1, 1, 1, 1] with mock.patch.object(action_mask.state_cache, "getLegalActions") as mock_get: mock_get.return_value = ["East"] assert action_mask.compute_action_mask() == [0, 0, 1, 0]
def test_step(action_mask: ActionMaskEnv): obs, reward, done, _ = action_mask.step(2) assert obs.shape == (47, 15, 6) assert reward == 10 assert not done obs, reward, done, mask = action_mask.step(0) assert reward == 10 assert action_mask.state_cache.getPacmanDirection() == 'East' assert mask == {'action_mask': [0, 1, 1, 1]} obs, reward, done, mask = action_mask.step(3) assert reward == 0 assert action_mask.state_cache.getPacmanDirection() == 'West' assert mask == {'action_mask': [0, 0, 1, 1]}
tensorboard_folder = './tensorboard/Pacman/action_mask/' model_folder = './models/Pacman/action_mask/' if not os.path.isdir(tensorboard_folder): os.makedirs(tensorboard_folder) if not os.path.isdir(model_folder): os.makedirs(model_folder) policy = '' model_tag = '' if len(sys.argv) > 1: policy = sys.argv[1] model_tag = '_' + sys.argv[1] if __name__ == '__main__': env = SubprocVecEnv([lambda: ActionMaskEnv() for i in range(4)]) env = VecFrameStack(env, 3) model = ACKTR(get_policy(policy), env, n_steps=100, verbose=0,vf_fisher_coef=0.5 , tensorboard_log=tensorboard_folder, kfac_update=10, n_cpu_tf_sess=2, async_eigen_decomp=False) model.learn(total_timesteps=100000000, tb_log_name='ACKTR_A2C' + model_tag) model.save(model_folder + "ACKTR_A2C" + model_tag) del model model = ACKTR.load(model_folder + "ACKTR_A2C" + model_tag) done = False states = None action_masks = [] obs = env.reset() while not done:
def action_mask(): return ActionMaskEnv('test_map')
tensorboard_folder = './tensorboard/Pacman/action_mask/' model_folder = './models/Pacman/base/' if not os.path.isdir(tensorboard_folder): os.makedirs(tensorboard_folder) if not os.path.isdir(model_folder): os.makedirs(model_folder) policy = '' model_tag = '' if len(sys.argv) > 1: policy = sys.argv[1] model_tag = '_' + sys.argv[1] if __name__ == '__main__': env = DummyVecEnv([lambda: ActionMaskEnv() for i in range(4)]) env = VecFrameStack(env, 3) model = PPO2.load(model_folder + "PPO2" + model_tag) done = [False, False, False, False] states = None action_masks = [] obs = env.reset() while not done[0]: action, states = model.predict(obs, states, action_mask=action_masks) obs, _, done, infos = env.step(action) env.render() action_masks.clear() for info in infos: