def run_game(): env = BotBowlEnv() steps = 0 random_bot = RandomBot(name='') for _ in range(2): env.reset() game = env.game while not game.state.game_over: game.step(random_bot.act(game)) steps += 1 return steps
def run_env(): env = BotBowlEnv() steps = 0 for _ in range(2): done = False _, _, mask = env.reset() while not done: steps += 1 aa = np.where(mask > 0.0)[0] action_idx = np.random.choice(aa, 1)[0] (_, _, mask), reward, done, _ = env.step(action_idx) return steps
def test_compute_action(): env = BotBowlEnv() for action_type in itertools.chain(env.env_conf.positional_action_types, env.env_conf.simple_action_types): if type(action_type) is botbowl.Formation: continue sq = None if action_type in env.env_conf.positional_action_types: sq = botbowl.Square(x=randint(0, env.width-1), y=randint(0, env.height-1)) action = botbowl.Action(action_type, position=sq) same_action = env._compute_action(env._compute_action_idx(action))[0] assert action.action_type == same_action.action_type, f"Wrong type: {action} != {same_action}" assert action.position == same_action.position, f"Wrong position: {action} != {same_action}"
def make_env(): env = BotBowlEnv(env_conf) if ppcg: env = PPCGWrapper(env) env = ScriptedActionWrapper(env, scripted_func=a2c_scripted_actions) env = RewardWrapper(env, home_reward_func=A2C_Reward()) return env
def test_multiple_gyms(): nenvs = 2 ps = [] remotes = [] for _ in range(nenvs): env = BotBowlEnv() remote, work_remote = Pipe() p = Process(target=worker, args=(work_remote, remote, env), daemon=True) p.start() work_remote.close() ps.append(p) remotes.append(remote) for i in range(20): for remote in remotes: remote.send('step') for remote in remotes: obs, reward, done, info = remote.recv() assert reward is not None assert obs is not None for remote, p in zip(remotes, ps): remote.send('close') p.join()
def __init__(self, name, env_conf: EnvConf, exclude_pathfinding_moves, scripted_func: Callable[[Game], Optional[Action]] = None, filename=model_filename): super().__init__(name) self.env = BotBowlEnv(env_conf) self.exclude_pathfinding_moves = exclude_pathfinding_moves self.scripted_func = scripted_func self.action_queue = [] # MODEL self.policy = torch.load(filename) self.policy.eval() self.end_setup = False
def worker(remote, parent_remote, env: BotBowlEnv): parent_remote.close() seed = env._seed rnd = np.random.RandomState(seed) steps = 0 _, _, mask = env.reset() while True: command = remote.recv() if command == 'step': aa = np.where(mask > 0.0)[0] action_idx = rnd.choice(aa, 1)[0] obs, reward, done, info = env.step(action_idx) mask = obs[2] steps += 1 if done: obs = env.reset() remote.send((obs, reward, done, info)) elif command == 'reset': obs = env.reset() mask = obs[2] done = False remote.send(obs) elif command == 'close': env.close() break
def test_observation_ranges(pathfinding): def find_first_index(array_: np.ndarray, value_: float): indices = (array_ == value_).nonzero() return [x[0] for x in indices] env = BotBowlEnv(EnvConf(pathfinding=pathfinding)) for _ in range(2): done = False spatial_obs, non_spatial_obs, mask = env.reset() while not done: # Spatial observation are within [0, 1] for layer, array in zip(env.env_conf.layers, spatial_obs): layer_name = layer.name() #array = layer.produce(env.game) max_val = np.max(array) min_val = np.min(array) assert max_val <= 1.0, \ f"['{layer_name}'][{find_first_index(array, max_val)}] is too high ({max_val})" assert min_val >= 0.0, \ f"['{layer_name}'][{find_first_index(array, min_val)}] is too low ({min_val})" max_val = np.max(non_spatial_obs) min_val = np.min(non_spatial_obs) assert min_val >= 0.0, \ f"non_spatial_obs[{find_first_index(non_spatial_obs, min_val)}] is too low ({min_val})" assert max_val <= 1.0, \ f"non_spatial_obs[{find_first_index(non_spatial_obs, max_val)}] is too high ({max_val})" aa = np.where(mask)[0] action_idx = np.random.choice(aa, 1)[0] (spatial_obs, non_spatial_obs, mask), reward, done, _ = env.step(action_idx) env.close()
def test_reward_and_scripted_wrapper(): reward_func = A2C_Reward() def scripted_func(game) -> Optional[botbowl.Action]: available_action_types = [action_choice.action_type for action_choice in game.get_available_actions()] if len(available_action_types) == 1 and len(game.get_available_actions()[0].positions) == 0 and len(game.get_available_actions()[0].players) == 0: return botbowl.Action(available_action_types[0]) if botbowl.ActionType.END_PLAYER_TURN in available_action_types and randint(1, 5) == 2: return botbowl.Action(botbowl.ActionType.END_PLAYER_TURN) return None env = BotBowlEnv(EnvConf(size=1)) env = ScriptedActionWrapper(env, scripted_func) env = RewardWrapper(env, home_reward_func=reward_func) rewards = [] own_tds = [] opp_tds = [] for _ in range(10): _, _, mask = env.reset() done = False ep_reward = 0.0 while not done: aa = np.where(mask)[0] action_idx = np.random.choice(aa, 1)[0] (_, _, mask), reward, done, _ = env.step(action_idx) ep_reward += reward rewards.append(ep_reward) own_tds.append(env.game.state.home_team.state.score) opp_tds.append(env.game.state.away_team.state.score)
class A2CAgent(Agent): env: BotBowlEnv def __init__(self, name, env_conf: EnvConf, exclude_pathfinding_moves, scripted_func: Callable[[Game], Optional[Action]] = None, filename=model_filename): super().__init__(name) self.env = BotBowlEnv(env_conf) self.exclude_pathfinding_moves = exclude_pathfinding_moves self.scripted_func = scripted_func self.action_queue = [] # MODEL self.policy = torch.load(filename) self.policy.eval() self.end_setup = False def new_game(self, game, team): pass def _filter_actions(self): """ Remove pathfinding-assisted non-adjacent or block move actions if pathfinding is disabled. """ actions = [] for action_choice in self.env.game.state.available_actions: if action_choice.action_type == ActionType.MOVE: positions, block_dice, rolls = [], [], [] for i in range(len(action_choice.positions)): position = action_choice.positions[i] roll = action_choice.paths[i].rolls[0] # Only include positions where there are not players if self.env.game.get_player_at(position) is None: positions.append(position) rolls.append(roll) actions.append( ActionChoice(ActionType.MOVE, team=action_choice.team, positions=positions, rolls=rolls)) else: actions.append(action_choice) self.env.game.state.available_actions = actions @staticmethod def _update_obs(array: np.ndarray): return torch.unsqueeze(torch.from_numpy(array.copy()), dim=0) def act(self, game): if len(self.action_queue) > 0: return self.action_queue.pop(0) if self.scripted_func is not None: scripted_action = self.scripted_func(game) if scripted_action is not None: return scripted_action self.env.game = game # Filter out pathfinding-assisted move actions if self.exclude_pathfinding_moves and self.env.game.config.pathfinding_enabled: self._filter_actions() spatial_obs, non_spatial_obs, action_mask = map( A2CAgent._update_obs, self.env.get_state()) non_spatial_obs = torch.unsqueeze(non_spatial_obs, dim=0) _, actions = self.policy.act(Variable(spatial_obs.float()), Variable(non_spatial_obs.float()), Variable(action_mask)) action_idx = actions[0] action_objects = self.env._compute_action(action_idx) self.action_queue = action_objects return self.action_queue.pop(0) def end_game(self, game): pass