Exemple #1
0
def run_game():
    env = BotBowlEnv()
    steps = 0
    random_bot = RandomBot(name='')

    for _ in range(2):
        env.reset()
        game = env.game
        while not game.state.game_over:
            game.step(random_bot.act(game))
            steps += 1
    return steps
Exemple #2
0
def run_env():
    env = BotBowlEnv()
    steps = 0
    for _ in range(2):
        done = False
        _, _, mask = env.reset()

        while not done:
            steps += 1
            aa = np.where(mask > 0.0)[0]
            action_idx = np.random.choice(aa, 1)[0]
            (_, _, mask), reward, done, _ = env.step(action_idx)

    return steps
Exemple #3
0
def test_compute_action():
    env = BotBowlEnv()

    for action_type in itertools.chain(env.env_conf.positional_action_types, env.env_conf.simple_action_types):
        if type(action_type) is botbowl.Formation:
            continue

        sq = None
        if action_type in env.env_conf.positional_action_types:
            sq = botbowl.Square(x=randint(0, env.width-1), y=randint(0, env.height-1))

        action = botbowl.Action(action_type, position=sq)
        same_action = env._compute_action(env._compute_action_idx(action))[0]
        assert action.action_type == same_action.action_type, f"Wrong type: {action} != {same_action}"
        assert action.position == same_action.position, f"Wrong position: {action} != {same_action}"
Exemple #4
0
def make_env():
    env = BotBowlEnv(env_conf)
    if ppcg:
        env = PPCGWrapper(env)
    env = ScriptedActionWrapper(env, scripted_func=a2c_scripted_actions)
    env = RewardWrapper(env, home_reward_func=A2C_Reward())
    return env
Exemple #5
0
def test_multiple_gyms():
    nenvs = 2
    ps = []
    remotes = []
    for _ in range(nenvs):
        env = BotBowlEnv()
        remote, work_remote = Pipe()
        p = Process(target=worker, args=(work_remote, remote, env), daemon=True)
        p.start()
        work_remote.close()

        ps.append(p)
        remotes.append(remote)

    for i in range(20):
        for remote in remotes:
            remote.send('step')
        for remote in remotes:
            obs, reward, done, info = remote.recv()
            assert reward is not None
            assert obs is not None

    for remote, p in zip(remotes, ps):
        remote.send('close')
        p.join()
Exemple #6
0
    def __init__(self,
                 name,
                 env_conf: EnvConf,
                 exclude_pathfinding_moves,
                 scripted_func: Callable[[Game], Optional[Action]] = None,
                 filename=model_filename):
        super().__init__(name)
        self.env = BotBowlEnv(env_conf)
        self.exclude_pathfinding_moves = exclude_pathfinding_moves

        self.scripted_func = scripted_func
        self.action_queue = []

        # MODEL
        self.policy = torch.load(filename)
        self.policy.eval()
        self.end_setup = False
Exemple #7
0
def worker(remote, parent_remote, env: BotBowlEnv):
    parent_remote.close()
    seed = env._seed
    rnd = np.random.RandomState(seed)
    steps = 0
    _, _, mask = env.reset()
    while True:
        command = remote.recv()
        if command == 'step':
            aa = np.where(mask > 0.0)[0]
            action_idx = rnd.choice(aa, 1)[0]
            obs, reward, done, info = env.step(action_idx)
            mask = obs[2]
            steps += 1
            if done:
                obs = env.reset()
            remote.send((obs, reward, done, info))
        elif command == 'reset':
            obs = env.reset()
            mask = obs[2]
            done = False
            remote.send(obs)
        elif command == 'close':
            env.close()
            break
Exemple #8
0
def test_observation_ranges(pathfinding):
    def find_first_index(array_: np.ndarray, value_: float):
        indices = (array_ == value_).nonzero()
        return [x[0] for x in indices]

    env = BotBowlEnv(EnvConf(pathfinding=pathfinding))

    for _ in range(2):
        done = False
        spatial_obs, non_spatial_obs, mask = env.reset()

        while not done:

            # Spatial observation are within [0, 1]
            for layer, array in zip(env.env_conf.layers, spatial_obs):
                layer_name = layer.name()
                #array = layer.produce(env.game)

                max_val = np.max(array)
                min_val = np.min(array)

                assert max_val <= 1.0, \
                    f"['{layer_name}'][{find_first_index(array, max_val)}] is too high ({max_val})"

                assert min_val >= 0.0, \
                    f"['{layer_name}'][{find_first_index(array, min_val)}] is too low ({min_val})"

            max_val = np.max(non_spatial_obs)
            min_val = np.min(non_spatial_obs)

            assert min_val >= 0.0, \
                f"non_spatial_obs[{find_first_index(non_spatial_obs, min_val)}] is too low ({min_val})"

            assert max_val <= 1.0, \
                f"non_spatial_obs[{find_first_index(non_spatial_obs, max_val)}] is too high ({max_val})"

            aa = np.where(mask)[0]
            action_idx = np.random.choice(aa, 1)[0]
            (spatial_obs, non_spatial_obs, mask), reward, done, _ = env.step(action_idx)

    env.close()
Exemple #9
0
def test_reward_and_scripted_wrapper():

    reward_func = A2C_Reward()

    def scripted_func(game) -> Optional[botbowl.Action]:
        available_action_types = [action_choice.action_type for action_choice in game.get_available_actions()]

        if len(available_action_types) == 1 and len(game.get_available_actions()[0].positions) == 0 and len(game.get_available_actions()[0].players) == 0:
            return botbowl.Action(available_action_types[0])

        if botbowl.ActionType.END_PLAYER_TURN in available_action_types and randint(1, 5) == 2:
            return botbowl.Action(botbowl.ActionType.END_PLAYER_TURN)

        return None

    env = BotBowlEnv(EnvConf(size=1))
    env = ScriptedActionWrapper(env, scripted_func)
    env = RewardWrapper(env, home_reward_func=reward_func)

    rewards = []
    own_tds = []
    opp_tds = []

    for _ in range(10):
        _, _, mask = env.reset()
        done = False
        ep_reward = 0.0

        while not done:
            aa = np.where(mask)[0]
            action_idx = np.random.choice(aa, 1)[0]
            (_, _, mask), reward, done, _ = env.step(action_idx)
            ep_reward += reward

        rewards.append(ep_reward)
        own_tds.append(env.game.state.home_team.state.score)
        opp_tds.append(env.game.state.away_team.state.score)
Exemple #10
0
class A2CAgent(Agent):
    env: BotBowlEnv

    def __init__(self,
                 name,
                 env_conf: EnvConf,
                 exclude_pathfinding_moves,
                 scripted_func: Callable[[Game], Optional[Action]] = None,
                 filename=model_filename):
        super().__init__(name)
        self.env = BotBowlEnv(env_conf)
        self.exclude_pathfinding_moves = exclude_pathfinding_moves

        self.scripted_func = scripted_func
        self.action_queue = []

        # MODEL
        self.policy = torch.load(filename)
        self.policy.eval()
        self.end_setup = False

    def new_game(self, game, team):
        pass

    def _filter_actions(self):
        """
        Remove pathfinding-assisted non-adjacent or block move actions if pathfinding is disabled.
        """
        actions = []
        for action_choice in self.env.game.state.available_actions:
            if action_choice.action_type == ActionType.MOVE:
                positions, block_dice, rolls = [], [], []
                for i in range(len(action_choice.positions)):
                    position = action_choice.positions[i]
                    roll = action_choice.paths[i].rolls[0]
                    # Only include positions where there are not players
                    if self.env.game.get_player_at(position) is None:
                        positions.append(position)
                        rolls.append(roll)
                actions.append(
                    ActionChoice(ActionType.MOVE,
                                 team=action_choice.team,
                                 positions=positions,
                                 rolls=rolls))
            else:
                actions.append(action_choice)
        self.env.game.state.available_actions = actions

    @staticmethod
    def _update_obs(array: np.ndarray):
        return torch.unsqueeze(torch.from_numpy(array.copy()), dim=0)

    def act(self, game):
        if len(self.action_queue) > 0:
            return self.action_queue.pop(0)

        if self.scripted_func is not None:
            scripted_action = self.scripted_func(game)
            if scripted_action is not None:
                return scripted_action

        self.env.game = game

        # Filter out pathfinding-assisted move actions
        if self.exclude_pathfinding_moves and self.env.game.config.pathfinding_enabled:
            self._filter_actions()

        spatial_obs, non_spatial_obs, action_mask = map(
            A2CAgent._update_obs, self.env.get_state())
        non_spatial_obs = torch.unsqueeze(non_spatial_obs, dim=0)

        _, actions = self.policy.act(Variable(spatial_obs.float()),
                                     Variable(non_spatial_obs.float()),
                                     Variable(action_mask))

        action_idx = actions[0]
        action_objects = self.env._compute_action(action_idx)

        self.action_queue = action_objects
        return self.action_queue.pop(0)

    def end_game(self, game):
        pass