コード例 #1
0
def test_random(height):
    params = {
        "tsu_rules": (height == 13),
        "height": height,
        "width": 5,
        "num_colors": 4,
        "num_deals": 2,
        "target_score": 70,
        "step_bonus": 10,
        "all_clear_bonus": 70 * 5 * 5,
    }
    game = Game(state_params=params)
    for i in range(100):
        actions = [random.choice(p.actions) for p in game.players]
        print(actions)
        result, garbage, done = game.step(actions)
        game.render()
        if done:
            break
    assert (result in (-1, 0, 1))
コード例 #2
0
def test_mirror():
    params = {
        "height": 8,
        "width": 4,
        "num_colors": 3,
        "num_deals": 2,
        "target_score": 1,
    }
    game = Game(state_params=params)
    for i in range(100):
        action = random.choice(game.players[0].actions)
        result, garbage, done = game.step([action, action])
        game.render()
        stack = game.players[0].field.to_list()
        assert (not garbage)
        assert (3 not in stack)
        assert (stack == game.players[1].field.to_list())
        if done:
            break
    assert (result == 0)
コード例 #3
0
ファイル: versus.py プロジェクト: Bolloknoon/gym_puyopuyo
    def __init__(self, opponent, state_params, garbage_clue_weight=0):
        self.opponent = opponent
        self.state = Game(state_params=state_params)
        self.garbage_clue_weight = garbage_clue_weight

        self.reward_range = (-1, 1)

        player = self.state.players[0]
        max_steps = player.height * player.width
        if not player.tsu_rules:
            max_steps //= 2
        max_score = player.max_score + max_steps * player.step_bonus
        player_space = spaces.Dict({
            "deals":
            spaces.Box(0,
                       1, (player.num_colors, player.num_deals, 2),
                       dtype=np.int8),
            "field":
            spaces.Box(0,
                       1, (player.num_layers, player.height, player.width),
                       dtype=np.int8),
            "chain_number":
            spaces.Discrete(player.max_chain),
            "pending_score":
            spaces.Discrete(max_score),
            "pending_garbage":
            spaces.Discrete(max_score // player.target_score),
            "all_clear":
            spaces.Discrete(2),
        })
        self.observation_space = spaces.Tuple((player_space, player_space))
        self.action_space = spaces.Discrete(len(player.actions))
        self.player = player
        self.seed()

        self.viewer = None
        self.anim_states = [None, None]
        self.last_actions = [None, None]
コード例 #4
0
def test_single_garbage(height):
    params = {
        "height": height,
        "width": 8,
        "num_colors": 4,
        "deals": [(0, 0), (0, 0), (1, 1), (1, 2), (2, 3), (3, 3)],
        "target_score": 1 if height == 8 else 70,
        "step_bonus": 0 if height == 8 else 20,
    }

    game = Game(state_params=params)
    game.step([(0, 0), (1, 1)])
    game.render()
    game.step([(0, 0), (7, 1)])
    game.render()
    game.step([(2, 0), (7, 1)])
    game.render()
    assert (4 not in game.players[1].field.to_list())
    game.step([(2, 0), (0, 1)])
    game.render()
    assert (4 in game.players[1].field.to_list())
    game.step([(2, 0), (2, 1)])
    game.render()
    result, garbage, done = game.step([(2, 0), (0, 1)])
    game.render()
    assert (result == 0)
    assert (done is False)
コード例 #5
0
ファイル: versus.py プロジェクト: Bolloknoon/gym_puyopuyo
class PuyoPuyoVersusEnv(gym.Env):
    """
    Puyo Puyo environment. Versus mode.
    """

    TESTING = False

    metadata = {"render.modes": ["human", "ansi"]}

    def __init__(self, opponent, state_params, garbage_clue_weight=0):
        self.opponent = opponent
        self.state = Game(state_params=state_params)
        self.garbage_clue_weight = garbage_clue_weight

        self.reward_range = (-1, 1)

        player = self.state.players[0]
        max_steps = player.height * player.width
        if not player.tsu_rules:
            max_steps //= 2
        max_score = player.max_score + max_steps * player.step_bonus
        player_space = spaces.Dict({
            "deals":
            spaces.Box(0,
                       1, (player.num_colors, player.num_deals, 2),
                       dtype=np.int8),
            "field":
            spaces.Box(0,
                       1, (player.num_layers, player.height, player.width),
                       dtype=np.int8),
            "chain_number":
            spaces.Discrete(player.max_chain),
            "pending_score":
            spaces.Discrete(max_score),
            "pending_garbage":
            spaces.Discrete(max_score // player.target_score),
            "all_clear":
            spaces.Discrete(2),
        })
        self.observation_space = spaces.Tuple((player_space, player_space))
        self.action_space = spaces.Discrete(len(player.actions))
        self.player = player
        self.seed()

        self.viewer = None
        self.anim_states = [None, None]
        self.last_actions = [None, None]

    def seed(self, seed=None):
        return [self.state.seed(seed)]

    def reset(self):
        self.state.reset()
        return self.state.encode()

    def close(self):
        if self.viewer:
            self.viewer.close()

    def render(self, mode="console"):
        if self.TESTING and mode == "human":
            mode = "console"

        if mode == "human":
            from time import sleep
            from gym_puyopuyo.rendering import ImageViewer, AnimationState

            for i in range(len(self.state.players)):
                player = self.state.players[i]
                if self.anim_states[i]:
                    # TODO: Intra step frames
                    # self.anim_states[i].state.deals[1:] = player.deals[:-1]
                    self.anim_states[i].state.deals[:] = player.deals[:-1]
                    self.anim_states[i].state.field.data[:] = player.field.data
                else:
                    self.anim_states[i] = AnimationState(player.clone())
                if self.last_actions[i] is not None:
                    # TODO: Intra step frames
                    # self.anim_states[i].state.play_deal(*player.actions[self.last_actions[i]])
                    self.anim_states[i].infer_entities()

            if not self.viewer:
                self.viewer = ImageViewer(
                    width=(self.anim_states[0].width + 5) *
                    len(self.state.players) - 1,
                    height=self.anim_states[0].height)

            # TODO: Synchronous gravity resolution
            # persistent_frames = [None, None]
            # for frames in zip_longest(*(state.resolve() for state in self.anim_states)):
            #     self.viewer.begin_flip()
            #     for i, frame in enumerate(frames):
            #         if frame:
            #             persistent_frames[i] = frame
            #         self.viewer.render_state(
            #             persistent_frames[i],
            #             x_offset=i * (frame.width + 5),
            #             flip=False
            #         )
            #     self.viewer.end_flip()
            #     sleep(0.05)

            self.viewer.begin_flip()
            for i, frame in enumerate(self.anim_states):
                self.viewer.render_state(frame,
                                         x_offset=i * (frame.width + 5),
                                         flip=False)
            self.viewer.end_flip()
            sleep(0.5)

            return

        outfile = StringIO() if mode == "ansi" else sys.stdout
        self.state.render(outfile)
        if mode == "ansi":
            return outfile

    def step(self, action):
        self.last_actions[0] = action
        root = self.get_root()
        root.players = root.players[::-1]
        opponent_action = self.opponent(root)
        self.last_actions[1] = opponent_action
        acts = self.player.actions
        reward, garbage, done = self.state.step(
            [acts[action], acts[opponent_action]])
        reward += self.garbage_clue_weight * garbage
        observation = self.state.encode()
        return observation, reward, done, {"state": self.state}

    def get_action_mask(self):
        return self.player.get_action_mask()

    def get_root(self):
        return self.state.clone()