def test_random(height): params = { "tsu_rules": (height == 13), "height": height, "width": 5, "num_colors": 4, "num_deals": 2, "target_score": 70, "step_bonus": 10, "all_clear_bonus": 70 * 5 * 5, } game = Game(state_params=params) for i in range(100): actions = [random.choice(p.actions) for p in game.players] print(actions) result, garbage, done = game.step(actions) game.render() if done: break assert (result in (-1, 0, 1))
def test_mirror(): params = { "height": 8, "width": 4, "num_colors": 3, "num_deals": 2, "target_score": 1, } game = Game(state_params=params) for i in range(100): action = random.choice(game.players[0].actions) result, garbage, done = game.step([action, action]) game.render() stack = game.players[0].field.to_list() assert (not garbage) assert (3 not in stack) assert (stack == game.players[1].field.to_list()) if done: break assert (result == 0)
def __init__(self, opponent, state_params, garbage_clue_weight=0): self.opponent = opponent self.state = Game(state_params=state_params) self.garbage_clue_weight = garbage_clue_weight self.reward_range = (-1, 1) player = self.state.players[0] max_steps = player.height * player.width if not player.tsu_rules: max_steps //= 2 max_score = player.max_score + max_steps * player.step_bonus player_space = spaces.Dict({ "deals": spaces.Box(0, 1, (player.num_colors, player.num_deals, 2), dtype=np.int8), "field": spaces.Box(0, 1, (player.num_layers, player.height, player.width), dtype=np.int8), "chain_number": spaces.Discrete(player.max_chain), "pending_score": spaces.Discrete(max_score), "pending_garbage": spaces.Discrete(max_score // player.target_score), "all_clear": spaces.Discrete(2), }) self.observation_space = spaces.Tuple((player_space, player_space)) self.action_space = spaces.Discrete(len(player.actions)) self.player = player self.seed() self.viewer = None self.anim_states = [None, None] self.last_actions = [None, None]
def test_single_garbage(height): params = { "height": height, "width": 8, "num_colors": 4, "deals": [(0, 0), (0, 0), (1, 1), (1, 2), (2, 3), (3, 3)], "target_score": 1 if height == 8 else 70, "step_bonus": 0 if height == 8 else 20, } game = Game(state_params=params) game.step([(0, 0), (1, 1)]) game.render() game.step([(0, 0), (7, 1)]) game.render() game.step([(2, 0), (7, 1)]) game.render() assert (4 not in game.players[1].field.to_list()) game.step([(2, 0), (0, 1)]) game.render() assert (4 in game.players[1].field.to_list()) game.step([(2, 0), (2, 1)]) game.render() result, garbage, done = game.step([(2, 0), (0, 1)]) game.render() assert (result == 0) assert (done is False)
class PuyoPuyoVersusEnv(gym.Env): """ Puyo Puyo environment. Versus mode. """ TESTING = False metadata = {"render.modes": ["human", "ansi"]} def __init__(self, opponent, state_params, garbage_clue_weight=0): self.opponent = opponent self.state = Game(state_params=state_params) self.garbage_clue_weight = garbage_clue_weight self.reward_range = (-1, 1) player = self.state.players[0] max_steps = player.height * player.width if not player.tsu_rules: max_steps //= 2 max_score = player.max_score + max_steps * player.step_bonus player_space = spaces.Dict({ "deals": spaces.Box(0, 1, (player.num_colors, player.num_deals, 2), dtype=np.int8), "field": spaces.Box(0, 1, (player.num_layers, player.height, player.width), dtype=np.int8), "chain_number": spaces.Discrete(player.max_chain), "pending_score": spaces.Discrete(max_score), "pending_garbage": spaces.Discrete(max_score // player.target_score), "all_clear": spaces.Discrete(2), }) self.observation_space = spaces.Tuple((player_space, player_space)) self.action_space = spaces.Discrete(len(player.actions)) self.player = player self.seed() self.viewer = None self.anim_states = [None, None] self.last_actions = [None, None] def seed(self, seed=None): return [self.state.seed(seed)] def reset(self): self.state.reset() return self.state.encode() def close(self): if self.viewer: self.viewer.close() def render(self, mode="console"): if self.TESTING and mode == "human": mode = "console" if mode == "human": from time import sleep from gym_puyopuyo.rendering import ImageViewer, AnimationState for i in range(len(self.state.players)): player = self.state.players[i] if self.anim_states[i]: # TODO: Intra step frames # self.anim_states[i].state.deals[1:] = player.deals[:-1] self.anim_states[i].state.deals[:] = player.deals[:-1] self.anim_states[i].state.field.data[:] = player.field.data else: self.anim_states[i] = AnimationState(player.clone()) if self.last_actions[i] is not None: # TODO: Intra step frames # self.anim_states[i].state.play_deal(*player.actions[self.last_actions[i]]) self.anim_states[i].infer_entities() if not self.viewer: self.viewer = ImageViewer( width=(self.anim_states[0].width + 5) * len(self.state.players) - 1, height=self.anim_states[0].height) # TODO: Synchronous gravity resolution # persistent_frames = [None, None] # for frames in zip_longest(*(state.resolve() for state in self.anim_states)): # self.viewer.begin_flip() # for i, frame in enumerate(frames): # if frame: # persistent_frames[i] = frame # self.viewer.render_state( # persistent_frames[i], # x_offset=i * (frame.width + 5), # flip=False # ) # self.viewer.end_flip() # sleep(0.05) self.viewer.begin_flip() for i, frame in enumerate(self.anim_states): self.viewer.render_state(frame, x_offset=i * (frame.width + 5), flip=False) self.viewer.end_flip() sleep(0.5) return outfile = StringIO() if mode == "ansi" else sys.stdout self.state.render(outfile) if mode == "ansi": return outfile def step(self, action): self.last_actions[0] = action root = self.get_root() root.players = root.players[::-1] opponent_action = self.opponent(root) self.last_actions[1] = opponent_action acts = self.player.actions reward, garbage, done = self.state.step( [acts[action], acts[opponent_action]]) reward += self.garbage_clue_weight * garbage observation = self.state.encode() return observation, reward, done, {"state": self.state} def get_action_mask(self): return self.player.get_action_mask() def get_root(self): return self.state.clone()