def play_game(agent1: Agent, agent2: Agent, output: t.Optional[t.IO] = None): gamestate = init_game() agents = [agent1, agent2] for agent in itertools.cycle(agents): if output: print(fmt.format_gamestate(gamestate), file=output) print("\n", file=output) if (winner := is_over(gamestate)) is not None: if output: print(f"Game Over. Winner {winner}", file=output) return winner if agent.agent_type == "human": while True: action = agent.get_action(gamestate) newgamestate = take_action_mut(gamestate, action) if newgamestate is None: print("Invalid action") else: break else: action = agent.get_action(gamestate) if output: print(action, file=output) newgamestate = take_action_mut(gamestate, action) if newgamestate is None: utils.assert_never( f"Non human agent returned an invalid action {action}")
def get_action(gamestate: G) -> A: random_action = rules.get_random_action(gamestate) if random_action is None: utils.assert_never( "Random action is None even though game is not over" ) return random_action
def minimax(config: types.MinimaxConfig[G, A, P], gamestate: G, depth=3) -> t.Tuple[float, t.Optional[A]]: this_player = config.get_player(gamestate) opponent = config.other_player(this_player) if isinstance(config.action, types.MutableActionConfig): take_action_mut, undo_action = ( config.action.take_action_mut, config.action.undo_action, ) elif isinstance(config.action, types.ImmutableActionConfig): raise NotImplementedError( "minimax not implemented for immutable action config") else: utils.assert_never(f"Unknown action config type {type(config.action)}") get_all_actions, is_over, heuristic = ( config.get_all_actions, config.is_over, config.heuristic, ) if (winner := is_over(gamestate)) is not None: value = ( float("+inf") if winner == this_player else float("-inf") if winner == opponent else 0 # draw ) return value, None
def broadcast_walk_logs( walk_logs: t.List[eng_types.WalkLog], r: redis.Redis, gamestate_id: int, engineserver_id: int, ): stream = f"gamestate-{gamestate_id}" with r.pipeline() as pipe: for walk_log in walk_logs: for item in walk_log: if item["event-type"] == "new-node": utils.write_stream( stream, pipe, { "engineserver_id": engineserver_id, "item": item, }, ) elif item["event-type"] == "walk-result": utils.write_stream( stream, pipe, { "engineserver_id": engineserver_id, "item": item, }, ) elif item["event-type"] == "take-action": pass else: utils.assert_never( f"unkown event-type {item['event-type']}") pipe.execute()
def decode_action(encoded: str) -> types.Action: data = json.loads(encoded) if data["type"] == "player-action": return types.PlayerAction(action=data["action"]) elif data["type"] == "environment-action": placement, val = data["action"] return types.EnvironmentAction( placement=placement, val=val, ) else: utils.assert_never(f"Unexpected action type {data['type']}")
def play_game( agents: t.List[Agent], game_type: str, output: t.Optional[t.IO] = None ): # TODO: generalze this so you can have more than one agent agent = agents[0] rules = common.load_rules(game_type) gamestate = rules.init_game() while True: # print gamestate if output: print(rules.format_gamestate(gamestate), file=output) print("\n", file=output) if rules.is_over(gamestate) is not None: score = rules.get_final_score(gamestate) if output: print(f"Game Over. Score: {score}", file=output) break # progress gamestate if gamestate.player == "environment": random_action = rules.get_random_action(gamestate) if random_action is None: utils.assert_never( "Random action is None even though game is not over" ) else: rules.take_action_mut(gamestate, random_action) else: if agent.agent_type == "human": while True: action = agent.get_action(gamestate) newgamestate = rules.take_action_mut(gamestate, action) if newgamestate is None: print("Invalid action") else: break else: action = agent.get_action(gamestate) if output: print(action, file=output) newgamestate = rules.take_action_mut(gamestate, action) if newgamestate is None: utils.assert_never( f"Non human agent returned an invalid action {action}" )
def get_agent( agent_type: AgentType, game_type: str, mcts_budget=1, n_engine_servers=2 ) -> Agent: rules = common.load_rules(game_type) config = eng_types.MctsConfig( take_action_mut=rules.take_action_mut, get_all_actions=rules.get_all_actions, is_over=rules.is_over, get_final_score=rules.get_final_score, players=rules.get_players(), encode_action=rules.encode_action, decode_action=rules.decode_action, ) if agent_type == "random": def get_action(gamestate: G) -> A: random_action = rules.get_random_action(gamestate) if random_action is None: utils.assert_never( "Random action is None even though game is not over" ) return random_action return Agent( agent_type=agent_type, get_action=get_action, ) elif agent_type == "human": return Agent( get_action=rules.get_action_from_human, agent_type=agent_type ) elif agent_type == "mcts-local": def get_action(gamestate: G) -> A: engine = Engine(config) _, action = engine.ponder(gamestate, mcts_budget) return action return Agent(get_action=get_action, agent_type=agent_type) elif agent_type == "mcts-distributed": return EngineServerFarmClient( agent_type, game_type, n_engine_servers, timeout=mcts_budget ) else: utils.assert_never(f"Invalid agent type {agent_type}")
def get_action(player_idx: int, gamestate: types.GameState) -> types.Action: print(f"Player {player_idx}") action = int( utils.get_and_validate_input( textwrap.dedent(""" 1. Draw card from center 2. Draw card from deck 3. Play card """), lambda n: 1 <= int(n) <= 3, )) if action == 1: # draw card from center center_size = len(gamestate.center) subaction = int( utils.get_and_validate_input("Which Card: ", lambda n: 0 <= int(n) < center_size)) return types.DrawCenterCardAction(center_index=subaction) elif action == 2: # draw card from deck return types.DrawDeckAction() elif action == 3: # play card handsize = len(gamestate.players[player_idx].hand) hand_index = int( utils.get_and_validate_input("Which Card: ", lambda n: 0 <= int(n) < handsize)) def transform(inp: str) -> t.Tuple[int, int]: a, b = [int(s.strip()) for s in inp.split(",")] return a, b placement1 = utils.get_and_transform_input("Placement 1: ", transform) placement2 = utils.get_and_transform_input("Placement 2: ", transform) return types.PlayCardAction( hand_index=hand_index, placement1=placement1, placement2=placement2, ) else: utils.assert_never(f"Invalid action {action}")
def consume_walk_log(self, walk_log: types.WalkLog): for item in walk_log: if item["event-type"] == "new-node": if item["id"] in self.tree.nodes: continue else: self._new_node( id=item["id"], parent_id=item["parent_id"], action=item["action"], ) elif item["event-type"] == "walk-result": node = self.tree.nodes[item["node_id"]] self._backup(node, item["score_vec"]) self.n_walks_consumed += 1 else: utils.assert_never( f"Unknown walk_log event-type {item['event-type']}" )
def encode_action(action: types.Action) -> str: if isinstance(action, types.PlayerAction): # return f"player action: {action.action}" return json.dumps( { "type": "player-action", "action": action.action, }, sort_keys=True, ) elif isinstance(action, types.EnvironmentAction): # return f"environment action: {action.placement} {action.val}" return json.dumps( { "type": "environment-action", "action": (action.placement, action.val), }, sort_keys=True, ) else: utils.assert_never(f"Unexpected action type {action}")
def format_gamestate(gamestate: types.GameState) -> str: board = gamestate.board buffer = io.StringIO() print(f"Player: {gamestate.player}\tnum_moves: {gamestate.num_moves}") for row in board: for x in row: ch = ( "X" if x == "X" else "O" if x == "O" else "-" if x is None else utils.assert_never(f"Unkonwn Space {x}") ) print(ch, end="", file=buffer) print("", file=buffer) return buffer.getvalue()
def init_game(nplayers: int) -> types.GameState: deck = types.ALL_CARDS[:] random.shuffle(deck) empty_stack = types.BoardStack(height=0, color=None) empty_board = [ [empty_stack, empty_stack, empty_stack, empty_stack], [empty_stack, empty_stack, empty_stack, empty_stack], [empty_stack, empty_stack, empty_stack, empty_stack], [empty_stack, empty_stack, empty_stack, empty_stack], ] players = [ types.PlayerState( hand=[deck.pop(), deck.pop()], board=copy.deepcopy(empty_board), score=0, ) for _ in range(nplayers) ] gamestate = types.GameState( players=players, player=0, history=[], center=[ (deck.pop(), 0), (deck.pop(), 0), (deck.pop(), 0), (deck.pop(), 0), ], deck=deck, color_piles={ color: (18 if nplayers <= 2 else 24 if nplayers == 3 else 28 if nplayers == 4 else assert_never(f"Invalid Number of Players {nplayers}")) for color in [ types.Color.red, types.Color.yellow, types.Color.purple, types.Color.green, ] }, ) return gamestate
def take_action_mut(gamestate: types.GameState, action: types.Action) -> t.Optional[types.GameState]: board = gamestate.board col, mark = action next_available_row = next( (i for i in range(BOARD_HEIGHT - 1, -1, -1) if board[i][col] is None), None, ) if next_available_row is None: return None board[next_available_row][col] = mark gamestate.num_moves += 1 gamestate.player = ( "X" if gamestate.player == "O" else "O" if gamestate.player == "X" else utils.assert_never(f"Unknown board player {gamestate.player}")) return gamestate
mut_grp.add_argument( "--file", default="-", type=str, ) mut_grp.add_argument("--no-file", action="store_true") parser.add_argument( "--seed", type=int, default=0, ) args = vars(parser.parse_args()) filepath, nofile, seed = ( args.get("file"), args.get("no_file"), args.get("seed"), ) if nofile: play_random_computer_vs_random_computer(seed=seed, output=None) elif filepath == "-": play_random_computer_vs_random_computer(seed=seed, output=sys.stdout) else: if filepath is None: utils.assert_never( "Argparse allowed file=None even though nofile is not given") with open(filepath, "w+") as f: play_random_computer_vs_random_computer(seed=seed, output=f)
def heuristic(gamestate: types.GameState) -> float: this_player = gamestate.player opponent = other_player(this_player) board = gamestate.board ############################ Count open quads ########################### open_quads_this_player = set() open_quads_opponent = set() for quad in ALL_QUADS: singleton: t.Optional[t.Tuple[int, int]] = None num_nones = 0 num_this_player = 0 num_opponent = 0 for (r, c) in quad: val = board[r][c] if val is None: num_nones += 1 singleton = (r, c) elif val == this_player: num_this_player += 1 elif val == opponent: num_opponent += 1 else: raise Exception(f"unexpected value in board {val}") if num_nones == 1 and num_this_player == 3 and num_opponent == 0: open_quads_this_player.add(singleton) elif num_nones == 1 and num_this_player == 1 and num_opponent == 3: open_quads_opponent.add(singleton) ############################ Count open trips ########################### open_trips_this_player = set() open_trips_opponent = set() for trip in ALL_TRIPLES: singleton = None # t.Optional[t.Tuple[int, int]] num_nones = 0 num_this_player = 0 num_opponent = 0 for (r, c) in trip: val = board[r][c] if val is None: num_nones += 1 singleton = (r, c) elif val == this_player: num_this_player += 1 elif val == opponent: num_opponent += 1 else: raise Exception(f"unexpected value in board {val}") if num_nones == 1 and num_this_player == 3 and num_opponent == 0: open_trips_this_player.add(singleton) elif num_nones == 1 and num_this_player == 1 and num_opponent == 3: open_trips_opponent.add(singleton) num_open_trips = len(open_trips_this_player) - len(open_trips_opponent) # see https://www.youtube.com/watch?v=YqqcNjQMX18&ab_channel=KeithGalli my_parity = (1 if gamestate.player == "X" else 0 if gamestate.player == "O" else utils.assert_never(f"Unexpected player {gamestate.player}")) open_quads_on_my_parity = [ x for x in open_quads_this_player if (x[0] % 2) == my_parity ] open_quads_on_opponet_parity = [ x for x in open_quads_opponent if (x[0] % 2) == my_parity ] open_quads_on_my_parity = [(r, c) for (r, c) in open_quads_on_my_parity if not any( (r2, c) not in open_quads_on_opponet_parity for r2 in range(BOARD_HEIGHT - 1, r, -1))] open_quads_on_opponet_parity = [ (r, c) for (r, c) in open_quads_on_opponet_parity if not any((r2, c) not in open_quads_on_my_parity for r2 in range(BOARD_HEIGHT - 1, r, -1)) ] middle_bias = (sum( (3 - abs(3 - c)) for (r, c) in ALL_COORDS if board[r][c] == this_player) / 3.0) num_open_quads_on_my_parity = len(open_quads_on_my_parity) num_open_quads_on_opponent_parity = len(open_quads_on_opponet_parity) remaining_open_quads_this_player = (len(open_quads_this_player) - num_open_quads_on_my_parity) remaining_open_quads_opponent = (len(open_quads_opponent) - num_open_quads_on_opponent_parity) return utils.sigmoid( num_open_trips + middle_bias + 3 * (num_open_quads_on_my_parity - num_open_quads_on_opponent_parity) + 2 * (remaining_open_quads_this_player - remaining_open_quads_opponent))
def get_action(gamestate: c4types.GameState) -> c4types.Action: random_action = get_random_action(gamestate) if random_action is None: utils.assert_never( "Random action is None even though game is not over") return random_action
def get_agent(agent_type: AgentType) -> Agent: mcts_budget = 2 if agent_type == "random": def get_action(gamestate: c4types.GameState) -> c4types.Action: random_action = get_random_action(gamestate) if random_action is None: utils.assert_never( "Random action is None even though game is not over") return random_action return Agent( agent_type=agent_type, get_action=get_action, ) elif agent_type == "minimax": config = types.MinimaxConfig( action=types.MutableActionConfig( take_action_mut=take_action_mut, undo_action=undo_action, ), get_all_actions=get_all_actions, is_over=is_over, heuristic=heuristic, get_player=lambda gs: gs.player, other_player=other_player, ) def get_action(gs: c4types.GameState) -> c4types.Action: _, action = minimax(config, gs, depth=4) return action return Agent( agent_type=agent_type, get_action=get_action, ) elif agent_type == "human": def get_action(gamestate: c4types.GameState) -> c4types.Action: while True: inp = input( f"Choose column (0-6) for player {gamestate.player}: ") try: inp = int(inp) except ValueError: print("Input must be an int") continue if not 0 <= inp < 7: print("Input must be in range 0-6") continue return (inp, gamestate.player) return Agent(get_action=get_action, agent_type=agent_type) elif agent_type == "mcts-noh": config = types.MctsConfig( take_action_mut=take_action_mut, undo_action=undo_action, get_all_actions=get_all_actions, is_over=is_over, get_final_score=get_final_score, players=["X", "O"], budget=mcts_budget, encode_action=encode_action, ) def get_action(gamestate: c4types.GameState) -> c4types.Action: eng = Engine(config) _, action = eng.ponder(gamestate, nseconds=2) return action return Agent(get_action=get_action, agent_type=agent_type) elif agent_type == "mcts-basich": config = types.MctsConfig( take_action_mut=take_action_mut, undo_action=undo_action, get_all_actions=get_all_actions, is_over=is_over, get_final_score=get_final_score, players=["X", "O"], budget=mcts_budget, heuristic_type="basic", heuristic=heuristic, encode_action=encode_action, ) def get_action(gamestate: c4types.GameState) -> c4types.Action: eng = Engine(config) _, action = eng.ponder(gamestate, nseconds=2) return action return Agent(get_action=get_action, agent_type=agent_type) elif agent_type == "mcts-previsith": config = types.MctsConfig( take_action_mut=take_action_mut, undo_action=undo_action, get_all_actions=get_all_actions, is_over=is_over, get_final_score=get_final_score, players=["X", "O"], budget=mcts_budget, heuristic_type="pre-visit", heuristic=heuristic, encode_action=encode_action, ) def get_action(gamestate: c4types.GameState) -> c4types.Action: eng = Engine(config) _, action = eng.ponder(gamestate, nseconds=2) return action return Agent(get_action=get_action, agent_type=agent_type) else: utils.assert_never(f"Invalid agent type {agent_type}")
def score_play_action( resulting_state: types.GameState, action: types.PlayCardAction, card_played: types.Card, ) -> int: player_idx = (resulting_state.player - 1) % len(resulting_state.players) if isinstance(card_played.card_face, types.HighestSurround): return score_highest_surround( resulting_state, action, player_idx, card_played.card_face, card_played, ) elif isinstance(card_played.card_face, types.Square): return score_square( resulting_state, action, player_idx, card_played.card_face, card_played, ) elif isinstance(card_played.card_face, types.Stack): return score_stack( resulting_state, action, player_idx, card_played.card_face, card_played, ) elif isinstance(card_played.card_face, types.ThreeDiag): return score_three_diag( resulting_state, action, player_idx, card_played.card_face, card_played, ) elif isinstance(card_played.card_face, types.ThreeL): return score_three_L( resulting_state, action, player_idx, card_played.card_face, card_played, ) elif isinstance(card_played.card_face, types.ThreeOrthog): return score_three_orthog( resulting_state, action, player_idx, card_played.card_face, card_played, ) elif isinstance(card_played.card_face, types.TwoDiag): return score_two_diag( resulting_state, action, player_idx, card_played.card_face, card_played, ) elif isinstance(card_played.card_face, types.TwoOrthog): return score_two_orthog( resulting_state, action, player_idx, card_played.card_face, card_played, ) else: utils.assert_never(f"unknown card face {type(card_played.card_face)}")
def other_player(player: types.Player) -> types.Player: return ("X" if player == "O" else "O" if player == "X" else utils.assert_never(f"Unexpected player {player}"))
def format_card(card: types.Card): icon, title = "UNSET", "UNSET" if isinstance(card.card_face, types.HighestSurround): c, s = ( COLOR_TO_LETTER[card.card_face.center].upper(), COLOR_TO_LETTER[card.card_face.surrounder].lower(), ) icon = textwrap.dedent(f""" {s}{s}{s} {s}{c}{s} {s}{s}{s} """).strip() title = "HighestSurround" elif isinstance(card.card_face, types.Square): c = COLOR_TO_LETTER[card.card_face.color] icon = textwrap.dedent(f""" {c}{c} {c}{c} """).strip() title = "Square" elif isinstance(card.card_face, types.Stack): c = COLOR_TO_LETTER[card.card_face.color] h = card.card_face.height icon = textwrap.dedent(f""" {c}{h} """).strip() title = "Stack" elif isinstance(card.card_face, types.ThreeDiag): c = COLOR_TO_LETTER[card.card_face.color] icon = textwrap.dedent(f""" {c} {c} {c} """).strip() title = "ThreeDiag" elif isinstance(card.card_face, types.ThreeL): c = COLOR_TO_LETTER[card.card_face.color] icon = textwrap.dedent(f""" {c} {c}{c} """).strip() title = "ThreeL" elif isinstance(card.card_face, types.ThreeOrthog): c = COLOR_TO_LETTER[card.card_face.color] icon = f"{c}{c}{c}" title = "ThreeOrthog" elif isinstance(card.card_face, types.TwoDiag): c1 = COLOR_TO_LETTER[card.card_face.stack1.color] c2 = COLOR_TO_LETTER[card.card_face.stack2.color] h1 = card.card_face.stack1.height h2 = card.card_face.stack2.height icon = textwrap.dedent(f""" {c1}{h1} {c2}{h2} """).strip() title = "TwoDiag" elif isinstance(card.card_face, types.TwoOrthog): c1 = COLOR_TO_LETTER[card.card_face.stack1.color] c2 = COLOR_TO_LETTER[card.card_face.stack2.color] h1 = card.card_face.stack1.height h2 = card.card_face.stack2.height icon = f"{c1}{h1} {c2}{h2}" title = "TwoOrthog" else: utils.assert_never(f"Invalid icon type {type(card.card_face)}") c1, c2 = COLOR_TO_LETTER[card.color1], COLOR_TO_LETTER[card.color2] card_text = "\n".join([title, icon, f"{c1} {c2}"]) return card_text
def get_random_action(gamestate: types.GameState) -> t.Optional[types.Action]: player_idx = gamestate.player player = gamestate.players[player_idx] # can you do a draw card action possible_action_types: t.List[t.Type[types.Action]] = [] if gamestate.center != [] and len(player.hand) < MAX_HAND_SIZE: possible_action_types.append(types.DrawCenterCardAction) # can you do a draw deck action if (gamestate.deck != [] and player.score > 0 and len(player.hand) < MAX_HAND_SIZE): possible_action_types.append(types.DrawDeckAction) # can you do a play card action hand_idx_choices = [ idx for (idx, card) in enumerate(player.hand) if (card.color1 == card.color2 and gamestate.color_piles[card.color1] >= 2) or (card.color1 != card.color2 and gamestate.color_piles[card.color1] >= 1 and gamestate.color_piles[card.color2] >= 1) ] if player.hand != [] and hand_idx_choices != []: # TODO: maybe put a check in here to make sure there's a place to put # the card (e.g. haven't filled up the board). Should never occur in a # real game bc you'll run out of deck before you fill up the board possible_action_types.append(types.PlayCardAction) if possible_action_types == []: return None # Now take an action action_type = random.choice(possible_action_types) if action_type == types.DrawCenterCardAction: return types.DrawCenterCardAction( random.randint(0, len(gamestate.center)) - 1) elif action_type == types.DrawDeckAction: return types.DrawDeckAction() elif action_type == types.PlayCardAction: placement1 = random.choice([ (x, y) for (x, y) in itertools.product(range(BOARD_SIZE), range(BOARD_SIZE)) if player.board[x][y].height < MAX_STACK_HEIGHT ]) x, y = placement1 player.board[x][y].height += 1 # kinda a hack placement2 = random.choice([ (x, y) for (x, y) in itertools.product(range(BOARD_SIZE), range(BOARD_SIZE)) if player.board[x][y].height < MAX_STACK_HEIGHT ]) player.board[x][y].height -= 1 return types.PlayCardAction( hand_index=random.choice(hand_idx_choices), placement1=placement1, placement2=placement2, ) else: utils.assert_never("invalid action type in get_random_action")
def take_action_mut( state: types.GameState, action: types.Action, ) -> t.Optional[types.GameState]: player = state.players[state.player] if isinstance(action, types.DrawCenterCardAction): if action.center_index >= len(state.center): return None (drawn_card, score) = state.center.pop(action.center_index) player.hand.append(drawn_card) player.score += score if len(state.deck) > 0: state.center.append((state.deck.pop(), 0)) elif isinstance(action, types.PlayCardAction): if action.hand_index >= len(player.hand): return None card = player.hand.pop(action.hand_index) x1, y1 = action.placement1 x2, y2 = action.placement2 player.board[x1][y1] = types.BoardStack( player.board[x1][y1].height + 1, card.color1, ) player.board[x2][y2] = types.BoardStack( player.board[x2][y2].height + 1, card.color2, ) state.color_piles[card.color1] -= 1 state.color_piles[card.color2] -= 1 player.score += score_play_action(state, action, card) elif isinstance(action, types.DrawDeckAction): if len(state.deck) == 0: return None card = state.deck.pop() if len(player.hand) >= MAX_HAND_SIZE: return None player.hand.append(card) # put a coin on an arbitrary center card that has the lowest number of # coins on it . TODO: fix it so the player has a choice of what card to # put it on if player.score == 0: return None player.score -= 1 if state.center: # if there are cards in the center, add 1 coin to the card with the # smallest card stack. In practice there will always be cards in # the center. This check only exists so hypothesis tests can make # empty centers and not blow up idx, card, score = min( ((idx, card, score) for (idx, (card, score)) in enumerate(state.center)), key=lambda x: x[2], ) state.center[idx] = (card, score + 1) else: utils.assert_never(f"unknown action type {type(action)}") state.player = (state.player + 1) % len(state.players) if not _is_gamestate_valid(state): return None return state