def __init__(self, players): """ players - List of player objects : Array """ self.board = Board() self.players = players self.no_players = len(players)
def __init__(self, p0: PlayerI, p1: PlayerI, run=True, mem: ReplayMem = None, reward: RewardI = None): self.p0 = p0 self.p1 = p1 self.mem = mem self.reward = reward self.board = Board() self.winner = None if mem is not None: assert reward is not None if run: self.play()
def get_move(self, board: Board): print("Its your turn. You are player {}".format( board.player_map[self.playerID])) print(board) while True: print("Whats your move? (move is an int in [0, 8]).") move_string = input("->") try: move = int(move_string) # if board.is_legal(move, self.playerID): return move except: pass
def get_move(self, board: Board, eps=0.0): self.moves += 1 legal_moves = board.legal_moves() if len(legal_moves) == 0: return None if random() < eps: self.rand += 1 return sample(legal_moves, 1)[0] max_q = -2**62 max_action = None for move in legal_moves: encoded = TTTQEncoding()(move, board, None, self.playerID) q_val = self.net(encoded)[0][0] if q_val > max_q: max_q = q_val max_action = move return max_action
def get_move(self, board: Board, eps=0): legal_moves = board.legal_moves() if len(legal_moves) == 0: return None if random() < eps: return sample(legal_moves, 1)[0] max_v = -2**62 max_action = None for move in legal_moves: # simulate move: sim_board = deepcopy(board) sim_board.do(move, self.playerID) encoded = TTTVEncoding()(None, sim_board, None, self.playerID) v_val = self.net(encoded)[0][0] if v_val > max_v: max_v = v_val max_action = move return max_action
class Game: """ TicTacToe NineMenMorris logic. """ def __init__(self, p0: PlayerI, p1: PlayerI, run=True, mem: ReplayMem = None, reward: RewardI = None): self.p0 = p0 self.p1 = p1 self.mem = mem self.reward = reward self.board = Board() self.winner = None if mem is not None: assert reward is not None if run: self.play() def getMove(self, player: PlayerI, eps=0.0): while True: if isinstance(player, NetPlayerI): move = player.get_move(board=self.board, eps=eps) else: move = player.get_move(board=self.board) if self.board.is_legal(move, player.playerID): return move print(player.playerID, self.board) def get_and_do_move(self, player: PlayerI, eps=0.0): move = self.getMove(player, eps) assert move in range(9) self.board.do(move, player.playerID) return move def learn(self, eps=0.1): assert self.mem is not None p0_is_net = isinstance(self.p0, NetPlayerI) p1_is_net = isinstance(self.p1, NetPlayerI) last_state_p0 = None last_state_p1 = None self.winner = None self.board.clear() moves = 0 while not self.board.is_terminal(self.p0.playerID): move = self.get_and_do_move(self.p0, eps) if last_state_p0 is not None and p0_is_net: board_prev = last_state_p0 board_post = deepcopy(self.board) r = self.reward(board_prev, board_post, None, self.p0.playerID) self.mem.add(board_prev, None, move, r, board_post, None, self.p0.playerID) last_state_p0 = deepcopy(self.board) moves += 1 if self.board.is_terminal(self.p1.playerID): break move = self.get_and_do_move(self.p1, eps) if last_state_p1 is not None and p1_is_net: board_prev = last_state_p1 board_post = deepcopy(self.board) r = self.reward(board_prev, board_post, None, self.p1.playerID) self.mem.add(board_prev, None, move, r, board_post, None, self.p1.playerID) last_state_p1 = deepcopy(self.board) moves += 1 if len(self.board.get_rows(self.p0.playerID)) > 0: self.p0.win() self.winner = self.p0 elif len(self.board.get_rows(self.p1.playerID)) > 0: self.p1.win() self.winner = self.p1 return moves def play(self, wait_and_show=False, max_moves=None): self.board.clear() self.winner = None while not self.board.is_terminal(self.p0.playerID): move = self.get_and_do_move(self.p0) if wait_and_show: print(self.board) input("continue?") if self.board.is_terminal(self.p1.playerID): break move = self.get_and_do_move(self.p1) if wait_and_show: print(self.board) input("continue?") if len(self.board.get_rows(self.p0.playerID)) > 0: self.p0.win() self.winner = self.p0 elif len(self.board.get_rows(self.p1.playerID)) > 0: self.p1.win() self.winner = self.p1 self.p0.end(self.board) self.p1.end(self.board)
class Game: """ Class to run the game, setup UI, take input """ def __init__(self, players): """ players - List of player objects : Array """ self.board = Board() self.players = players self.no_players = len(players) def run(self, repeats=1, quiet=False): """ Run the game repeats - Number of times to repeat the game : Integer > 0 quiet - Should the board be displayed : Boolean """ print("Welcome to tic tac toe") for i in range(1, repeats + 1): print("Game %i" % i) # Random starting player next_player = randint(0, self.no_players - 1) while self.board.finished == None: if not quiet: print("Player %s's turn" % self.players[next_player].name) # Get player input and make the move move = self.players[next_player].get_move(self.board) self.board.move(move, self.players[next_player].symbol) if not quiet: self.board.print_board() # raise SystemExit # Go to next player in the list (wraps around) next_player = 0 if next_player + 1 > self.no_players - 1 else next_player + 1 # Credit winner winner = None for player in self.players: if player.symbol == self.board.finished: player.wins += 1 winner = player break if winner != None: print("Player %s has won!" % winner.name) else: print("No one has won!") self.board.reset() # Sort players by wins self.players.sort(key=lambda x: x.wins, reverse=True) print("Finished") if self.players[0].wins > 0: print("Overall - Player %s has won!" % self.players[0].name) print("Leaderboard: ") for i in range(0, len(self.players)): print(" %i %s : %i" % (i + 1, self.players[i].name, self.players[i].wins)) else: print("There were no winners today...")
def get_move(self, board: Board): legal = board.legal_moves(self.playerID) if len(legal) == 0: return None return sample(legal, 1)[0]