def turn(self): letter, count = self.lettersAreNormal if count > 0 and self.words.EMPTY not in self.letters: return State.ChangeLetters(letter, count) else if count < 0: return State.LettersChange match0 = self.words.getMatches(0) try: word, pos, scores = max(self.wordsGenerator(), key = lambda x: x[2]) except ValueError: pass # TODO try: wordWithCon, posOfWWC, ConWord, posOfC, scoresWC = self.board.BestConnectedWord(letters)
def negamax(state, alpha, beta, depth, θ): if state.terminal_test(): return state.utility() if depth == 0: return H(Φ(state), θ) v = -INF for a in state.actions(): child = state.result(a) # game state must be flipped v = max(v, -negamax(State(-1*child.board), alpha, beta, depth-1, θ)) if v >= beta: return v alpha = max(alpha, v) return v
def turn0(self): letter, count = self.lettersAreNormal if count > 0 and self.words.EMPTY not in self.letters: return State.ChangeLetters(letter, count) else if count < 0: return State.LettersChange try: word = min( self.words.getMatches(0), key = lambda x: return self.words.scores(x) ) column = self.board.findBestInCenter(word) self.board.place(word, self.board.centerVertical, column) self.removeLetters(word) return State.SuccessfulTurn except ValueError: return State.LettersChange
def turn(self): self.currentPlayer.letters += self.bag.get( 7 - len(self.currentPlayer.letters) ) state = self._turn() if state > 0: letter, count = State.Decode(state) self.removeLetters(letter * count) return True elif state == State.LettersChange: self.swapLetters() self.fail() elif state == State.SuccessfulTurn: self._fails = 0 self.swapTurn() else: # State = Pass Turn self.fail() self.nextPlayer()
def play(θo, θm, θe, depth=TRAIN_DEPTH): OPN, MID, END = 0, 1, 2 state = State() first = np.random.choice([0, 1]) random_turns = 0 #np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32]) while (not state.terminal_test()): print(f'Turn number {state.turn}') print_board(state.board) print() if (state.turn + first) % 2: if state.board[state.board > 0].sum() == 12: θ = θo elif state.board[state.board > 0].sum() > 5: θ = θm else: θ = θe state.history[state] += 1 if state.turn < random_turns: num_actions = len(state.actions(False)) state = state.result( state.actions(False)[np.random.choice( [i for i in range(num_actions)])]) else: searched_states = [] V = minimax(State(state.board), depth, θ, searched_states) Δθ = np.zeros(num_features) for s, vs, hs, features, d in searched_states: #𝛿 = V(s) - H(features, θ) 𝛿 = vs - hs Δθ += α * 𝛿 * features * λ**(depth - d) for i in range(num_features): if Δθ[i] > MAX_CHANGE: Δθ[i] = MAX_CHANGE elif Δθ[i] < -MAX_CHANGE: Δθ[i] = -MAX_CHANGE θ += Δθ actions = [] actions2 = [] for a in state.actions(): child = state.result(a) actions.append((-negamax(State(-1 * child.board), -INF, INF, depth - 1, θ), a)) state = state.result(max(actions)[1]) else: print(actions_with_indices(translate_actions(state.actions()))) i = int(input()) state = state.result(state.actions()[i]) state.board *= -1 state.turn += 1 print(state) print('Game over!') return θo, θm, θe
def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH): state = State() #memoised_features = {} if MULTI else None memoised_features = {} random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 + [16] * 4 + [32] * 8) # See if each player will use book X_use_book = np.random.choice([0, 0, 0, 1]) O_use_book = np.random.choice([0, 0, 0, 1]) while (not state.training_terminal_test()): print(f'Turn number {state.turn}') print(state) print() if state.stage[0] == OPN: θ = θo elif state.stage[0] == DEV: θ = θd elif state.stage[0] == MID: θ = θm else: θ = θe #depth = 2*TRAIN_DEPTH if ((state.turn % 2 and X_use_book) or (not state.turn % 2 and O_use_book)) and (str(state.board) in opening_book): state = state.result(tuple(opening_book[str(state.board)])) elif state.turn < random_turns: num_actions = len(state.actions(False)) state = state.result( state.actions(False)[np.random.choice( [i for i in range(num_actions)])]) else: if MULTI: searched_states = set() V = speedy_minimax(state, depth, θ, searched_states, first=True, memoised_states=memoised_features)[0] elif not AB_TRAIN: searched_states = [] V = negamax(state, -10 * INF, 10 * INF, depth, θ, memoised_features) if AB_TRAIN: searched_states = [] alpha_beta_train(state, θ, searched_states, TRAIN_DEPTH, memoised_features) ab_weight_updates(searched_states, θ, depth, α, λ, MAX_CHANGE) else: Δθ = np.zeros(num_features) #for s, vs, hs, features, d in searched_states: # # updates should only happen for states that match the player to play # if not d % 2: # features = np.frombuffer(features) # #𝛿 = V(s) - H(features, θ) # 𝛿 = vs - hs # Δθ += α*𝛿*features*λ**(depth-d) if V != 0: features = Φ(state, memoised_features) h = H(features, θ) 𝛿 = V - h Δθ += α * 𝛿 * features for i in range(num_features): if Δθ[i] > MAX_CHANGE: Δθ[i] = MAX_CHANGE elif Δθ[i] < -MAX_CHANGE: Δθ[i] = -MAX_CHANGE θ += Δθ best_action = None alpha, beta, v = -4 * INF, 4 * INF, -4 * INF for a in state.actions(): child = state.result(a) nmax = -negamax(child, -beta, -alpha, depth - 1, θ, memoised_features) if nmax > alpha: alpha = nmax best_action = a state = state.result(best_action) print(alpha) print('Terminal State:') print(state) memoised_features = None gc.collect() return θo, θd, θm, θe
from features import Φ, ALL_STACKS, RINGS, H from ab_treestrap_train import alpha_beta_train, ab_weight_updates from opening import opening_book #from weight import weight1 # note multiprocessing was used for training only, not for play from multiprocessing import Pool, Manager import gc MULTI = False PROCESSES = 8 AB_TRAIN = True TRAIN_DEPTH = 4 num_features = len(Φ(State(), {})) α = 0.000001 λ = 0.5 MAX_CHANGE = 0.01 def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH): state = State() #memoised_features = {} if MULTI else None memoised_features = {} random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 + [16] * 4 + [32] * 8) # See if each player will use book X_use_book = np.random.choice([0, 0, 0, 1])
def Φ(state, memoized_states={}): if state in memoized_states: return memoized_states[state] X, O = 1, 0 board = state.board opp_b = State(-1 * board).board X_stacks = [(x, y) for x, y in ALL if board[x][y] > 0] O_stacks = [(x, y) for x, y in ALL if board[x][y] < 0] X_stacks_by_size = [[(x, y) for x, y in X_stacks if board[x][y] == stack_size] for stack_size in range(1, 13)] O_stacks_by_size = [[(x, y) for x, y in O_stacks if board[x][y] == -stack_size] for stack_size in range(1, 13)] def largest_connected_cluster(player): ''' largest connected cluster in terms of number of stacks ''' NORM = 12 player_stacks = X_stacks.copy() if player == X else O_stacks.copy() colour = 1 if player == X else -1 adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0], [1, 1]] largest_connected_cluster = 0 num_stacks = len(player_stacks) while len(player_stacks) > 0: cur_piece = player_stacks[0] x, y = cur_piece are_adj = set() checked_adj = set() are_adj.add((x, y)) while len(are_adj) > len(checked_adj): x, y = are_adj.difference(checked_adj).pop() for d in adj: dx, dy = x + d[0], y + d[1] if 0 <= dx < 8 and 0 <= dy < 8: if board[dx][dy] * colour > 0: are_adj.add((dx, dy)) checked_adj.add((x, y)) player_stacks.remove((x, y)) if len(are_adj) > largest_connected_cluster: largest_connected_cluster = len(are_adj) if largest_connected_cluster >= num_stacks / 2: return largest_connected_cluster / NORM return largest_connected_cluster / NORM def largest_almost_connected_cluster_stacks(player, num_pieces=False): ''' number of stacks (opt pieces) in an extended cluster (vulnerable to one opposing stack in the right spot) ''' NORM = 12 player_stacks = X_stacks.copy() if player == X else O_stacks.copy() max_lost = 0 adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0], [1, 1]] check_spots = set() [ check_spots.add((x + dx, y + dy)) for x, y in player_stacks for dx, dy in adj if 0 <= x + dx <= 7 and 0 <= y + dy <= 7 and [x + dx, y + dy] not in player_stacks ] if num_pieces: starting_pieces = pieces(player) else: starting_stacks = stacks(player) for x, y in check_spots: result = state.result(('BOOM', (x, y))) if num_pieces: lost = starting_pieces - pieces(player, result.board) else: lost = starting_stacks - stacks(player, board=result.board) if lost > max_lost: max_lost = lost return max_lost / NORM def largest_almost_connected_cluster_pieces(player, num_pieces=False): ''' number of pieces in an extended cluster (vulnerable to one opposing stack in the right spot) ''' # other function is already normalised correctly NORM = 1 return largest_almost_connected_cluster_stacks(player, True) / NORM def piece_centrality(player, ring): ''' Overall centrality in terms of where pieces are ''' NORM = 12 # player_stacks is a list of (x, y) of each stack player_stacks = X_stacks if player == X else O_stacks colour = 1 if player == X else -1 count = 0 for pos in player_stacks: if pos in ring: count += board[pos[0]][pos[1]] return count * colour / NORM def stack_centrality(player, ring): ''' Overall centrality in terms of where stacks are (ignores that bigger stacks have more pieces) ''' NORM = 12 # player_stacks is a list of (x, y) of each stack player_stacks = X_stacks if player == X else O_stacks count = 0 for pos in player_stacks: if pos in ring: count += 1 return count / NORM def spacing(player): NORM = 1 pass def mobility(player): ''' How many different squares the player can move onto ''' NORM = 1 player_stacks = X_stacks if player == X else O_stacks colour = 1 if player == X else -1 contribution = 0 for x, y in player_stacks: stack_size = abs(board[x][y]) num_spots = 0 for dx in range(1, stack_size + 1): if 0 <= x + dx < 8 and ((board[x + dx][y] == 0) or (board[x + dx][y] * colour > 0)): num_spots += 1 if 0 <= x - dx < 8 and ((board[x - dx][y] == 0) or (board[x - dx][y] * colour > 0)): num_spots += 1 for dy in range(1, stack_size + 1): if 0 <= y + dy < 8 and ((board[x][y + dy] == 0) or (board[x][y + dy] * colour > 0)): num_spots += 1 if 0 <= y - dy < 8 and ((board[x][y - dy] == 0) or (board[x][y - dy] * colour > 0)): num_spots += 1 # contribution += num spots that piece can move / num_spaces it could move if free contribution += num_spots / (4 * stack_size) return contribution / len(player_stacks) / NORM def control(player): ''' Blowing up all pieces now, how many squares are touched ''' NORM = 1 player_stacks = X_stacks if player == X else O_stacks def control2(player): ''' Moving and then blowing up, how many squares are touched ''' NORM = 1 pass def best_trade(player): ''' piece advantage of best trade ''' NORM = 11 pass def av_cluster_size(player): NORM = 1 pass def pieces(player, board=board): ''' Returns the number of pieces on a board for the current player. Defaults to the board of the current state, can pass in a different board. ''' NORM = 12 if player == X: return board[board > 0].sum() / NORM return -board[board < 0].sum() / NORM def stacks(player, board=board): ''' Takes a player and returns the number of stacks that player has INT Defaults to the board of the current state, can pass in a different board. ''' NORM = 12 if player == X: return (board > 0).sum() / NORM return (board < 0).sum() / NORM def actions(player): ''' Returns the number of actions the player has INT''' NORM = 130 if player == X: return len(State(board).actions()) / NORM return len(State(opp_b).actions()) / NORM def connectivity(player): NORM = 8 player_stacks = X_stacks if player == X else O_stacks colour = 1 if player == X else -1 adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0], [1, 1]] count = 0 s = set() for x, y in player_stacks: for d in adj: dx, dy = x + d[0], y + d[1] if 0 <= dx < 8 and 0 <= dy < 8: if board[dx][dy] * colour > 0: s.add((dx, dy)) return len(s) / NORM def threat(player): NORM = 8 player_stacks = X_stacks if player == X else O_stacks colour = 1 if player == X else -1 adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0], [1, 1]] count = 0 s = set() for x, y in player_stacks: for d in adj: dx, dy = x + d[0], y + d[1] if 0 <= dx < 8 and 0 <= dy < 8: if board[dx][dy] * colour < 0: s.add((dx, dy)) return len(s) / NORM def column_piece_count(player, column): ''' How many pieces are in the column ''' NORM = 24 col = [] for row in range(8): col.append(board[row][column]) col = np.array(col) if player == X: return col[col > 0].sum() / NORM return -col[col < 0].sum() / NORM def column_stack_count(player, column): ''' How many stacks of certain size are in the column ''' NORM = 8 col = [] for row in range(8): col.append(board[row][column]) col = np.array(col) if player == X: return (col > 0).sum() / NORM return (col < 0).sum() / NORM def av_stack_size(player): NORM = 12 return pieces(player) / stacks(player) / NORM # Distance to opponent # Measure of closeness or spread # Board position # Closeness to centre f1s = [ largest_connected_cluster, #largest_almost_connected_cluster_stacks, largest_almost_connected_cluster_pieces, mobility, pieces, stacks, actions, connectivity, threat, av_stack_size ] f2s = [piece_centrality, stack_centrality] f3s = [column_piece_count, column_stack_count] features = [f(player) for f in f1s for player in [X, O]] + \ [f(player, ring) for f in f2s for ring in RINGS for player in [X, O]] + \ [f(player, col) for f in f3s for col in range(8) for player in [X, O]] diffs = [] for i in range(0, len(features), 2): diffs.append(features[i] - features[i + 1]) features = np.array(features + diffs) memoized_states[state] = features return features
def actions(player): ''' Returns the number of actions the player has INT''' NORM = 130 if player == X: return len(State(board).actions()) / NORM return len(State(opp_b).actions()) / NORM
from player import State, ALL, MOVE, INF import numpy as np import matplotlib.pyplot as plt import seaborn as sns from collections import defaultdict as dd from features import Φ, ALL_STACKS, RINGS TRAIN_DEPTH = 2 num_features = len(Φ(State())) def H(features, θ): h = np.dot(features, θ) if h > 0.99*INF: return 0.99*INF if h < -0.99*INF: return -0.99*INF return h α = 0.000001*3 λ = 0.5 MAX_CHANGE = 0.1 def tree_strap_train(θo, θm, θe, depth=TRAIN_DEPTH): OPN, MID, END = 0, 1, 2 state = State() random_turns = np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32]) while (not state.terminal_test()): print(f'Turn number {state.turn}') print(state) print()
from collections import defaultdict as dd from features import Φ, ALL_STACKS, RINGS, H from ab_treestrap_train import alpha_beta_train, ab_weight_updates from opening import opening_book from weight import weight1 from multiprocessing import Pool, Manager import gc MULTI = False PROCESSES = 8 AB_TRAIN = True TRAIN_DEPTH = 4 num_features = len(Φ(State())) α = 0.000001 λ = 0.5 MAX_CHANGE = 0.01 def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH): state = State() #memoised_features = {} if MULTI else None memoised_features = {} random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 + [16] * 4 + [32] * 8) # See if each player will use book X_use_book = np.random.choice([0, 0, 0, 1])
def startGamePvC(): board = [] player_1 = Player(1) player_2 = Player(2) p2_nodes = 0 alg_choice = -1 valid_choice = False while not valid_choice: print("Please choose an algorithm that the computer should use:") print("1 - Mini-Max") print("2 - Alpha-Beta") alg_choice = int(input()) if alg_choice == 1: print( "\033[1;31;17mThe Computer\033[0m uses the Mini-Max Algorithm." ) valid_choice = True elif alg_choice == 2: print( "\033[1;34;17mThe Computer\033[0m uses the Alpha-Beta Algorithm." ) valid_choice = True else: print("Invalid choice. Please type 1 or 2.") valid_choice = False #num_stones = randint(1, Constants.MAX_STONES) num_stones = Constants.NUM_STONES initGame(board, player_1, player_2, num_stones) printBoard(board) rand = randint(0, 1) if rand == 0: print("\033[1;31;17mThe player\033[0m makes the first move.") else: print("\033[1;34;17mThe Computer\033[0m makes the first move.") while rand == 0: if isOutOfMoves(board, Constants.P1_PITS): print( "\033[1;31;17mThe player\033[0m is out of moves, skip to the Computer:" ) else: pit_choice = -1 pit_valid = False while not pit_valid: print( "\033[1;31;17mThe player\033[0m's turn. Please choose a pit." ) pit_choice = int(input()) if isValidPit(board, pit_choice, Constants.P1_PITS): pit_valid = True else: print("Invalid choice, please try again.") print( "\033[1;31;17mThe player\033[0m chose to move the stones in pit #" + str(pit_choice) + ":") move = State(None, 0, pit_choice, board[pit_choice].getNumStones(), None) tempBoard = [] for i in range(len(board)): tempBoard.append(board[i].getNumStones()) newBoard = player_1.updateBoard(move, tempBoard) move.setBoard(newBoard) updateBoard(board, move) printBoard(board) if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves( board, Constants.P2_PITS): break if isOutOfMoves(board, Constants.P2_PITS): print( "\033[1;34;17mThe Computer\033[0m is out of moves, skip to the player:" ) else: print("\033[1;34;17mThe Computer\033[0m's turn.") if alg_choice == 1: move, node_count = player_2.minimax_decision(board) elif alg_choice == 2: move, node_count = player_2.alpha_beta_search(board) p2_nodes = p2_nodes + node_count print( "\033[1;34;17mThe Computer\033[0m chose to move the stones in pit #" + str(move.getPitIndex()) + ":") updateBoard(board, move) printBoard(board) if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves( board, Constants.P2_PITS): break while rand == 1: if isOutOfMoves(board, Constants.P2_PITS): print( "\033[1;34;17mThe Computer\033[0m is out of moves, skip to the player:" ) else: print("\033[1;34;17mThe Computer\033[0m's turn.") if alg_choice == 1: move, node_count = player_2.minimax_decision(board) elif alg_choice == 2: move, node_count = player_2.alpha_beta_search(board) p2_nodes = p2_nodes + node_count print( "\033[1;34;17mThe Computer\033[0m chose to move the stones in pit #" + str(move.getPitIndex()) + ":") updateBoard(board, move) printBoard(board) if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves( board, Constants.P2_PITS): break if isOutOfMoves(board, Constants.P1_PITS): print( "\033[1;31;17mThe player\033[0m is out of moves, skip to the Computer:" ) else: pit_choice = -1 pit_valid = False while not pit_valid: print( "\033[1;31;17mThe player\033[0m's turn. Please choose a pit." ) pit_choice = int(input()) if isValidPit(board, pit_choice, Constants.P1_PITS): pit_valid = True else: print("Invalid choice, please try again.") print( "\033[1;31;17mThe player\033[0m chose to move the stones in pit #" + str(pit_choice) + ":") move = State(None, 0, pit_choice, board[pit_choice].getNumStones(), None) tempBoard = [] for i in range(len(board)): tempBoard.append(board[i].getNumStones()) newBoard = player_1.updateBoard(move, tempBoard) move.setBoard(newBoard) updateBoard(board, move) printBoard(board) if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves( board, Constants.P2_PITS): break p1_score = board[Constants.P1_POCKETS[0]].getNumStones() + board[ Constants.P1_POCKETS[1]].getNumStones() p2_score = board[Constants.P2_POCKETS[0]].getNumStones() + board[ Constants.P2_POCKETS[1]].getNumStones() if p1_score > p2_score: print("\033[1;31;17mThe Player\033[0m Won!") elif p2_score > p1_score: print("\033[1;34;17mThe Computer\033[0m Won!") else: print("It's a tie!")