def play(self, board): # display(board) valid = self.game.getValidMoves(board, 1) print('hp: possible moves: ', end="") for i in range(len(valid)): if valid[i]: print(i, end=' ') print('') while True: # Python 3.x a = input() # Python 2.x # a = raw_input() x = 0 for y in a.split(' '): x = int(y) a = x if x != -1 else 2 * self.game.n if a >= 0 and a <= self.game.n and valid[a]: break else: print('hp: Invalid') select = a b = Board(6) b.pieces = np.copy(board) b.check_board(select, prefix="hp: ") return a
def play(self, board): valid = self.game.getValidMoves(board, 1) #print('op: possible moves: ', end="") #for i in range(len(valid)): #if valid[i]: #print(i, end=' ') #print('') b = Board(6) b.pieces = np.copy(board) scores = b.oracle_eval_board(prefix="op: ") best = -127 best_i = 6 next_best = best next_best_i = best_i for i in range(6): if scores[i] != 127: # pick best move, or choose between best ones; # allow for mistakes if so desired if scores[i] > best or (scores[i] == best and random() < 0.5): next_best = best next_best_i = best_i best = scores[i] best_i = i select = best_i if random() < self.mistake_fraction: # take other choice, if not absurd: if next_best != -127 and (best - next_best) <= self.mistake_max: select = next_best_i #print('op: select suboptimal move') #print('op: select ' + str(select)) return select
def play(self, board): ''' :param board: the current configuration of the board :return: if more actions have the same value, which is the best one, it returns randomly one action from these ''' valids = self.game.getValidMoves(board, 1) candidates = [] for a in range(self.game.getActionSize()): if valids[a] == 0: continue nextBoard, _ = self.game.getNextState(board, 1, a) score = self.game.getScore(nextBoard, 1) candidates += [(-score, a)] candidates.sort() list = [] max = candidates[0][0] for i in range(len(candidates)): if candidates[i][0] == max: list.append(candidates[i][1]) select = random.choice(list) b = Board(6) b.pieces = np.copy(board) b.check_board(select, prefix="gp: ") return select
def play(self, board): args1 = dotdict({'numMCTSSims': args.mcts, 'cpuct': args.cpuct}) mcts1 = MCTS(self.game, self.n1, args1) actions = mcts1.getActionProb(board, temp=1) select = np.argmax(actions) #print('board: ', end="") #print(board) #print('action p-values: ' + str(actions)) b = Board(6) b.pieces = np.copy(board) b.check_board(select, prefix="nn: ") return select
def play(self, board): ''' :param board: the configuration of the board :return: the action from the tuple (action, score) where this action is the best action detected by alfa-beta ''' score = self.minimax((board, -1), self.depth, 1, -infinity, +infinity) print("mp: minmax at depth " + str(self.depth)) select = score[0] print("mp: select " + str(select)) b = Board(6) b.pieces = np.copy(board) b.check_board(select, prefix="mp: ") return select
def play(self, board): select = np.random.randint(self.game.getActionSize()) valid = self.game.getValidMoves(board, 1) print('rp: possible moves: ', end="") for i in range(len(valid)): if valid[i]: print(i, end=' ') print('') while valid[select] != 1: select = np.random.randint(self.game.getActionSize()) print('rp: select ' + str(select)) b = Board(6) b.pieces = np.copy(board) b.check_board(select, prefix="rp: ") return select
def executeEpisode(self): """ This function executes one episode of self-play, starting with player 1. As the game is played, each turn is added as a training example to trainExamples. The game is played till the game ends. After the game ends, the outcome of the game is used to assign values to each example in trainExamples. It uses a temp=1 if episodeStep < tempThreshold, and thereafter uses temp=0. Returns: trainExamples: a list of examples of the form (canonicalBoard,pi,v) pi is the MCTS informed policy vector, v is +1 if the player eventually won the game, else -1. """ trainExamples = [] board = self.game.getInitBoard() self.curPlayer = 1 episodeStep = 0 moves = 0 max_moves = self.mcts.MAX_TREE_DEPTH while True: episodeStep += 1 canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer) # TODO: look carefully into good settings for tempThreshold. Game dependent! # Don't want to be stuck in a loop at lower levels. temp = int(episodeStep < self.args.tempThreshold) pi = self.mcts.getActionProb(canonicalBoard, temp=temp) sym = self.game.getSymmetries(canonicalBoard, pi) for b, p in sym: trainExamples.append([b.tolist(), self.curPlayer, p, None]) action = np.random.choice(len(pi), p=pi) prevboard = board prevplayer = self.curPlayer board, self.curPlayer = self.game.getNextState( board, self.curPlayer, action) r = self.game.getGameEnded(board, self.curPlayer) moves += 1 if moves > max_moves: r = 1e-4 if r != 0: # also add final move that finished the game if 1: # r == 1 or r == -1 or r == 1e-4: canonicalBoard = self.game.getCanonicalForm( board, self.curPlayer) b = Board(6) b.pieces = np.copy(canonicalBoard) # also add a (fake but legal) pi for the final move; but don't do MCTS for this moves = b.get_legal_moves(self.curPlayer) if len(moves) > 0: # print('board: ' + str(b.pieces) + ' canonicalBoard: ' + str(canonicalBoard) + ' legal moves: ' + str(moves)) pi = [0, 0, 0, 0, 0, 0, 0] for i in moves: pi[i] = 1 / len(moves) else: pi = [0, 0, 0, 0, 0, 0, 1] # trainExamples.append([canonicalBoard, self.curPlayer, pi, None]) trainExamples.append( [canonicalBoard.tolist(), self.curPlayer, pi, None]) return [(x[0], x[2], r * ((-1)**(x[1] != self.curPlayer))) for x in trainExamples]