def testGetLowestLegalMoveSet(self): game = Game() #[-2,0,0,0,0,5, 0,3,0,0,0,-5, 5,0,0,0,-3,0, -5,0,0,0,0,2, 0,0,0,0] game.whiteDice1.faceUp = 1 game.whiteDice2.faceUp = 2 analyzer = BoardAnalyzer(game) lowestMoveSet = analyzer.getLowestLegalMoveSet() self.assertTrue(lowestMoveSet == ((5, 4), (4, 2)) or lowestMoveSet == ((5, 3), (3, 2))) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 4, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2, 1, 0, 0, 0 ] lowestMoveSet = analyzer.getLowestLegalMoveSet() self.assertTrue(lowestMoveSet == ((Board.whiteBarPoint, 23), (5, 3)) or lowestMoveSet == ((Board.whiteBarPoint, 22), (5, 4))) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 3, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2, 2, 0, 0, 0 ] lowestMoveSet = analyzer.getLowestLegalMoveSet() self.assertTrue(lowestMoveSet == ((Board.whiteBarPoint, 23), (Board.whiteBarPoint, 22)) or lowestMoveSet == ((Board.whiteBarPoint, 22), (Board.whiteBarPoint, 23)))
def testGetAllLegalMoveSets_isSameForWhiteAndForRed(self): game = Game() a = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2, 0, 0, 0, 0 ] game.whiteDice1.faceUp = 1 game.whiteDice2.faceUp = 2 analyzer = BoardAnalyzer(game) aFlip = a.copy() aFlip = BoardAnalyzer.flipBoard(aFlip) self.assertEqual(aFlip, a) aFlip = BoardAnalyzer.flipBoard(aFlip) self.assertEqual(a, aFlip) # The inital board is s symmetric. # Here I don't need the flip() function # The initial board is symmetric. # red and white should have symmetric moves allAMoveSets = analyzer.getAllLegalMoveSets() game.currentPlayer = game.red game.redDice1.faceUp = game.whiteDice1.faceUp game.redDice2.faceUp = game.whiteDice2.faceUp allBMoveSets = analyzer.getAllLegalMoveSets() allBWhiteMoveSets = self.translateMoveSetsToWhiteCoordinates( allBMoveSets) #self.humanReadablyPrintMoves(game, allAMoveSets, allBWhiteMoveSets) self.assertTrue(((5, 4), (4, 2)) in allAMoveSets) self.assertTrue((((7, 6), (6, 4)) in allAMoveSets) or (((7, 5), (5, 4)) in allAMoveSets) or (((5, 4), (7, 5)) in allAMoveSets)) self.assertEqual(len(allAMoveSets), len(allBMoveSets)) self.assertEqual(allAMoveSets, allBWhiteMoveSets) a = randomPopulatedBoard() b = randomPopulatedBoard() #print(a) #print(b) # The assignment of a list as default parameter gives the same object. # The assignment of a list within a method creates a new object every time. Wow! self.assertNotEqual(a, b) a = randomPopulatedBoard() b = a.copy() b = BoardAnalyzer.flipBoard(b) self.assertNotEqual( a, b) # a random board will most likely not be symmetric b = BoardAnalyzer.flipBoard(b) self.assertEqual(a, b) # flipping back yields the original
def testFlipBoard(self): # initial board a = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2, 0, 0, 0, 0 ] b = BoardAnalyzer.flipBoard(a) self.assertEqual(a, b) # the initial board is symmetric a = [ -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0 ] b = BoardAnalyzer.flipBoard(a) self.assertEqual(a, b) # the initial board is symmetric #a = [,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,, 0,0,0,0] a = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -2, 2, -1 ] b = BoardAnalyzer.flipBoard(a) self.assertEqual(a, b) # the initial board is symmetric a = [ -14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0 ] b = BoardAnalyzer.flipBoard(a) self.assertNotEqual(a, b) # the initial board is symmetric a = [ -14, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0 ] b = BoardAnalyzer.flipBoard(a) self.assertNotEqual(a, b) # the initial board is symmetric
class NegamaxAi(AiBase): PLAYER_O = 1 PLAYER_X = -1 NEUTRAL = 0 def get_move(self, board, player): self.board_analyzer = BoardAnalyzer() return self.best_move(board, player) def best_move(self, board, player): alpha = -sys.maxint beta = sys.maxint available_squares = board.get_available_squares() best_score = sys.maxint best_square = available_squares[0] for square in available_squares: new_board = deepcopy(board) new_board.put_mark_in_square(self.player_mark(player), square) score = self.negamax(new_board, -player, alpha, beta) if score < best_score: best_score = score best_square = square return best_square def negamax(self, board, player, alpha, beta): if self.board_analyzer.game_over(board): return player * self.value_of_node() maximum = -sys.maxint available_squares = board.get_available_squares() for square in available_squares: new_board = deepcopy(board) new_board.put_mark_in_square(self.player_mark(player), square) result = -self.negamax(new_board, -player, -beta, -alpha) if result > maximum: maximum = result if result > alpha: alpha = result if alpha >= beta: return alpha return maximum def player_mark(self, player): return game.PLAYER_TWO if player == self.PLAYER_O else game.PLAYER_ONE def value_of_node(self): if self.board_analyzer.winner == game.PLAYER_ONE: return self.PLAYER_X elif self.board_analyzer.winner == game.PLAYER_TWO: return self.PLAYER_O else: return self.NEUTRAL
def testGetAllLegalMoveSets_randomBoards(self): game = Game() analyzer = BoardAnalyzer(game) #print("Testing all possible moves for random boards.") nrTestsPassed = 0 for i in range(number_of_tested_boards): self.checkMovesForRandomBoard(game, randomPopulatedBoard, analyzer) nrTestsPassed += 1 if nrTestsPassed == 50: nrTestsPassed = 0 #print(i+1,"tests have passed.") #print("Testing all possible moves with at least 1 white and 1 red token on the bar.") nrTestsPassed = 0 for i in range(number_of_tested_boards * 20): self.checkMovesForRandomBoard(game, randomPopulatedBoardBar, analyzer) nrTestsPassed += 1 if nrTestsPassed == 50: nrTestsPassed = 0 #print(i+1,"tests have passed.") #print("Testing all possible moves with 0..15 white tokens on the bar and 0..15 red tokens.") nrTestsPassed = 0 for i in range(number_of_tested_boards * 20): self.checkMovesForRandomBoard(game, randomPopulatedBoardManyBar, analyzer) nrTestsPassed += 1 if nrTestsPassed == 50: nrTestsPassed = 0 #print(i+1,"tests have passed.") #print("Testing all possible moves when all tokens are in the home area.") nrTestsPassed = 0 for i in range(number_of_tested_boards * 10): self.checkMovesForRandomBoard(game, randomPopulatedBoardBearingOff, analyzer) nrTestsPassed += 1 if nrTestsPassed == 50: nrTestsPassed = 0
def checkMovesForRandomBoard(self, game, randomBoardInitializerFunction, analyzer): game.currentPlayer = Game.white game.rollOwnDice() a = randomBoardInitializerFunction() game.board.tokens = a allAMoveSets = analyzer.getAllLegalMoveSets() game.currentPlayer = Game.red game.redDice1.faceUp = game.whiteDice1.faceUp game.redDice2.faceUp = game.whiteDice2.faceUp b = a.copy() b = BoardAnalyzer.flipBoard(b) game.board.tokens = b allBMoveSets = self.translateMoveSetsToWhiteCoordinates( analyzer.getAllLegalMoveSets()) game.board.tokens = a # display the original. #self.humanReadablyPrintMoves(game, allAMoveSets, allBMoveSets) self.assertEqual(len(allAMoveSets), len(allBMoveSets)) self.assertEqual(allAMoveSets, allBMoveSets)
def testGetStepsToGo(self): game = Game() analyzer = BoardAnalyzer(game) #[-2,0,0,0,0,5, 0,3,0,0,0,-5, 5,0,0,0,-3,0, -5,0,0,0,0,2, 0,0,0,0] self.assertEqual(analyzer.getStepsToGo(), 5 * 6 + 3 * 8 + 5 * 13 + 2 * 24) game.currentPlayer = Game.red self.assertEqual(analyzer.getStepsToGo(), 5 * 6 + 3 * 8 + 5 * 13 + 2 * 24) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 1, 0, 0, 1, 0 ] game.currentPlayer = Game.white self.assertEqual(analyzer.getStepsToGo(), 5 * 6 + 3 * 8 + 5 * 13 + 1 * 24) game.currentPlayer = Game.red self.assertEqual(analyzer.getStepsToGo(), 5 * 6 + 3 * 8 + 5 * 13 + 2 * 24)
def observe(self, board): # Board 클래스를 알고리즘을 사용하는데 필요한 BoardAnalyzer 클래스로 변환 self.board = BoardAnalyzer() self.board.analyze(board)
class MinimaxPlayer(object): def __init__(self, depth): ''' 얼마나 깊이 볼 것인가! ''' self.DEPTH = depth # 마지막으로 선택한 x, y 좌표 self.x = -1 self.y = -1 # 마지막으로 얻은 eval 점수 self.eval = 0 def observe(self, board): # Board 클래스를 알고리즘을 사용하는데 필요한 BoardAnalyzer 클래스로 변환 self.board = BoardAnalyzer() self.board.analyze(board) def obsesrve_finish(self, board, is_winner): pass def predict(self): if (self.board.get_turn() <= 1): self.x = self.board.get_size() // 2 self.y = self.board.get_size() // 2 return self.x, self.y alpha = -999999 beta = 999999 self.x, self.y, self.eval = self.minimax(self.board, self.DEPTH, alpha, beta, True) return self.x, self.y def minimax(self, node, depth, alpha, beta, maximizing_player): if depth == 0: node.evaluate() return -1, -1, node.eval # Maximizing Player if (maximizing_player): best_value = -99999999 best_x = 0 best_y = 0 for i in range(node.get_size()): for j in range(node.get_size()): # 해당 칸이 빈 칸인 경우에만 트리를 생성 if node.get_value(i, j) == 0 and node.isNear(i, j): next_node = copy.deepcopy(node) if next_node.put_value(i, j): v = next_node.eval else: _, _, v = self.minimax(next_node, depth - 1, alpha, beta, False) best_value = self.max_node(best_value, v) if best_value == v: best_x = i best_y = j # Alpha-Beta Prunning alpha = self.max_node(alpha, best_value) if beta < alpha: return best_x, best_y, best_value return best_x, best_y, best_value # Minimizing Player else: best_value = 99999999 best_x = 0 best_y = 0 for i in range(node.get_size()): for j in range(node.get_size()): # 해당 칸이 빈 칸인 경우에만 트리를 생성 if node.get_value(i, j) == 0 and node.isNear(i, j): next_node = copy.deepcopy(node) if next_node.put_value(i, j): v = next_node.eval else: _, _, v = self.minimax(next_node, depth - 1, alpha, beta, False) best_value = self.min_node(best_value, v) if best_value == v: best_x = i best_y = j # Alpha-Beta Prunning beta = self.max_node(beta, best_value) if beta < alpha: return best_x, best_y, best_value return best_x, best_y, best_value def max_node(self, a, b): if a > b: return a else: return b def min_node(self, a, b): if a < b: return a else: return b
def get_move(self, board, player): self.board_analyzer = BoardAnalyzer() return self.best_move(board, player)
def testThreadSum(self): game = Game() analyzer = BoardAnalyzer(game) game.board.tokens = [ -15, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0 ] # -(1/6)*(1/6) because (1,1) will only hit the token once. self.assertAlmostEqual(analyzer.getThreadSum(), (2 * (1 / 6) - (1 / 6) * (1 / 6)) * 23) game.board.tokens = [ -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (1,1), (1,2), (3,*), (2,1), (*,3) # --> 14*(1/36) on position 21 (oh, I mean position 4, loss 21) self.assertAlmostEqual(analyzer.getThreadSum(), (14 * (1 / 36)) * 21) game.board.tokens = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, -14, 14, 0 ] # hits from (1,1), (2,*), (*,2) # --> 12*(1/36) on position 1 self.assertAlmostEqual(analyzer.getThreadSum(), (12 * (1 / 36)) * 1) game.board.tokens = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 1, 0, -13, 14, 0 ] self.assertAlmostEqual(analyzer.getThreadSum(), (12 * (1 / 36)) * 1) game.board.tokens = [ -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, -13, 13, 0 ] # hits from (1,1), (1,2), (3,*), (2,1), (*,3) and from (1,1), (2,*), (*,2) # --> 14*(1/36) on position 21 + 12*(1/36) on position 1 # + interference: 0 # The risk at the low position (point 1) for (1,1), (1,2) and (2,1) is neglectible # because there is a higher risk for these dice values losing token on point 21 # Should be: # self.assertAlmostEqual(analyzer.getThreadSum(), 14*(1/36)*21 + 9*(1/36)*1 ) self.assertAlmostEqual(analyzer.getThreadSum(), 14 * (1 / 36) * 21 + 12 * (1 / 36) * 1) game.board.tokens = [ -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, -13, 13, 0 ] # hits from (1,1), (1,2), (3,*), (2,1), (*,3) and from (1,1), (2,*), (*,2) # --> 14*(1/36) on position 21 + 12*(1/36) on position 6 # + interference: (6,6) at position 6 self.assertAlmostEqual(analyzer.getThreadSum(), 14 * (1 / 36) * 21 + (12 + 1) * (1 / 36) * 6) game.board.tokens = [ -1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (2,2), (4,4), (6,2), (2,6), (3,5), (5,3) # --> (1/36) on position 16 self.assertAlmostEqual(analyzer.getThreadSum(), 6 * (1 / 36) * 16) game.board.tokens = [ 0, 0, -1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (2,2), (6,*), (*,6), (1,5), (5,1), (2,4). (4,2), (3,3) # --> (1/36) on position 16 self.assertAlmostEqual(analyzer.getThreadSum(), 17 * (1 / 36) * 16) game.board.tokens = [ 0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (1,1) (2,2), (4,*), (*,4), (1,3), (3,1) # --> (1/36) on position 16 self.assertAlmostEqual(analyzer.getThreadSum(), 15 * (1 / 36) * 16) game.board.tokens = [ -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (1,4), (4,1), (2,3), (3,2) (5,*), (*,5) # --> 15*(1/36) on position 19 self.assertAlmostEqual(analyzer.getThreadSum(), (15 * (1 / 36)) * 19) # test with blocked paths game.board.tokens = [ -1, 0, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 12, 0 ] # hits from (1,4), (4,1), (5,*), (*,5), but not from (2,3) and (3,2) # --> 13*(1/36) on position 19 self.assertAlmostEqual(analyzer.getThreadSum(), (13 * (1 / 36)) * 19) # Test for red player: game.currentPlayer = Game.red game.board.tokens = [ -1, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 0, 0 ] # -(1/6)*(1/6) because (1,1) will only hit the token once. # Hits from (1,*), (*,1): 11*(1/36) self.assertAlmostEqual(analyzer.getThreadSum(), (2 * (1 / 6) - (1 / 6) * (1 / 6)) * 1) game.board.tokens = [ -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (1,1), (1,2), (3,*), (2,1), (*,3) # --> 14*(1/36) on position 1 self.assertAlmostEqual(analyzer.getThreadSum(), (14 * (1 / 36)) * 1) game.board.tokens = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, -14, 14, 0 ] # hits from (1,1), (2,*), (*,2) # --> 12*(1/36) on position 22 self.assertAlmostEqual(analyzer.getThreadSum(), (12 * (1 / 36)) * 22) game.board.tokens = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 1, 0, -13, 14, 0 ] self.assertAlmostEqual(analyzer.getThreadSum(), 0) game.board.tokens = [ -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, -13, 13, 0 ] # hits from (1,1), (1,2), (3,*), (2,1), (*,3) and from (1,1), (2,*), (*,2) # --> 14*(1/36) on position 24, loss 1 + 12*(1/36) on position 3, loss 22 # + interference: 0 self.assertAlmostEqual(analyzer.getThreadSum(), 14 * (1 / 36) * 1 + 12 * (1 / 36) * 22) game.board.tokens = [ -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, -13, 13, 0 ] # hits from (1,1), (1,2), (3,*), (2,1), (*,3) and from (1,1), (2,*), (*,2) # --> 14*(1/36) on position 24, loss 1 + 12*(1/36) on position 8, loss 17 # + interference: (6,6) at position 24, loss 1 self.assertAlmostEqual(analyzer.getThreadSum(), (14 + 1) * (1 / 36) * 1 + (12) * (1 / 36) * 17) game.board.tokens = [ -1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (2,2), (4,4), (6,2), (2,6), (3,5), (5,3) # --> (1/36) on position 24, loss 1 self.assertAlmostEqual(analyzer.getThreadSum(), 6 * (1 / 36) * 1) game.board.tokens = [ 0, 0, -1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (2,2), (6,*), (*,6), (1,5), (5,1), (2,4). (4,2), (3,3) # --> (1/36) on position 22, loss 3 self.assertAlmostEqual(analyzer.getThreadSum(), 17 * (1 / 36) * 3) game.board.tokens = [ 0, 0, 0, 0, -1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (1,1) (2,2), (4,*), (*,4), (1,3), (3,1) # --> (1/36) on position 20, loss 5 self.assertAlmostEqual(analyzer.getThreadSum(), 15 * (1 / 36) * 5) game.board.tokens = [ -1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 14, 0 ] # hits from (1,4), (4,1), (2,3), (3,2) (5,*), (*,5) # --> 15*(1/36) on position 24, loss 1 self.assertAlmostEqual(analyzer.getThreadSum(), (15 * (1 / 36)) * 1) # test with blocked paths game.board.tokens = [ -1, 0, -2, -2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -14, 10, 0 ] # hits from (1,4), (4,1), (5,*), (*,5), but not from (2,3) and (3,2) # --> 13*(1/36) on position 24, loss 1 self.assertAlmostEqual(analyzer.getThreadSum(), (13 * (1 / 36)) * 1)
def testGetAllLegalMoveSets(self): game = Game() #[-2,0,0,0,0,5, 0,3,0,0,0,-5, 5,0,0,0,-3,0, -5,0,0,0,0,2, 0,0,0,0] game.whiteDice1.faceUp = 1 game.whiteDice2.faceUp = 2 analyzer = BoardAnalyzer(game) allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 15) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 3, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2, 2, 0, 0, 0 ] allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 1) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 3, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 1, 3, 0, 0, 0 ] game.whiteDice1.faceUp = 1 game.whiteDice2.faceUp = 1 allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 3) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 3, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2, 2, 0, 0, 0 ] game.whiteDice1.faceUp = 1 game.whiteDice2.faceUp = 1 allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 9) # Test red player game.currentPlayer = Game.red #[-2,0,0,0,0,5, 0,3,0,0,0,-5, 5,0,0,0,-3,0, -5,0,0,0,0,2, 0,0,0,0] game.redDice1.faceUp = 1 game.redDice2.faceUp = 2 analyzer = BoardAnalyzer(game) allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 15) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 3, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 4, 0, 0, 0, -2 ] allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 1) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 3, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 4, 0, 0, 0, -3 ] game.redDice1.faceUp = 1 game.redDice2.faceUp = 1 allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 3) game.board.tokens = [ -2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 3, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 4, 0, 0, 0, -2 ] game.redDice1.faceUp = 1 game.redDice2.faceUp = 1 allMoveSets = analyzer.getAllLegalMoveSets() self.assertEqual(len(allMoveSets), 9)
def qlearning_with_minimax(board, player1, player2, memory, sess, saver, epsilon, iteration): player1.reset() err = 0 total_score = 0 gameOver = False currentPlayer = STONE_PLAYER1 while (board.finished != True): #------------------------------------------------------------ # 홀수 턴(qlearning player) - Black #------------------------------------------------------------ if board.turn % 2 == 1: #------------------------------------------------------------ # 상대 턴이 마친 후의 상태를 업데이트 #------------------------------------------------------------ last_action = board.last_x * board.size + board.last_y player1.updateState(STONE_PLAYER1, last_action) #-------------------------------- # 행동 수행 #-------------------------------- action = -9999 currentState = player1.getState() if (randf(0, 1) <= epsilon): action = player1.getActionRandom() else: action = player1.getAction(sess, currentState) if (epsilon > epsilonMinimumValue): epsilon = epsilon * epsilonDiscount nextState, reward, gameOver = player1.act(currentPlayer, action) ''' print("Player1 ==> Turn : {0}, Color : {1}, Eval : {2}".format( board.turn, "BLACK", "???")) ''' #------------------------------------------------------------ # 게임의 Board를 업데이트 #------------------------------------------------------------ y = int(action / board.size) x = action % board.size board.put_value(x, y) #------------------------------------------------------------ # Reward에 Heuristic으로 점수 계산을 적용 #------------------------------------------------------------ analyzer = BoardAnalyzer() reward = analyzer.get_score(board, 1) total_score += reward #-------------------------------- # action 기억 #-------------------------------- memory.remember(currentState, action, reward, nextState, gameOver) #-------------------------------- # 학습 수행 #-------------------------------- memory.remember(currentState, action, reward, nextState, gameOver) inputs, targets = memory.getBatch(output_layer, batchSize, nbActions, nbStates, sess, X) _, loss = sess.run([optimizer, cost], feed_dict={X: inputs, Y: targets}) err = err + loss #------------------------------------------------------------ # 짝수 턴(minimax player) - White #------------------------------------------------------------ else: player2.observe(board) x, y = player2.predict() ''' print("Player2 ==> Turn : {0}, Color : {1}, Eval : {2}".format( board.turn, "WHITE", player2.eval)) ''' #------------------------------------------------------------ # 게임의 Board를 업데이트 #------------------------------------------------------------ board.put_value(x, y) #board.draw() if (board.finished == True) : print("Episode : {0:5d}, End Turn : {1:3d}, Score : {2:0.1f}, Loss : {3:0.1f}".format(iteration, board.turn, total_score / board.turn, err)) if (board.turn % 2 == 1) : winner = 1 else : winner = 2 return winner, epsilon