def reward(state):
    b = TicTacToe(board=tuple_to_list2d(state))
    tmp = b.finished()
    if tmp == False:
        return RBASE
    if tmp == True:
        return RDRAW
    if tmp == Player.X:
        return RVICTORY
    return RLOSS
Exemple #2
0
else:
    print(f("{v} isn't a valid player, please use 'X' or 'O'"))
    print("Using default...")
    player = Player.X

print("Player starting" if player == Player.X else "Player second")

# Use precalculated policy from file.
# If file not present, find the policy via value iteration.
try:
    print("Reading the AI policy")
    if player == player.X:
        file = open("policy_O.pkl", "rb")
    else:
        file = open("policy_X.pkl", "rb")
    policy = pickle.load(file)
    ai = Agent(game, policy)
except:
    print("Failed to read the AI policy, calculating it now...")
    print("To precalculate and save the policy run valueiteration.py")
    ai = ReinforcementAgent(game, ~player)
print("Ready to play!!!")

assert (len(sys.argv) == 2)

# TODO : Add possibility for player to play as O
while True:
    if not game.finished() and game.player is ~player:
        ai.play()
    handle_events(game)
    draw_board(game.board)
Exemple #3
0
 def startGame(self, trainingGame=False, learningRate=0.1, decay=0.99):
     cross = 1
     circle = -1
     playerMoves = []
     playerBoard = []
     neuralMoves = []
     neuralBoard = []
     mark = circle
     game = TicTacToe()
     entry = ""
     while entry != "exit" and game.finished() == False:
         if entry == "print":
             self.print()
         else:
             game.print()
             if mark == cross:
                 mark = circle
                 board = game.getBoard(mark)
                 coord = self.action(board)
                 x = coord[0][0]
                 y = coord[0][1]
                 #print(coord)
                 print("x: " + str(x) + ", y: " + str(y))
                 while game.addMark(mark, x, y) == False:
                     if trainingGame == False:
                         break
                     out = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     value = self.coordinatesToIndex(x, y)
                     out[value] = 0.0
                     self.NN.trainWithOutput(learningRate, out)
                     coord = self.action(board)
                     x = coord[0][0]
                     y = coord[0][1]
                 if trainingGame:
                     value = self.coordinatesToIndex(x, y)
                     neuralMoves.append(value)
                     neuralBoard.append(board)
             else:
                 mark = cross
                 entry = input()
                 if entry != "exit":
                     split = entry.split(',')
                     x = int(split[0])
                     y = int(split[1])
                     if game.addMark(mark, x, y) and trainingGame:
                         board = game.getBoard(mark)
                         value = self.coordinatesToIndex(x, y)
                         playerMoves.append(value)
                         playerBoard.append(board)
         print()
     if game.finished() == True:
         game.print()
         winner = game.getWinner()
         print("Winner: " + str(winner))
         if trainingGame:
             neuralMoves.reverse()
             neuralBoard.reverse()
             if winner == cross:
                 for i in range(len(neuralMoves)):
                     target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     target[neuralMoves[i]] = 0.0
                     self.NN.train(learningRate * (decay**i),
                                   neuralBoard[i], target)
                 for i in range(len(playerMoves)):
                     target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                     target[playerMoves[i]] = 1.0
                     self.NN.train(learningRate * (decay**i),
                                   playerBoard[i], target)
             elif winner == circle:
                 for i in range(len(neuralMoves)):
                     target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                     target[neuralMoves[i]] = 1.0
                     self.NN.train(learningRate * (decay**i),
                                   neuralBoard[i], target)
                 for i in range(len(playerMoves)):
                     target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     target[playerMoves[i]] = 0.0
                     self.NN.train(learningRate * (decay**i),
                                   playerBoard[i], target)
Exemple #4
0
 def trainFor(self, learningRate, decay, exploreRate, episodes):
     cross = 1
     circle = -1
     gameNum = 0
     for _ in range(episodes):
         self.totalGamesTrained += 1
         gameNum += 1
         print("Number " + str(gameNum))
         game = TicTacToe()
         crossMoves = []
         crossBoard = []
         circleMoves = []
         circleBoard = []
         mark = circle
         while game.finished() == False:
             #game.print()
             if mark == cross:
                 mark = circle
             else:
                 mark = cross
             x = None
             y = None
             board = game.getBoard(mark)
             if random.random() >= exploreRate:
                 coord = self.action(board)
                 x = coord[0][0]
                 y = coord[0][1]
                 while game.addMark(mark, x, y) == False:
                     #game.print()
                     out = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     value = self.coordinatesToIndex(x, y)
                     out[value] = 0.0
                     self.NN.trainWithOutput(learningRate, out)
                     coord = self.action(board)
                     x = coord[0][0]
                     y = coord[0][1]
             else:
                 pos = [[0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1],
                        [0, 2], [1, 2], [2, 2]]
                 picked = random.choice(pos)
                 x = picked[0]
                 y = picked[1]
                 #print("Explored!")
                 while game.addMark(mark, x, y) == False:
                     pos.remove(picked)
                     picked = random.choice(pos)
                     x = picked[0]
                     y = picked[1]
             value = self.coordinatesToIndex(x, y)
             if mark == cross:
                 crossMoves.append(value)
                 crossBoard.append(board)
             else:
                 circleMoves.append(value)
                 circleBoard.append(board)
         #game.print()
         crossMoves.reverse()
         crossBoard.reverse()
         circleMoves.reverse()
         circleBoard.reverse()
         if game.getWinner() == cross:
             for i in range(len(crossMoves)):
                 target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                 target[crossMoves[i]] = 1.0
                 self.NN.train(learningRate * (decay**i), crossBoard[i],
                               target)
             for i in range(len(circleMoves)):
                 target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                 target[circleMoves[i]] = 0.0
                 self.NN.train(learningRate * (decay**i), circleBoard[i],
                               target)
         elif game.getWinner == circle:
             for i in range(len(circleMoves)):
                 target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                 target[circleMoves[i]] = 1.0
                 self.NN.train(learningRate * (decay**i), circleBoard[i],
                               target)
             for i in range(len(crossMoves)):
                 target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                 target[crossMoves[i]] = 0.0
                 self.NN.train(learningRate * (decay**i), crossBoard[i],
                               target)