def play(self): self.game.reset() while (True): state = board.Game(self.game.clone()) move = mcts.search(state, 10000) self.game.placePiece(move) self.game.displayBoard() if self.game.victor != 0: break ''' self.makeHumanMove() self.game.displayBoard() if self.game.victor != 0: break ''' print('------------------------------') state = board.Game(self.game.clone()) move = mcts.search(state, 20000) self.game.placePiece(move) self.game.displayBoard() if self.game.victor != 0: break print('------------------------------') print('Player ' + str(self.game.victor) + ' wins!')
def search(rootState, iterations): rootNode = Node(state=rootState) for i in range(iterations): node = rootNode state = board.Game(rootState.clone(), rootState.turn) # Select while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal node = node.selectChild() state.placePiece(node.move) # Expand if node.untriedMoves != []: # if we can expand (i.e. state/node is non-terminal) m = random.choice(node.untriedMoves) state.placePiece(m) node = node.addChild(m, state) # add child and descend tree # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function while state.getMoves() != []: # while state is non-terminal state.placePiece(random.choice(state.getMoves())) # Backpropagate while node != None: # backpropagate from the expanded node and work back to the root node node.update( state.getResult(node.playerJustMoved) ) # state is terminal. Update node with result from POV of node.playerJustMoved node = node.parentNode # Output some information about the tree - can be omitted return sorted(rootNode.childNodes, key=lambda c: c.visits )[-1].move # return the move that was most visited
def main(): RUN = True game = board.Game() # If you want the AI to be the first to go, uncomment line:39 game.AI() game.getBoard() while RUN: x, y = getUserInput(game) game.addToBoard(x, y) game.getBoard() # Check for the result. result = game.checkState(game.board) if result != None: game.printResult(result)
def play(self, state, debug=False, player=1): ind = 1 if player == 1 else 0 t_game = board.Game() p_states = [] for i in range(7): t_game.board = copy.deepcopy(state) t_game.turn = 1 t_game.placePiece(i) p_states.append(copy.deepcopy(t_game.board.ravel())) p = self.nn.predict_proba(p_states) if debug: print(p_states, flush=True) print(p, flush=True) bestPlay = (0, p[0][ind]) for i in range(1, 7): if p[i][ind] > bestPlay[1]: bestPlay = (i, p[i][1]) return bestPlay[0]
import minmax import board import time import os import time m = minmax.minmax() b = board.SimpleBoard() g = board.Game() def speed_test(): mapstart = time.perf_counter() m.evaluator(b, b.RED) mapend = time.perf_counter() print("Time Elapsed: " + str(mapend - mapstart)) mapstarter = time.perf_counter() m.evaluator2(b, b.RED) mapender = time.perf_counter() print("Time Elapsed2: " + str(mapender - mapstarter)) #m.minimax(b, 4, b.RED, float('-inf'), float('inf')) def even_fight(): for i in range(100): time.sleep(0.40) #os.system("clear") mapstart = time.perf_counter() g.step() mapend = time.perf_counter() print("Time Elapsed: " + str(mapend - mapstart))
def __init__(self): self.player = player.Player() #self.player.qtrain(.45, .99) self.game = board.Game()
def main(): # Network n = network.Network() # Game # This sets the WIDTH and HEIGHT of each grid location WIDTH = 20 HEIGHT = 20 # This sets the margin between each cell MARGIN = 5 # Y grid y_grid = 12 # X grid x_grid = 10 # Colors BACKGROUND_COLOR = (94, 191, 226) # Azul bebe # Creating Game game = board.Game(WIDTH, HEIGHT, MARGIN, x_grid, y_grid) game.create_screen(500, 255) game.create_board() game.board_coloring() game.create_path() game.screen.fill(BACKGROUND_COLOR) # Player posicao = 0 player_1 = player.Player('0', True) game.add_player(player_1) player_2 = player.Player('1', False) game.add_player(player_2) # Loop until the user clicks the close button. done = False # Used to manage how fast the screen updates clock = pygame.time.Clock() # -------- Main Program Loop ----------- while not done: clock.tick(60) # for event in pygame.event.get(): # if event.type == pygame.QUIT: # done = True # pygame.quit() # elif event.type == pygame.MOUSEBUTTONDOWN: # pos = pygame.mouse.get_pos() # for dice in game.dice: # if dice.click(pos): # resultado = game.dice_roll() # posicao = player_1.atualiza_posicao(resultado, game.path) # # Set the screen background # game.screen.fill(BACKGROUND_COLOR) # # Draw the grid # for row in range(x_grid): # for column in range(y_grid): # casa = game.grid[row][column] # pygame.draw.rect(game.screen, # casa.color, # [casa.x1, # casa.y1, # casa.x2, # casa.y2]) # # Displaying buttons # for dice in game.dice: # dice.draw(game.screen) # # Displaying players # for player_ in game.player_list: # player_.draw(game.screen) # if posicao == 47: # done = True # atualiza_janela(game, player_1, player_2) if player_1.play: for event in pygame.event.get(): # User did something if event.type == pygame.QUIT: # If user clicked close done = True # Flag that we are done so we exit this loop elif event.type == pygame.MOUSEBUTTONDOWN: # User clicks the mouse. Get the position pos = pygame.mouse.get_pos() for dice in game.dice: if dice.click(pos): resultado = game.dice_roll() posicao = player_1.atualiza_posicao(resultado, game.path) player_1.play = False player_2.play = True elif player_2.play: for event in pygame.event.get(): # User did something if event.type == pygame.QUIT: # If user clicked close done = True # Flag that we are done so we exit this loop elif event.type == pygame.MOUSEBUTTONDOWN: # User clicks the mouse. Get the position pos = pygame.mouse.get_pos() for dice in game.dice: if dice.click(pos): resultado = game.dice_roll() posicao = player_2.atualiza_posicao(resultado, game.path) player_1.play = True player_2.play = False # Set the screen background game.screen.fill(BACKGROUND_COLOR) # Draw the grid for row in range(x_grid): for column in range(y_grid): casa = game.grid[row][column] pygame.draw.rect(game.screen, casa.color, [casa.x1, casa.y1, casa.x2, casa.y2]) # Displaying buttons for dice in game.dice: dice.draw(game.screen) # Displaying players for player_ in game.player_list: player_.draw(game.screen) if posicao == 47: done = True # Update screen pygame.display.update() # Be IDLE friendly. If you forget this line, the program will 'hang' on exit. time.sleep(2) pygame.quit()
def train(self, learnRate, discFact): # train basic qlearning model to get nn started self.qtrain(learnRate, discFact) game = board.Game() for i in range(10000): currNode = self.root currState = '11111111111111' r = 0 game.reset() depth = 0 while (r == 0): r = -1 while r == -1: if depth < self.MAX_DEPTH: m = self.qplay(currNode.boardState) else: m = random.randint(0, 6) r = game.placePiece(m) newKey = currNode.findNext(m, 1) if r == -1: self.nodeList[newKey] = Node(newKey, -10000000) if newKey not in self.nodeList: self.nodeList[newKey] = Node(newKey, 0) currNode = self.nodeList[newKey] if r != 1 and r != 2: r = 1 while r == 1: m = random.randint(0, 6) r = -1 * game.placePiece(m) currState = currNode.findNext(m, 2) tNode = Node(currState, 0) if i % 10000 == 0: dinc += 1 print(i, flush=True) for i in range(10000): currNode = self.root seenStates = [] r = 0 game.reset() while (r == 0): r = -1 rando = False while r == -1: if not rando and i > 5000: m = self.play(game.board) rando = True else: m = random.randint(0, 6) r = game.placePiece(m) seenStates.append(game.board.ravel()) if r != 1 and r != 2: r = 1 rando = False while r == 1: if not rando and i > 5000: m = self.play(game.board, player=2) rando = True else: m = random.randint(0, 6) r = -1 * game.placePiece(m) if r == -2: r = 2 seenStates.append(game.board.ravel()) self.nn.partial_fit(seenStates, [r] * len(seenStates), [-1, 1, 2])
def qtrain(self, learnRate, discFact): game = board.Game() for i in range(500000): currNode = self.root r = 0 game.reset() while (r == 0): pNode = currNode r = -1 while r == -1: m = random.randint(0, 6) r = game.placePiece(m) newKey = currNode.findNext(m, 1) if r == -1: self.nodeList[newKey] = Node(newKey, -10000000) if newKey not in self.nodeList: self.nodeList[newKey] = Node(newKey, 0) tNode = self.nodeList[newKey] if r != 1 and r != 2: r = 1 while r == 1: m = random.randint(0, 6) r = -1 * game.placePiece(m) newKey = tNode.findNext(m, 2) if r == 1: self.nodeList[newKey] = Node(newKey, -1000000) if newKey not in self.nodeList: self.nodeList[newKey] = Node(newKey, 0) currNode = self.nodeList[newKey] reward = 0 if r == 1: reward = 100 elif r == -1: reward = -100 bestChildVal = self.bestChild(tNode, 2) #average reward from this position for player 2 pNode.seen += 1 pNode.qval = (pNode.seen - 1) / pNode.seen * pNode.qval + ( -1 * reward + discFact * bestChildVal) / pNode.seen bestChildVal = self.bestChild(currNode, 1) #average reward from this position for player 1 tNode.seen += 1 tNode.qval = (tNode.seen - 1) / tNode.seen * tNode.qval + ( reward + discFact * bestChildVal) / tNode.seen #if pNode.qval != 0: # print(pNode.qval) #if tNode.qval != 0: # print(tNode.qval) print('Done with random training', flush=True) dinc = 0 for i in range(100000): currNode = self.root currState = '11111111111111' r = 0 game.reset() depth = 0 while (r == 0): pNode = currNode depth += 1 r = -1 while r == -1: if depth < 5 + dinc: m = self.qplay(currNode.boardState) else: m = random.randint(0, 6) r = game.placePiece(m) newKey = currNode.findNext(m, 1) if r == -1: self.nodeList[newKey] = Node(newKey, -10000000) if newKey not in self.nodeList: self.nodeList[newKey] = Node(newKey, 0) tNode = self.nodeList[newKey] if r != 1 and r != 2: r = 1 while r == 1: if depth < 5 + dinc: m = self.qplay(tNode.boardState) else: m = random.randint(0, 6) r = -1 * game.placePiece(m) newKey = tNode.findNext(m, 2) if r == 1: self.nodeList[newKey] = Node(newKey, -10000000) if newKey not in self.nodeList: self.nodeList[newKey] = Node(newKey, 0) currNode = self.nodeList[newKey] reward = 0 if r == 1: reward = 100 elif r == -1: reward = -100 bestChildVal = self.bestChild(tNode, 2) # updating qlearning val for player 2 pNode.qval = (1 - learnRate) * pNode.qval + learnRate * ( reward + discFact * bestChildVal) bestChildVal = self.bestChild(currNode, 1) # updating qlearning val for player 1 tNode.qval = (1 - learnRate) * tNode.qval + learnRate * ( reward + discFact * bestChildVal) if i % 10000 == 0: dinc += 1 print(i, flush=True) print('done', flush=True)
def train(self, learnRate, discFact): game = board.Game() for i in range(1000000): currNode = self.root currState = '11111111111111' seenStates = [] r = 0 game.reset() while(r == 0): r = -1 while r == -1: m = random.randint(0,6) r = game.placePiece(m) newKey = currNode.findNext(m, 1) if r == -1: self.nodeList[newKey] = Node(newKey, -10000000) if newKey not in self.nodeList: self.nodeList[newKey] = Node(newKey, 0) currNode = self.nodeList[newKey] if r != 1 and r != 2: r = 1 while r == 1: m = random.randint(0,6) r = -1 * game.placePiece(m) currState = currNode.findNext(m, 2) tNode = Node(currState, 0) reward = 0 if r == 1: reward = 100 elif r == -1: reward = -100 bestChildVal = None seen = 0 for n in range(7): nState = tNode.findNext(n, 1) if nState in self.nodeList: if not bestChildVal or self.nodeList[nState].qval > bestChildVal: bestChildVal = self.nodeList[nState].qval else: if not bestChildVal or 0 > bestChildVal: bestChildVal = 0 # currNode.qval = (1 - learnRate) * currNode.qval + learnRate * (reward + discFact * bestChildVal) currNode.seen += 1 currNode.qval = (currNode.seen - 1) / currNode.seen * currNode.qval + (reward + discFact * bestChildVal) / currNode.seen currNode = tNode dinc = 0 for i in range(100000): currNode = self.root currState = '11111111111111' r = 0 game.reset() depth = 0 while(r == 0): depth += 1 r = -1 while r == -1: if depth < 5: m = self.play(currNode.boardState) else: m = random.randint(0, 6) r = game.placePiece(m) newKey = currNode.findNext(m, 1) if r == -1: self.nodeList[newKey] = Node(newKey, -10000000) if newKey not in self.nodeList: self.nodeList[newKey] = Node(newKey, 0) currNode = self.nodeList[newKey] if r != 1 and r != 2: r = 1 while r == 1: m = random.randint(0,6) r = -1 * game.placePiece(m) currState = currNode.findNext(m, 2) tNode = Node(currState, 0) reward = 0 if r == 1: reward = 100 elif r == -1: reward = -100 bestChildVal = None seen = 0 for n in range(7): nState = tNode.findNext(n, 1) if nState in self.nodeList: if not bestChildVal or self.nodeList[nState].qval > bestChildVal: bestChildVal = self.nodeList[nState].qval else: if not bestChildVal or 0 > bestChildVal: bestChildVal = 0 currNode.qval = (1 - learnRate) * currNode.qval + learnRate * (reward + discFact * bestChildVal) currNode.seen += 1 #currNode.qval = (currNode.seen - 1) / currNode.seen * currNode.qval + (reward + discFact * bestChildVal) / currNode.seen currNode = tNode if i % 10000 == 0: dinc += 1 print(i, flush=True)