예제 #1
0
파일: play.py 프로젝트: Weves/Connect4AI
    def play(self):
        self.game.reset()

        while (True):

            state = board.Game(self.game.clone())
            move = mcts.search(state, 10000)

            self.game.placePiece(move)
            self.game.displayBoard()
            if self.game.victor != 0:
                break
            '''
      self.makeHumanMove()
      self.game.displayBoard()
      if self.game.victor != 0:
        break
      '''

            print('------------------------------')

            state = board.Game(self.game.clone())
            move = mcts.search(state, 20000)

            self.game.placePiece(move)
            self.game.displayBoard()
            if self.game.victor != 0:
                break

            print('------------------------------')

        print('Player ' + str(self.game.victor) + ' wins!')
예제 #2
0
def search(rootState, iterations):

    rootNode = Node(state=rootState)

    for i in range(iterations):
        node = rootNode
        state = board.Game(rootState.clone(), rootState.turn)

        # Select
        while node.untriedMoves == [] and node.childNodes != []:  # node is fully expanded and non-terminal
            node = node.selectChild()
            state.placePiece(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves)
            state.placePiece(m)
            node = node.addChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.getMoves() != []:  # while state is non-terminal
            state.placePiece(random.choice(state.getMoves()))

        # Backpropagate
        while node != None:  # backpropagate from the expanded node and work back to the root node
            node.update(
                state.getResult(node.playerJustMoved)
            )  # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted

    return sorted(rootNode.childNodes, key=lambda c: c.visits
                  )[-1].move  # return the move that was most visited
예제 #3
0
파일: main.py 프로젝트: rohitbindal/Games
def main():

    RUN = True
    game = board.Game()
    # If you want the AI to be the first to go, uncomment line:39
    game.AI()
    game.getBoard()
    while RUN:

        x, y = getUserInput(game)
        game.addToBoard(x, y)
        game.getBoard()
        # Check for the result.
        result = game.checkState(game.board)
        if result != None:
            game.printResult(result)
예제 #4
0
    def play(self, state, debug=False, player=1):

        ind = 1 if player == 1 else 0

        t_game = board.Game()
        p_states = []
        for i in range(7):
            t_game.board = copy.deepcopy(state)
            t_game.turn = 1
            t_game.placePiece(i)
            p_states.append(copy.deepcopy(t_game.board.ravel()))

        p = self.nn.predict_proba(p_states)
        if debug:
            print(p_states, flush=True)
            print(p, flush=True)

        bestPlay = (0, p[0][ind])
        for i in range(1, 7):
            if p[i][ind] > bestPlay[1]:
                bestPlay = (i, p[i][1])

        return bestPlay[0]
예제 #5
0
import minmax
import board
import time
import os
import time
m = minmax.minmax()
b = board.SimpleBoard()
g = board.Game()


def speed_test():
    mapstart = time.perf_counter()
    m.evaluator(b, b.RED)
    mapend = time.perf_counter()
    print("Time Elapsed: " + str(mapend - mapstart))

    mapstarter = time.perf_counter()
    m.evaluator2(b, b.RED)
    mapender = time.perf_counter()
    print("Time Elapsed2: " + str(mapender - mapstarter))
    #m.minimax(b, 4, b.RED, float('-inf'), float('inf'))


def even_fight():
    for i in range(100):
        time.sleep(0.40)
        #os.system("clear")
        mapstart = time.perf_counter()
        g.step()
        mapend = time.perf_counter()
        print("Time Elapsed: " + str(mapend - mapstart))
예제 #6
0
파일: play.py 프로젝트: Weves/Connect4AI
 def __init__(self):
     self.player = player.Player()
     #self.player.qtrain(.45, .99)
     self.game = board.Game()
예제 #7
0
def main():
    
    # Network
    n = network.Network()
    
    # Game
    # This sets the WIDTH and HEIGHT of each grid location
    WIDTH = 20
    HEIGHT = 20
    
    # This sets the margin between each cell
    MARGIN = 5

    # Y grid
    y_grid = 12

    # X grid
    x_grid = 10

    # Colors
    BACKGROUND_COLOR = (94, 191, 226) # Azul bebe
    
    # Creating Game
    game = board.Game(WIDTH, HEIGHT, MARGIN, x_grid, y_grid) 
    game.create_screen(500, 255)
    game.create_board()
    game.board_coloring()
    game.create_path()
    game.screen.fill(BACKGROUND_COLOR)

    # Player
    posicao = 0
    
    player_1 = player.Player('0', True)
    game.add_player(player_1)

    player_2 = player.Player('1', False)
    game.add_player(player_2)

    # Loop until the user clicks the close button.
    done = False
    
    # Used to manage how fast the screen updates
    clock = pygame.time.Clock()
    
    # -------- Main Program Loop -----------
    while not done:
        
        clock.tick(60)
        
        # for event in pygame.event.get():
        #     if event.type == pygame.QUIT:
        #         done = True
        #         pygame.quit()
        
        #     elif event.type == pygame.MOUSEBUTTONDOWN:
        #         pos = pygame.mouse.get_pos()
                
        #         for dice in game.dice:
        #             if dice.click(pos):
        #                 resultado = game.dice_roll()
        #                 posicao = player_1.atualiza_posicao(resultado, game.path)
            
        #             # Set the screen background
        #     game.screen.fill(BACKGROUND_COLOR)
            
        #     # Draw the grid
        #     for row in range(x_grid):
        #         for column in range(y_grid):
        #             casa = game.grid[row][column]
        #             pygame.draw.rect(game.screen,
        #                             casa.color,
        #                             [casa.x1,
        #                             casa.y1,
        #                             casa.x2,
        #                             casa.y2])
            
        #     # Displaying buttons
        #     for dice in game.dice:
        #         dice.draw(game.screen)
            
        #     # Displaying players
        #     for player_ in game.player_list:
        #         player_.draw(game.screen)
        #         if posicao == 47:
        #             done = True
            
        #     atualiza_janela(game, player_1, player_2)
                          
        if player_1.play:
            for event in pygame.event.get():  # User did something
                if event.type == pygame.QUIT:  # If user clicked close
                    done = True  # Flag that we are done so we exit this loop
                
                elif event.type == pygame.MOUSEBUTTONDOWN:
                
                    # User clicks the mouse. Get the position
                    pos = pygame.mouse.get_pos()
                    
                    for dice in game.dice:
                        if dice.click(pos):
                            resultado = game.dice_roll()
                            posicao = player_1.atualiza_posicao(resultado, game.path)
                            player_1.play = False
                            player_2.play = True
        
        elif player_2.play:
            for event in pygame.event.get():  # User did something
                if event.type == pygame.QUIT:  # If user clicked close
                    done = True  # Flag that we are done so we exit this loop
                
                elif event.type == pygame.MOUSEBUTTONDOWN:
                    
                    # User clicks the mouse. Get the position
                    pos = pygame.mouse.get_pos()
                    
                    for dice in game.dice:
                        if dice.click(pos):
                            resultado = game.dice_roll()
                            posicao = player_2.atualiza_posicao(resultado, game.path)
                            player_1.play = True
                            player_2.play = False
                        
        # Set the screen background
        game.screen.fill(BACKGROUND_COLOR)
        
        # Draw the grid
        for row in range(x_grid):
            for column in range(y_grid):
                casa = game.grid[row][column]
                pygame.draw.rect(game.screen,
                                casa.color,
                                [casa.x1,
                                casa.y1,
                                casa.x2,
                                casa.y2])
        
        # Displaying buttons
        for dice in game.dice:
            dice.draw(game.screen)
        
        # Displaying players
        for player_ in game.player_list:
            player_.draw(game.screen)
            if posicao == 47:
                done = True
        
        # Update screen
        pygame.display.update()
                
    # Be IDLE friendly. If you forget this line, the program will 'hang' on exit.
    time.sleep(2)
    pygame.quit()
예제 #8
0
    def train(self, learnRate, discFact):

        # train basic qlearning model to get nn started

        self.qtrain(learnRate, discFact)

        game = board.Game()

        for i in range(10000):
            currNode = self.root
            currState = '11111111111111'
            r = 0
            game.reset()
            depth = 0
            while (r == 0):

                r = -1
                while r == -1:
                    if depth < self.MAX_DEPTH:
                        m = self.qplay(currNode.boardState)
                    else:
                        m = random.randint(0, 6)
                    r = game.placePiece(m)
                    newKey = currNode.findNext(m, 1)
                    if r == -1:
                        self.nodeList[newKey] = Node(newKey, -10000000)

                if newKey not in self.nodeList:
                    self.nodeList[newKey] = Node(newKey, 0)

                currNode = self.nodeList[newKey]

                if r != 1 and r != 2:
                    r = 1
                    while r == 1:
                        m = random.randint(0, 6)
                        r = -1 * game.placePiece(m)

                currState = currNode.findNext(m, 2)
                tNode = Node(currState, 0)

                if i % 10000 == 0:
                    dinc += 1
                    print(i, flush=True)

        for i in range(10000):
            currNode = self.root
            seenStates = []
            r = 0
            game.reset()
            while (r == 0):

                r = -1
                rando = False
                while r == -1:
                    if not rando and i > 5000:
                        m = self.play(game.board)
                        rando = True
                    else:
                        m = random.randint(0, 6)
                    r = game.placePiece(m)

                seenStates.append(game.board.ravel())

                if r != 1 and r != 2:
                    r = 1
                    rando = False
                    while r == 1:
                        if not rando and i > 5000:
                            m = self.play(game.board, player=2)
                            rando = True
                        else:
                            m = random.randint(0, 6)
                        r = -1 * game.placePiece(m)

                if r == -2:
                    r = 2

                seenStates.append(game.board.ravel())

            self.nn.partial_fit(seenStates, [r] * len(seenStates), [-1, 1, 2])
예제 #9
0
    def qtrain(self, learnRate, discFact):

        game = board.Game()

        for i in range(500000):
            currNode = self.root
            r = 0
            game.reset()
            while (r == 0):

                pNode = currNode

                r = -1
                while r == -1:
                    m = random.randint(0, 6)
                    r = game.placePiece(m)
                    newKey = currNode.findNext(m, 1)
                    if r == -1:
                        self.nodeList[newKey] = Node(newKey, -10000000)

                if newKey not in self.nodeList:
                    self.nodeList[newKey] = Node(newKey, 0)

                tNode = self.nodeList[newKey]

                if r != 1 and r != 2:
                    r = 1
                    while r == 1:
                        m = random.randint(0, 6)
                        r = -1 * game.placePiece(m)
                        newKey = tNode.findNext(m, 2)
                        if r == 1:
                            self.nodeList[newKey] = Node(newKey, -1000000)

                if newKey not in self.nodeList:
                    self.nodeList[newKey] = Node(newKey, 0)

                currNode = self.nodeList[newKey]

                reward = 0
                if r == 1:
                    reward = 100
                elif r == -1:
                    reward = -100

                bestChildVal = self.bestChild(tNode, 2)

                #average reward from this position for player 2
                pNode.seen += 1
                pNode.qval = (pNode.seen - 1) / pNode.seen * pNode.qval + (
                    -1 * reward + discFact * bestChildVal) / pNode.seen

                bestChildVal = self.bestChild(currNode, 1)

                #average reward from this position for player 1
                tNode.seen += 1
                tNode.qval = (tNode.seen - 1) / tNode.seen * tNode.qval + (
                    reward + discFact * bestChildVal) / tNode.seen

                #if pNode.qval != 0:
                #  print(pNode.qval)
                #if tNode.qval != 0:
                #  print(tNode.qval)

        print('Done with random training', flush=True)

        dinc = 0
        for i in range(100000):
            currNode = self.root
            currState = '11111111111111'
            r = 0
            game.reset()
            depth = 0
            while (r == 0):

                pNode = currNode

                depth += 1

                r = -1
                while r == -1:
                    if depth < 5 + dinc:
                        m = self.qplay(currNode.boardState)
                    else:
                        m = random.randint(0, 6)
                    r = game.placePiece(m)
                    newKey = currNode.findNext(m, 1)
                    if r == -1:
                        self.nodeList[newKey] = Node(newKey, -10000000)

                if newKey not in self.nodeList:
                    self.nodeList[newKey] = Node(newKey, 0)

                tNode = self.nodeList[newKey]

                if r != 1 and r != 2:
                    r = 1
                    while r == 1:
                        if depth < 5 + dinc:
                            m = self.qplay(tNode.boardState)
                        else:
                            m = random.randint(0, 6)
                        r = -1 * game.placePiece(m)
                        newKey = tNode.findNext(m, 2)
                        if r == 1:
                            self.nodeList[newKey] = Node(newKey, -10000000)

                if newKey not in self.nodeList:
                    self.nodeList[newKey] = Node(newKey, 0)

                currNode = self.nodeList[newKey]

                reward = 0
                if r == 1:
                    reward = 100
                elif r == -1:
                    reward = -100

                bestChildVal = self.bestChild(tNode, 2)

                # updating qlearning val for player 2
                pNode.qval = (1 - learnRate) * pNode.qval + learnRate * (
                    reward + discFact * bestChildVal)

                bestChildVal = self.bestChild(currNode, 1)

                # updating qlearning val for player 1
                tNode.qval = (1 - learnRate) * tNode.qval + learnRate * (
                    reward + discFact * bestChildVal)

            if i % 10000 == 0:
                dinc += 1
                print(i, flush=True)

        print('done', flush=True)
예제 #10
0
파일: qplayer.py 프로젝트: Weves/Connect4AI
  def train(self, learnRate, discFact):
    game = board.Game()
    
    for i in range(1000000):
      currNode = self.root
      currState = '11111111111111'
      seenStates = []
      r = 0
      game.reset()
      while(r == 0):

        r = -1
        while r == -1:
          m = random.randint(0,6)
          r = game.placePiece(m)
          newKey = currNode.findNext(m, 1)
          if r == -1:
            self.nodeList[newKey] = Node(newKey, -10000000)  

        if newKey not in self.nodeList:
          self.nodeList[newKey] = Node(newKey, 0)
    
        currNode = self.nodeList[newKey]

        if r != 1 and r != 2:
          r = 1
          while r == 1:
            m = random.randint(0,6)
            r = -1 * game.placePiece(m)

        currState = currNode.findNext(m, 2)
        tNode = Node(currState, 0)

        reward = 0
        if r == 1:
          reward = 100
        elif r == -1:
          reward = -100 

        bestChildVal = None
        seen = 0
        for n in range(7):
          nState = tNode.findNext(n, 1)
          if nState in self.nodeList:
            if not bestChildVal or self.nodeList[nState].qval > bestChildVal:
              bestChildVal = self.nodeList[nState].qval
          else:
            if not bestChildVal or 0 > bestChildVal:
              bestChildVal = 0

#        currNode.qval = (1 - learnRate) * currNode.qval + learnRate * (reward + discFact * bestChildVal)
        currNode.seen += 1
        currNode.qval = (currNode.seen - 1) / currNode.seen * currNode.qval + (reward + discFact * bestChildVal) / currNode.seen

        currNode = tNode

    dinc = 0
    for i in range(100000):
      currNode = self.root
      currState = '11111111111111'
      r = 0
      game.reset()
      depth = 0
      while(r == 0):
        
        depth += 1

        r = -1
        while r == -1:
          if depth < 5:
            m = self.play(currNode.boardState)
          else:
            m = random.randint(0, 6)
          r = game.placePiece(m)
          newKey = currNode.findNext(m, 1)
          if r == -1:
            self.nodeList[newKey] = Node(newKey, -10000000)  

        if newKey not in self.nodeList:
          self.nodeList[newKey] = Node(newKey, 0)
    
        currNode = self.nodeList[newKey]

        if r != 1 and r != 2:
          r = 1
          while r == 1:
            m = random.randint(0,6)
            r = -1 * game.placePiece(m)

        currState = currNode.findNext(m, 2)
        tNode = Node(currState, 0)

        reward = 0
        if r == 1:
          reward = 100
        elif r == -1:
          reward = -100 

        bestChildVal = None
        seen = 0
        for n in range(7):
          nState = tNode.findNext(n, 1)
          if nState in self.nodeList:
            if not bestChildVal or self.nodeList[nState].qval > bestChildVal:
              bestChildVal = self.nodeList[nState].qval
          else:
            if not bestChildVal or 0 > bestChildVal:
              bestChildVal = 0

        currNode.qval = (1 - learnRate) * currNode.qval + learnRate * (reward + discFact * bestChildVal)
        currNode.seen += 1
        #currNode.qval = (currNode.seen - 1) / currNode.seen * currNode.qval + (reward + discFact * bestChildVal) / currNode.seen

        currNode = tNode

        if i % 10000 == 0:
          dinc += 1
          print(i, flush=True)