Ejemplo n.º 1
0
    def rollout(self):
        '''
         Monte Carlo simulation: simulate a randomized game from the current node until it reaches an end of the game.
         Outputs:
             e: the result of the game (X player won:1, tie:0, lose: -1), an integer scalar. 
         Hint: you could use PlayerRandom in problem 1.
        '''
        #########################################
        ## INSERT YOUR CODE HERE

        P1 = PlayerRandom()
        P2 = PlayerRandom()
        s = np.copy(self.s)
        e = TicTacToe.check(s)
        if (self.x == 1):
            while (e == None and len(np.where(s == 0)) > 0):
                r, c = P1.play(s, 1)
                s[r][c] = 1
                e = TicTacToe.check(s)
                if (e == None and len(np.where(s == 0)) > 0):
                    r, c = P2.play(s, -1)
                    s[r][c] = -1
                    e = TicTacToe.check(s)
        else:
            while (e == None and len(np.where(s == 0)) > 0):
                r, c = P1.play(s, -1)
                s[r][c] = -1
                e = TicTacToe.check(s)
                if (e == None and len(np.where(s == 0)) > 0):
                    r, c = P2.play(s, 1)
                    s[r][c] = 1
                    e = TicTacToe.check(s)
        #########################################
        return e
Ejemplo n.º 2
0
    def rollout(self):
        '''
         Monte Carlo simulation: simulate a randomized game from the current node until it reaches an end of the game.
         Outputs:
             e: the result of the game (X player won:1, tie:0, lose: -1), an integer scalar. 
         Hint: you could use PlayerRandom in problem 1.
        '''
        s = np.copy(self.s)
        x = self.x
        while TicTacToe.check(s) is None:
            player_random = PlayerRandom()
            r, c = player_random.play(s, x=x)
            s[r, c] = x
            x = -x

        e = TicTacToe.check(s)
        return e
Ejemplo n.º 3
0
def test_choose_optimal_move():
    '''(5 points) choose_optimal_move()'''
    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game 

    p=MCTSPlayer()
    #-------------------------
    s=np.array([[ 1,-1, 1],
                [ 0, 0,-1],
                [ 0, 1,-1]])
    n = MCNode(s, x=1)
    n.build_tree(g,100)
    r,c=p.choose_optimal_move(n)
    assert r == 2
    assert c == 0

    #-------------------------
    s=np.array([[ 1,-1, 1],
                [ 0, 1,-1],
                [ 0, 1,-1]])
    n = MCNode(s, x=-1)
    n.build_tree(g,100)
    r,c=p.choose_optimal_move(n)
    assert r == 2
    assert c == 0

    #-------------------------
    s=np.array([[ 1,-1, 1],
                [ 0, 0, 0],
                [ 0, 0, 0]])
    n = MCNode(s, x=-1)
    n.build_tree(g,200)
    r,c=p.choose_optimal_move(n)
    assert r == 1
    assert c == 1

    # The AI agent should be compatible with both games: TicTacToe and Othello.
    # now let's test on the game "Othello":

    #---------------------
    # Game: Othello 
    g = Othello()  # game 
    s=np.array([[ 0,-1, 1,-1, 0, 0, 0, 0],
                [ 0, 0, 0, 0, 0, 0, 0, 0],
                [ 0, 0, 0, 0, 0, 0, 0, 0],
                [ 0, 0, 0, 0, 0, 0, 0, 0],
                [ 0, 0, 0, 0, 0, 0, 0, 0],
                [ 0, 0, 0, 0, 0, 0, 0, 0],
                [ 0, 0, 0, 0, 0, 0, 0, 0],
                [ 0, 0, 0, 0, 0, 0, 0, 0]])
    s_ = s.copy()
    n = MCNode(s, x=1) # it's X player's turn
    n.build_tree(g,100)
    assert np.allclose(s,s_)
    r,c=p.choose_optimal_move(n)
    assert r == 0
    assert c == 0
Ejemplo n.º 4
0
    def expand(self):
        '''
         Expand the current tree node for one-level.
         Add one child node for each possible next move in the game.
         Inputs:
                node: the current tree node to be expanded 
        '''
        # if the game has already ended, return/exit without expanding the tree
        if TicTacToe.check(self.s) is not None:
            return

        # if the game has not ended yet, expand the current node with one child node for each valid move
        all_moves = TicTacToe.avail_moves(self.s)
        for r, c in all_moves:
            # update s
            s = np.copy(self.s)
            s[r, c] = 1 if self.x == 1 else -1
            node = Node(s, x=-self.x, parent=self)
            self.c.append(node)
Ejemplo n.º 5
0
def test_backprop():
    '''(5 points) backprop'''

    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game 

    s=np.array([[ 0, 0, 0],
                [-1, 1, 1],
                [ 0, 0,-1]])
    r = MCNode(s,x=1) # X player's turn 
    r.expand(g) # expand the root node with one level of children nodes
    # simulation 1
    c1 = r.c[1] # suppose the second child node is selected 
    c1.backprop(e=1) # run a simulation on c, suppose the result is X player won
    assert c1.v ==1 # won one game in the simulation
    assert c1.N ==1 # number of simulations in the node 
    assert r.v ==1 
    assert r.N ==1


    # simulation 2
    c2 = r.c[2] # suppose the third child node is selected
    c2.expand(g) # expand the tree with one level of children nodes
    c2c0 = c2.c[0] # suppose the first grand child is selected
    c2c0.backprop(e=-1) # run a simulation, suppose the game result: O player won
    assert c2c0.v ==-1 
    assert c2c0.N ==1
    assert c2.v ==-1 
    assert c2.N ==1
    assert r.v ==0 
    assert r.N ==2
    assert c1.v ==1
    assert c1.N ==1
    c2c1 = c2.c[1] 
    assert c2c1.v ==0 
    assert c2c1.N ==0


    # simulation 3
    c2c1 = c2.c[1] # suppose the second child is selected
    c2c1.backprop(e=0) # run a simulation: a tie in the game
    assert c2c1.v ==0 
    assert c2c1.N ==1
    assert c2.v ==-1 
    assert c2.N ==2
    assert r.v ==0 
    assert r.N ==3
    assert c2c0.v ==-1 
    assert c2c0.N ==1
    assert c1.v ==1 # won one game in the simulation
    assert c1.N ==1 # number of simulations in the node 
Ejemplo n.º 6
0
    def expand(self):
        '''
         Expand the current tree node for one-level.
         Add one child node for each possible next move in the game.
         Inputs:
                node: the current tree node to be expanded 
        '''
        #########################################
        ## INSERT YOUR CODE HERE

        # if the game has already ended,  return/exit without expanding the tree
        if (TicTacToe.check(self.s) != None):
            return

        # if the game has not ended yet, expand the current node with one child node for each valid move

        valid_moves = TicTacToe.avail_moves(self.s)
        for move in valid_moves:
            new_s = np.copy(self.s)
            i = move[0]
            j = move[1]
            new_s[i][j] = self.x
            Child = Node(s=new_s, x=self.x * -1, parent=self)
            self.c.append(Child)
Ejemplo n.º 7
0
def run_a_game(p):
    '''
        Run a game
        Input:
            p: the AI player that you are playing with 
    '''
    win = init_screen()

    # initialize game state
    g = TicTacToe()
    s = g.initial_game_state()
    x = 1  # current turn (x player's turn)

    canPlay = True
    pygame.display.update()

    # run the game
    while True:
        event = pygame.event.wait()
        # close the window
        if event.type == pygame.QUIT:
            pygame.quit()
            sys.exit()

        # Press Key
        if event.type == pygame.KEYDOWN:
            # press F button (restart game)
            if event.key == pygame.K_f:
                s = g.initial_game_state()
                draw_empty_board(win)
                canPlay = True
                x = 1  # X player's turn
                pygame.display.update()
            # press ESC button (exit game)
            if event.key == pygame.K_ESCAPE:
                pygame.quit()
                sys.exit()

        # Click Mouse
        if event.type is pygame.MOUSEBUTTONDOWN and canPlay and x == 1:
            # Human player's turn to choose a move
            # get mouse position
            (mouseX, mouseY) = pygame.mouse.get_pos()
            # convert to board grid (row,column)
            r, c = map_mouse_to_board(mouseX, mouseY)
            # if the move is valid
            if g.check_valid_move(s, r, c):
                # update game state
                g.apply_a_move(s, r, c)
                x = s.x
                # draw the board
                draw_board(win, s.b)
                # check if the game has ended already
                e = g.check_game(s)
                if e is not None:
                    draw_result(win, e)
                    canPlay = False
                e = pygame.event.Event(pygame.USEREVENT)
                pygame.event.post(e)

        if event.type == pygame.USEREVENT and x == -1 and canPlay:  # computer's turn to choose a move
            r, c = p.choose_a_move(g, s)
            # if the move is valid
            assert g.check_valid_move(s, r, c)
            # update game state
            g.apply_a_move(s, r, c)
            x = s.x
            # draw the board
            draw_board(win, s.b)
            # check if the game has ended already
            e = g.check_game(s)
            if e is not None:
                draw_result(win, e)
                canPlay = False

        # update the UI display
        pygame.display.update()
Ejemplo n.º 8
0
def test_MCTS_choose_a_move():
    '''(10 points) MCTS choose_a_move'''
    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game

    p = MCTSPlayer()
    b = np.array([[0, -1, -1], [0, 1, 0], [0, 0, 0]])
    s = GameState(b, x=1)
    r, c = p.choose_a_move(g, s)
    assert r == 0
    assert c == 0

    b = np.array([[0, 0, -1], [0, 1, -1], [0, 0, 0]])
    s = GameState(b, x=1)
    r, c = p.choose_a_move(g, s)
    assert r == 2
    assert c == 2

    b = np.array([[0, 0, 1], [0, -1, 1], [0, 0, 0]])
    s = GameState(b, x=-1)
    r, c = p.choose_a_move(g, s)
    assert r == 2
    assert c == 2

    p1 = MCTSPlayer()
    p2 = RandomPlayer()
    p3 = MiniMaxPlayer()
    '''random vs MCTS'''
    for i in range(10):
        b = np.array([[0, -1, 1], [-1, 1, -1], [0, -1, -1]])
        s = GameState(b, x=1)
        e = g.run_a_game(p1, p2, s)
        assert e == 1

    for i in range(10):
        b = np.array([[0, -1, 1], [-1, 1, -1], [-1, 1, 0]])
        s = GameState(b, x=1)
        e = g.run_a_game(p1, p2, s)
        assert e == 0
    ''' Minimax vs MCTS '''

    for i in range(10):
        b = np.array([[0, 0, 1], [0, -1, 0], [1, -1, 0]])
        s = GameState(b, x=1)
        e = g.run_a_game(p1, p3, s)
        assert e == 0

    w = 0
    for i in range(10):
        b = np.array([[0, 0, 0], [0, 0, 0], [1, -1, 0]])
        s = GameState(b, x=1)
        e = g.run_a_game(p1, p3, s)
        w += e
    assert w > 1
    ''' MCTS vs MCTS '''
    w = 0
    for i in range(10):
        b = np.array([[0, 0, 0], [1, -1, 0], [0, 0, 0]])
        s = GameState(b, x=1)
        e = g.run_a_game(p1, p1, s)
        w += e
    assert np.abs(w) < 5
    ''' MCTS(n_iter=1) vs MCTS(n_iter=100) '''

    pm1 = MCTSPlayer(1)
    pm100 = MCTSPlayer(100)
    w = 0
    for i in range(10):
        b = np.array([[0, 0, 0], [0, 0, 0], [1, -1, 0]])
        s = GameState(b, x=1)
        e = g.run_a_game(pm100, pm1, s)
        w += e
    assert np.abs(w) > 4

    #----------------------------------------------
    # The AI agent should be compatible with both games: TicTacToe and Othello.
    # now let's test on the game "Othello":

    #---------------------
    # Game: Othello
    g = Othello()  # game
    b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])

    for i in range(10):
        s = GameState(b.copy(), x=1)
        e = g.run_a_game(p1, p2, s)
        assert e == 1
    ''' MCTS vs random'''
    s = GameState(b, x=1)
    e = g.run_a_game(p1, p2, s)
    assert e == 1
Ejemplo n.º 9
0
def test_build_tree():
    '''(5 points) build_tree'''

    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game

    #--------------------------
    b = np.array([[0, 1, 1], [-1, 1, -1], [0, -1, 1]])
    s = GameState(b, x=-1)  # O player's turn
    n = MCNode(s)
    # run one iteration
    n.build_tree(g, 1)
    assert len(n.c) == 2
    assert n.N == 1
    assert n.v == 1

    for c in n.c:
        assert c.s.x == 1
        assert c.p == n
        assert c.c == []

    c = 0
    for x in n.c:
        if x.N > 0:
            c += 1
            assert x.v == 1
            assert x.N == 1
    assert c == 1

    # run another iteration
    n.build_tree(g, 1)
    assert len(n.c) == 2
    assert n.N == 2
    assert n.v == 2

    for c in n.c:
        assert c.s.x == 1
        assert c.p == n

    c = 0
    for x in n.c:
        assert x.v == 1
        assert x.N == 1
        if len(x.c) == 1:
            c += 1
    assert c == 1

    # run two more iterations
    n.build_tree(g, 2)
    assert n.N == 4
    assert n.v == 4

    for c in n.c:
        assert c.s.x == 1
        assert c.p == n

    count = 0
    for x in n.c:
        assert len(x.c) == 1
        assert x.v == 2
        assert x.N == 2
        c = x.c[0]
        assert c.s.x == -1
        assert c.p == x
        assert c.N == c.v
        assert c.N == 1 or c.N == 2
        if c.N == 2:
            count += 1
    assert count == 1

    #--------------------------
    b = np.array([[0, 1, 1], [0, -1, 0], [0, 0, 0]])
    s = GameState(b, x=-1)
    n = MCNode(s)
    n.build_tree(g, 1000)
    assert n.s.x == -1
    assert n.N == 1000

    b1 = np.array([[-1, 1, 1], [0, -1, 0], [0, 0, 0]])

    for x in n.c:
        if np.allclose(x.s.b, b1):
            assert x.s.x == 1
            assert x.p == n
            assert x.N > 800
            assert np.abs(x.v) < 50
            c1 = x

    b2 = np.array([[-1, 1, 1], [0, -1, 0], [0, 0, 1]])
    for x in c1.c:
        if np.allclose(x.s.b, b2):
            assert x.s.x == -1
            assert x.p == c1
            assert x.N > 700
            assert np.abs(x.v) < 50
            c2 = x

    b3 = np.array([[-1, 1, 1], [0, -1, -1], [0, 0, 1]])
    for x in c2.c:
        if np.allclose(x.s.b, b3):
            assert x.s.x == 1
            assert x.p == c2
            assert x.N > 600
            assert np.abs(x.v) < 50
            c3 = x

    b4 = np.array([[-1, 1, 1], [1, -1, -1], [0, 0, 1]])
    for x in c3.c:
        if np.allclose(x.s.b, b4):
            assert x.s.x == -1
            assert x.p == c3
            assert x.N > 500
            assert np.abs(x.v) < 50
Ejemplo n.º 10
0
def test_selection():
    '''(5 points) selection'''
    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game

    #----------------------
    # The root node is a leaf node
    b = np.array([[0, -1, 1], [0, 0, 1], [0, -1, 1]])
    s = GameState(b, x=-1)  # O player's turn
    p = MCNode(s)
    c = p.selection()
    assert c == p

    #----------------------
    # tree with one level of children nodes
    b = np.array([[1, 1, -1], [0, -1, -1], [0, 1, 1]])
    s = GameState(b, x=-1)  # O player's turn
    p = MCNode(s)
    p.expand(g)  # expand the root node with one level of children nodes
    c1, c2 = p.c

    p.v = -6
    p.N = 12
    c1.v = -1
    c1.N = 2
    c2.v = -5
    c2.N = 10

    c = p.selection()
    assert c == c1

    p.v = -10
    p.N = 20
    c1.v = -5
    c1.N = 10
    c2.v = -5
    c2.N = 10
    # select the child node with the highest UCB score
    c = p.selection()
    assert c == c1  # a tie in UCB score, use index as tie-breaker

    # A parent node with three children nodes
    b = np.array([[0, -1, -1], [0, 1, 1], [0, -1, 1]])
    s = GameState(b, x=1)  # X player's turn
    p = MCNode(s)
    p.expand(g)  # expand the root node with one level of children nodes
    c1, c2, c3 = p.c

    p.v = 1
    p.N = 1
    c1.v = 1
    c1.N = 1
    c = p.selection()
    assert c == c2

    #----------------------
    p.v = 2
    p.N = 2
    c2.v = 1
    c2.N = 1
    c = p.selection()
    assert c == c3

    #----------------------
    p.v = 1
    p.N = 3
    c3.v = -1
    c3.N = 1
    c = p.selection()
    assert c == c1

    #----------------------
    p.v = 2
    p.N = 4
    c1.v = 2
    c1.N = 2
    c = p.selection()
    assert c == c2

    #----------------------
    # tree with two levels of children nodes
    b = np.array([[0, 0, -1], [-1, 1, 1], [0, 0, 0]])
    s = GameState(b, x=1)  # X player's turn
    p = MCNode(s)
    p.expand(g)  # expand the root node with one level of children nodes
    p.v = 0
    p.N = 5
    for c in p.c:
        c.v = 0
        c.N = 1
        c.expand(g)  # expand the second level children nodes

    for j in range(4):
        for i in range(5):
            l = p.selection()
            assert l == p.c[i].c[j]
            p.c[i].c[j].N = 1
            p.c[i].N += 1
            p.N += 1

    p.c[1].v = 1
    p.c[1].c[2].v = -1
    l = p.selection()
    assert l == p.c[1].c[2]
Ejemplo n.º 11
0
def test_select_a_child():
    '''(5 points) select_a_child'''

    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game

    # A parent node with two children nodes
    b = np.array([[1, 1, -1], [0, -1, -1], [0, 1, 1]])
    s = GameState(b, x=-1)  # O player's turn
    p = MCNode(s)
    p.expand(g)  # expand the root node with one level of children nodes
    c1, c2 = p.c

    # set the node statistics (this is only used for testing, in the real game, the statistics will be different from these numbers)
    p.v = -6
    p.N = 12
    c1.v = -1
    c1.N = 2
    c2.v = -5
    c2.N = 10
    # select the child node with the highest UCB score
    c = p.select_a_child()
    assert c == c1

    #----------------------
    p.v = -10
    p.N = 20
    c1.v = -5
    c1.N = 10
    c2.v = -5
    c2.N = 10
    # select the child node with the highest UCB score
    c = p.select_a_child()
    assert c == c1  # a tie in UCB score, use index as tie-breaker

    #----------------------
    p.v = -6
    p.N = 20
    c1.v = -1
    c1.N = 10
    c2.v = -5
    c2.N = 10
    # select the child node with the highest UCB score
    c = p.select_a_child()
    assert c == c2

    #----------------------
    # A parent node with three children nodes
    b = np.array([[0, -1, -1], [0, 1, 1], [0, -1, 1]])
    s = GameState(b, x=1)  # X player's turn
    p = MCNode(s)
    p.expand(g)  # expand the root node with one level of children nodes
    c1, c2, c3 = p.c

    p.v = 1
    p.N = 1
    c1.v = 1
    c1.N = 1
    c = p.select_a_child()
    assert c == c2

    #----------------------
    p.v = 2
    p.N = 2
    c2.v = 1
    c2.N = 1
    c = p.select_a_child()
    assert c == c3

    #----------------------
    p.v = 1
    p.N = 3
    c3.v = -1
    c3.N = 1
    c = p.select_a_child()
    assert c == c1

    #----------------------
    p.v = 2
    p.N = 4
    c1.v = 2
    c1.N = 2
    c = p.select_a_child()
    assert c == c2
Ejemplo n.º 12
0
def test_sample():
    '''(5 points) sample'''
    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game

    #------------------------
    b = np.array([[0, 1, 1], [0, -1, 1], [-1, 1, -1]])
    bc = np.array([[0, 1, 1], [0, -1, 1], [-1, 1, -1]])
    s = GameState(b, x=-1)  # "O" player's turn
    n = MCNode(s)
    assert np.allclose(n.s.b,
                       bc)  # the game state should not change after simulation
    v = 0
    for _ in range(100):
        e = n.sample(g)
        assert e == -1 or e == 1
        v += e
    assert np.abs(
        v) < 25  # the two results should have roughly the same chance

    #------------------------
    b = np.array([[0, 1, 1], [-1, -1, 1], [-1, 1, -1]])
    s = GameState(b, x=1)  # "X" player's turn
    n = MCNode(s)
    for _ in range(100):
        e = n.sample(g)
        assert e == 1

    #------------------------
    b = np.array([[0, 1, 0], [-1, -1, 1], [-1, 1, 1]])
    s = GameState(b, x=-1)  # "O" player's turn
    n = MCNode(s)
    for _ in range(100):
        e = n.sample(g)
        assert e == -1

    #------------------------
    b = np.array([[0, 1, 1], [0, -1, 1], [0, -1, -1]])

    s = GameState(b, x=1)  # "X" player's turn
    n = MCNode(s)
    v = 0
    for _ in range(100):
        e = n.sample(g)
        assert e == -1 or e == 1
        v += e
    assert np.abs(v) < 25  # X player has 1/2 chance to win and 1/2 to lose

    #------------------------
    # Terminal node, the game has already ended, the simulation result should always be the same.
    b = np.array([[-1, 0, 0], [1, -1, 1], [0, 1,
                                           -1]])  # terminal node: O player won
    s = GameState(b, x=1)  # "X" player's turn
    n = MCNode(s)
    for _ in range(100):
        assert n.sample(g) == -1

    b_ = np.array([[-1, 0, 0], [1, -1, 1], [0, 1, -1]])
    assert np.allclose(n.s.b,
                       b_)  # the game state should not change after simulation

    b = np.array([[-1, -1, 1], [1, 1, -1], [-1, 1, 1]])
    s = GameState(b, x=1)  # "X" player's turn
    n = MCNode(s)
    for _ in range(100):
        assert n.sample(g) == 0

    #------------------------
    b = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]])
    s = GameState(b, x=-1)  # "O" player's turn
    n = MCNode(s)
    v = 0
    for _ in range(1000):
        e = n.sample(g)
        assert e == -1 or e == 1 or e == 0
        v += e
    assert np.abs(v - 500) < 100

    #-----------------------------
    # The AI agent should be compatible with both games: TicTacToe and Othello.
    # now let's test on the game "Othello":

    #---------------------
    # Game: Othello
    g = Othello()  # game

    #------------------------
    b = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])
    s = GameState(b, x=1)  # "X" player's turn
    n = MCNode(s)
    for _ in range(10):
        e = n.sample(g)
        assert e == -1

    #------------------------
    b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])
    s = GameState(b, x=1)  # "X" player's turn
    n = MCNode(s)
    w = 0
    for _ in range(10):
        e = n.sample(g)
        w += e
    assert np.abs(
        w
    ) < 9  # the two results (1 and -1) should roughly have the same chance
Ejemplo n.º 13
0
def test_expand():
    '''(5 points) expand'''
    #---------------------
    # Game: TicTacToe
    g = TicTacToe()  # game

    # Current Node (root)
    b = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]])
    s = GameState(b, x=1)  #it's X player's turn
    n = MCNode(s)
    # expand
    sc = n.expand(g)
    assert n.s.x == 1
    assert len(n.c) == 3

    assert type(sc) == MCNode
    assert sc.p == n
    assert sc.s.x == -1
    assert sc.p == n
    assert sc.c == []
    assert sc.v == 0
    assert sc.N == 0

    b_ = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]])
    # the current game state should not change after expanding
    assert np.allclose(n.s.b, b_)
    for c in n.c:
        assert c.s.x == -1
        assert c.p == n
        assert c.c == []
        assert c.v == 0
        assert c.N == 0

    # child node A
    b = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]])
    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (0, 0)
    assert c

    # child node B
    b = np.array([[0, 1, -1], [1, -1, 1], [0, 1, -1]])
    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (1, 0)
    assert c

    # child node C
    b = np.array([[0, 1, -1], [0, -1, 1], [1, 1, -1]])
    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (2, 0)
    assert c

    # the selected child node should be in the children list
    c = False
    for x in n.c:
        if sc == x:
            c = True
    assert c

    #--------------------------

    # Current Node (root)
    b = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]])
    s = GameState(b, x=-1)  #it's O player's turn
    n = MCNode(s)
    sc = n.expand(g)
    assert n.s.x == -1
    assert len(n.c) == 2
    assert type(sc) == MCNode
    assert sc.p == n
    assert sc.s.x == 1
    assert sc.p == n
    assert sc.c == []
    assert sc.v == 0
    assert sc.N == 0

    for c in n.c:
        assert c.s.x == 1
        assert c.p == n
        assert c.c == []
        assert c.v == 0
        assert c.N == 0

    # child node A
    b = np.array([[1, 1, -1], [-1, -1, 1], [0, 1, -1]])
    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (1, 0)
    assert c

    # child node B
    b = np.array([[1, 1, -1], [0, -1, 1], [-1, 1, -1]])
    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (2, 0)
    assert c

    # the selected child node should be in the children list
    c = False
    for x in n.c:
        if sc == x:
            c = True
    assert c

    #---------------------------
    s = GameState(np.zeros((3, 3)), x=1)
    n = MCNode(s)
    sc = n.expand(g)
    assert n.s.x == 1
    assert len(n.c) == 9
    a = False
    for c in n.c:
        assert c.s.x == -1
        assert c.p == n
        assert c.c == []
        assert np.sum(c.s.b) == 1
        assert c.v == 0
        assert c.N == 0
        if sc == c:
            a = True
    assert a  # the selected child node should be in the children list

    # The AI agent should be compatible with both games: TicTacToe and Othello.
    # now let's test on the game "Othello":

    #---------------------
    # Game: Othello
    g = Othello()  # game
    b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])
    b_ = b.copy()
    s = GameState(b, x=1)  #it's X player's turn
    n = MCNode(s)
    # expand
    n.expand(g)
    assert len(n.c) == 2
    assert n.s.x == 1
    # the current game state should not change after expanding
    assert np.allclose(n.s.b, b_)
    for c in n.c:
        assert type(c) == MCNode
        assert c.p == n
        assert c.c == []
        assert c.v == 0

    # child node A
    b = np.array([[1, 1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])

    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (0, 0)
            assert x.s.x == 1  # it is still X player's turn because there is no valid move for O player
    assert c

    # child node B
    b = np.array([[0, -1, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])

    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (0, 4)
            assert x.s.x == -1
    assert c

    #---------------------
    b = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])
    b_ = b.copy()
    s = GameState(b, x=-1)  #it's O player's turn
    n = MCNode(s)
    # expand
    n.expand(g)
    print(n.c)
    assert len(n.c) == 3
    assert n.s.x == -1
    # the current game state should not change after expanding
    assert np.allclose(n.s.b, b_)
    for c in n.c:
        assert type(c) == MCNode
        assert c.p == n
        assert c.c == []
        assert c.v == 0

    # child node A
    b = np.array([[-1, -1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])

    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (0, 0)
            assert x.s.x == -1  # no valid move for X player
    assert c

    # child node B
    b = np.array([[0, 1, -1, -1, -1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])

    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (0, 4)
            assert x.s.x == 1
    assert c

    # child node C
    b = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, -1, 0, 0, 0, 0, 0],
                  [0, 0, -1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]])

    c = False
    for x in n.c:
        if np.allclose(x.s.b, b):
            c = True
            assert x.m == (2, 2)
            assert x.s.x == 1
    assert c