Ejemplo n.º 1
0
    def select(self, gamestate):
        # if terminal node
        win = connectFour.checkWinner(gamestate)
        if win != 2:
            return -1 * win

        tupled_gamestate = tuple(map(tuple, gamestate))
        # get q, n, p, and toplay values
        stats = self.__gamestates[tupled_gamestate]

        #shuffle valid moves randomly
        valid_moves = []
        for i in range(7):
            if connectFour.check_valid(gamestate, i):
                valid_moves.append(i)
        np.random.shuffle(valid_moves)

        move = -1
        #calculate upper confidence bound, to pick a move
        max_ucb = -1 * float("inf")
        for i in range(0, len(valid_moves)):
            if max_ucb <= stats[0][valid_moves[i]] * (
                    1 - self.exploration_factor
            ) + self.exploration_factor * stats[2][valid_moves[i]] * math.sqrt(
                    np.sum(stats[1])) / (1 + stats[1][valid_moves[i]]):
                move = valid_moves[i]

        #generate new state
        new_state = connectFour.play(gamestate, stats[3], move)
        #connectFour.print_board(new_state)

        tupled_new_state = tuple(map(tuple, new_state))

        # if the new gamestate already exists, update q value
        if tupled_new_state in self.__gamestates:
            win = self.select(new_state)
            self.__gamestates[tupled_gamestate][0][move] = (
                self.__gamestates[tupled_gamestate][0][move] *
                self.__gamestates[tupled_gamestate][1][move] +
                win) / (self.__gamestates[tupled_gamestate][1][move] + 1)
            self.__gamestates[tupled_gamestate][1][move] += 1
            #print self.__gamestates[tupled_gamestate]
            return -1 * win
        else:
            #print "LEAF NODE, starting rollout"
            result = self.neural_net.feedforward(gamestate)
            v_prime = result[:-1]
            new_stats = np.array([[0.0] * 7, [0.0] * 7, v_prime,
                                  -1 * stats[3]])
            self.__gamestates[tupled_new_state] = new_stats
            win = self.rollout(gamestate, -1 * stats[3])

            # print np.sum(self.__gamestates[tupled_gamestate][1])
            return -1 * self.rollout(gamestate, -1 * stats[3])
def pickMove(board, player, depth, network):
    validMoves = []
    for i in range(7):
        if connectFour.check_valid(board, i):
            validMoves.append(i)

    scores = []

    for i in validMoves:
        scores.append(
            alphabeta(connectFour.play(deepcopy(board), player,
                                       i), depth - 1, -1 * (1234567 - 50),
                      (1234567 - 50), player, -1 * player, network))
    #print scores
    return validMoves[scores.index(max(scores))]
def alphabeta(node, depth, alpha, beta, player, currPlayer, network):
    win = connectFour.checkWinner(node)
    if not win == 2:
        return win * (1234567 + depth) * player
    if depth <= 0:
        v = network.feed_forward(node)
        if player == 1:
            return v[0]
        else:
            return v[1]
        # return random.random()

    validMoves = []

    for i in range(7):
        if connectFour.check_valid(node, i):
            validMoves.append(i)
    random.shuffle(validMoves)

    if player == currPlayer:
        v = -1 * (1234567 - 50)
        for i in validMoves:
            v = max(
                v,
                alphabeta(connectFour.play(node, currPlayer, i), depth - 1,
                          alpha, beta, player, -1 * currPlayer, network))
            connectFour.unplay(node, i)
            alpha = max(alpha, v)
            if beta <= alpha:
                break  #beta cutoff
        return v

    elif not player == currPlayer:
        v = 1 * (1234567 - 50)
        for i in validMoves:
            v = min(
                v,
                alphabeta(connectFour.play(node, currPlayer, i), depth - 1,
                          alpha, beta, player, -1 * currPlayer, network))
            connectFour.unplay(node, i)
            beta = min(beta, v)
            if beta <= alpha:
                break  #alpha cutoff
        return v
Ejemplo n.º 4
0
            results = cnn.feedforward(np.array([board]))[:-1]
            connectFour.play(board, 1, np.where(results == max(results)))
            connectFour.print_board(board)
            # raw_input("press")
            print
            if not connectFour.checkWinner(board) == 2:
                break
            results = cnn.feedforward(np.array([board]))[:-1]
            connectFour.play(board, -1, np.where(results == max(results)))
            connectFour.print_board(board)
        print("WINNER:" + str(connectFour.checkWinner(board)))

while True:
    board = np.zeros((6, 7))
    connectFour.print_board(board)
    while (connectFour.checkWinner(board) == 2):
        move = input("make a move: ")
        if not connectFour.check_valid(board, move):
            continue
        #connectFour.play(board, 1, minimax.pickMove(board, 1, 3, net0))
        connectFour.play(board, 1, move)
        connectFour.print_board(board)
        # raw_input("press")
        print
        if not connectFour.checkWinner(board) == 2:
            break
        results = cnn.feedforward(np.array([board]))[:-1]
        connectFour.play(board, -1, np.where(results == max(results)))
        connectFour.print_board(board)
    print("WINNER:" + str(connectFour.checkWinner(board)))