Example #1
0
def player_vs_nnet(results_folder, player, games, chunk):
    g = Connect4Game()
    results = []
    chunk_number, files = chunk

    for file in files:
        nn = NNet(g)
        nn.load_checkpoint(results_folder, file)
        args = dotdict({'numMCTSSims': 25, 'cpuct': 5.0})
        mcts1 = MCTS(g, nn, args)
        n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

        arena = Arena.Arena(n1p, player, g, display=display)
        result = arena.playGames(games, verbose=False)
        results.append(list(result))

    return chunk_number, results
def errorsTillOptimalPlay(player1, nGames=10):
    """

    :param player1:  function that takes a board state as an input and returns a move (look Players in Connec4Players)
    :param nGames: number of games we play
    :return: list of lists of moves for all nGames and corresponding mistakes for each
    """
    game = Connect4Game()
    player2 = EngineConnect4Player(game).play
    players = [player2, None, player1]

    moveHistoryList = []
    mistakesList = []

    for i in range(nGames):
        board = game.getInitBoard()
        moveHistory = []
        mistakes = {}
        curPlayer = 1
        move_number = 0

        while game.getGameEnded(board, curPlayer) == 0:

            action = players[curPlayer + 1](game.getCanonicalForm(board, curPlayer))
            valids = game.getValidMoves(game.getCanonicalForm(board, curPlayer), 1)

            if valids[action] == 0:
                print(action)
                assert valids[action] > 0

            moveHistory.append(action)

            if curPlayer == 1 and getBoardScoreTheoretical("".join([str(x + 1) for x in moveHistory])) != -1:
                mistakes[move_number] = action + 1
                action = players[0](game.getCanonicalForm(board, curPlayer))

                moveHistory[-1] = action
            board, curPlayer = game.getNextState(board, curPlayer, action)

            move_number += 1

        moveHistoryList.append(moveHistory)
        mistakesList.append(mistakes)

    return moveHistoryList, mistakesList
Example #3
0
def generateMovesTestFile(folder, model):
    test_files = [
        "Test_L1_R1.txt", "Test_L1_R2.txt", "Test_L1_R3.txt", "Test_L2_R1.txt",
        "Test_L2_R2.txt", "Test_L3_R1.txt"
    ]

    g = Connect4Game()
    nn = NNet(g)
    nn.load_checkpoint(folder, model)
    args = dotdict({'numMCTSSims': 25, 'cpuct': 1})
    mcts1 = MCTS(g, nn, args)
    n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

    for test_file in test_files:
        moves = preform_moves(n1p, test_file)
        print(folder + model + "_" + test_file)
        with open(folder + model[:-8] + "_" + test_file, "w") as moves_file:
            moves_file.write(str(moves))
Example #4
0
def main():
    log.info('Loading %s...', Connect4Game.__name__)
    g = Connect4Game()

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
def getBoardScore(moves):
    """

    :param moves:  moves that have been preformed given either as a string with char 1-7 or a list with values 0-6
    :return: negemax score of the board
    """

    if os.name == 'nt':
        path = "C:\\Magistrsko_delo\\connect4\\bin\\board_evaluate.exe"
    else:
        path = "/home/dlusina/connect4/bin/board_evaluate"
    if not isinstance(moves, str):
        moves = "".join([str(move + 1) for move in moves])

    process = run(path, stdout=PIPE, input=(moves + "\n").encode())
    score = process.returncode
    if score > 42:
        score = np.uint32(score).view('int32')

    board = get_board(moves)
    if Connect4Game().getGameEnded(board, -1):
        score *= -1  # we have to negate the score if the last move was a winning move

    return score
 def test_connect4_tensorflow(self):
     self.execute_game_test(Connect4Game(), Connect4TensorflowNNet)
Example #7
0
import Arena
from MCTS import MCTS
from connect4.Connect4Game import Connect4Game
from connect4.Connect4Players import *
from connect4.pytorch.NNet import NNetWrapper as NNet

import numpy as np
from utils import *
"""
use this script to play any two agents against each other, or play manually with
any agent.
"""

human_vs_cpu = True  #False # True

g = Connect4Game(6, 7, 4)
#g = Connect4Game(5,5,3) # mini

# all players
rp = RandomPlayer(g).play
op = OneStepLookaheadConnect4Player(g).play
hp = HumanConnect4Player(g).play

if 1:
    # nnet players
    n1 = NNet(g)
    n1.load_checkpoint('./temp/', 'best.pth.tar')
    args1 = dotdict({'numMCTSSims': 300, 'cpuct': 1.0})
    mcts1 = MCTS(g, n1, args1)
    n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))
    p1_name = 'az-1'
Example #8
0
def main(argv):
    game_type = ''
    player_types = ['random', 'heuristic', 'minimax', 'alphazero', 'human']
    p1 = ''
    p2 = ''
    try:
        opts, args = getopt.getopt(argv, "hp:o:",
                                   ["help", "player=", "opponent="])
    except getopt.GetoptError:
        print(
            'pit.py -p <player type> -o <opponent type> (random, heuristic, minimax, alphazero, human)'
        )
        sys.exit(2)
    if len(opts) != 2:
        print(
            'pit.py -p <player type> -o <opponent type> (random, heuristic, minimax, alphazero, human)'
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                'pit.py -p <player type> -o <opponent type> (random, heuristic, minimax, alphazero, human)'
            )
            sys.exit()
        elif opt in ('-p', '--player'):
            p1 = arg
        elif opt in ('-o', '--opponent'):
            p2 = arg
    if ((p1 not in player_types) or (p2 not in player_types)):
        print('Invalid player types. Valid player types are:')
        print('random')
        print('heuristic')
        print('minimax')
        print('alphazero')
        print('human')
        sys.exit(2)

    args = dotdict({
        'checkpoint':
        '.connect4/temp/',
        'load_folder_file':
        ('connect4/dev/models/8x100x50', 'connect4/best.pth.tar'),
    })
    g = Connect4Game(6)
    p1_ind = pselect(p1)
    p2_ind = pselect(p2)
    print('playing ' + player_types[p1_ind] + ' against ' +
          player_types[p2_ind] + '...')
    # all players
    rp = RandomPlayer(g).play
    gp = OneStepLookaheadConnect4Player(g).play
    hp = HumanConnect4Player(g).play
    mp = MiniMaxConnect4Player(g).play

    # nnet players
    n1 = NNet(g)
    n1.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
    mcts1 = MCTS(g, n1, args1)
    n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

    player_list = [rp, gp, mp, n1p, mp]

    #print('playing' + player_types[p1_ind] + 'against ' + player_types[p2_ind] + '...')
    arena = Arena.Arena(player_list[p1_ind],
                        player_list[p2_ind],
                        g,
                        display=display)
    print(arena.playGames(10, verbose=True))
 def test_connect4_keras(self):
     self.execute_game_test(Connect4Game(5), Connect4KerasNNet)
Example #10
0
    def executeEpisode(self):
        """
        This function executes one episode of self-play, starting with player 1.
        As the game is played, each turn is added as a training example to
        trainExamples. The game is played till the game ends. After the game
        ends, the outcome of the game is used to assign values to each example
        in trainExamples.

        It uses a temp=1 if episodeStep < tempThreshold, and thereafter
        uses temp=0.

        Returns:
            trainExamples: a list of examples of the form (canonicalBoard,pi,v)
                           pi is the MCTS informed policy vector, v is +1 if
                           the player eventually won the game, else -1.
        """
        trainExamples = []
        board = self.game.getInitBoard()
        self.curPlayer = 1
        episodeStep = 0
        moveHistory = []

        if "openings_prob" in self.args:
            use_opening = random.random() < self.args.openings_prob
        else:
            use_opening = False
        if use_opening:
            opening = opening_tree()

        while True:
            episodeStep += 1
            canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer)
            temp = int(episodeStep < self.args.tempThreshold)

            pi = self.mcts.getActionProb(canonicalBoard, temp=temp)
            valids = self.game.getValidMoves(canonicalBoard, self.curPlayer)
            pi = pi * valids
            pi = pi / sum(pi)

            if not use_opening or episodeStep >= len(opening):
                if self.args.heuristic_type == 'combined':
                    fraction = self.args.heuristic_probability
                    h_prob = self.args.heuristic_function(canonicalBoard)
                    new_pi = (np.array(pi) * (1 - fraction) +
                              h_prob * fraction)
                    if self.args.change_probabilities:
                        pi = new_pi

                    action = np.random.choice(len(new_pi), p=new_pi)
                elif self.args.heuristic_type == 'normal' or self.args.heuristic_type == 'cooling':
                    if self.args.heuristic_type == 'cooling':
                        prob = self.args.heuristic_probability - (
                            episodeStep -
                            1) * self.args.heuristic_probability / 42
                    else:
                        prob = self.args.heuristic_probability
                    if np.random.ranf(1)[0] > prob:
                        action = np.random.choice(len(pi), p=pi)
                    else:
                        new_pi = self.args.heuristic_function(canonicalBoard)
                        if self.args.change_probabilities:
                            pi = new_pi
                        action = np.random.choice(len(new_pi), p=new_pi)
                elif self.args.heuristic_type == 'cooling_iter':
                    fraction = max(0, ((50 - (self.args.curIter - 1)) / 50))
                    h_prob = heuristic2_prob(canonicalBoard)
                    new_pi = (np.array(pi) * (1 - fraction) +
                              h_prob * fraction)
                    if self.args.change_probabilities:
                        pi = new_pi
                    action = np.random.choice(len(new_pi), p=new_pi)

                elif self.args.heuristic_type == 'custom':
                    prob = self.args.probability_function(episodeStep)
                    if np.random.ranf(1)[0] > prob:
                        action = np.random.choice(len(pi), p=pi)
                    else:
                        action = self.args.heuristic_function(canonicalBoard)
                elif self.args.heuristic_type == 'perfect':
                    action = EngineConnect4Player(
                        Connect4Game()).play(canonicalBoard)
                elif self.args.heuristic_type == 'default':
                    action = np.random.choice(len(pi), p=pi)
                else:
                    raise NameError("Wrong heuristic type '" +
                                    self.args.heuristic_type + "'")

            else:
                action = opening[episodeStep - 1]
                # pi = np.array(7)
                # pi[action] = 1

            sym = self.game.getSymmetries(canonicalBoard, pi)
            for b, p in sym:
                if np.all(b == canonicalBoard):
                    trainExamples.append(
                        [b, self.curPlayer, p,
                         list(moveHistory), None])
                else:
                    trainExamples.append([
                        b, self.curPlayer, p, [6 - x for x in moveHistory],
                        None
                    ])

            board, self.curPlayer = self.game.getNextState(
                board, self.curPlayer, action)
            moveHistory.append(action)
            r = self.game.getGameEnded(board, self.curPlayer)

            if r != 0:
                if self.args.supervised:
                    result = []
                    for x in trainExamples:
                        r = getBoardScoreTheoretical(x[3])
                        result.append((x[0], x[2], r))

                        print(x[0], "Moves",
                              "".join([str(i + 1) for i in x[3]]),
                              "Theoretical value", r)

                    return result
                else:
                    if self.args.value_game_length:
                        r = 1.198 - 99 / 3500 * episodeStep

                    res = [[x[0], x[2], r * ((-1)**(x[1] != self.curPlayer))]
                           for x in trainExamples]

                    if use_opening:
                        score = 1
                        for i in range(len(opening)):
                            res[i][2] = score
                            score *= -1
                    return res
Example #11
0
if choice == "gobang":
    g = GobangGame(5, 4)
    n1 = NNet1(g)
    n1.load_checkpoint('./temp/', 'curent13temp_iter75_eps350_dim5.pth.tar')
    gamename = "gobang"
    display = display1
    hp = HumanGobangPlayer(g).play
if choice == "othello":
    g = OthelloGame(8)
    n1 = NNet2(g)
    n1.load_checkpoint('./temp/', 'curent14temp:iter14:eps200:dim8.pth.tar')
    gamename = "othello"
    display = display2
    hp = MinMaxOthelloPlayer(g, 3).play
if choice == "connect4":
    g = Connect4Game(5, 6)
    n1 = NNet3(g)
    n1.load_checkpoint('./temp/', 'best75_eps300_dim5.pth.tar')
    gamename = "connect4"
    display = display3
    hp = HumanConnect4Player(g).play

# all players
#rp = RandomPlayer(g).play
#gp = GreedyOthelloPlayer(g).play

# nnet players
args1 = dotdict({
    'numMCTSSims': 400,
    'cpuct': 1.5,
    'epsilon': 0,
Example #12
0
    'numEps':
    100,  # Number of complete self-play games to simulate during a new iteration.
    'tempThreshold': 15,  #
    'updateThreshold':
    0.6,  # During arena playoff, new neural net will be accepted if threshold or more of games are won.
    'maxlenOfQueue':
    200000,  # Number of game examples to train the neural networks.
    'numMCTSSims': 25,  # Number of games moves for MCTS to simulate.
    'arenaCompare':
    40,  # Number of games to play during arena play to determine if new net will be accepted.
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
})

if __name__ == "__main__":
    g = Connect4Game(6, 7, 4, None)
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Example #13
0
def experiment(game):
    np.random.seed(556)
    height = game[0]
    width = game[1]
    win_streak = game[2]
    g = Connect4Game(height, width, win_streak)
    if game == (4, 5, 3):
        total_episodes = n_episodes[0]
        ep_step = 10000
        ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1
        ep_range[0] = 0
        ep_range = ep_range.astype(int)
    elif game == (5, 6, 4):
        total_episodes = n_episodes[1]
        ep_step = 20000
        ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1
        ep_range[0] = 0
        ep_range = ep_range.astype(int)
    else:
        total_episodes = n_episodes[2]
        ep_step = 28000
        ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1
        ep_range[0] = 0
        ep_range = ep_range.astype(int)
    for lr in lrs:
        for i in epsilon_config:
            print('Config: Game', game, 'lr', lr, 'epsilon', i)
            test_wr_list = []
            test_wr_list_op = []
            test_wr = []
            test_wr_op = []
            if i == 'f':
                q_agent = QAgent(g,
                                 episodes=total_episodes,
                                 lr=lr,
                                 epsilon=0.2,
                                 dc=1,
                                 e_min=0.001,
                                 ep_arena=ep_step)
                rp = RandomPlayer(g).play
                op = OneStepLookaheadConnect4Player(g, verbose=False).play
                q_agent_play = q_agent.play
            else:
                q_agent = QAgent(g,
                                 episodes=total_episodes,
                                 lr=lr,
                                 epsilon=1,
                                 dc=0.99,
                                 e_min=0.001,
                                 ep_arena=ep_step)
                rp = RandomPlayer(g).play
                op = OneStepLookaheadConnect4Player(g, verbose=False).play
                q_agent_play = q_agent.play
            start = time()
            for idx, episode in enumerate(ep_range):
                if episode == ep_range[-1]:
                    break
                if episode == 0:
                    print('Training for Episodes ',
                          0,
                          ' to ',
                          ep_range[idx + 1] - 1,
                          '...',
                          sep='')
                elif episode == ep_range[-2]:
                    print('Training for Episodes ',
                          episode - 1,
                          ' to ',
                          total_episodes,
                          '...',
                          sep='')
                else:
                    print('Training for Episodes ',
                          episode - 1,
                          ' to ',
                          ep_range[idx + 1] - 1,
                          '...',
                          sep='')
                q_agent.train(cur_episode=episode)
                print('Training Finished.')
                print('Playing in Arena...')
                wins = 0
                temp = []
                for repet in range(reps):
                    arena_rp_op = Arena.Arena(q_agent_play,
                                              rp,
                                              g,
                                              display=display)
                    w, _, _ = arena_rp_op.playGames(n_games, verbose=False)
                    temp.append(w / n_games)
                    wins += w
                test_wr_list.append(temp)
                test_wr.append(wins / (reps * n_games))
                print('\n')

                wins_op = 0
                temp = []
                for repet in range(reps):
                    arena_rp_op = Arena.Arena(q_agent_play,
                                              op,
                                              g,
                                              display=display)
                    w_op, _, _ = arena_rp_op.playGames(n_games, verbose=False)
                    temp.append(w_op / n_games)
                    wins_op += w_op
                test_wr_list_op.append(temp)
                test_wr_op.append(wins_op / (reps * n_games))
                print('\n')

            end = time()
            training_time = np.array([end - start])
            np.save(
                'train_wr_connect4_' + str(game) + '_' + str(lr) + '_' +
                str(i) + '_rp', q_agent.total_wins)
            np.save(
                'train_ep_connect4_' + str(game) + '_' + str(lr) + '_' +
                str(i) + '_rp', q_agent.total_eps)
            np.save(
                'test_wr_connect4_' + str(game) + '_' + str(lr) + '_' +
                str(i) + '_rp', test_wr)
            np.save(
                'test_wr_list_connect4_' + str(game) + '_' + str(lr) + '_' +
                str(i) + '_rp', test_wr_list)
            np.save(
                'training_time_' + str(game) + '_' + str(lr) + '_' + str(i) +
                '_rp', training_time)

            np.save(
                'test_wr_connect4_' + str(game) + '_' + str(lr) + '_' +
                str(i) + '_op', test_wr_op)
            np.save(
                'test_wr_list_connect4_' + str(game) + '_' + str(lr) + '_' +
                str(i) + '_op', test_wr_list_op)
            print('\n')
    return data


print('Start Parallel')
global_start = time()
microsecs = np.array([
    10000, 50000, 100000, 250000, 500000, 750000, 1000000, 1500000, 2000000,
    3000000
])
games = [(4, 5, 3), (5, 6, 4), (6, 7, 4)]
players = ['rp', 'op']
for player in players:
    if player == 'rp':
        cs = [1, 4, 3]
    else:
        cs = [5, 2, 5]
    for i, c in zip(games, cs):
        c_best = c
        height = i[0]
        width = i[1]
        win_streak = i[2]
        global_start = time()
        g = Connect4Game(height, width, win_streak)
        data = []
        data = Parallel(n_jobs=10)(delayed(experiment)(m) for m in microsecs)
        np.save(
            'mcts_best_connect4_results_' + player + '_' + str(height) +
            str(width) + str(win_streak), data)
        print('Game: ' + str(i) + ' against ' + player + ', Time: ' +
              str(time() - global_start))
Example #15
0
from MCTS import MCTS
from connect4.Connect4Game import Connect4Game, display
from connect4.Connect4Players import HumanConnect4Player
from connect4.tensorflows.NNet import NNetWrapper as NNet
from utils import dotdict
import numpy as np

if __name__ == '__main__':
    goingFirst = True
    folder = "H:\\alpha-zero-trained\\final\\h2\\mcts_visits_tanh\\default\\1\\"

    game = Connect4Game()
    nn = NNet(game)
    nn.load_checkpoint(folder, 'best.pth.tar')
    args = dotdict({'numMCTSSims': 25, 'cpuct': 1})
    mcts1 = MCTS(game, nn, args)
    AI = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

    human = HumanConnect4Player(game).play

    if goingFirst:
        players = [AI, None, human]
    else:
        players = [human, None, AI]

    curPlayer = 1
    board = game.getInitBoard()
    while game.getGameEnded(board, curPlayer) == 0:
        display(board, symbols=True)

        action = players[curPlayer + 1](game.getCanonicalForm(
Example #16
0
from MCTS import MCTS
from connect4.Connect4Players import *
from Coach import Coach
from connect4.Connect4Game import Connect4Game
from connect4.tensorflow.NNet import NNetWrapper as NNet
from utils import dotdict

import numpy as np
"""
use this script to play any two agents against each other, or play manually with
any agent.
"""
# Play in 6x6 instead of the normal 8x8.
human_vs_cpu = True

g = Connect4Game()

# all players
rp = RandomPlayer(g).play
gp = OneStepLookaheadConnect4Player(g).play
hp = HumanConnect4Player(g).play

# nnet players
n1 = NNet(g)
n1.load_checkpoint('./temp/', 'best.pth.tar')
args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
mcts1 = MCTS(g, n1, args1)
n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

if human_vs_cpu:
    player2 = hp
Example #17
0
    for c in range(6):
        mcts = MCTSAgent(g, iters=100000000, c=c, rollout_iter=1, time=m).play
        if player == 'rp':
            opponent = RandomPlayer(g).play
        else:
            opponent = OneStepLookaheadConnect4Player(g, verbose=False).play
        arena_rp_hp = Arena.Arena(mcts, opponent, g, display=display)
        wins, loss, draw = arena_rp_hp.playGames(100, verbose=False)
        data.append([m, c, wins, loss, draw])
    return data


print('Start Parallel Simulation for Connect4: (4,5,3) (5,6,4) (6,7,4)')
global_start = time()
microsecs = np.array([
    10000, 50000, 100000, 250000, 500000, 750000, 1000000, 1500000, 2000000,
    3000000
])
games = [(4, 5, 3), (5, 6, 4), (6, 7, 4)]
players = ['rp', 'op']

for player in players:
    for i in games:
        global_start = time()
        g = Connect4Game(i[0], i[1], i[2])
        data = []
        data = Parallel(n_jobs=10)(delayed(experiment)(m) for m in microsecs)
        np.save('connect4_results_' + player + '_' + str(i), data)
        print('Game: ' + str(i) + 'Opponent: ' + player + ' Time: ' +
              str(time() - global_start))