Beispiel #1
0
def logCurrentCapabilities(game, iter_num, args):
    gpus = args.setGPU.split(',')
    os.environ["CUDA_VISIBLE_DEVICES"] = gpus[iter_num % len(gpus)]

    # improved nnet player
    n2 = nn(game)
    n2.load_checkpoint('./temp/', 'best.pth.tar')
    #args2 = dotdict({'numMCTSSims': args.numMCTSSims, 'cpuct':args.cpuct, 'multiGPU':True})
    mcts2 = MCTS(game, n2, args)
    n2p = lambda b, p: np.argmax(mcts2.getActionProb(b, p, temp=0))

    # Heuristic player:
    heuristic = Heuristic(game).random_play

    # Random Player:
    rp = RandomPlayer(game).play

    arena = Arena(n2p, heuristic, game, display=display)
    resultHeur = "{} {}".format(*arena.playGames(40, verbose=False)[:2])

    arena = Arena(n2p, rp, game, display=display)
    resultRand = "{} {}".format(*arena.playGames(40, verbose=False)[:2])

    MyLogger.info("Iter:{} Heuristic: {} Random: {}".format(
        iter_num, resultHeur, resultRand))
    print("Iter:{} Heuristic: {} Random: {}\n".format(iter_num, resultHeur,
                                                      resultRand))
Beispiel #2
0
def play_games(game, args, processID, enemy):
    np.random.seed(processID)
    #set gpu
    gpus = args.setGPU.split(',')
    os.environ["CUDA_VISIBLE_DEVICES"] = gpus[processID % len(gpus)]

    #set gpu memory grow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Players:
    heuristic = Heuristic(game).random_play
    policy = PolicyPlayer(game).play
    rp = RandomPlayer(game).play

    if enemy == "heuristic": second_player = heuristic
    elif enemy == "rp": second_player = rp
    elif enemy == "n1p":
        # improved nnet player
        n1 = nn(game)
        n1.load_checkpoint('./temp/', 'best.pth.tar')
        mcts1 = MCTS(game, n1, args, lambdaHeur=args.lambdaHeur)
        n1p = lambda b, p: np.argmax(mcts1.getActionProb(b, p, temp=0))

        second_player = n1p
        arena = Arena(n1p, heuristic, game, display=display)
        return arena.playGames(args.numPerProcessAgainst, verbose=False)

    arena = Arena(policy, second_player, game, display=display)

    return arena.playGames(args.numPerProcessAgainst, verbose=False)
Beispiel #3
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            log.info(f'Starting Iter #{i} ...')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                for _ in tqdm(range(self.args.numEps), desc="Self Play"):
                    self.mcts = MCTS(self.game, self.nnet, self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                log.warning(
                    f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            # self.loadTrainExamples()
            # trainExamples = self.trainExamplesHistory
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            log.info('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x, y: np.argmax(pmcts.getActionProb(x, y, temp=0, player=1)),
                          lambda x, y: np.argmax(nmcts.getActionProb(x, y, temp=0, player=-1)), self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (pwins + nwins) < self.args.updateThreshold:
                log.info('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            else:
                log.info('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
Beispiel #4
0
    def pitter(self):
        # training new network, keeping a copy of the old one
        self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                  filename='best1.pth.tar')
        self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                  filename='best2.pth.tar')
        pmcts = MCTS(self.game, self.pnet, self.args)

        nmcts = MCTS(self.game, self.nnet, self.args)

        print('PITTING AGAINST PREVIOUS VERSION')
        arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                      lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                      self.game)
        pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

        print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
        if pwins + nwins > 0 and float(nwins) / (
                pwins + nwins) < self.args.updateThreshold:
            print('REJECTING NEW MODEL')
            self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
        else:
            print('ACCEPTING NEW MODEL')
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=self.getCheckpointFile(i))
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='best.pth.tar')
        self.aws_s3_sync()
Beispiel #5
0
def PitAgents(agent1, agent2, boardSize: int, gameCount: int):
    game = Game(boardSize)
    print('(P1 = {0}) vs. (P2 = {1})'.format(agent1.name, agent2.name))
    arena = Arena(agent1.playFunc, agent2.playFunc, game)
    p1wins, p2wins, draws = arena.playGames(gameCount)

    print('P1/P2 WINS : %d / %d ; DRAWS : %d' % (p1wins, p2wins, draws))
Beispiel #6
0
    def pitting(self, previous_weights, current_weights, games_num):
        """Fighting between previous generation agent and current generation agent

        Args:
            previous_weights (numpy.array): weights of previous generation neural network
            current_weights (numpy.array): weights of current generation neural network
            game_num (int): game number of fighting 

        Returns:
            tuple of (game number of previous agent won, game number of current agent won, game number of draw)
        """
        # update weights of previous and current neural network
        self.previous_agent.set_weights(previous_weights)
        self.current_agent.set_weights(current_weights)

        # reset node state of MCTS
        self.previous_mcts = MCTS(self.game, self.previous_agent, self.args)
        self.current_mcts = MCTS(self.game, self.current_agent, self.args)

        arena = Arena(
            lambda x: np.argmax(self.previous_mcts.getActionProb(x, temp=0)),
            lambda x: np.argmax(self.current_mcts.getActionProb(x, temp=0)),
            self.game)
        previous_wins, current_wins, draws = arena.playGames(games_num)

        return (previous_wins, current_wins, draws)
    def optimize_and_evaluate(self):
        # training new network, keeping a copy of the old one
        self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
        self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
        pmcts = MCTS(self.game, self.pnet, self.args)
    
        self.nnet.train(trainExamples)
        nmcts = MCTS(self.game, self.nnet, self.args)

        # if self.args.arenaCompare is large enough both nmcts and pmcts consume huge amount of RAM.
        # RAM consumed depends also on your game implementation, particularly on game.getActionSize
        # and size of game.stringRepresentation.
        print('PITTING AGAINST PREVIOUS VERSION')
        arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                      lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
        pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

        # input("Arena finished, continue?\n")

        print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
        if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
            print('REJECTING NEW MODEL')
            self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')    
        else:
            print('ACCEPTING NEW MODEL')
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=getCheckpointFile(i))
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
        
        print("")
        print("Previous NN MCTS stats")
        pmcts.print_stats()                
        pmcts = None
        print("New NN MCTS stats")
        nmcts.print_stats()                
        nmcts = None
Beispiel #8
0
def callgreedy(num, q, args):
    '''

    :param num: number of games
    :param q: the queue that will store the results
    :param args: configs
    :return: doesn't return anything, the results are stored in q

    It uses the greedy agent to play the specified games
    '''

    from tictactoe.TicTacToeGame import TicTacToeGame as Game
    from tictactoe.tensorflow.NNet import NNetWrapper as nn
    verify = 0
    while verify == 0:
        try:
            g = Game(3)
            nnet = nn(g, 0.06)
            filenameCurrent = "currentforprocess:temp:iter" + str(args.numIters) + \
                              ":eps" + str(args.numEps) + ":dim" + str(g.n) + ".pth.tar"

            nnet.load_checkpoint(folder=args.checkpoint, filename=filenameCurrent)

            gp = returnplayer(args, "greedy", g)
            nmcts1 = MCTS(g, nnet, args)
            arenagreedy = Arena(lambda x: np.argmax(nmcts1.getActionProb(x, temp=0)), gp, g)
            pwins, nwins, drawwins = arenagreedy.playGames(num)
            q.put((pwins, nwins, drawwins))
            nmcts1.clear()
            verify = 1
        except:
            verify = 0
Beispiel #9
0
def main():
    game = Connect4Game()
    config = Config()

    rp = RandomPlayer(game).play
    oslp = OneStepLookaheadPlayer(game).play
    hp = HumanConnect4Player(game).play
    mctsp = MCTSPlayer(game, config).play

    c4config = C4Config()
    nn = NNetWrapper(game, c4config)
    ckpt = ('./trained/connect4','connect4_best_34.pth.tar')
    nn.load_checkpoint(ckpt[0], ckpt[1])
    nnp = NNetPlayer(game, nn, c4config).play

    nn2 = NNetWrapper(game, c4config)
    ckpt2 = ('./trained/connect4','connect4_checkpoint_26.pth.tar')
    nn2.load_checkpoint(ckpt2[0], ckpt2[1])
    nnp2 = NNetPlayer(game, nn2, c4config).play


    arena = Arena(hp, nnp2, game, display=display)
    # arena = ArenaMP(nnp, nnp2, game, display=display)
    # arena.playGame(verbose=True)
    out = arena.playGames(50, verbose=True)
    print(out)
def test_play_games_cumulative_score():
    mock_player = mock.Mock()
    mock_game = mock.Mock()
    mock_game.getGameEnded.side_effect = [1, 1, 1, 1, 1, 5, 5, 5, 5, 5]
    arena = Arena(mock_player, mock_player, mock_game)
    p1_score, p2_score = arena.playGames(10, verbose=False)
    assert p1_score == 5
    assert p2_score == 25
Beispiel #11
0
    def learn(self):
        for i in range(1, self.args.numIters+1):
            self.iter = i
            # bookkeeping
            print('------ITER ' + str(i) + '------')

            if self.args.use_pitting:
                # training new network, keeping a copy of the old one
                if os.path.exists(os.path.join(self.args.checkpoint,'best.pth.tar.index')):
                    self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
                else:
                    self.pnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
                pmcts_game = self.game.__class__()
                pmcts_game.getInitBoard()
                pmcts = MCTS(pmcts_game, self.pnet, self.args)
            

            if self.args.comment_training:
                self.learn_comment_iter()
            
            if not self.args.nn_args['is_train'] and not self.args.use_pitting:
                break

            if self.args.chess_training:
                self.learn_chess_iter()
           
            if self.args.use_self_play:
                self.learn_self_play_iter()
                self.mcts = MCTS(self.game, self.nnet, self.args)

            if self.args.save_model:
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                if self.args['comment_training']:
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='new.pth.tar')
            
            if self.args.use_pitting:
                nmcts_game = self.game.__class__()
                nmcts_game.getInitBoard()
                nmcts = MCTS(nmcts_game, self.nnet, self.args)

                print('PITTING AGAINST PREVIOUS VERSION')
                arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                            lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
                pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            
                if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
                    if self.args.use_self_play: 
                        self.trainExampleSelfPlay = []
                        self.selfplaynum+=1
                nmcts = None
                pmcts = None
Beispiel #12
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        trainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        for i in range(self.args.numIters):
            # bookkeeping
            print('------ITER ' + str(i+1) + '------')
            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                trainExamples += self.executeEpisode()                

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                           total=bar.elapsed_td, eta=bar.eta_td)
                bar.next()
            bar.finish()

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pnet = self.nnet.__class__(self.game)
            pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pmcts = MCTS(self.game, pnet, self.args)
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins) + ' ; DRAWS : ' + str(draws))
            if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet = pnet

            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')                
Beispiel #13
0
def Async_Play(game, args, iter_num, bar):
    bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(
        i=iter_num + 1,
        x=args.numPlayGames,
        total=bar.elapsed_td,
        eta=bar.eta_td)
    bar.next()

    # set gpu
    if (args.multiGPU):
        if (iter_num % 2 == 0):
            os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        else:
            os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU

    # set gpu growth
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # create NN
    model1 = NNet(game)
    model2 = NNet(game)

    # try load weight
    try:
        model1.load_checkpoint(folder=args.model1Folder,
                               filename=args.model1FileName)
    except:
        print("load model1 fail")
        pass
    try:
        model2.load_checkpoint(folder=args.model2Folder,
                               filename=args.model2FileName)
    except:
        print("load model2 fail")
        pass

    # create MCTS
    mcts1 = MCTS(game, model1, args)
    mcts2 = MCTS(game, model2, args)

    # each process play 2 games
    arena = Arena(lambda x: np.argmax(mcts1.getActionProb(x, temp=0)),
                  lambda x: np.argmax(mcts2.getActionProb(x, temp=0)), game)
    arena.displayBar = False
    oneWon, twoWon, draws = arena.playGames(2)
    return oneWon, twoWon, draws
Beispiel #14
0
def AsyncAgainst(nnet, game, args, iter_num):

    os.environ["CUDA_VISIBLE_DEVICES"] = '3'

    minimax = minimaxAI(game,depth=7)

    local_args = dotdict({'numMCTSSims': 200, 'cpuct': 1.0})
    mcts = MCTS(game, nnet, local_args, eval=True)

    arena = Arena(lambda x: np.argmax(mcts.getActionProb(x, temp=0)),
                  minimax.get_move, game)
    arena.displayBar = False
    net_win, minimax_win, draws = arena.playGames(2)
    return net_win, minimax_win, draws
Beispiel #15
0
def AsyncAgainst(game, args, iter_num, bar):
    # create separate seeds for each worker
    np.random.seed(iter_num)

    if args.displaybar:
        bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(
            i=iter_num + 1,
            x=args.numAgainstPlayProcess,
            total=bar.elapsed_td,
            eta=bar.eta_td)
        bar.next()

    #set gpu
    gpus = args.setGPU.split(',')
    os.environ["CUDA_VISIBLE_DEVICES"] = gpus[iter_num % len(gpus)]

    #set gpu memory grow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    #create nn and load
    nnet = nn(game, args.displaybar)
    pnet = nn(game, args.displaybar)
    try:
        nnet.load_checkpoint(folder=args.checkpoint, filename='train.pth.tar')
    except:
        print("load train model fail")
        pass
    try:
        pnet.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    except:
        print("load old model fail")
        filepath = os.path.join(args.checkpoint, "best.pth.tar")
        pnet.save_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    pmcts = MCTS(game, pnet, args, args.lambdaHeur)
    nmcts = MCTS(game, nnet, args, args.lambdaHeur)

    arena = Arena(lambda b, p: np.argmax(
        pmcts.getActionProb(board=b, curPlayer=p, temp=1)),
                  lambda b, p: np.argmax(
                      nmcts.getActionProb(board=b, curPlayer=p, temp=1)),
                  game,
                  displaybar=args.displaybar)
    # each against process play the number of numPerProcessAgainst games.
    pwins, nwins, draws = arena.playGames(args.numPerProcessAgainst)
    return pwins, nwins, draws
def AsyncAgainst(game, args, iter_num, bar):
    bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(
        i=iter_num + 1,
        x=args.numAgainstPlayProcess,
        total=bar.elapsed_td,
        eta=bar.eta_td)
    bar.next()

    #set gpu
    if (args.multiGPU):
        if (iter_num % 2 == 0):
            os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        else:
            os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU

    #set gpu memory grow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    #create nn and load
    nnet = nn(game)
    pnet = nn(game)
    try:
        nnet.load_checkpoint(folder=args.checkpoint, filename='train.pth.tar')
    except:
        print("load train model fail")
        pass
    try:
        pnet.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    except:
        print("load old model fail")
        pass
    pmcts = MCTS(game, pnet, args)
    nmcts = MCTS(game, nnet, args)

    arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                  lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), game)
    arena.displayBar = True
    # each against process play the number of numPerProcessAgainst games.
    pwins, nwins, draws = arena.playGames(args.numPerProcessAgainst)
    return pwins, nwins, draws
    def pitAgainstOpponents(self):
        print('COMPARING AGAINS DEFAULT PLAYERS')
        for player in self.opponents:
            print(f'PITTING AGAINST {player}')
            nmcts = MCTS(self.game, self.nnet, self.args)
            opponent = player(self.game)

            def bot(x):
                return opponent.play(x)

            def model(x):
                return np.argmax(nmcts.getActionProb(x, temp=0))

            arena = Arena(bot, model, self.game, render=self.args.render)
            fwins, swins, draws = arena.playGames(self.args.arenaCompare //
                                                  len(self.opponents))

            print('MODEL/PLAYER WINS : %d / %d ; DRAWS : %d' %
                  (swins, fwins, draws))
def AsyncAgainst(nnet, game, args, gameth):

    logging.debug("play self test game " + str(gameth))

    os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU

    # create nn and load
    minimax = minimaxAI(game)

    local_args = dotdict({'numMCTSSims': 100, 'cpuct': 1.0})
    # local_args.numMCTSSims = 100
    # local_args.cpuct = 1
    mcts = MCTS(game, nnet, local_args, eval=True)

    arena = Arena(lambda x: np.argmax(mcts.getActionProb(x, temp=0)),
                  minimax.get_move, game)
    arena.displayBar = False
    net_win, minimax_win, draws = arena.playGames(2)
    return net_win, minimax_win, draws
    def eval(self, iter):
        print('Evaluating against random play...')

        def mplay(board):
            mcts = MCTS(self.game, self.nnet, self.args)
            return np.argmax(mcts.getActionProb(board, temp=0))

        def rplay(board):
            a = np.random.randint(self.game.getActionSize())
            valids = self.game.getValidMoves(board, 1)
            while valids[a] != 1:
                a = np.random.randint(self.game.getActionSize())
            return a

        arena = Arena(mplay, rplay, self.game)
        mwins, rwins, draws = arena.playGames(20)
        self.evalResults.append((mwins / 20.0, iter))
        print('Saving eval results:')
        print(self.evalResults)
        self.saveEvalResults()
Beispiel #20
0
def PitNetworks(gameCount: int):
    pred = OthelloPredictor(6, 'trainedModels/othello/pred_othello_087.pth',
                            100000)
    g_pred = Game(6, predictor=pred)
    g_regular = Game(6)
    nnet1 = nn(g_regular)
    nnet2 = nn(g_regular)

    #nnet1.load_checkpoint('AlphaZeroModels', 'predictor_87_ep93.pth.tar')
    nnet1.load_checkpoint('AlphaZeroModels', 'predictor_87_ep131.pth.tar')
    nnet2.load_checkpoint('AlphaZeroModels', 'pretrained_ep153.pth.tar')

    mcts1 = MCTS(g_regular, nnet1, args)
    mcts2 = MCTS(g_regular, nnet2, args)

    print('PITTING AGAINST PREVIOUS VERSION')
    arena = Arena(lambda x: np.argmax(mcts1.getActionProb(x, temp=0)),
                  lambda x: np.argmax(mcts2.getActionProb(x, temp=0)),
                  g_regular)
    p1wins, p2wins, draws = arena.playGames(gameCount)

    print('P1/P2 WINS : %d / %d ; DRAWS : %d' % (p1wins, p2wins, draws))
Beispiel #21
0
    def pit(self, iteration, proc_num):
        self.pnet = self.nnet.__class__(self.game)  # the competitor network
        if iteration != 1:
            self.pnet.load_checkpoint(folder=self.args["checkpoint"],
                                      filename="checkpoint_%d.pth.tar" %
                                      (iteration - 1))

        nmcts = MCTS(self.game, self.nnet, self.args)
        pmcts = MCTS(self.game, self.pnet, self.args)

        print('PITTING AGAINST PREVIOUS VERSION')

        arena = Arena(lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                      lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                      self.game)
        nwins, pwins, draws = arena.playGames(
            self.args["arenaCompare"] // self.args["genFilesPerIteration"])
        # nwins = 1
        # pwins = 2
        # draws = 0

        print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
        self.savePit(iteration, proc_num, nwins, pwins, draws)
Beispiel #22
0
def async_against(game, args, iter_num):
    import tensorflow as tf
    #bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(i=iter_num + 1, x=args.arenaCompare,
    #                                                                   total=bar.elapsed_td, eta=bar.eta_td)
    #bar.next()
    # set gpu
    if args.multiGPU:
        if iter_num % 2 == 0:
            os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        else:
            os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU
    # set gpu memory grow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    _ = tf.Session(config=config)
    # create nn and load
    nnet = nn(game)
    pnet = nn(game)
    try:
        nnet.load_checkpoint(folder=args.checkpoint, filename='train.pth.tar')
    except:
        print("load train model fail")
        pass
    try:
        pnet.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    except:
        print("load old model fail")
        pass
    pmcts = MCTS(game, pnet, args)
    nmcts = MCTS(game, nnet, args)
    arena = Arena(lambda x: np.argmax(pmcts.get_action_prob(x, temp=0)),
                  lambda x: np.argmax(nmcts.get_action_prob(x, temp=0)), game)
    arena.displayBar = False
    pwins, nwins, draws = arena.playGames(2)
    return pwins, nwins, draws
Beispiel #23
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)
                trainStats = [0, 0, 0]
                for _, _, res in iterationTrainExamples:
                    trainStats[res] += 1
                print trainStats

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Beispiel #24
0
        # total num should x2, because each process play 2 games.
        'numPlayGames': 10,
        'numPlayPool': 5,  # num of processes pool.
        'model1Folder': '/workspace/CU_Makhos/models/',
        'model1FileName': 'best.pth.tar',
        'model2Folder': '/workspace/CU_Makhos/models/',
        'model2FileName': 'best.pth.tar',
    })

    g = ThaiCheckersGame()
    minimax = minimaxAI(game=g, depth=7).get_move
    # nnet players
    n1 = NNet(g, gpu_num=0)
    n1.load_checkpoint('models_minimax/', 'train_iter_268.pth.tar')
    args1 = dotdict({'numMCTSSims': 100, 'cpuct': 1.0})
    mcts1 = MCTS(g, n1, args1, eval=True, verbose=True)

    def n1p(x):
        return np.random.choice(32 * 32, p=mcts1.getActionProb(x, temp=0))

    n2 = NNet(g, gpu_num=0)
    n2.load_checkpoint('models_minimax/', 'train_iter_140.pth.tar')
    args2 = dotdict({'numMCTSSims': 100, 'cpuct': 1.0})
    mcts2 = MCTS(g, n2, args2, eval=True)

    def n2p(x):
        return np.random.choice(32 * 32, p=mcts2.getActionProb(x, temp=0))

    arena = Arena(n1p, n2p, g, display=display)
    print(arena.playGames(2, verbose=True))
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            time_begin_iter = time.time()
            # bookkeeping
            log.info(f'Starting Iter #{i} ...')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                for _ in tqdm(range(self.args.numEps), desc="Self Play"):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                log.warning(
                    f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}"
                )
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            losses = self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            log.info('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws, avg_moves = arena.playGames(
                self.args.arenaCompare)

            self.df_stats = self.log_to_file(
                file=self.log_file,
                args=self.args,
                it=i,
                trainExamples=trainExamples,
                time_begin_iter=time_begin_iter,
                nwins=nwins,
                df_stats=self.df_stats,
                nb_model_improv=self.nb_model_improv,
                avg_nb_moves=avg_moves,
                train_losses=losses)

            self.df_stats.to_feather(
                os.path.join(self.args.log_file_location,
                             f"{self.args.log_run_name}.feather"))
            log.info('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                     (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                log.info('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                log.info('ACCEPTING NEW MODEL')
                self.nb_model_improv += 1
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
                if self.nb_model_improv % self.args.nb_of_new_model_for_random_player == 0:
                    game_simul = SantoriniGame(5, 4)
                    rp = RandomPlayer(game_simul).play
                    n_simul = NNet(game_simul, self.nn_args)
                    n_simul.load_checkpoint(folder=self.args.checkpoint,
                                            filename='best.pth.tar')
                    mcts_simul = MCTS(game_simul, n_simul, self.args)
                    n1_simul = lambda x: np.argmax(
                        mcts_simul.getActionProb(x, temp=0))
                    arena_simul = Arena(n1_simul,
                                        rp,
                                        game_simul,
                                        display=False)
                    nnwins, _, _, avg_nb_moves = arena_simul.playGames(
                        self.args.nb_of_game_agaisnt_random_player,
                        verbose=False)
                    self.df_stats = self.log_to_file(
                        file=self.log_file,
                        args=self.args,
                        it=i,
                        trainExamples=trainExamples,
                        time_begin_iter=time_begin_iter,
                        nwins=nwins,
                        df_stats=self.df_stats,
                        nb_model_improv=self.nb_model_improv,
                        nb_game_rdm=self.args.nb_of_game_agaisnt_random_player,
                        nnwins=nnwins,
                        only_random=True,
                        avg_nb_moves=avg_nb_moves)
Beispiel #26
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i>1:
                saveSelfPlayTimeLog('------ITER ' + str(i) + '------')
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                selfPlayStartTime = time.time()

                if self.multiprocessing:
                    pool = Pool(processes = self.cpu, maxtasksperchild = self.maxtasksperchild)
                    for eps in range(self.args.numEps):
                        self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                        # re = pool.apply_async(selfPlay, args=(eps, self.game, self.args))
                        # iterationTrainExamples += re.get()
                        re = pool.starmap(selfPlay, [(str(eps), self.args)])
                        iterationTrainExamples += re[0]
                        gc.collect()

                    pool.close()
                    pool.join()
                else:
                    for eps in range(self.args.numEps):
                        selfStartTime = time.time()
                        
                        self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                        re = self.executeEpisode()
                        iterationTrainExamples += re

                        print('Episode ',eps,' eps cost time = %.3f'%(time.time()-selfStartTime) ,' sec')
                        saveSelfPlayTimeLog('Episode ' + str(eps) + ' eps cost time = %.3f'%(time.time()-selfStartTime) + ' sec')

                print('SelfPlay total cost time = %.3f'%(time.time()-selfPlayStartTime),' sec')
                saveSelfPlayTimeLog('SelfPlay total cost time = %.3f'%(time.time()-selfPlayStartTime) + ' sec')
                
                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)
                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            saveStart = time.time()
            self.saveTrainExamples(i-1)
            saveEnd = time.time()
            self.saveTimeLog(i,saveEnd-saveStart)
            
            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)
            
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')                
Beispiel #27
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        #Generate a fixed sensing matrix if option is toggled to True.
        #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix. 
        #2)the folder which saves the fixed sensing matrix is empty
        if self.args['fixed_matrix'] == True:
            if self.args['load_existing_matrix'] == True:
                self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
                self.arena_game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
                
                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------
                
            else: #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type']
                self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) 
                self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix
                #Save the fixed matrix
                self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])
                
                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------
            
        for i in range(1, self.args['numIters']+1):
            print('------ITER ' + str(i) + '------')
            if not self.skipFirstSelfPlay or i>1: #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples
                iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue'])
                #bookkeeping objects contained in pytorch_classification.utils
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args['numEps'])
                end = time.time()
                #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y. 
                #-----------------------------------------------------
                for eps in range(self.args['numEps']):
                    #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode()
                    if self.args['fixed_matrix'] == False: #repeatedly generate sensing matrices if we are not fixing the sensing matrix. 
                        self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) #generate a new sensing matrix
                    self.game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity'])#generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!!
                    self.mcts = MCTS(self.game, self.nnet, self.args, self.game_args, self.skip_nnet)#create new search tree for each game we play
                    
                    #TESTING-------------------------
                    #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter))
                    #--------------------------------
                    
                    #TESTING--------------------------
                    #print('Starting self-play game iteration: ' + str(eps))
                    #start_game = time.time()
                    #--------------------------------
                    
                    iterationTrainExamples += self.executeEpisode() #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game
                    
                    #TESTING--------------------------
                    #end_game = time.time()
                    #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game))
                    #-----------------------------------------------------
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args['numEps'], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games
                self.trainExamplesHistory.append(iterationTrainExamples)
            
            #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below.    
            if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file by calling saveTrainExamples method
            # The examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1) #save examples to self.args['checkpoint'] folder with given iteration name of i-1
            
            # shuffle examples before training
            #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques,
            #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training
            #samples in a single list, shuffled
            trainExamples = []
            for e in self.trainExamplesHistory: #Each e is a deque
                trainExamples.extend(e)
            shuffle(trainExamples)
            
            #The Arena--------------------------------------------------------
            if self.args['Arena'] == True:
                self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one
                self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
            
                #convert trainExamples into a format recognizable by Neural Network and train
                trainExamples = self.nnet.constructTraining(trainExamples)
                self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated
            
                #Pit the two neural networks self.pnet and self.nnet in the arena            
                print('PITTING AGAINST PREVIOUS VERSION')
            
                arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. 
                pwins, nwins, draws = arena.playGames()
            
                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
                else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
                    self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best')
            #-----------------------------------------------------------------
            
            else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).  
                print('TRAINING NEW NEURAL NETWORK...')
                trainExamples = self.nnet.constructTraining(trainExamples)
                
                #FOR TESTING-----------------------------------------------------
                #print('trainExamples feature arrays: ' + str(trainExamples[0]))
                #print('trainExamples label arrays: ' + str(trainExamples[1]))
                #END TESTING-----------------------------------------------------
                    
                self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1))    
                
                #FOR TESTING-----------------------------------------------------
                #weights = self.nnet.nnet.model.get_weights()
                #min_max = []
                #for layer_weights in weights:
                    #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape)
                    #layer_weights_min = np.amin(layer_weights)
                    #layer_weights_max = np.amax(layer_weights)
                    #min_max.append([layer_weights_min, layer_weights_max])
                #print('')
                #print('The smallest and largest weights of each layer are: ')
                #for pair in min_max:
                    #print(pair)
                #print('')
                #END TESTING-----------------------------------------------------
                      
                self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
                self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best')
Beispiel #28
0
 def learn(self):
     #generate or load a matrix if fixed matrix set to True. We save a Game_args object in Coach in case A is fixed so when we
     #initialize multiple MCTS objects below, we do not have to store multiple copies of A. 
                 
     if self.args['fixed_matrix'] == True:
         if self.args['load_existing_matrix'] == True:
             self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
         else:
             self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type'])
             self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])
     
     #keep track of learning time
     learning_start = time.time()
     
     #start training iterations
     for i in range(1, self.args['numIters']+1):
         print('------ITER ' + str(i) + '------')
         #If we are not loading a set of training data.... then:
         if not self.skipFirstSelfPlay or i>1:
             #1)Initialize empty deque for storing training data after every eps in the iteration has been processed
             iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue'])
             
             #3)Start search. A single search consists of a synchronous search over ALL eps in the current batch.
             #Essentially the number of MCTS trees that must be maintained at once is equal to number of eps in current batch
             for j in range(self.args['num_batches']):
                 
                 iterationTrainExamples += self.playAllGames(self.args['eps_per_batch'])
         
             #Add the training samples generated in a single training iteration to self.trainExamplesHistory
             #This step is the last line included in "if not self.skipFirstSelfPlay or i>1:" block
             self.trainExamplesHistory.append(iterationTrainExamples)
         
         #Jump to here if self.skipFirstSelfPlay returns True or i<=1
         #Once iterationTrainExamples has been completed, we will use these iterationTrainExamples to retrain the Neural Network. 
         if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']:
             print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
             self.trainExamplesHistory.pop(0)
         
         #save trainExamplesHistory 
         self.saveTrainExamples(i-1)
         
         #move all training samples from trainExamplesHistory to trainExamples for shuffling
         #shuffle trainExamples
         trainExamples = []
         for e in self.trainExamplesHistory: 
             trainExamples.extend(e)
         shuffle(trainExamples)
         
         #The Arena--------------------------------------------------------
         if self.args['Arena'] == True:
             self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one
             self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
         
             #convert trainExamples into a format recognizable by Neural Network and train
             trainExamples = self.nnet.constructTraining(trainExamples)
             self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated
         
             #Pit the two neural networks self.pnet and self.nnet in the arena            
             print('PITTING AGAINST PREVIOUS VERSION')
         
             arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. 
             pwins, nwins, draws = arena.playGames()
         
             print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
             if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']:
                 print('REJECTING NEW MODEL')
                 self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
             else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                 print('ACCEPTING NEW MODEL')
                 self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
                 self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best')
         #-----------------------------------------------------------------
         
         else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).  
             print('TRAINING NEW NEURAL NETWORK...')
             trainExamples = self.nnet.constructTraining(trainExamples)
                 
             self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1))
                   
             self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
             self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best')
     
     #Compute total time to run alphazero
     learning_end = time.time()
     print('----------TRAINING COMPLETE----------')
     print('Total training time: ', learning_end - learning_start)
Beispiel #29
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
    
                tracker = ParallelRuntimes(self.args.mcts_workers)
                bar = Bar('Self Play', max=self.args.numEps)
    
                # Multiprocess self-play
                proccesses = []
                work_queue = mp.Queue()
                done_queue = mp.Queue()

                print("[Master] Spawning Workers...")

                # Spawn workers
                for ep in range(self.args.mcts_workers):
                    tup = (work_queue, done_queue, ep)
                    proc = mp.Process(target=self.coach_worker, args=tup)
                    proc.start()

                    proccesses.append(proc)

                print("[Master] Adding work...")

                # Add work to queue
                for eps in range(self.args.numEps):
                    data = dict()
                    data["i"] = eps
                    data["game"] = copy.deepcopy(self.game)

                    work_queue.put(data)

                print("[Master] Waiting for results...")

                # Wait for results to come in
                for ep in range(self.args.numEps):
                    runtime, examples = done_queue.get()
                    
                    # Drop 80% of draws
                    to_add = False
                    loss_rate = self.args.filter_draw_rate
                    if abs(examples[0][2]) != 1:
                        if random.random() >= loss_rate:
                            to_add = True
                    else:
                        to_add = True

                    if to_add:
                        iterationTrainExamples += examples

                    tracker.update(runtime)
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                                  eps=ep + 1, maxeps=self.args.numEps, et=tracker.avg(), total=bar.elapsed_td, 
                                  eta=tracker.eta(ep + 1, self.args.numEps))
                    bar.next()

                print("[Master] Killing workers...")

                # Kill workers
                for p in proccesses:
                    p.terminate()
                    p.join()

                print("[Master] iter={} adding {} examples".format(i, len(iterationTrainExamples)))
                self.trainExamplesHistory.append(iterationTrainExamples)

                bar.finish()

                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')

            # normal network, don't use parallel code
            self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pmcts = MCTS(copy.deepcopy(self.game), self.pnet, self.args)
            
            self.nnet.train(trainExamples)

            nmcts = MCTS(copy.deepcopy(self.game), self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION (player1 = previous, player2 = new)')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), 
                          self.game, num_workers=self.args.mcts_workers)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')

                # Load so all nnets are updated accordingly
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            print(str(self.game.innerN) + "x" + str(self.game.innerM))
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()
    
                for eps in range(self.args.numEps):
                    # self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                    self.mcts = MCTS(self.nnet, self.args)   # reset search tree
                    iterationTrainExamples += self.executeEpisode()

    
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                self.trainExamplesHistory.append(iterationTrainExamples)
                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            tempfile =  'temp.pth.tar'
            bestfile =  'best.pth.tar'

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile)
            self.nnet.train(trainExamples)

            if self.arenaEnabled:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)

                pmcts = MCTS(self.pnet, self.args)
                nmcts = MCTS(self.nnet, self.args)

                print('PITTING AGAINST PREVIOUS VERSION')
                # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                #               lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
                arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0),
                           lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game)
                pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
Beispiel #31
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in tqdm(range(1, self.args.numIters + 1), desc='Iteration'):
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                for eps in tqdm(range(self.args.numEps), desc='mcts.Episode'):
                    iterationTrainExamples += self.executeEpisode()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples, self.writer)
            self.writer.set_step(i - 1, "learning")
            nmcts = MCTS(self.game, self.nnet, self.args)

            print("PITTING AGAINST METRIC COMPONENTS")
            for metric_opponent in self.args.metric_opponents:
                arena = Arena(
                    lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                    metric_opponent(self.game).play, self.game)
                nwins, owins, draws = arena.playGames(
                    self.args.metricArenaCompare)
                print('%s WINS : %d / %d ; DRAWS : %d' %
                      (metric_opponent.__name__, nwins, owins, draws))
                if nwins + owins == 0: win_prct = 0
                else: win_prct = float(nwins) / (nwins + owins)
                self.writer.add_scalar(
                    '{}_win'.format(metric_opponent.__name__), win_prct)
                # Reset nmcts
                nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)
            if nwins + pwins == 0: win_prct = 0
            else: win_prct = float(nwins) / (nwins + pwins)
            self.writer.add_scalar('self_win', win_prct)

            # Calculate elo score for self play
            results = [-x for x in arena.get_results()
                       ]  # flip to be next neural network wins
            nelo, pelo = elo(self.elo, self.elo, results)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.elo = pelo
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.elo = nelo
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')

            self.writer.add_scalar('self_elo', self.elo)