예제 #1
0
def check_result_and_save_network(pwins, nwins, draws, game, args, iter_num):
    # set gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU
    if pwins + nwins > 0 and float(nwins + (0.5 * draws)) / (
            pwins + nwins + draws) < args.updateThreshold:
        print('REJECTING NEW MODEL')
    else:
        print('ACCEPTING NEW MODEL')
        net = nn(game)
        net.load_checkpoint(folder=args.checkpoint, filename='train.pth.tar')
        net.save_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
        net.save_checkpoint(folder=args.checkpoint,
                            filename='checkpoint_' + str(iter_num) +
                            '.pth.tar')
예제 #2
0
def async_against(game, args, iter_num):
    import tensorflow as tf
    #bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(i=iter_num + 1, x=args.arenaCompare,
    #                                                                   total=bar.elapsed_td, eta=bar.eta_td)
    #bar.next()
    # set gpu
    if args.multiGPU:
        if iter_num % 2 == 0:
            os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        else:
            os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU
    # set gpu memory grow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    _ = tf.Session(config=config)
    # create nn and load
    nnet = nn(game)
    pnet = nn(game)
    try:
        nnet.load_checkpoint(folder=args.checkpoint, filename='train.pth.tar')
    except:
        print("load train model fail")
        pass
    try:
        pnet.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    except:
        print("load old model fail")
        pass
    pmcts = MCTS(game, pnet, args)
    nmcts = MCTS(game, nnet, args)
    arena = Arena(lambda x: np.argmax(pmcts.get_action_prob(x, temp=0)),
                  lambda x: np.argmax(nmcts.get_action_prob(x, temp=0)), game)
    arena.displayBar = False
    pwins, nwins, draws = arena.playGames(2)
    return pwins, nwins, draws
예제 #3
0
def async_self_play(game, args, iter_num, iterr):
    import tensorflow as tf
    #bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(i=iter_num + 1, x=iterr,
    #                                                                   total=bar.elapsed_td, eta=bar.eta_td)
    #bar.next()
    # set gpu
    if args.multiGPU:
        if iter_num % 2 == 0:
            os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        else:
            os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU
    # set gpu memory grow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    _ = tf.Session(config=config)
    # create nn and load
    net = nn(game)
    mcts = MCTS(game, net, args)
    try:
        net.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    except:
        pass
    train_examples = []
    board = game.getInitBoard()
    cur_player = 1
    episode_step = 0
    while True:
        episode_step += 1
        canonical_board = game.getCanonicalForm(board, cur_player)
        temp = int(episode_step < args.tempThreshold)
        pi = mcts.get_action_prob(canonical_board, temp=temp)
        sym = game.getSymmetries(canonical_board, pi)
        for b, p in sym:
            train_examples.append([b, cur_player, p, None])
        action = np.random.choice(len(pi), p=pi)
        board, cur_player = game.getNextState(board, cur_player, action)
        r = game.getGameEnded(board, cur_player)
        if r != 0:
            return [(x[0], x[2], r * ((-1)**(x[1] != cur_player)))
                    for x in train_examples]
예제 #4
0
def async_train_network(game, args, trainhistory):
    # set gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU
    # create network for training
    nnet = nn(game)
    try:
        nnet.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar')
    except:
        pass
    # ---load history file---
    model_file = os.path.join(args.checkpoint, "trainhistory.pth.tar")
    examples_file = model_file + ".examples"
    if not os.path.isfile(examples_file):
        print(examples_file)
    else:
        print("File with trainExamples found. Read it.")
        with open(examples_file, "rb") as f:
            for i in Unpickler(f).load():
                trainhistory.append(i)
    # ---delete if over limit---
    if len(trainhistory) > args.numItersForTrainExamplesHistory:
        print("len(trainExamplesHistory) =", len(trainhistory),
              " => remove the oldest trainExamples")
        del trainhistory[len(trainhistory) - 1]
    # ---extend history---
    train_examples = []
    for e in trainhistory:
        train_examples.extend(e)
    # ---save history---
    folder = args.checkpoint
    if not os.path.exists(folder):
        os.makedirs(folder)
    filename = os.path.join(folder, 'trainhistory.pth.tar' + ".examples")
    with open(filename, "wb+") as f:
        Pickler(f).dump(trainhistory)
    print('Train with {} examples'.format(len(train_examples)))
    nnet.train(train_examples)
    nnet.save_checkpoint(folder=args.checkpoint, filename='train.pth.tar')
예제 #5
0
    'numIters': 1000,
    'numEps': 100,
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 40,
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': True,
    'load_folder_file': ('./temp/', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
})

g = Game(6)
nnet = nn(g)

if args.load_model:
    nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])

c = Coach(g, nnet, args)
board = g.getInitBoard()

player = 1

while g.getGameEnded(board, player) == 0:
    mcts = MCTS(g, nnet, args)
    action = mcts.get_action_prob(g.getCanonicalForm(board, 1))
    print(action)
    board, player = g.getNextState(board, 1, np.argmax(action))
    g.display(board)