Esempio n. 1
0
def arena_process(i):
    g = Game(8)

    nnet = nn(g)
    nnet.load_model(filename=("model_auto_" + str(i + 1)))
    nmcts = MCTS(g, nnet, args)

    pnet = nn(g)
    if i != 0:
        pnet.load_model(filename=("model_auto_" + str(i)))
    pmcts = MCTS(g, pnet, args)

    def player1(x):
        pi = pmcts.get_action_prob(x)
        # display_pi(np.array(pi[:-1]).reshape((len(x), len(x))))
        return np.random.choice(len(pi), p=pi)

    def player2(x):
        pi = nmcts.get_action_prob(x)
        return np.random.choice(len(pi), p=pi)

    arena = Arena(player1=lambda x: player1(x),
                  player2=lambda x: player2(x),
                  game=g,
                  display=display)
    return arena.play_games(8)
Esempio n. 2
0
def mcts_test():
    g = Game(15)
    b = Board(15)
    nnet = nn(g)

    mcts = MCTS(g, nnet, args)

    b.execute_move((4, 1), 1)
    b.execute_move((3, 2), 1)
    b.execute_move((2, 3), 1)
    b.execute_move((5, 0), 1)
    # b.execute_move((1, 4), 1)

    b.execute_move((3, 3), -1)
    b.execute_move((3, 4), -1)
    b.execute_move((3, 5), -1)
    b.execute_move((3, 6), -1)
    # b.execute_move((3, 5), -1)

    curPlayer = 1
    canonicalBoard = g.get_canonical_form(np.array(b.pieces), curPlayer)

    pi = mcts.get_action_prob(canonicalBoard)

    display(canonicalBoard)
    display_pi(np.array(pi[:-1]).reshape((len(canonicalBoard), len(canonicalBoard))))
Esempio n. 3
0
def generate_data(l):
    g = Game(args.goBang_n)
    nnet = nn(g)

    c = Coach(g, nnet, args)
    train_example = c.execute_episode()

    l.acquire()
    try:
        folder = args.checkpoint
        if not os.path.exists(folder):
            os.makedirs(folder)
        filename = os.path.join(folder + "train_examples_4")
        with open(filename, "ab+") as f:
            pickle.dump(train_example, f)
    finally:
        l.release()
Esempio n. 4
0
def generate_data(l, model_iter):
    g = Game(8)
    nnet = nn(g)
    nnet.load_model(filename=("model_auto_" + str(model_iter + 1)))

    c = Coach(g, nnet, args)
    train_example = c.execute_episode()

    l.acquire()
    try:
        folder = args.checkpoint
        if not os.path.exists(folder):
            os.makedirs(folder)
        filename = os.path.join(folder +
                                ("train_examples_auto_" + str(model_iter + 1)))
        with open(filename, "ab+") as f:
            pickle.dump(train_example, f)
    finally:
        l.release()
Esempio n. 5
0
    'numIters': 10,
    'numEps': 10,
    'tempThreshold': 100,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 2000000,
    'numMCTSSims': 1000,
    'arenaCompare': 64,
    'cpuct': 3,
    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
}

if __name__ == "x":
    g = Game(15)
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.load_train_examples()
    c.learn()

if __name__ == '__main__':
    g = Game(15)
    auto_run = AutoRun(g, args)
Esempio n. 6
0
            lock = Lock()

            for iteration in range(200):
                jobs = []

                for _ in range(8):
                    p = Process(target=generate_data, args=(lock, i))
                    jobs.append(p)
                    p.start()

                for job in jobs:
                    job.join()

            trainExamples = merge_data(i)
            print(len(trainExamples))
            g = Game(8)
            nnet = nn(g)
            nnet.train(trainExamples)
            nnet.save_model(filename="model_auto_" + str(i + 2))
        else:
            break

        print(i, 'one model')

if __name__ == 'x':
    with Pool(8) as p:
        result = p.map(arena_process, range(8))

        win_1 = sum([i[0] for i in result])
        win_2 = sum([i[1] for i in result])