Exemple #1
0
def arena_process(i):
    g = Game(8)

    nnet = nn(g)
    nnet.load_model(filename=("model_auto_" + str(i + 1)))
    nmcts = MCTS(g, nnet, args)

    pnet = nn(g)
    if i != 0:
        pnet.load_model(filename=("model_auto_" + str(i)))
    pmcts = MCTS(g, pnet, args)

    def player1(x):
        pi = pmcts.get_action_prob(x)
        # display_pi(np.array(pi[:-1]).reshape((len(x), len(x))))
        return np.random.choice(len(pi), p=pi)

    def player2(x):
        pi = nmcts.get_action_prob(x)
        return np.random.choice(len(pi), p=pi)

    arena = Arena(player1=lambda x: player1(x),
                  player2=lambda x: player2(x),
                  game=g,
                  display=display)
    return arena.play_games(8)
Exemple #2
0
    def arena_process(self, r, old_model_file, new_model_file, verbose=False):
        old_net = nn(self.game)
        if len(old_model_file) > 1:
            old_net.load_model(filename=old_model_file)
        else:
            print('random state')
        # old_net.load_model(filename=old_model_file)
        old_mcts = MCTS(self.game, old_net, self.args)

        new_net = nn(self.game)
        new_net.load_model(filename=new_model_file)
        new_mcts = MCTS(self.game, new_net, self.args)

        def old_player(x):
            pi = old_mcts.get_action_prob(x, self.args['numMCTSSims'])
            # display_pi(np.array(pi[:-1]).reshape((len(x), len(x))))
            return np.random.choice(len(pi), p=pi)

        def new_player(x):
            pi = new_mcts.get_action_prob(x, int(self.args['numMCTSSims'] * 1))
            return np.random.choice(len(pi), p=pi)

        arena = Arena(player1=lambda x: old_player(x),
                      player2=lambda x: new_player(x),
                      game=self.game,
                      display=display)
        return arena.play_games(r, verbose=verbose)
Exemple #3
0
    def train(self):
        '''
        '''
        for i in range(1, self.args.num_iters + 1):
            print('----START ITERATION:' + str(i))

            #num_episodes回対戦して,experienceを蓄積する.
            for _ in range(self.args.num_episodes):
                self.mcts = MCTS(self.env_utils, self.new_model_system,
                                 self.args)  #MCTS初期化
                self.experiences.extend(self.run_episode())

                if len(self.experiences) > self.args.max_experiences:
                    self.experiences \
                        =self.experiences[len(self.experiences)-self.args.max_experiences:]

            #experienceからランダムサンプリングして,NN用のミニバッチを作る.
            mini_batch = random.sample(
                self.experiences,
                min([len(self.experiences), self.args.mini_batch_size]))

            #現在のモデルを保存して,cur_model_systemに読み込み.
            self.new_model_system.save_checkpoint(folder=self.args.checkpoint,
                                                  filename='temp.pth.tar')
            self.cur_model_system.load_checkpoint(folder=self.args.checkpoint,
                                                  filename='temp.pth.tar')
            cur_mcts = MCTS(self.env_utils, self.cur_model_system, self.args)

            self.new_model_system.fit(mini_batch)
            new_mcts = MCTS(self.env_utils, self.new_model_system, self.args)

            #Arenaで対戦させて,既存モデルと比べて強くなったか確認する.
            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(
                lambda x: np.argmax(cur_mcts.get_action_prob(x, temp=0)),
                lambda x: np.argmax(new_mcts.get_action_prob(x, temp=0)),
                self.env,
                display=False)
            cur_wins, new_wins, draws = arena.play_games(
                self.args.num_compares_arena)
            print('NEW/CUR WINS : %d / %d ; DRAWS : %d' %
                  (new_wins, cur_wins, draws))

            #update_thresholdよりも強くなっていたら,採用,強くなっていなかったら棄却する.
            if 0 < cur_wins + new_wins and float(new_wins) / (
                    cur_wins + new_wins) < self.args.update_threshold:
                print('REJECTING NEW MODEL')
                self.new_model_system.load_checkpoint(
                    folder=self.args.checkpoint, filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.new_model_system.save_checkpoint(
                    folder=self.args.checkpoint,
                    filename=self.get_checkpoint_filename(i))
                self.new_model_system.save_checkpoint(
                    folder=self.args.checkpoint, filename='best.pth.tar')
Exemple #4
0
hp = HumanPlayer(env).play  # 人間プレイヤー

rp = RandomPlayer(env).play  # ランダム
gp = GreedyPlayer(env, env_utils).play  # グリーディ(常に直後に最も多くの石が取れる行動を選択)
agp = AntiGreedyPlayer(env, env_utils).play  # 逆グリーディ(グリーディの逆)
cgp = CompositeGreedyPlayer(env, env_utils).play  # 混合グリーディ(前半逆グリーディで後半グリーディ)

# AlphaZeroプレイヤー1の設定
ms1 = ModelSystem(env)
ms1.load_checkpoint('temp', 'best.pth.tar')  # DNNロード
args1 = dotdict({'num_MCTS': 128, 'cpuct': 1.0, 'dirichlet_eps': 0.0})
mcts1 = MCTS(env_utils, ms1, args1)  # MCTSはnum_MCTS,dirichlet_epsしか参照しない.
azp1 = lambda x: np.argmax(mcts1.get_action_prob(x, temp=0)
                           )  # AlphaZeroプレイヤーオブジェクト

# AlphaZeroプレイヤー2の設定(AlphaZero同士で対戦させたいとき用)
ms2 = ModelSystem(env)
ms2.load_checkpoint('temp', 'best.pth.tar')  # DNNロード
args2 = dotdict({'num_MCTS': 100, 'cpuct': 1.0, 'dirichlet_eps': 0.0})
mcts2 = MCTS(env_utils, ms2, args2)  # MCTSはnum_MCTS,dirichlet_epsしか参照しない.
azp2 = lambda x: np.argmax(mcts2.get_action_prob(x, temp=0)
                           )  # AlphaZeroプレイヤーオブジェクト

###############################################################################
# 試合設定
arena = Arena(player1=azp1, player2=hp, env=env, display=None)

# 対戦開始
print(arena.play_games(2, verbose=True))
Exemple #5
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                # eps_time = AverageMeter()
                # bar = Bar('Self Play', max=self.args.numEps)
                # end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.execute_episode()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.save_train_examples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(
                lambda x: np.argmax(pmcts.get_action_prob(x, temp=0)),
                lambda x: np.argmax(nmcts.get_action_prob(x, temp=0)),
                self.game)
            pwins, nwins, draws = arena.play_games(
                self.args.arenaCompare)  # not implemented

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.get_checkpoint_file(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Exemple #6
0
         ('CompositeGreedyPlayer',cgp,0),
         ('mini-AlphaZero',azp1,1)]


###############################################################################
num_trial=100

results=np.zeros(shape=(len(players),len(players)))

for i in range(len(players)):
    for j in range(len(players))[0:i+1]:
        arena=Arena(player1=players[i][1],player2=players[j][1],env=env,display=None)
        
        actual_num_trial=2 if (players[i][2]==0 and players[j][2]==0) else num_trial #playerが両方とも決定論的な場合は2回で済ます.
        
        one_win,two_win,draw=arena.play_games(actual_num_trial,verbose=False)
        print(players[i][0],' vs ',players[j][0],' : ',one_win/float(actual_num_trial),' wins.')
        results[i,j]=one_win/float(actual_num_trial)
        

###############################################################################
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df=pd.DataFrame(results,index=[x[0] for x in players],columns=[x[0] for x in players])

plt.figure(figsize=(10,10))
sns.heatmap(df,annot=True)
plt.show()