Example #1
0
    def match(self):
        s1, s2 = self.strategy_1, self.strategy_2
        print('player1:', s1.__class__.__name__)
        print('player2:', s2.__class__.__name__)

        probs = np.zeros(6)
        games = 100  # 30
        for i in range(games):
            print(i)
            s1.stand_for = Board.STONE_BLACK
            s2.stand_for = Board.STONE_WHITE
            g = Game(Board.rand_generate_a_position(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[0] += 1
            elif g.winner == Board.STONE_WHITE:
                probs[1] += 1
            else:
                probs[2] += 1

            s1.stand_for = Board.STONE_WHITE
            s2.stand_for = Board.STONE_BLACK
            g = Game(Board.rand_generate_a_position(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[3] += 1
            elif g.winner == Board.STONE_BLACK:
                probs[4] += 1
            else:
                probs[5] += 1

        print('total play:', games)
        print(probs)
Example #2
0
    def measure_perf(self, s1, s2):
        old_epsilon1, old_is_learning1, old_stand_for1 = s1.epsilon, s1.is_learning, s1.stand_for
#         old_epsilon2, old_is_learning2, old_stand_for2 = s2.epsilon, s2.is_learning, s2.stand_for
        old_is_learning2, old_stand_for2 = s2.is_learning, s2.stand_for
        s1.epsilon, s1.is_learning, s1.stand_for = 0, False, Board.STONE_BLACK
#         s2.epsilon, s2.is_learning, s2.stand_for = 0, False, Board.STONE_WHITE
        s2.is_learning, s2.stand_for = False, Board.STONE_WHITE

        s3 = StrategyRand()

        probs = [0, 0, 0, 0, 0, 0]
        games = 3  # 30
        for i in range(games):
            # the learner s1 move first(use black)
            s1.stand_for = Board.STONE_BLACK
            s2.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[0] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[1] += 1

            # the learner s1 move second(use white)
            s1.stand_for = Board.STONE_WHITE
            s2.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[2] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[3] += 1

            # the learner s1 move first vs. random opponent
            s1.stand_for = Board.STONE_BLACK
            s3.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[4] += 1

            # the learner s1 move second vs. random opponent
            s1.stand_for = Board.STONE_WHITE
            s3.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[5] += 1

        probs = [i / games for i in probs]
        print(probs)

        s1.epsilon, s1.is_learning, s1.stand_for = old_epsilon1, old_is_learning1, old_stand_for1
#         s2.epsilon, s2.is_learning, s2.stand_for = old_epsilon2, old_is_learning2, old_stand_for2
        s2.is_learning, s2.stand_for = old_is_learning2, old_stand_for2
        return probs
Example #3
0
    def reinforce(self):
        if len(self.oppo_pool) == 0:
            self.oppo_pool.append(
                StrategyDNN(is_train=False, is_revive=True, is_rl=False))

        s1 = StrategyDNN(is_train=False, is_revive=True, is_rl=True)
        s2 = random.choice(self.oppo_pool)

        stat = []
        win1, win2, draw = 0, 0, 0

        n_lose = 0
        iter_n = 100
        i = 0
        while True:
            print('iter:', i)

            for _ in range(1000):
                s1.stand_for = random.choice(
                    [Board.STONE_BLACK, Board.STONE_WHITE])
                s2.stand_for = Board.oppo(s1.stand_for)

                g = Game(Board.rand_generate_a_position(), s1, s2, observer=s1)
                g.step_to_end()
                win1 += 1 if g.winner == s1.stand_for else 0
                win2 += 1 if g.winner == s2.stand_for else 0
                draw += 1 if g.winner == Board.STONE_EMPTY else 0

#             if win1 > win2:
#                 s1_c = s1.mind_clone()
#                 self.oppo_pool.append(s1_c)
#                 s2 = random.choice(self.oppo_pool)
#                 n_lose = 0
#                 print('stronger, oppos:', len(self.oppo_pool))
#             elif win1 < win2:
#                 n_lose += 1
#
#             if n_lose >= 50:
#                 break

            if i % 1 == 0 or i + 1 == iter_n:
                total = win1 + win2 + draw
                win1_r = win1 / total
                win2_r = win2 / total
                draw_r = draw / total
                print("iter:%d, win: %.3f, loss: %.3f, tie: %.3f" %
                      (i, win1_r, win2_r, draw_r))
                stat.append([win1_r, win2_r, draw_r])

            i += 1

            if i > iter_n:
                break

        stat = np.array(stat)
        print('stat. shape:', stat.shape)
        np.savez('/home/splendor/fusor/stat.npz', stat=np.array(stat))
        self.strategy_1 = self.strategy_2 = s1
Example #4
0
    def vs_human(self, which_side_human_play):
        strategy = self.which_one(Board.oppo(which_side_human_play))
        if strategy is None or isinstance(strategy, StrategyRand):
            strategy = self.which_one(which_side_human_play)
        if strategy is None:
            print('without opponent')
            return

        old_is_learning, old_stand_for = strategy.is_learning, strategy.stand_for
        strategy.is_learning, strategy.stand_for = False, Board.oppo(which_side_human_play)

        s1 = strategy
        s2 = StrategyHuman()
        s2.stand_for = which_side_human_play

        self.game = Game(Board(), s1, s2, self.msg_queue)
        self.game.step_to_end()

        strategy.is_learning, strategy.stand_for = old_is_learning, old_stand_for
Example #5
0
    def learn_from_2_teachers(self):
        s1 = StrategyMinMax()
        s1.stand_for = Board.STONE_BLACK
        self.strategy_1 = s1

        s2 = StrategyMinMax()
        s2.stand_for = Board.STONE_WHITE
        self.strategy_2 = s2

        observer = StrategyMC()

        win1, win2, draw = 0, 0, 0
        step_counter, explo_counter = 0, 0
        begin = datetime.datetime.now()
        episodes = 10000
        for i in range(episodes):
            g = Game(Board(), s1, s2, observer=observer)
            g.step_to_end()
            win1 += 1 if g.winner == Board.STONE_BLACK else 0
            win2 += 1 if g.winner == Board.STONE_WHITE else 0
            draw += 1 if g.winner == Board.STONE_EMPTY else 0

            step_counter += g.step_counter
            explo_counter += g.exploration_counter
            print('training...%d' % i)

        total = win1 + win2 + draw
        print("black win: %f" % (win1 / total))
        print("white win: %f" % (win2 / total))
        print("draw: %f" % (draw / total))

        print('avg. steps[%f], avg. explos[%f]' %
              (step_counter / episodes, explo_counter / episodes))

        end = datetime.datetime.now()
        diff = end - begin
        print("time cost[%f]s, avg.[%f]s" %
              (diff.total_seconds(), diff.total_seconds() / episodes))

        observer.save('./brain1.npz')
Example #6
0
    def train1(self, s1, s2):
        '''train one time
        Returns:
        ------------
        winner : Strategy
            the win strategy
        '''

        max_explore_rate = 0.95

        win1, win2, draw = 0, 0, 0
        step_counter, explo_counter = 0, 0
        begin = datetime.datetime.now()
        episodes = 1
        samples = 100
        interval = episodes // samples
        perf = [[] for _ in range(7)]
        learner = s1 if s1.is_learning else s2
        oppo = self.which_one(Board.oppo(learner.stand_for))
        stat_win = []
#         past_me = learner.mind_clone()
        for i in range(episodes):
#             if (i + 1) % interval == 0:
# #                 print(np.allclose(s1.hidden_weights, past_me.hidden_weights))
#                 probs = self.measure_perf(learner, oppo)
#                 perf[0].append(i)
#                 for idx, x in enumerate(probs):
#                     perf[idx + 1].append(x)

            learner.epsilon = max_explore_rate * np.exp(-5 * i / episodes)  # * (1 if i < episodes//2 else 0.3) #
            g = Game(Board(), s1, s2)
            g.step_to_end()
            win1 += 1 if g.winner == Board.STONE_BLACK else 0
            win2 += 1 if g.winner == Board.STONE_WHITE else 0
            draw += 1 if g.winner == Board.STONE_EMPTY else 0

            stat_win.append(win1 - win2 - draw)
#             rec.append(win1)
            step_counter += g.step_counter
            explo_counter += g.exploration_counter
#             print('steps[%d], explos[%d]' % (g.step_counter, g.exploration_counter))
            print('training...%d' % i)

        total = win1 + win2 + draw
        print("black win: %f" % (win1 / total))
        print("white win: %f" % (win2 / total))
        print("draw: %f" % (draw / total))

        print('avg. steps[%f], avg. explos[%f]' % (step_counter / episodes, explo_counter / episodes))

        end = datetime.datetime.now()
        diff = end - begin
        print("time cost[%f]s, avg.[%f]s" % (diff.total_seconds(), diff.total_seconds() / episodes))

        with open('stat-result-win.txt', 'w') as f:
            f.write(repr(stat_win))
#         print(perf)
#         self.draw_perf(perf)

#         np.set_printoptions(threshold=np.nan, formatter={'float_kind' : lambda x: "%.4f" % x})
#         with open('stat-result-net-train-errors.txt', 'w') as f:
#             f.write(repr(np.array(s1.errors)))

        winner = Board.STONE_BLACK if win1 >= win2 else Board.STONE_WHITE
        return self.which_one(winner), max(win1, win2) / total
Example #7
0
    def reinforce(self, resume=True):
        self.oppo_pool = self.get_mindsets(RL_BRAIN_DIR, FILE_PREFIX)

        part_vars = True
        if resume and len(self.oppo_pool) != 0:
            file = tf.train.latest_checkpoint(RL_BRAIN_DIR)
            part_vars = False
        else:
            file = tf.train.latest_checkpoint(SL_BRAIN_DIR)
            part_vars = True
        s1 = StrategyDNN(is_train=False, is_revive=True, is_rl=True, from_file=file, part_vars=part_vars)
        print('I was born from', file)

        if len(self.oppo_pool) != 0:
            file = random.choice(self.oppo_pool)
            file = os.path.join(RL_BRAIN_DIR, file)
            part_vars = False
        else:
            file = tf.train.latest_checkpoint(SL_BRAIN_DIR)
            part_vars = True
        s2 = StrategyDNN(is_train=False, is_revive=True, is_rl=False, from_file=file, part_vars=part_vars)
        print('vs.', file)

        stat = []

#         n_lose = 0
        iter_n = 100
        for i in range(iter_n):
            print('iter:', i)
            win1, win2, draw = 0, 0, 0
            step_counter, explo_counter = 0, 0
            episodes = cfg.REINFORCE_PERIOD
            for _ in range(episodes):
                s1.stand_for = random.choice([Board.STONE_BLACK, Board.STONE_WHITE])
                s2.stand_for = Board.oppo(s1.stand_for)

                g = Game(Board.rand_generate_a_position(), s1, s2, observer=s1)
                g.step_to_end()
                win1 += 1 if g.winner == s1.stand_for else 0
                win2 += 1 if g.winner == s2.stand_for else 0
                draw += 1 if g.winner == Board.STONE_EMPTY else 0
#                 print('winner: {:d}, stand for: {:d}'.format(g.winner, s1.stand_for))
                s1.win_ratio = win1 / win2 if win2 != 0 else 1.
                step_counter += g.step_counter
                explo_counter += g.exploration_counter

            if s1.win_ratio > 1.1:
                file = FILE_PREFIX + '-' + str(i)
                s1.mind_clone(os.path.join(RL_BRAIN_DIR, FILE_PREFIX), i)
                self.oppo_pool.append(file)
                file = random.choice(self.oppo_pool)
                file = os.path.join(RL_BRAIN_DIR, file)
                s2.close()
                s2 = StrategyDNN(is_train=False, is_revive=True, is_rl=False, from_file=file, part_vars=False)
                print('vs.', file)
#                 n_lose = 0
#             elif win1 < win2:
#                 n_lose += 1
#             if n_lose >= 50:
#                 break

            if i % 1 == 0 or i + 1 == iter_n:
                total = win1 + win2 + draw
                win1_r = win1 / total
                win2_r = win2 / total
                draw_r = draw / total
                print("iter:%d, win: %.3f, lose: %.3f, draw: %.3f, t: %.3f" % (i, win1_r, win2_r, draw_r, s1.temperature))
                stat.append([win1_r, win2_r, draw_r])
                print('avg. steps[%f], avg. explos[%f]' % (step_counter / episodes, explo_counter / episodes))

            if i % 10 == 0 or i + 1 == iter_n:
                np.savez(STAT_FILE, stat=np.array(stat))

        print('rl done. you can try it.')
        self.strategy_1 = self.strategy_2 = s1