Beispiel #1
0
    def measure_perf(self, s1, s2):
        old_epsilon1, old_is_learning1, old_stand_for1 = s1.epsilon, s1.is_learning, s1.stand_for
#         old_epsilon2, old_is_learning2, old_stand_for2 = s2.epsilon, s2.is_learning, s2.stand_for
        old_is_learning2, old_stand_for2 = s2.is_learning, s2.stand_for
        s1.epsilon, s1.is_learning, s1.stand_for = 0, False, Board.STONE_BLACK
#         s2.epsilon, s2.is_learning, s2.stand_for = 0, False, Board.STONE_WHITE
        s2.is_learning, s2.stand_for = False, Board.STONE_WHITE

        s3 = StrategyRand()

        probs = [0, 0, 0, 0, 0, 0]
        games = 3  # 30
        for i in range(games):
            # the learner s1 move first(use black)
            s1.stand_for = Board.STONE_BLACK
            s2.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[0] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[1] += 1

            # the learner s1 move second(use white)
            s1.stand_for = Board.STONE_WHITE
            s2.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[2] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[3] += 1

            # the learner s1 move first vs. random opponent
            s1.stand_for = Board.STONE_BLACK
            s3.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[4] += 1

            # the learner s1 move second vs. random opponent
            s1.stand_for = Board.STONE_WHITE
            s3.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[5] += 1

        probs = [i / games for i in probs]
        print(probs)

        s1.epsilon, s1.is_learning, s1.stand_for = old_epsilon1, old_is_learning1, old_stand_for1
#         s2.epsilon, s2.is_learning, s2.stand_for = old_epsilon2, old_is_learning2, old_stand_for2
        s2.is_learning, s2.stand_for = old_is_learning2, old_stand_for2
        return probs
Beispiel #2
0
    def measure_perf(self, s1, s2):
        old_epsilon1, old_is_learning1, old_stand_for1 = s1.epsilon, s1.is_learning, s1.stand_for
#         old_epsilon2, old_is_learning2, old_stand_for2 = s2.epsilon, s2.is_learning, s2.stand_for
        old_is_learning2, old_stand_for2 = s2.is_learning, s2.stand_for
        s1.epsilon, s1.is_learning, s1.stand_for = 0, False, Board.STONE_BLACK
#         s2.epsilon, s2.is_learning, s2.stand_for = 0, False, Board.STONE_WHITE
        s2.is_learning, s2.stand_for = False, Board.STONE_WHITE

        s3 = StrategyRand()

        probs = [0, 0, 0, 0, 0, 0]
        games = 3  # 30
        for i in range(games):
            # the learner s1 move first(use black)
            s1.stand_for = Board.STONE_BLACK
            s2.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[0] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[1] += 1

            # the learner s1 move second(use white)
            s1.stand_for = Board.STONE_WHITE
            s2.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[2] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[3] += 1

            # the learner s1 move first vs. random opponent
            s1.stand_for = Board.STONE_BLACK
            s3.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[4] += 1

            # the learner s1 move second vs. random opponent
            s1.stand_for = Board.STONE_WHITE
            s3.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[5] += 1

        probs = [i / games for i in probs]
        print(probs)

        s1.epsilon, s1.is_learning, s1.stand_for = old_epsilon1, old_is_learning1, old_stand_for1
#         s2.epsilon, s2.is_learning, s2.stand_for = old_epsilon2, old_is_learning2, old_stand_for2
        s2.is_learning, s2.stand_for = old_is_learning2, old_stand_for2
        return probs
Beispiel #3
0
    def init_both_sides(self):
        # feat = Board.BOARD_SIZE_SQ * 2 + 2

        # if self.strategy_1 is None:
        #     s1 = StrategyTD(feat, feat * 2)
        #     s1.stand_for = Board.STONE_BLACK
        #     s1.alpha = 0.3
        #     s1.beta = 0.3
        #     s1.lambdaa = 0.05
        #     s1.epsilon = 0.3
        #     self.strategy_1 = s1
        # else:
        #     s1 = self.strategy_1
        #     s1.epsilon = 0.3

        if self.strategy_1 is None:
            # s1 = StrategyMC()
            # s1 = StrategyANN(feat, feat * 2)
            file = tf.train.latest_checkpoint(RL_BRAIN_DIR)
            s1 = StrategyDNN(from_file=file, part_vars=True)
            # s1 = StrategyMCTS1()
            self.strategy_1 = s1
        else:
            s1 = self.strategy_1

        s1.is_learning = True
        s1.stand_for = Board.STONE_BLACK


#         if self.strategy_2 is None:
#             s2 = StrategyTD(feat, feat * 2)
#             s2.stand_for = Board.STONE_WHITE
#             self.strategy_2 = s2
#         else:
#             s2 = self.strategy_2
#             s2.is_learning = False
        s2 = StrategyRand()

#         s2 = StrategyMinMax()
        s2.stand_for = Board.STONE_WHITE
        self.strategy_2 = s2

        return s1, s2
Beispiel #4
0
    def init_both_sides(self):
        feat = Board.BOARD_SIZE_SQ * 2 + 2

#         if self.strategy_1 is None:
#             s1 = StrategyTD(feat, feat * 2)
#             s1.stand_for = Board.STONE_BLACK
#     #         s1.alpha = 0.3
#     #         s1.beta = 0.3
#             s1.lambdaa = 0.05
#             s1.epsilon = 0.3
#             self.strategy_1 = s1
#         else:
#             s1 = self.strategy_1
#             s1.epsilon = 0.3

        if self.strategy_1 is None:
#             s1 = StrategyMC()
#             s1 = StrategyANN(feat, feat * 2)
            s1 = StrategyDNN()
            self.strategy_1 = s1
        else:
            s1 = self.strategy_1


        s1.is_learning = True
        s1.stand_for = Board.STONE_BLACK


#         if self.strategy_2 is None:
#             s2 = StrategyTD(feat, feat * 2)
#             s2.stand_for = Board.STONE_WHITE
#             self.strategy_2 = s2
#         else:
#             s2 = self.strategy_2
#             s2.is_learning = False
        s2 = StrategyRand()

#         s2 = StrategyMinMax()
        s2.stand_for = Board.STONE_WHITE
        self.strategy_2 = s2

        return s1, s2