Exemple #1
0
def dispose_msg(msg, msg_queue):
    # print('recv:', msg)

    global board
    global s1
    global first_query
    global who_first

    ans = None
    seq = msg.split(' ')
    if seq[0] == 'START:':
        board_size = int(seq[1])
        Board.set_board_size(board_size)
        board = Board()
        if s1 is None:
            s1 = StrategyDNN()
        first_query = True
        who_first = None
        ans = 'START: OK'
        if msg_queue is not None:
            msg_queue.put(('start', ))
        s1.absorb('?')
        s1.on_episode_start()
    elif seq[0] == 'MOVE:':
        assert len(seq) >= 4, 'protocol inconsistent'
        old_board = copy.deepcopy(board)
        x, y = int(seq[1]), int(seq[2])
        who = Board.STONE_BLACK if int(seq[3]) == 1 else Board.STONE_WHITE
        if who_first is None:
            who_first = who
            print('who first?', who_first)
        if board.is_legal(x, y):
            board.move(x, y, who)

        s1.swallow(who, old_board, board)
        if msg_queue is not None:
            msg_queue.put(('move', who, x * Board.BOARD_SIZE + y))
    elif seq[0] == 'WIN:':
        assert len(seq) == 3, 'protocol inconsistent'
        x, y = int(seq[1]), int(seq[2])
        who = board.get(x, y)
        print('player %d win the game' % (who, ))
    elif seq[0] == 'UNDO:':
        ans = 'UNDO: unsupported yet'
    elif seq[0] == 'WHERE:':
        if who_first is None:
            who_first = Board.STONE_BLACK
            print('who first?', who_first)
        if first_query:
            s1.stand_for = board.query_stand_for(who_first)
            print('i stand for:', s1.stand_for)
            first_query = False
        assert s1.stand_for is not None
        x, y = s1.preferred_move(board)
        ans = 'HERE: %d %d' % (x, y)
    elif seq[0] == 'END:':
        # s1.close()
        ans = 'END: OK'

    return ans
Exemple #2
0
    def reinforce(self):
        if len(self.oppo_pool) == 0:
            self.oppo_pool.append(
                StrategyDNN(is_train=False, is_revive=True, is_rl=False))

        s1 = StrategyDNN(is_train=False, is_revive=True, is_rl=True)
        s2 = random.choice(self.oppo_pool)

        stat = []
        win1, win2, draw = 0, 0, 0

        n_lose = 0
        iter_n = 100
        i = 0
        while True:
            print('iter:', i)

            for _ in range(1000):
                s1.stand_for = random.choice(
                    [Board.STONE_BLACK, Board.STONE_WHITE])
                s2.stand_for = Board.oppo(s1.stand_for)

                g = Game(Board.rand_generate_a_position(), s1, s2, observer=s1)
                g.step_to_end()
                win1 += 1 if g.winner == s1.stand_for else 0
                win2 += 1 if g.winner == s2.stand_for else 0
                draw += 1 if g.winner == Board.STONE_EMPTY else 0

#             if win1 > win2:
#                 s1_c = s1.mind_clone()
#                 self.oppo_pool.append(s1_c)
#                 s2 = random.choice(self.oppo_pool)
#                 n_lose = 0
#                 print('stronger, oppos:', len(self.oppo_pool))
#             elif win1 < win2:
#                 n_lose += 1
#
#             if n_lose >= 50:
#                 break

            if i % 1 == 0 or i + 1 == iter_n:
                total = win1 + win2 + draw
                win1_r = win1 / total
                win2_r = win2 / total
                draw_r = draw / total
                print("iter:%d, win: %.3f, loss: %.3f, tie: %.3f" %
                      (i, win1_r, win2_r, draw_r))
                stat.append([win1_r, win2_r, draw_r])

            i += 1

            if i > iter_n:
                break

        stat = np.array(stat)
        print('stat. shape:', stat.shape)
        np.savez('/home/splendor/fusor/stat.npz', stat=np.array(stat))
        self.strategy_1 = self.strategy_2 = s1
Exemple #3
0
    def init_both_sides(self):
        # feat = Board.BOARD_SIZE_SQ * 2 + 2

        # if self.strategy_1 is None:
        #     s1 = StrategyTD(feat, feat * 2)
        #     s1.stand_for = Board.STONE_BLACK
        #     s1.alpha = 0.3
        #     s1.beta = 0.3
        #     s1.lambdaa = 0.05
        #     s1.epsilon = 0.3
        #     self.strategy_1 = s1
        # else:
        #     s1 = self.strategy_1
        #     s1.epsilon = 0.3

        if self.strategy_1 is None:
            # s1 = StrategyMC()
            # s1 = StrategyANN(feat, feat * 2)
            file = tf.train.latest_checkpoint(RL_BRAIN_DIR)
            s1 = StrategyDNN(from_file=file, part_vars=True)
            # s1 = StrategyMCTS1()
            self.strategy_1 = s1
        else:
            s1 = self.strategy_1

        s1.is_learning = True
        s1.stand_for = Board.STONE_BLACK


#         if self.strategy_2 is None:
#             s2 = StrategyTD(feat, feat * 2)
#             s2.stand_for = Board.STONE_WHITE
#             self.strategy_2 = s2
#         else:
#             s2 = self.strategy_2
#             s2.is_learning = False
        s2 = StrategyRand()

#         s2 = StrategyMinMax()
        s2.stand_for = Board.STONE_WHITE
        self.strategy_2 = s2

        return s1, s2
Exemple #4
0
    def init_both_sides(self):
        feat = Board.BOARD_SIZE_SQ * 2 + 2

#         if self.strategy_1 is None:
#             s1 = StrategyTD(feat, feat * 2)
#             s1.stand_for = Board.STONE_BLACK
#     #         s1.alpha = 0.3
#     #         s1.beta = 0.3
#             s1.lambdaa = 0.05
#             s1.epsilon = 0.3
#             self.strategy_1 = s1
#         else:
#             s1 = self.strategy_1
#             s1.epsilon = 0.3

        if self.strategy_1 is None:
#             s1 = StrategyMC()
#             s1 = StrategyANN(feat, feat * 2)
            s1 = StrategyDNN()
            self.strategy_1 = s1
        else:
            s1 = self.strategy_1


        s1.is_learning = True
        s1.stand_for = Board.STONE_BLACK


#         if self.strategy_2 is None:
#             s2 = StrategyTD(feat, feat * 2)
#             s2.stand_for = Board.STONE_WHITE
#             self.strategy_2 = s2
#         else:
#             s2 = self.strategy_2
#             s2.is_learning = False
        s2 = StrategyRand()

#         s2 = StrategyMinMax()
        s2.stand_for = Board.STONE_WHITE
        self.strategy_2 = s2

        return s1, s2
Exemple #5
0
    def reinforce(self, resume=True):
        self.oppo_pool = self.get_mindsets(RL_BRAIN_DIR, FILE_PREFIX)

        part_vars = True
        if resume and len(self.oppo_pool) != 0:
            file = tf.train.latest_checkpoint(RL_BRAIN_DIR)
            part_vars = False
        else:
            file = tf.train.latest_checkpoint(SL_BRAIN_DIR)
            part_vars = True
        s1 = StrategyDNN(is_train=False, is_revive=True, is_rl=True, from_file=file, part_vars=part_vars)
        print('I was born from', file)

        if len(self.oppo_pool) != 0:
            file = random.choice(self.oppo_pool)
            file = os.path.join(RL_BRAIN_DIR, file)
            part_vars = False
        else:
            file = tf.train.latest_checkpoint(SL_BRAIN_DIR)
            part_vars = True
        s2 = StrategyDNN(is_train=False, is_revive=True, is_rl=False, from_file=file, part_vars=part_vars)
        print('vs.', file)

        stat = []

#         n_lose = 0
        iter_n = 100
        for i in range(iter_n):
            print('iter:', i)
            win1, win2, draw = 0, 0, 0
            step_counter, explo_counter = 0, 0
            episodes = cfg.REINFORCE_PERIOD
            for _ in range(episodes):
                s1.stand_for = random.choice([Board.STONE_BLACK, Board.STONE_WHITE])
                s2.stand_for = Board.oppo(s1.stand_for)

                g = Game(Board.rand_generate_a_position(), s1, s2, observer=s1)
                g.step_to_end()
                win1 += 1 if g.winner == s1.stand_for else 0
                win2 += 1 if g.winner == s2.stand_for else 0
                draw += 1 if g.winner == Board.STONE_EMPTY else 0
#                 print('winner: {:d}, stand for: {:d}'.format(g.winner, s1.stand_for))
                s1.win_ratio = win1 / win2 if win2 != 0 else 1.
                step_counter += g.step_counter
                explo_counter += g.exploration_counter

            if s1.win_ratio > 1.1:
                file = FILE_PREFIX + '-' + str(i)
                s1.mind_clone(os.path.join(RL_BRAIN_DIR, FILE_PREFIX), i)
                self.oppo_pool.append(file)
                file = random.choice(self.oppo_pool)
                file = os.path.join(RL_BRAIN_DIR, file)
                s2.close()
                s2 = StrategyDNN(is_train=False, is_revive=True, is_rl=False, from_file=file, part_vars=False)
                print('vs.', file)
#                 n_lose = 0
#             elif win1 < win2:
#                 n_lose += 1
#             if n_lose >= 50:
#                 break

            if i % 1 == 0 or i + 1 == iter_n:
                total = win1 + win2 + draw
                win1_r = win1 / total
                win2_r = win2 / total
                draw_r = draw / total
                print("iter:%d, win: %.3f, lose: %.3f, draw: %.3f, t: %.3f" % (i, win1_r, win2_r, draw_r, s1.temperature))
                stat.append([win1_r, win2_r, draw_r])
                print('avg. steps[%f], avg. explos[%f]' % (step_counter / episodes, explo_counter / episodes))

            if i % 10 == 0 or i + 1 == iter_n:
                np.savez(STAT_FILE, stat=np.array(stat))

        print('rl done. you can try it.')
        self.strategy_1 = self.strategy_2 = s1