Example #1
0
    def match(self):
        s1, s2 = self.strategy_1, self.strategy_2
        print('player1:', s1.__class__.__name__)
        print('player2:', s2.__class__.__name__)

        probs = np.zeros(6)
        games = 100  # 30
        for i in range(games):
            print(i)
            s1.stand_for = Board.STONE_BLACK
            s2.stand_for = Board.STONE_WHITE
            g = Game(Board.rand_generate_a_position(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[0] += 1
            elif g.winner == Board.STONE_WHITE:
                probs[1] += 1
            else:
                probs[2] += 1

            s1.stand_for = Board.STONE_WHITE
            s2.stand_for = Board.STONE_BLACK
            g = Game(Board.rand_generate_a_position(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[3] += 1
            elif g.winner == Board.STONE_BLACK:
                probs[4] += 1
            else:
                probs[5] += 1

        print('total play:', games)
        print(probs)
Example #2
0
    def self_play(self):
        # generate data with cur_best
        self.load_from_vat(self.cur_best_dir)

        for _ in range(N_GAMES_TRAIN):
            board = Board()
            assert (board.stones == Board.STONE_EMPTY).any()
            memo_s = []
            memo_pi = []
            winner = Board.STONE_EMPTY
            step = 0
            whose_persp = board.whose_turn_now()
            cur_player = whose_persp
            while True:
                self._mcts.sim_many(board, N_SIMS)
                t = 1 if step < N_STEPS_EXPLORE else 1e-9
                step += 1
                pi, move = self._mcts.get_pi_and_best_move(t)
                memo_s.append(board)
                memo_pi.append(pi)
                new_board = copy.deepcopy(board)
                new_board.place_down(move, cur_player)
                over, winner, _ = new_board.is_over(board)
                if over:
                    break
                if self.resign(board, pi):
                    break
                board = new_board

            if winner != Board.STONE_EMPTY:
                reward = winner == whose_persp
                memo_z = [0] * len(memo_s)
                memo_z[-1::-2] = reward
                memo_z[-1::-2] = -reward
                self.memo(memo_s, memo_pi, memo_z)
Example #3
0
    def match(self):
        s1, s2 = self.strategy_1, self.strategy_2
        print('player1:', s1.__class__.__name__)
        print('player2:', s2.__class__.__name__)

        probs = np.zeros(6)
        games = 100  # 30
        for i in range(games):
            print(i)
            s1.stand_for = Board.STONE_BLACK
            s2.stand_for = Board.STONE_WHITE
            g = Game(Board.rand_generate_a_position(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[0] += 1
            elif g.winner == Board.STONE_WHITE:
                probs[1] += 1
            else:
                probs[2] += 1

            s1.stand_for = Board.STONE_WHITE
            s2.stand_for = Board.STONE_BLACK
            g = Game(Board.rand_generate_a_position(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[3] += 1
            elif g.winner == Board.STONE_BLACK:
                probs[4] += 1
            else:
                probs[5] += 1

        print('total play:', games)
        print(probs)
Example #4
0
    def reinforce(self):
        if len(self.oppo_pool) == 0:
            self.oppo_pool.append(
                StrategyDNN(is_train=False, is_revive=True, is_rl=False))

        s1 = StrategyDNN(is_train=False, is_revive=True, is_rl=True)
        s2 = random.choice(self.oppo_pool)

        stat = []
        win1, win2, draw = 0, 0, 0

        n_lose = 0
        iter_n = 100
        i = 0
        while True:
            print('iter:', i)

            for _ in range(1000):
                s1.stand_for = random.choice(
                    [Board.STONE_BLACK, Board.STONE_WHITE])
                s2.stand_for = Board.oppo(s1.stand_for)

                g = Game(Board.rand_generate_a_position(), s1, s2, observer=s1)
                g.step_to_end()
                win1 += 1 if g.winner == s1.stand_for else 0
                win2 += 1 if g.winner == s2.stand_for else 0
                draw += 1 if g.winner == Board.STONE_EMPTY else 0

#             if win1 > win2:
#                 s1_c = s1.mind_clone()
#                 self.oppo_pool.append(s1_c)
#                 s2 = random.choice(self.oppo_pool)
#                 n_lose = 0
#                 print('stronger, oppos:', len(self.oppo_pool))
#             elif win1 < win2:
#                 n_lose += 1
#
#             if n_lose >= 50:
#                 break

            if i % 1 == 0 or i + 1 == iter_n:
                total = win1 + win2 + draw
                win1_r = win1 / total
                win2_r = win2 / total
                draw_r = draw / total
                print("iter:%d, win: %.3f, loss: %.3f, tie: %.3f" %
                      (i, win1_r, win2_r, draw_r))
                stat.append([win1_r, win2_r, draw_r])

            i += 1

            if i > iter_n:
                break

        stat = np.array(stat)
        print('stat. shape:', stat.shape)
        np.savez('/home/splendor/fusor/stat.npz', stat=np.array(stat))
        self.strategy_1 = self.strategy_2 = s1
Example #5
0
    def measure_perf(self, s1, s2):
        old_epsilon1, old_is_learning1, old_stand_for1 = s1.epsilon, s1.is_learning, s1.stand_for
#         old_epsilon2, old_is_learning2, old_stand_for2 = s2.epsilon, s2.is_learning, s2.stand_for
        old_is_learning2, old_stand_for2 = s2.is_learning, s2.stand_for
        s1.epsilon, s1.is_learning, s1.stand_for = 0, False, Board.STONE_BLACK
#         s2.epsilon, s2.is_learning, s2.stand_for = 0, False, Board.STONE_WHITE
        s2.is_learning, s2.stand_for = False, Board.STONE_WHITE

        s3 = StrategyRand()

        probs = [0, 0, 0, 0, 0, 0]
        games = 3  # 30
        for i in range(games):
            # the learner s1 move first(use black)
            s1.stand_for = Board.STONE_BLACK
            s2.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[0] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[1] += 1

            # the learner s1 move second(use white)
            s1.stand_for = Board.STONE_WHITE
            s2.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s2)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[2] += 1
            elif g.winner == Board.STONE_EMPTY:
                probs[3] += 1

            # the learner s1 move first vs. random opponent
            s1.stand_for = Board.STONE_BLACK
            s3.stand_for = Board.STONE_WHITE
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_BLACK:
                probs[4] += 1

            # the learner s1 move second vs. random opponent
            s1.stand_for = Board.STONE_WHITE
            s3.stand_for = Board.STONE_BLACK
            g = Game(Board(), s1, s3)
            g.step_to_end()
            if g.winner == Board.STONE_WHITE:
                probs[5] += 1

        probs = [i / games for i in probs]
        print(probs)

        s1.epsilon, s1.is_learning, s1.stand_for = old_epsilon1, old_is_learning1, old_stand_for1
#         s2.epsilon, s2.is_learning, s2.stand_for = old_epsilon2, old_is_learning2, old_stand_for2
        s2.is_learning, s2.stand_for = old_is_learning2, old_stand_for2
        return probs
Example #6
0
def dispose_msg(msg, msg_queue):
    # print('recv:', msg)

    global board
    global s1
    global first_query
    global who_first

    ans = None
    seq = msg.split(' ')
    if seq[0] == 'START:':
        board_size = int(seq[1])
        Board.set_board_size(board_size)
        board = Board()
        if s1 is None:
            s1 = StrategyDNN()
        first_query = True
        who_first = None
        ans = 'START: OK'
        if msg_queue is not None:
            msg_queue.put(('start', ))
        # s1.absorb('?')
        s1.on_episode_start()
    elif seq[0] == 'MOVE:':
        assert len(seq) >= 4, 'protocol inconsistent'
        old_board = copy.deepcopy(board)
        x, y = int(seq[1]), int(seq[2])
        who = Board.STONE_BLACK if int(seq[3]) == 1 else Board.STONE_WHITE
        if who_first is None:
            who_first = who
            print('who first?', who_first)
        if board.is_legal(x, y):
            board.move(x, y, who)

        s1.swallow(who, old_board, board)
        if msg_queue is not None:
            msg_queue.put(('move', who, x * Board.BOARD_SIZE + y))
    elif seq[0] == 'WIN:':
        assert len(seq) == 3, 'protocol inconsistent'
        x, y = int(seq[1]), int(seq[2])
        who = board.get(x, y)
        print('player %d win the game' % (who, ))
    elif seq[0] == 'UNDO:':
        ans = 'UNDO: unsupported yet'
    elif seq[0] == 'WHERE:':
        if who_first is None:
            who_first = Board.STONE_BLACK
            print('who first?', who_first)
        if first_query:
            s1.stand_for = board.query_stand_for(who_first)
            print('i stand for:', s1.stand_for)
            first_query = False
        assert s1.stand_for is not None
        x, y = s1.preferred_move(board)
        ans = 'HERE: %d %d' % (x, y)
    elif seq[0] == 'END:':
        # s1.close()
        ans = 'END: OK'

    return ans
Example #7
0
 def __init__(self):
     self.cur_board = Board()
     self.cur_player = self.cur_board.whose_turn_now()
     self.is_over = False
     self.winner = None
     self.history_states = []
     self.history_actions = []
     self.reward = 0.
     self.num_of_moves = 0
     self.rl_stard_for = Board.STONE_EMPTY
     self.first_rl_step = None
Example #8
0
    def inference_who_won(self):
        assert len(self.observation) > 0

        last = self.observation[-1]
        who, st1 = last[0], last[2]

        oppo = Board.oppo(who)
        oppo_will_win = Board.find_pattern_will_win(st1, oppo)
        if oppo_will_win:
            return oppo
        return Board.STONE_EMPTY
Example #9
0
    def inference_who_won(self):
        assert len(self.observation) > 0

        last = self.observation[-1]
        who, st1 = last[0], last[2]

        oppo = Board.oppo(who)
        oppo_will_win = Board.find_pattern_will_win(st1, oppo)
        if oppo_will_win:
            return oppo
        return Board.STONE_EMPTY
Example #10
0
    def preferred_board(self, old, moves, context):
        game = context
        self.searcher.board = old.stones.reshape((-1, Board.BOARD_SIZE)).tolist()
        DEPTH = 1
        score, row, col = self.searcher.search(game.whose_turn, DEPTH)
#         print('score%d, loc(%d, %d)'%(score, row, col))

        x = old.stones.copy()
        x[row * Board.BOARD_SIZE + col] = game.whose_turn
        b = Board()
        b.stones = x
        return b
Example #11
0
    def preferred_board(self, old, moves, context):
        game = context
        self.searcher.board = old.stones.reshape((-1, Board.BOARD_SIZE)).tolist()
        DEPTH = 1
        score, row, col = self.searcher.search(game.whose_turn, DEPTH)
#         print('score%d, loc(%d, %d)'%(score, row, col))

        x = old.stones.copy()
        x[row * Board.BOARD_SIZE + col] = game.whose_turn
        b = Board()
        b.stones = x
        return b
Example #12
0
    def from_new_start_point(self, winner, s1, s2):
        '''
        Returns:
        ------------
        s1 : Strategy
            the learner
        s2 : Strategy
            the teacher
        '''
        if s1 == winner:
            s2 = s1.mind_clone()
        if s2 == winner:
            s1 = s2.mind_clone()

        # way 1: s1 follow the winner's stand-for
            s1.stand_for = winner.stand_for
        # way 2: s1 switch to another stand-for of winner
#             s1.stand_for = Board.oppo(winner.stand_for)
        # way 3: s1 random select stand-for
#             s1.stand_for = np.random.choice(np.array([Board.STONE_BLACK, Board.STONE_WHITE]))
        s2.stand_for = Board.oppo(s1.stand_for)

        s1.is_learning = True
        s2.is_learning = False
        return s1, s2
Example #13
0
    def step(self):
        moves, self.whose_turn, _ = Game.possible_moves(self.board)

        strat = self.strat1 if self.whose_turn == self.strat1.stand_for else self.strat2
        #         print('who', strat.stand_for)

        strat.update(self.board, None)

        new_board = strat.preferred_board(self.board, moves, self)
        # print('who%d play at %s' % (self.whose_turn,
        #                             str(divmod(Board.change(self.board, new_board), Board.BOARD_SIZE))))
        #         print(self.board.stones)
        if new_board.exploration:
            strat.setup()
            self.exploration_counter += 1

        self.over, self.winner, self.last_loc = new_board.is_over(self.board)

        if self.observer is not None:
            self.observer.swallow(self.whose_turn, self.board, new_board)

        if self.over:
            strat.update_at_end(self.board, new_board)
            opponent_strat = self.strat1 if self.whose_turn != self.strat1.stand_for else self.strat2
            opponent_strat.update_at_end(None, new_board)
            if self.observer is not None:
                self.observer.absorb(self.whose_turn)

        self.board = new_board

        if self.strat1 == self.strat2:
            self.strat1.stand_for = Board.oppo(self.strat1.stand_for)
Example #14
0
    def setup_brain(self):
        if self.policy1 is None:
            self.policy1 = Brain(self.transformer.get_input_shape,
                           self.transformer.placeholder_inputs,
                           self.transformer.model,
                           RLPolicy.SL_POLICY_DIR,
                           RLPolicy.SL_SUMMARY_DIR)
        assert self.policy1 is not None

        if self.policy2 is not None:
            self.policy2.close()
        self.policy2 = None  # random choice from oppo_pool

        policy_dir = RLPolicy.SL_POLICY_DIR
        summary_dir = RLPolicy.SL_SUMMARY_DIR
        if self.oppo_brain:
            rl_brain_id = random.choice(tuple(self.oppo_brain.keys()))
            print('the chosen oppo:', rl_brain_id)
            policy_dir = self.oppo_brain[rl_brain_id]
#             summary_dir = self.oppo_summary.get(rl_brain_id, RLPolicy.RL_SUMMARY_DIR_PREFIX + str(0))
#             summary_dir = os.path.join(RLPolicy.WORK_DIR, summary_dir)

        self.policy2 = Brain(self.transformer.get_input_shape,
           self.transformer.placeholder_inputs,
           self.transformer.model,
           policy_dir,
           summary_dir)

        assert self.policy2 is not None

        self.policy1_stand_for = random.choice([Board.STONE_BLACK, Board.STONE_WHITE])
        self.policy2_stand_for = Board.oppo(self.policy1_stand_for)
Example #15
0
    def from_new_start_point(self, winner, s1, s2):
        '''
        Returns:
        ------------
        s1 : Strategy
            the learner
        s2 : Strategy
            the teacher        
        '''
        if s1 == winner:
            s2 = s1.mind_clone()
        if s2 == winner:
            s1 = s2.mind_clone()

        # way 1: s1 follow the winner's stand-for
            s1.stand_for = winner.stand_for
        # way 2: s1 switch to another stand-for of winner
#             s1.stand_for = Board.oppo(winner.stand_for)
        # way 3: s1 random select stand-for
#             s1.stand_for = np.random.choice(np.array([Board.STONE_BLACK, Board.STONE_WHITE]))
        s2.stand_for = Board.oppo(s1.stand_for)

        s1.is_learning = True
        s2.is_learning = False
        return s1, s2
Example #16
0
    def step(self):
        moves, self.whose_turn, _ = Game.possible_moves(self.board)

        strat = self.strat1 if self.whose_turn == self.strat1.stand_for else self.strat2
#         print('who', strat.stand_for)

        strat.update(self.board, None)

        new_board = strat.preferred_board(self.board, moves, self)
        # print('who%d play at %s' % (self.whose_turn,
        #                             str(divmod(Board.change(self.board, new_board), Board.BOARD_SIZE))))
#         print(self.board.stones)
        if new_board.exploration:
            strat.setup()
            self.exploration_counter += 1

        self.over, self.winner, self.last_loc = new_board.is_over(self.board)

        if self.observer is not None:
            self.observer.swallow(self.whose_turn, self.board, new_board)

        if self.over:
            strat.update_at_end(self.board, new_board)
            opponent_strat = self.strat1 if self.whose_turn != self.strat1.stand_for else self.strat2
            opponent_strat.update_at_end(None, new_board)
            if self.observer is not None:
                self.observer.absorb(self.whose_turn)

        self.board = new_board

        if self.strat1 == self.strat2:
            self.strat1.stand_for = Board.oppo(self.strat1.stand_for)
Example #17
0
    def vs_human(self, which_side_human_play):
        strategy = self.which_one(Board.oppo(which_side_human_play))
        if strategy is None or isinstance(strategy, StrategyRand):
            strategy = self.which_one(which_side_human_play)
        if strategy is None:
            print('without opponent')
            return

        old_is_learning, old_stand_for = strategy.is_learning, strategy.stand_for
        strategy.is_learning, strategy.stand_for = False, Board.oppo(which_side_human_play)

        s1 = strategy
        s2 = StrategyHuman()
        s2.stand_for = which_side_human_play

        self.game = Game(Board(), s1, s2, self.msg_queue)
        self.game.step_to_end()

        strategy.is_learning, strategy.stand_for = old_is_learning, old_stand_for
Example #18
0
    def vs_human(self, which_side_human_play):
        strategy = self.which_one(Board.oppo(which_side_human_play))
        if strategy is None or isinstance(strategy, StrategyRand):
            strategy = self.which_one(which_side_human_play)
        if strategy is None:
            print('without opponent')
            return

        old_is_learning, old_stand_for = strategy.is_learning, strategy.stand_for
        strategy.is_learning, strategy.stand_for = False, Board.oppo(which_side_human_play)

        s1 = strategy
        s2 = StrategyHuman()
        s2.stand_for = which_side_human_play

        self.game = Game(Board(), s1, s2, self.msg_queue)
        self.game.step_to_end()

        strategy.is_learning, strategy.stand_for = old_is_learning, old_stand_for
Example #19
0
    def possible_moves(board):
        '''
        Returns:
        --------------
            boards: Board list
        '''
#         whose turn is it?
        who = board.whose_turn_now()

#         print("it is [%d]'s turn" % who)

        boards = []
        loc = np.where(board.stones == 0)
#         print(loc)
        for i in loc[0]:
            x = board.stones.copy()
            x[i] = who
            b = Board()
            b.stones = x
            boards.append(b)

#         print('possible moves[%d]' % len(boards))
        return boards, who, loc[0]
Example #20
0
    def possible_moves(board):
        '''
        Returns:
        --------------
            boards: Board list
        '''
        #         whose turn is it?
        who = board.whose_turn_now()

        #         print("it is [%d]'s turn" % who)

        boards = []
        loc = np.where(board.stones == 0)
        #         print(loc)
        for i in loc[0]:
            x = board.stones.copy()
            x[i] = who
            b = Board()
            b.stones = x
            boards.append(b)

#         print('possible moves[%d]' % len(boards))
        return boards, who, loc[0]
Example #21
0
class Game(object):
    def __init__(self):
        self.cur_board = Board()
        self.cur_player = self.cur_board.whose_turn_now()
        self.is_over = False
        self.winner = None
        self.history_states = []
        self.history_actions = []
        self.reward = 0.
        self.num_of_moves = 0
        self.rl_stard_for = Board.STONE_EMPTY
        self.first_rl_step = None

    def move(self, loc):
        old_board = copy.deepcopy(self.cur_board)
        self.cur_board.move(loc[0], loc[1], self.cur_player)
        self.cur_player = Board.oppo(self.cur_player)
        self.is_over, self.winner, _ = self.cur_board.is_over(old_board)
        self.num_of_moves += 1

    def record_history(self, state, action):
        self.history_states.append(state)
        self.history_actions.append((self.cur_player, action))

    def remember_1st_rl_step(self, state):
        assert state is not None
        if self.first_rl_step is None:
            self.first_rl_step = (state, self.cur_player)

    def calc_reward(self, stand_for):
        assert self.is_over
        if self.winner == 0:
            self.reward = 0
        elif self.winner == stand_for:
            self.reward = 1
        else:
            self.reward = -1
Example #22
0
class FiveGame(TwoPlayerGame):
    def __init__(self):
        self.reset()
    
    
    def reset(self):
        TwoPlayerGame.reset(self)
        self.movesDone = 0
        self.b = Board()
    
    def isLegal(self, c, pos):
        return self.b.is_legal(pos[0], pos[1])
    
    def _fiveRow(self, c, pos):
        b = self.b.stones.reshape(-1, Board.BOARD_SIZE)
        self.b.find_conn_5(b, pos[0], pos[1], c)
        
    def getLegals(self, c):
        loc = np.where(self.b.stones == 0)
        moves = [i for i in map(lambda i: divmod(i, Board.BOARD_SIZE), loc[0])]
        return moves
    
    def doMove(self, c, pos):
        """ the action is a (color, position) tuple, for the next stone to move.
        returns True if the move was legal. """
        self.movesDone += 1
        if not self.isLegal(c, pos):
            return False
        elif self._fiveRow(c, pos):
            self.winner = c
            self.b.move(pos[0], pos[1], c)
            return True
        else:
            self.b.move(pos[0], pos[1], c)
            if self.movesDone == Board.BOARD_SIZE_SQ:
                self.winner = Board.STONE_EMPTY
            return True
        
    def playToTheEnd(self, p1, p2):
        """ alternate playing moves between players until the game is over. """
        assert p1.color == -p2.color
        i = 0
        p1.game = self
        p2.game = self
        players = [p1, p2]
        while not self.gameOver():
            p = players[i]
            self.performAction(p.getAction())
            i = (i + 1) % 2
Example #23
0
class FiveGame(TwoPlayerGame):
    def __init__(self):
        self.reset()

    def reset(self):
        TwoPlayerGame.reset(self)
        self.movesDone = 0
        self.b = Board()

    def isLegal(self, c, pos):
        return self.b.is_legal(pos[0], pos[1])

    def _fiveRow(self, c, pos):
        b = self.b.stones.reshape(-1, Board.BOARD_SIZE)
        self.b.find_conn_5(b, pos[0], pos[1], c)

    def getLegals(self, c):
        loc = np.where(self.b.stones == 0)
        moves = [i for i in map(lambda i: divmod(i, Board.BOARD_SIZE), loc[0])]
        return moves

    def doMove(self, c, pos):
        """ the action is a (color, position) tuple, for the next stone to move.
        returns True if the move was legal. """
        self.movesDone += 1
        if not self.isLegal(c, pos):
            return False
        elif self._fiveRow(c, pos):
            self.winner = c
            self.b.move(pos[0], pos[1], c)
            return True
        else:
            self.b.move(pos[0], pos[1], c)
            if self.movesDone == Board.BOARD_SIZE_SQ:
                self.winner = Board.STONE_EMPTY
            return True

    def playToTheEnd(self, p1, p2):
        """ alternate playing moves between players until the game is over. """
        assert p1.color == -p2.color
        i = 0
        p1.game = self
        p2.game = self
        players = [p1, p2]
        while not self.gameOver():
            p = players[i]
            self.performAction(p.getAction())
            i = (i + 1) % 2
Example #24
0
    def learn_from_2_teachers(self):
        s1 = StrategyMinMax()
        s1.stand_for = Board.STONE_BLACK
        self.strategy_1 = s1

        s2 = StrategyMinMax()
        s2.stand_for = Board.STONE_WHITE
        self.strategy_2 = s2

        observer = StrategyMC()

        win1, win2, draw = 0, 0, 0
        step_counter, explo_counter = 0, 0
        begin = datetime.datetime.now()
        episodes = 10000
        for i in range(episodes):
            g = Game(Board(), s1, s2, observer=observer)
            g.step_to_end()
            win1 += 1 if g.winner == Board.STONE_BLACK else 0
            win2 += 1 if g.winner == Board.STONE_WHITE else 0
            draw += 1 if g.winner == Board.STONE_EMPTY else 0

            step_counter += g.step_counter
            explo_counter += g.exploration_counter
            print('training...%d' % i)

        total = win1 + win2 + draw
        print("black win: %f" % (win1 / total))
        print("white win: %f" % (win2 / total))
        print("draw: %f" % (draw / total))

        print('avg. steps[%f], avg. explos[%f]' %
              (step_counter / episodes, explo_counter / episodes))

        end = datetime.datetime.now()
        diff = end - begin
        print("time cost[%f]s, avg.[%f]s" %
              (diff.total_seconds(), diff.total_seconds() / episodes))

        observer.save('./brain1.npz')
Example #25
0
    def train1(self, s1, s2):
        '''train one time
        Returns:
        ------------
        winner : Strategy
            the win strategy
        '''

        max_explore_rate = 0.95

        win1, win2, draw = 0, 0, 0
        step_counter, explo_counter = 0, 0
        begin = datetime.datetime.now()
        episodes = 1
        samples = 100
        interval = episodes // samples
        perf = [[] for _ in range(7)]
        learner = s1 if s1.is_learning else s2
        oppo = self.which_one(Board.oppo(learner.stand_for))
        stat_win = []
#         past_me = learner.mind_clone()
        for i in range(episodes):
#             if (i + 1) % interval == 0:
# #                 print(np.allclose(s1.hidden_weights, past_me.hidden_weights))
#                 probs = self.measure_perf(learner, oppo)
#                 perf[0].append(i)
#                 for idx, x in enumerate(probs):
#                     perf[idx + 1].append(x)

            learner.epsilon = max_explore_rate * np.exp(-5 * i / episodes)  # * (1 if i < episodes//2 else 0.3) #
            g = Game(Board(), s1, s2)
            g.step_to_end()
            win1 += 1 if g.winner == Board.STONE_BLACK else 0
            win2 += 1 if g.winner == Board.STONE_WHITE else 0
            draw += 1 if g.winner == Board.STONE_EMPTY else 0

            stat_win.append(win1 - win2 - draw)
#             rec.append(win1)
            step_counter += g.step_counter
            explo_counter += g.exploration_counter
#             print('steps[%d], explos[%d]' % (g.step_counter, g.exploration_counter))
            print('training...%d' % i)

        total = win1 + win2 + draw
        print("black win: %f" % (win1 / total))
        print("white win: %f" % (win2 / total))
        print("draw: %f" % (draw / total))

        print('avg. steps[%f], avg. explos[%f]' % (step_counter / episodes, explo_counter / episodes))

        end = datetime.datetime.now()
        diff = end - begin
        print("time cost[%f]s, avg.[%f]s" % (diff.total_seconds(), diff.total_seconds() / episodes))

        with open('stat-result-win.txt', 'w') as f:
            f.write(repr(stat_win))
#         print(perf)
#         self.draw_perf(perf)

#         np.set_printoptions(threshold=np.nan, formatter={'float_kind' : lambda x: "%.4f" % x})
#         with open('stat-result-net-train-errors.txt', 'w') as f:
#             f.write(repr(np.array(s1.errors)))

        winner = Board.STONE_BLACK if win1 >= win2 else Board.STONE_WHITE
        return self.which_one(winner), max(win1, win2) / total
Example #26
0
                    self.mcts.update_with_move(best_move)
                    return m
        raise Exception('impossible')

    def _value_fn(self, board):
        state, _ = self.get_input_values(board.stones)
        v = self.brain.get_state_value(state)
        return v

    def _policy_fn(self, board):
        _, _, legal_moves = Game.possible_moves(board)
        state, _ = self.get_input_values(board.stones)
        probs = self.brain.get_move_probs(state)
        probs = probs[0, legal_moves]
        return list(zip(legal_moves, probs))

    def _rollout_fn(self, board, legal_moves):
        state, _ = self.get_input_values(board.stones)
        probs = self.brain.get_move_probs(state)
        return probs

    def get_input_values(self, board):
        state, _ = self.brain.adapt_state(board)
        legal = (board == Board.STONE_EMPTY)
        return state, legal

if __name__ == '__main__':
    mcts = StrategyMCTS1()
    board = Board()
    mcts.preferred_board(board, None, None)
Example #27
0
def test_sim_many():
    zero = AG0(input_fn, model_fn)
    zero.prepare()

    s0 = Board.rand_generate_a_position()
    zero._mcts.sim_many(s0, N_SIMS)
Example #28
0
 def move(self, loc):
     old_board = copy.deepcopy(self.cur_board)
     self.cur_board.move(loc[0], loc[1], self.cur_player)
     self.cur_player = Board.oppo(self.cur_player)
     self.is_over, self.winner, _ = self.cur_board.is_over(old_board)
     self.num_of_moves += 1
Example #29
0
 def reset(self):
     TwoPlayerGame.reset(self)
     self.movesDone = 0
     self.b = Board()
Example #30
0
    def reinforce(self, resume=True):
        self.oppo_pool = self.get_mindsets(RL_BRAIN_DIR, FILE_PREFIX)

        part_vars = True
        if resume and len(self.oppo_pool) != 0:
            file = tf.train.latest_checkpoint(RL_BRAIN_DIR)
            part_vars = False
        else:
            file = tf.train.latest_checkpoint(SL_BRAIN_DIR)
            part_vars = True
        s1 = StrategyDNN(is_train=False, is_revive=True, is_rl=True, from_file=file, part_vars=part_vars)
        print('I was born from', file)

        if len(self.oppo_pool) != 0:
            file = random.choice(self.oppo_pool)
            file = os.path.join(RL_BRAIN_DIR, file)
            part_vars = False
        else:
            file = tf.train.latest_checkpoint(SL_BRAIN_DIR)
            part_vars = True
        s2 = StrategyDNN(is_train=False, is_revive=True, is_rl=False, from_file=file, part_vars=part_vars)
        print('vs.', file)

        stat = []

#         n_lose = 0
        iter_n = 100
        for i in range(iter_n):
            print('iter:', i)
            win1, win2, draw = 0, 0, 0
            step_counter, explo_counter = 0, 0
            episodes = cfg.REINFORCE_PERIOD
            for _ in range(episodes):
                s1.stand_for = random.choice([Board.STONE_BLACK, Board.STONE_WHITE])
                s2.stand_for = Board.oppo(s1.stand_for)

                g = Game(Board.rand_generate_a_position(), s1, s2, observer=s1)
                g.step_to_end()
                win1 += 1 if g.winner == s1.stand_for else 0
                win2 += 1 if g.winner == s2.stand_for else 0
                draw += 1 if g.winner == Board.STONE_EMPTY else 0
#                 print('winner: {:d}, stand for: {:d}'.format(g.winner, s1.stand_for))
                s1.win_ratio = win1 / win2 if win2 != 0 else 1.
                step_counter += g.step_counter
                explo_counter += g.exploration_counter

            if s1.win_ratio > 1.1:
                file = FILE_PREFIX + '-' + str(i)
                s1.mind_clone(os.path.join(RL_BRAIN_DIR, FILE_PREFIX), i)
                self.oppo_pool.append(file)
                file = random.choice(self.oppo_pool)
                file = os.path.join(RL_BRAIN_DIR, file)
                s2.close()
                s2 = StrategyDNN(is_train=False, is_revive=True, is_rl=False, from_file=file, part_vars=False)
                print('vs.', file)
#                 n_lose = 0
#             elif win1 < win2:
#                 n_lose += 1
#             if n_lose >= 50:
#                 break

            if i % 1 == 0 or i + 1 == iter_n:
                total = win1 + win2 + draw
                win1_r = win1 / total
                win2_r = win2 / total
                draw_r = draw / total
                print("iter:%d, win: %.3f, lose: %.3f, draw: %.3f, t: %.3f" % (i, win1_r, win2_r, draw_r, s1.temperature))
                stat.append([win1_r, win2_r, draw_r])
                print('avg. steps[%f], avg. explos[%f]' % (step_counter / episodes, explo_counter / episodes))

            if i % 10 == 0 or i + 1 == iter_n:
                np.savez(STAT_FILE, stat=np.array(stat))

        print('rl done. you can try it.')
        self.strategy_1 = self.strategy_2 = s1
Example #31
0
 def reset(self):
     TwoPlayerGame.reset(self)
     self.movesDone = 0
     self.b = Board()