Esempio n. 1
0
def test_static_env():
    from cchess_alphazero.environment.env import CChessEnv
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.static_env import INIT_STATE
    from cchess_alphazero.environment.lookup_tables import flip_move
    env = CChessEnv()
    env.reset()
    print("env:  " + env.observation)
    print("senv: " + INIT_STATE)
    state = INIT_STATE
    env.step('0001')
    state = senv.step(state, '0001')
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.step('7770')
    state = senv.step(state, flip_move('7770'))
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.render()
    board = senv.state_to_board(state)
    for i in range(9, -1, -1):
        print(board[i])
    print("env: ")
    print(env.input_planes()[0+7:3+7])
    print("senv: ")
    print(senv.state_to_planes(state)[0+7:3+7])
    print(f"env:  {env.board.legal_moves()}" )
    print(f"senv: {senv.get_legal_moves(state)}")
    print(set(env.board.legal_moves()) == set(senv.get_legal_moves(state)))
Esempio n. 2
0
def test_onegreen():
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.lookup_tables import flip_move
    init = '9999299949999999249999869999999958999999519999999999999999997699'
    state = senv.init(init)
    print(state)
    senv.render(state)
    move = senv.parse_onegreen_move('8685')
    state = senv.step(state, move)
    print(state)
    senv.render(state)
    move = senv.parse_onegreen_move('7666')
    state = senv.step(state, flip_move(move))
    print(state)
    senv.render(state)
    def start_game(self, idx):
        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, 
                        debugging=False, enable_resign=True)
        self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, 
                        debugging=False, enable_resign=True)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.debug(f"best model is red, ng is black")
        else:
            red = self.player2
            black = self.player1
            logger.debug(f"best model is black, ng is red")

        state = senv.INIT_STATE
        value = 0       # best model's value
        turns = 0       # even == red; odd == black
        game_over = False

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns)
            else:
                action, _ = black.action(state, turns)
            end_time = time()
            # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            
            state = senv.step(state, action)
            turns += 1

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move = senv.done(state)

        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        if idx % 2 == 1:
            value = -value

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns
Esempio n. 4
0
 def info_best_move(self, action, value, depth):
     self.end_time = time()
     if not self.is_red_turn:
         value = -value
     score = int(value * 1000)
     duration = self.end_time - self.start_time
     nps = int(depth * 100 / duration) * 1000
     print(f"info depth {depth} score {score} time {int(duration * 1000)} nps {nps}")
     logger.debug(f"info depth {depth} score {score} time {int((self.end_time - self.start_time) * 1000)}")
     sys.stdout.flush()
     # get ponder
     state = senv.step(self.state, action)
     ponder = None
     if state in self.search_tree:
         node = self.search_tree[state]
         cnt = 0
         for mov, action_state in node.a.items():
             if action_state.n > cnt:
                 ponder = mov
                 cnt = action_state.n
     if not self.is_red_turn:
         action = flip_move(action)
     action = senv.to_uci_move(action)
     output = f"bestmove {action}"
     if ponder:
         if self.is_red_turn:
             ponder = flip_move(ponder)
         ponder = senv.to_uci_move(ponder)
         output += f" ponder {ponder}"
     print(output)
     logger.debug(output)
     sys.stdout.flush()
Esempio n. 5
0
def expanding_data(data, use_history=False):
    state = data[0]
    real_data = []
    action = None
    policy = None
    value = None
    if use_history:
        history = [state]
    else:
        history = None
    for item in data[1:]:
        action = item[0]
        value = item[1]
        try:
            policy = build_policy(action, flip=False)
        except Exception as e:
            logger.error(
                f"Expand data error {e}, item = {item}, data = {data}, state = {state}"
            )
            return None
        real_data.append([state, policy, value])
        state = senv.step(state, action)
        if use_history:
            history.append(action)
            history.append(state)

    return convert_to_trainging_data(real_data, history)
Esempio n. 6
0
def test_ucci():
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.lookup_tables import flip_move
    state = senv.INIT_STATE
    state = senv.step(state, '0001')
    fen = senv.state_to_fen(state, 1)
    print(fen)
    senv.render(state)
    move = 'b7b0'
    move = senv.parse_ucci_move(move)
    print(f'Parsed move {move}')
    move = flip_move(move)
    print(f'fliped move {move}')
    state = senv.step(state, move)
    senv.render(state)
    fen = senv.state_to_fen(state, 2)
    print(fen)
Esempio n. 7
0
def fixbug():
    from cchess_alphazero.config import Config
    from cchess_alphazero.lib.data_helper import get_game_data_filenames, read_game_data_from_file, write_game_data_to_file
    import cchess_alphazero.environment.static_env as senv
    c = Config('distribute')
    files = get_game_data_filenames(c.resource)
    cnt = 0
    fix = 0
    draw_cnt = 0
    for filename in files:
        try:
            data = read_game_data_from_file(filename)
        except:
            print(f"error: {filename}")
            os.remove(filename)
            continue
        state = data[0]
        real_data = [state]
        need_fix = True
        draw = False
        action = None
        value = None
        is_red_turn = True
        for item in data[1:]:
            action = item[0]
            value = -item[1]
            if value == 0:
                need_fix = False
                draw = True
                draw_cnt += 1
                break
            state = senv.step(state, action)
            is_red_turn = not is_red_turn
            real_data.append([action, value])
        if not draw:
            game_over, v, final_move = senv.done(state)
            if final_move:
                v = -v
                is_red_turn = not is_red_turn
            if not is_red_turn:
                v = -v
            if not game_over:
                v = 1
            # print(game_over, v, final_move, state)
            if v == data[1][1]:
                need_fix = False
            else:
                need_fix = True
        if need_fix:
            write_game_data_to_file(filename, real_data)
            # print(filename)
            fix += 1
        cnt += 1
        if cnt % 1000 == 0:
            print(cnt, fix, draw_cnt)
    print(f"all {cnt}, fix {fix}, draw {draw_cnt}")
Esempio n. 8
0
 def cmd_position(self):
     '''
     position {fen <fenstring> | startpos } [moves <move1> .... <moven>]
     '''
     if not self.is_ready:
         return
     move_idx = -1
     if len(self.args) > 0:
         if self.args[0] == 'fen':
             # init with fen string
             fen = self.args[1]
             try:
                 self.state = senv.fen_to_state(fen)
             except Exception as e:
                 logger.error(f"cmd position error! cmd = {self.args}, {e}")
                 return
             self.history = [self.state]
             turn = self.args[2]
             if turn == 'b':
                 self.state = senv.fliped_state(self.state)
                 self.is_red_turn = False
                 self.turns = (int(self.args[6]) - 1) * 2 + 1
             else:
                 self.is_red_turn = True
                 self.turns = (int(self.args[6]) - 1) * 2
             if len(self.args) > 7 and self.args[7] == 'moves':
                 move_idx = 8
         elif self.args[0] == 'startpos':
             self.state = senv.INIT_STATE
             self.is_red_turn = True
             self.history = [self.state]
             self.turns = 0
             if len(self.args) > 1 and self.args[1] == 'moves':
                 move_idx = 2
         elif self.args[0] == 'moves':
             move_idx = 1
     else:
         self.state = senv.INIT_STATE
         self.is_red_turn = True
         self.history = [self.state]
         self.turns = 0
     logger.debug(f"state = {self.state}")
     # senv.render(self.state)
     # execute moves
     if move_idx != -1:
         for i in range(move_idx, len(self.args)):
             action = senv.parse_ucci_move(self.args[i])
             if not self.is_red_turn:
                 action = flip_move(action)
             self.history.append(action)
             self.state = senv.step(self.state, action)
             self.is_red_turn = not self.is_red_turn
             self.turns += 1
             self.history.append(self.state)
         logger.debug(f"state = {self.state}")
Esempio n. 9
0
    def MCTS_search(self, state, history=[], is_root_node=False) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]:  # loop -> loss
                    # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                    self.executor.submit(self.update_tree, None, 0, history)
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")

                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")

                if action_state.next is None:
                    action_state.next = senv.step(state, sel_action)
                # logger.debug(f"step action {sel_action}, next = {action_state.next}")

            history.append(sel_action)
            state = action_state.next
            history.append(state)
Esempio n. 10
0
    def MCTS_search(self, state, history=[], is_root_node=False) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]: # loop -> loss
                    # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                    self.executor.submit(self.update_tree, None, 0, history)
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")
                
                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")
                
                if action_state.next is None:
                    action_state.next = senv.step(state, sel_action)
                # logger.debug(f"step action {sel_action}, next = {action_state.next}")

            history.append(sel_action)
            state = action_state.next
            history.append(state)
Esempio n. 11
0
def test_check_and_catch():
    import cchess_alphazero.environment.static_env as senv
    state = senv.fen_to_state(
        'rnba1cbnr/1a7/1c7/p1p3p1p/2p5k/2P1R4/P1P3P1P/1C5C1/9/RNBAKABN1 r')
    # state = senv.fliped_state(state)
    ori_state = state
    senv.render(state)
    print()
    action = '4454'
    state = senv.step(state, action)
    senv.render(state)
    state = senv.fliped_state(state)
    print()
    senv.render(state)
    print(senv.will_check_or_catch(ori_state, action))
Esempio n. 12
0
 def print_depth_info(self, state, turns, start_time, value, no_act):
     '''
     info depth xx pv xxx
     '''
     depth = self.done_tasks // 100
     end_time = time()
     pv = ""
     i = 0
     while i < 20:
         node = self.tree[state]
         bestmove = None
         root = True
         n = 0
         if len(node.a) == 0:
             break
         for mov, action_state in node.a.items():
             if action_state.n >= n:
                 if root and no_act and mov in no_act:
                     continue
                 n = action_state.n
                 bestmove = mov
         if bestmove is None:
             logger.error(
                 f"state = {state}, turns = {turns}, no_act = {no_act}, root = {root}, len(as) = {len(node.a)}"
             )
             break
         state = senv.step(state, bestmove)
         root = False
         if turns % 2 == 1:
             bestmove = flip_move(bestmove)
         bestmove = senv.to_uci_move(bestmove)
         pv += " " + bestmove
         i += 1
         turns += 1
     if state in self.debug:
         _, value = self.debug[state]
         if turns % 2 != self.side:
             value = -value
     score = int(value * 1000)
     duration = end_time - start_time
     nps = int(depth * 100 / duration) * 1000
     output = f"info depth {depth} score {score} time {int(duration * 1000)} pv" + pv + f" nps {nps}"
     print(output)
     logger.debug(output)
     sys.stdout.flush()
Esempio n. 13
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False,
                                   use_history=self.use_history)

        state = senv.INIT_STATE
        history = [state]
        # policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None
        no_eat_count = 0
        check = False
        no_act = []
        increase_temp = False

        while not game_over:
            start_time = time()
            action, policy = self.player.action(state,
                                                turns,
                                                no_act,
                                                increase_temp=increase_temp)
            end_time = time()
            if action is None:
                logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # if self.config.opts.log_move:
            #     logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            history.append(action)
            # policys.append(policy)
            try:
                state, no_eat = senv.new_step(state, action)
            except Exception as e:
                logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
                game_over = True
                value = 0
                break
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.error(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0
                increase_temp = False
                no_act = []
                if not game_over and not check and state in history[:-1]:
                    free_move = defaultdict(int)
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i + 1]):
                                no_act.append(history[i + 1])
                            elif not senv.be_catched(state, history[i + 1]):
                                increase_temp = True
                                free_move[state] += 1
                                if free_move[state] >= 3:
                                    # 作和棋处理
                                    game_over = True
                                    value = 0
                                    logger.error("闲着循环三次,作和棋处理")
                                    break

        if final_move:
            # policy = self.build_policy(final_move, False)
            history.append(final_move)
            # policys.append(policy)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns < 10:
            if random() > 0.9:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
Esempio n. 14
0
def self_play_buffer(config, cur, use_history=False) -> (tuple, list):
    pipe = cur.pop() # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, 
                            enable_resign=enable_resign, debugging=False, use_history=use_history)

    state = senv.INIT_STATE
    history = [state]
    # policys = [] 
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False
    no_act = None
    increase_temp = False

    while not game_over:
        start_time = time()
        action, policy = player.action(state, turns, no_act, increase_temp=increase_temp)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s")
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state, need_check=True)
            no_act = []
            increase_temp = False
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0
            if not game_over and not check and state in history[:-1]:
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        if senv.will_check_or_catch(state, history[i+1]):
                            no_act.append(history[i + 1])
                        elif not senv.be_catched(state, history[i+1]):
                            increase_temp = True
                            free_move[state] += 1
                            if free_move[state] >= 3:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break

    if final_move:
        # policy = build_policy(final_move, False)
        history.append(final_move)
        # policys.append(policy)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    player.close()
    del player
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value
    
    v = value
    data = [history[0]]
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
Esempio n. 15
0
    def start_game(self, idx):
        sleep(random())
        playouts = randint(8, 12) * 100
        self.config.play.simulation_num_per_move = playouts
        logger.info(f"Set playouts = {self.config.play.simulation_num_per_move}")

        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, 
                        debugging=False, enable_resign=False, use_history=self.hist_base)
        self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, 
                        debugging=False, enable_resign=False, use_history=self.hist_ng)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑")
        else:
            red = self.player2
            black = self.player1
            logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑")

        state = senv.INIT_STATE
        history = [state]
        value = 0       # best model's value
        turns = 0       # even == red; odd == black
        game_over = False
        no_eat_count = 0
        check = False
        increase_temp = False
        no_act = []

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp)
            else:
                action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp)
            end_time = time()
            if self.config.opts.log_move:
                logger.debug(f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            history.append(action)
            state, no_eat = senv.new_step(state, action)
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(state, need_check=True)
                no_act = []
                increase_temp = False
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0
                if not game_over and not check and state in history[:-1]:
                    free_move = defaultdict(int)
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i+1]):
                                no_act.append(history[i + 1])
                            elif not senv.be_catched(state, history[i+1]):
                                increase_temp = True
                                free_move[state] += 1
                                if free_move[state] >= 3:
                                    # 作和棋处理
                                    game_over = True
                                    value = 0
                                    logger.info("闲着循环三次,作和棋处理")
                                    break

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            turns += 1
            value = - value
            history.append(state)

        data = []
        if idx % 2 == 0:
            data = [self.data['base']['digest'], self.data['unchecked']['digest']]
        else:
            data = [self.data['unchecked']['digest'], self.data['base']['digest']]
        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        v = value
        data.append(history[0])
        for i in range(turns):
            k = i * 2
            data.append([history[k + 1], v])
            v = -v

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns, data
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1):
                no_act = None
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, policy = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
                try:
                    policy = self.build_policy(action, False)
                except Exception as e:
                    logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}")
                    value = 0
                    break
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0 or turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
Esempio n. 17
0
def self_play_buffer(config, pipes_bt, pipes_ng, idx, res_data, hist_base,
                     hist_ng) -> (tuple, list):
    sleep(random())
    playouts = randint(8, 12) * 100
    config.play.simulation_num_per_move = playouts
    logger.info(f"Set playouts = {config.play.simulation_num_per_move}")

    pipe1 = pipes_bt.pop()  # borrow
    pipe2 = pipes_ng.pop()

    player1 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe1,
                           enable_resign=False,
                           debugging=False,
                           use_history=hist_base)
    player2 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe2,
                           enable_resign=False,
                           debugging=False,
                           use_history=hist_ng)

    # even: bst = red, ng = black; odd: bst = black, ng = red
    if idx % 2 == 0:
        red = player1
        black = player2
        print(f"基准模型执红,待评测模型执黑")
    else:
        red = player2
        black = player1
        print(f"待评测模型执红,基准模型执黑")

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False
    increase_temp = False
    no_act = []

    while not game_over:
        start_time = time()
        if turns % 2 == 0:
            action, _ = red.action(state,
                                   turns,
                                   no_act=no_act,
                                   increase_temp=increase_temp)
        else:
            action, _ = black.action(state,
                                     turns,
                                     no_act=no_act,
                                     increase_temp=increase_temp)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            no_act = []
            increase_temp = False
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0
            if not game_over and not check and state in history[:-1]:
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        if senv.will_check_or_catch(state, history[i + 1]):
                            no_act.append(history[i + 1])
                        elif not senv.be_catched(state, history[i + 1]):
                            increase_temp = True
                            free_move[state] += 1
                            if free_move[state] >= 3:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break

    if final_move:
        history.append(final_move)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    data = []
    if idx % 2 == 0:
        data = [res_data['base']['digest'], res_data['unchecked']['digest']]
    else:
        data = [res_data['unchecked']['digest'], res_data['base']['digest']]
    player1.close()
    player2.close()
    del player1, player2
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data.append(history[0])
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    pipes_bt.append(pipe1)
    pipes_ng.append(pipe2)
    return (turns, v, idx), data
Esempio n. 18
0
def self_play_buffer(config, cur) -> (tuple, list):
    pipe = cur.pop()  # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config,
                          search_tree=defaultdict(VisitState),
                          pipes=pipe,
                          enable_resign=enable_resign,
                          debugging=False)

    state = senv.INIT_STATE
    history = [state]
    policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None

    while not game_over:
        no_act = None
        if state in history[:-1]:
            no_act = []
            for i in range(len(history) - 1):
                if history[i] == state:
                    no_act.append(history[i + 1])
        start_time = time()
        action, policy = player.action(state, turns, no_act)
        end_time = time()
        if action is None:
            logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
            value = -1
            break
        # logger.debug(f"Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
        policys.append(policy)
        history.append(action)
        state = senv.step(state, action)
        turns += 1
        history.append(state)

        if turns / 2 >= config.play.max_game_length:
            game_over = True
            value = senv.evaluate(state)
        else:
            game_over, value, final_move = senv.done(state)

    if final_move:
        policy = build_policy(final_move, False)
        history.append(final_move)
        policys.append(policy)
        state = senv.step(state, final_move)
        history.append(state)

    player.close()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data = []
    for i in range(turns):
        k = i * 2
        data.append([history[k], policys[i], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
    def start_game(self, idx):
        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=True)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=True)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.debug(f"best model is red, ng is black")
        else:
            red = self.player2
            black = self.player1
            logger.debug(f"best model is black, ng is red")

        state = senv.INIT_STATE
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns)
            else:
                action, _ = black.action(state, turns)
            end_time = time()
            # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break

            state = senv.step(state, action)
            turns += 1

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move = senv.done(state)

        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        if idx % 2 == 1:  # return player1' value
            value = -value

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns
Esempio n. 20
0
    def MCTS_search(self,
                    state,
                    history=[],
                    is_root_node=False,
                    real_hist=None) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                v = v * 2
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    if is_root_node and real_hist:
                        self.expand_and_evaluate(state, history, real_hist)
                    else:
                        self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]:  # loop
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i + 1]):
                                self.executor.submit(self.update_tree, None,
                                                     -1, history)
                            elif senv.be_catched(state, history[i + 1]):
                                self.executor.submit(self.update_tree, None, 1,
                                                     history)
                            else:
                                # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                                self.executor.submit(self.update_tree, None, 0,
                                                     history)
                            break
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")

                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")

                # if action_state.next is None:
                history.append(sel_action)
                state = senv.step(state, sel_action)
                history.append(state)
Esempio n. 21
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
Esempio n. 23
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None
        check = False

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red
                                                     and turns % 2 == 1):
                no_act = None
                if not check and state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, _ = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
            history.append(action)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            history.append(state)
            turns += 1
            value = -value

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store