def self_play_buffer(config, pipes_bt, pipes_ng, idx,
                     res_data) -> (tuple, list):
    sleep(random())
    playouts = randint(8, 12) * 100
    config.play.simulation_num_per_move = playouts
    logger.info(f"Set playouts = {config.play.simulation_num_per_move}")

    pipe1 = pipes_bt.pop()  # borrow
    pipe2 = pipes_ng.pop()

    player1 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe1,
                           enable_resign=False,
                           debugging=False)
    player2 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe2,
                           enable_resign=False,
                           debugging=False)

    # even: bst = red, ng = black; odd: bst = black, ng = red
    if idx % 2 == 0:
        red = player1
        black = player2
        print(f"基准模型执红,待评测模型执黑")
    else:
        red = player2
        black = player1
        print(f"待评测模型执红,基准模型执黑")

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False

    while not game_over:
        no_act = None
        if not check and state in history[:-1]:
            no_act = []
            free_move = defaultdict(int)
            for i in range(len(history) - 1):
                if history[i] == state:
                    # 如果走了下一步是将军或捉:禁止走那步
                    if senv.will_check_or_catch(state, history[i + 1]):
                        no_act.append(history[i + 1])
                    # 否则当作闲着处理
                    else:
                        free_move[state] += 1
                        if free_move[state] >= 2:
                            # 作和棋处理
                            game_over = True
                            value = 0
                            logger.info("闲着循环三次,作和棋处理")
                            break
        if game_over:
            break
        start_time = time()
        if turns % 2 == 0:
            action, _ = red.action(state, turns, no_act=no_act)
        else:
            action, _ = black.action(state, turns, no_act=no_act)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0

    if final_move:
        history.append(final_move)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    data = []
    if idx % 2 == 0:
        data = [res_data['base']['digest'], res_data['unchecked']['digest']]
    else:
        data = [res_data['unchecked']['digest'], res_data['base']['digest']]
    player1.close()
    player2.close()
    del player1, player2
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data.append(history[0])
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    pipes_bt.append(pipe1)
    pipes_ng.append(pipe2)
    return (turns, v, idx), data
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None
        check = False

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red
                                                     and turns % 2 == 1):
                no_act = None
                if not check and state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, _ = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
            history.append(action)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            history.append(state)
            turns += 1
            value = -value

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
Beispiel #3
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        # policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        final_move = None
        no_eat_count = 0
        check = False

        while not game_over:
            no_act = None
            if not check and state in history[:-1]:
                no_act = []
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        # 如果走了下一步是将军或捉:禁止走那步
                        if senv.will_check_or_catch(state, history[i+1]):
                            no_act.append(history[i + 1])
                        # 否则当作闲着处理
                        else:
                            free_move[state] += 1
                            if free_move[state] >= 2:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break
            if game_over:
                break
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            if self.config.opts.log_move:
                logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            # policys.append(policy)
            try:
                state, no_eat = senv.new_step(state, action)
            except Exception as e:
                logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
                game_over = True
                value = 0
                break
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0

        if final_move:
            # policy = self.build_policy(final_move, False)
            history.append(final_move)
            # policys.append(policy)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns < 10:
            if random() > 0.9:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
Beispiel #4
0
    def start_game(self, idx):
        sleep(random())
        playouts = randint(8, 12) * 100
        self.config.play.simulation_num_per_move = playouts
        logger.info(
            f"Set playouts = {self.config.play.simulation_num_per_move}")

        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=False,
                                    use_history=self.hist_base)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=False,
                                    use_history=self.hist_ng)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑")
        else:
            red = self.player2
            black = self.player1
            logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑")

        state = senv.INIT_STATE
        history = [state]
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False
        no_eat_count = 0
        check = False

        while not game_over:
            start_time = time()
            no_act = None
            if not check and state in history[:-1]:
                no_act = []
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        # 如果走了下一步是将军或捉:禁止走那步
                        if senv.will_check_or_catch(state, history[i + 1]):
                            no_act.append(history[i + 1])
                        # 否则当作闲着处理
                        else:
                            free_move[state] += 1
                            if free_move[state] >= 2:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break
            if game_over:
                break
            if turns % 2 == 0:
                action, _ = red.action(state, turns, no_act=no_act)
            else:
                action, _ = black.action(state, turns, no_act=no_act)
            end_time = time()
            if self.config.opts.log_move:
                logger.debug(
                    f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}"
                )
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            history.append(action)
            state, no_eat = senv.new_step(state, action)
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        data = []
        if idx % 2 == 0:
            data = [
                self.data['base']['digest'], self.data['unchecked']['digest']
            ]
        else:
            data = [
                self.data['unchecked']['digest'], self.data['base']['digest']
            ]
        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        v = value
        data.append(history[0])
        for i in range(turns):
            k = i * 2
            data.append([history[k + 1], v])
            v = -v

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns, data
def self_play_buffer(config, cur) -> (tuple, list):
    pipe = cur.pop()  # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config,
                          search_tree=defaultdict(VisitState),
                          pipes=pipe,
                          enable_resign=enable_resign,
                          debugging=False)

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False

    while not game_over:
        no_act = None
        if not check and state in history[:-1]:
            no_act = []
            free_move = defaultdict(int)
            for i in range(len(history) - 1):
                if history[i] == state:
                    # 如果走了下一步是将军或捉:禁止走那步
                    if senv.will_check_or_catch(state, history[i + 1]):
                        no_act.append(history[i + 1])
                    # 否则当作闲着处理
                    else:
                        free_move[state] += 1
                        if free_move[state] >= 2:
                            # 作和棋处理
                            game_over = True
                            value = 0
                            logger.info("闲着循环三次,作和棋处理")
                            break
        if game_over:
            break
        start_time = time()
        action, policy = player.action(state, turns, no_act)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0

    if final_move:
        # policy = build_policy(final_move, False)
        history.append(final_move)
        # policys.append(policy)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    player.close()
    del player
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data = [history[0]]
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
Beispiel #6
0
    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=True)
        self.human_move_first = human_first

        pygame.init()
        screen, board_background, widget_background = self.init_screen()
        framerate = pygame.time.Clock()

        current_chessman = None
        if human_first:
            self.env.board.calc_chessmans_moving_list()

        ai_worker = Thread(target=self.ai_move, name="ai_worker")
        ai_worker.daemon = True
        ai_worker.start()

        while not self.env.board.is_end():
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    self.env.board.print_record()
                    self.ai.close(wait=False)
                    game_id = datetime.now().strftime("%Y%m%d-%H%M%S")
                    path = os.path.join(
                        self.config.resource.play_record_dir,
                        self.config.resource.play_record_filename_tmpl %
                        game_id)
                    self.env.board.save_record(path)
                    sys.exit()
                elif event.type == VIDEORESIZE:
                    pass
                elif event.type == MOUSEBUTTONDOWN:
                    if human_first == self.env.red_to_move:
                        pressed_array = pygame.mouse.get_pressed()
                        for index in range(len(pressed_array)):
                            if index == 0 and pressed_array[index]:
                                mouse_x, mouse_y = pygame.mouse.get_pos()
                                col_num, row_num = self.translate_hit_area(
                                    mouse_x, mouse_y, self.chessman_w,
                                    self.chessman_h)
                                chessman_sprite = self.select_sprite_from_group(
                                    self.chessmans, col_num, row_num)
                                if current_chessman is None and chessman_sprite != None:
                                    if chessman_sprite.chessman.is_red == self.env.red_to_move:
                                        current_chessman = chessman_sprite
                                        chessman_sprite.is_selected = True
                                elif current_chessman != None and chessman_sprite != None:
                                    if chessman_sprite.chessman.is_red == self.env.red_to_move:
                                        current_chessman.is_selected = False
                                        current_chessman = chessman_sprite
                                        chessman_sprite.is_selected = True
                                    else:
                                        move = str(current_chessman.chessman.col_num) + str(
                                            current_chessman.chessman.row_num) + \
                                               str(col_num) + str(row_num)
                                        success = current_chessman.move(
                                            col_num, row_num, self.chessman_w,
                                            self.chessman_h)
                                        self.history.append(move)
                                        if success:
                                            self.chessmans.remove(
                                                chessman_sprite)
                                            chessman_sprite.kill()
                                            current_chessman.is_selected = False
                                            current_chessman = None
                                            self.history.append(
                                                self.env.get_state())
                                elif current_chessman != None and chessman_sprite is None:
                                    move = str(current_chessman.chessman.col_num) + str(
                                        current_chessman.chessman.row_num) + \
                                           str(col_num) + str(row_num)
                                    success = current_chessman.move(
                                        col_num, row_num, self.chessman_w,
                                        self.chessman_h)
                                    self.history.append(move)
                                    if success:
                                        current_chessman.is_selected = False
                                        current_chessman = None
                                        self.history.append(
                                            self.env.get_state())

            self.draw_widget(screen, widget_background)
            framerate.tick(20)
            # clear/erase the last drawn sprites
            self.chessmans.clear(screen, board_background)

            # update all the sprites
            self.chessmans.update()
            self.chessmans.draw(screen)
            pygame.display.update()

        self.ai.close(wait=False)
        logger.info(f"Winner is {self.env.board.winner} !!!")
        self.env.board.print_record()
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S")
        path = os.path.join(
            self.config.resource.play_record_dir,
            self.config.resource.play_record_filename_tmpl % game_id)
        self.env.board.save_record(path)
        sleep(3)
    def start_game(self, idx):
        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=True)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=True)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.debug(f"best model is red, ng is black")
        else:
            red = self.player2
            black = self.player1
            logger.debug(f"best model is black, ng is red")

        state = senv.INIT_STATE
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns)
            else:
                action, _ = black.action(state, turns)
            end_time = time()
            # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break

            state = senv.step(state, action)
            turns += 1

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move = senv.done(state)

        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        if idx % 2 == 1:
            value = -value

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns