Exemple #1
0
def test_static_env():
    from cchess_alphazero.environment.env import CChessEnv
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.static_env import INIT_STATE
    from cchess_alphazero.environment.lookup_tables import flip_move
    env = CChessEnv()
    env.reset()
    print("env:  " + env.observation)
    print("senv: " + INIT_STATE)
    state = INIT_STATE
    env.step('0001')
    state = senv.step(state, '0001')
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.step('7770')
    state = senv.step(state, flip_move('7770'))
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.render()
    board = senv.state_to_board(state)
    for i in range(9, -1, -1):
        print(board[i])
    print("env: ")
    print(env.input_planes()[0+7:3+7])
    print("senv: ")
    print(senv.state_to_planes(state)[0+7:3+7])
    print(f"env:  {env.board.legal_moves()}" )
    print(f"senv: {senv.get_legal_moves(state)}")
    print(set(env.board.legal_moves()) == set(senv.get_legal_moves(state)))
    def load_game(self, init, move_list, winner, idx, title, url):
        turns = 0
        env = CChessEnv(self.config).reset(init)
        red_moves = []
        black_moves = []
        moves = [
            move_list[i:i + 4] for i in range(len(move_list)) if i % 4 == 0
        ]

        for move in moves:
            action = senv.parse_onegreen_move(move)
            try:
                if turns % 2 == 0:
                    red_moves.append([
                        env.observation,
                        self.build_policy(action, flip=False)
                    ])
                else:
                    black_moves.append([
                        env.observation,
                        self.build_policy(action, flip=True)
                    ])
                env.step(action)
            except:
                logger.error(
                    f"Invalid Action: idx = {idx}, action = {action}, turns = {turns}, moves = {moves}, "
                    f"winner = {winner}, init = {init}, title: {title}, url: {url}"
                )
                return
            turns += 1

        if winner == Winner.red:
            red_win = 1
        elif winner == Winner.black:
            red_win = -1
        else:
            red_win = senv.evaluate(env.get_state())
            if not env.red_to_move:
                red_win = -red_win

        for move in red_moves:
            move += [red_win]
        for move in black_moves:
            move += [-red_win]

        data = []
        for i in range(len(red_moves)):
            data.append(red_moves[i])
            if i < len(black_moves):
                data.append(black_moves[i])
        self.buffer += data
        return red_win
    def load_game(self, init, move_list, winner, idx, title, url):
        turns = 0
        env = CChessEnv(self.config).reset(init)
        red_moves = []
        black_moves = []
        moves = [move_list[i:i+4] for i in range(len(move_list)) if i % 4 == 0]

        for move in moves:
            action = senv.parse_onegreen_move(move)
            try:
                if turns % 2 == 0:
                    red_moves.append([env.observation, self.build_policy(action, flip=False)])
                else:
                    black_moves.append([env.observation, self.build_policy(action, flip=True)])
                env.step(action)
            except:
                logger.error(f"Invalid Action: idx = {idx}, action = {action}, turns = {turns}, moves = {moves}, "
                             f"winner = {winner}, init = {init}, title: {title}, url: {url}")
                return
            turns += 1

        if winner == Winner.red:
            red_win = 1
        elif winner == Winner.black:
            red_win = -1
        else:
            red_win = senv.evaluate(env.get_state())
            if not env.red_to_move:
                red_win = -red_win

        for move in red_moves:
            move += [red_win]
        for move in black_moves:
            move += [-red_win]

        data = []
        for i in range(len(red_moves)):
            data.append(red_moves[i])
            if i < len(black_moves):
                data.append(black_moves[i])
        self.buffer += data
        return red_win
Exemple #4
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
def self_play_buffer(config, cur) -> (tuple, list):
    pipe = cur.pop()  # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config,
                          search_tree=defaultdict(VisitState),
                          pipes=pipe,
                          enable_resign=enable_resign,
                          debugging=False)

    state = senv.INIT_STATE
    history = [state]
    policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None

    while not game_over:
        no_act = None
        if state in history[:-1]:
            no_act = []
            for i in range(len(history) - 1):
                if history[i] == state:
                    no_act.append(history[i + 1])
        start_time = time()
        action, policy = player.action(state, turns, no_act)
        end_time = time()
        if action is None:
            logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
            value = -1
            break
        # logger.debug(f"Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
        policys.append(policy)
        history.append(action)
        state = senv.step(state, action)
        turns += 1
        history.append(state)

        if turns / 2 >= config.play.max_game_length:
            game_over = True
            value = senv.evaluate(state)
        else:
            game_over, value, final_move = senv.done(state)

    if final_move:
        policy = build_policy(final_move, False)
        history.append(final_move)
        policys.append(policy)
        state = senv.step(state, final_move)
        history.append(state)

    player.close()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data = []
    for i in range(turns):
        k = i * 2
        data.append([history[k], policys[i], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1):
                no_act = None
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, policy = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
                try:
                    policy = self.build_policy(action, False)
                except Exception as e:
                    logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}")
                    value = 0
                    break
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0 or turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store