コード例 #1
0
def convert_to_cheating_data(data):
    """
    :param data: format is SelfPlayWorker.buffer
    :return:
    """
    state_list = []
    policy_list = []
    value_list = []
    for state_fen, policy, value in data:

        state_planes = canon_input_planes(state_fen)

        if is_black_turn(state_fen):
            policy = Config.flip_policy(policy)

        move_number = int(state_fen.split(' ')[5])
        value_certainty = min(
            5, move_number
        ) / 5  # reduces the noise of the opening... plz train faster
        sl_value = value * value_certainty + testeval(
            state_fen, False) * (1 - value_certainty)

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(sl_value)

    return np.asarray(state_list, dtype=np.float32), np.asarray(
        policy_list, dtype=np.float32), np.asarray(value_list,
                                                   dtype=np.float32)
コード例 #2
0
def convert_to_cheating_data(data):
    """
    :param data: format is SelfPlayWorker.buffer
    :return:
    """
    state_list = []
    policy_list = []
    value_list = []
    env = ChessEnv().reset()
    for state_fen, policy, value in data:
        move_number = int(state_fen.split(' ')[5])
        # f2 = maybe_flip_fen(maybe_flip_fen(state_fen,True),True)
        # assert state_fen == f2
        next_move = env.deltamove(state_fen)
        if next_move == None:  # new game!
            assert state_fen == chess.STARTING_FEN
            env.reset()
        else:
            env.step(next_move, False)

        state_planes = env.canonical_input_planes()
        # assert env.check_current_planes(state_planes)

        side_to_move = state_fen.split(" ")[1]
        if side_to_move == 'b':
            #assert np.sum(policy) == 0
            policy = Config.flip_policy(policy)
        else:
            #assert abs(np.sum(policy) - 1) < 1e-8
            pass

        # if np.sum(policy) != 0:
        #     policy /= np.sum(policy)

        #assert abs(np.sum(policy) - 1) < 1e-8

        assert len(policy) == 1968
        assert state_planes.dtype == np.float32

        value_certainty = min(
            15, move_number
        ) / 15  # reduces the noise of the opening... plz train faster
        SL_value = value * value_certainty + env.testeval() * (1 -
                                                               value_certainty)

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(SL_value)

    return np.array(state_list, dtype=np.float32), np.array(
        policy_list, dtype=np.float32), np.array(value_list, dtype=np.float32)
コード例 #3
0
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """ expand new leaf, this is called only once per state
        this is called with state locked
        insert P(a|s), return leaf_v
        """
        state_planes = env.canonical_input_planes()

        leaf_p, leaf_v = self.predict(state_planes)
        # these are canonical policy and value (i.e. side to move is "white")

        if not env.white_to_move:
            leaf_p = Config.flip_policy(
                leaf_p)  # get it back to python-chess form

        return leaf_p, leaf_v
コード例 #4
0
    def calc_policy(self, env):
        """calc π(a|s0)
        :return:
        """
        state = state_key(env)
        my_visitstats = self.tree[state]
        policy = np.zeros(self.labels_n)
        for action, a_s in my_visitstats.a.items():
            policy[self.move_lookup[action]] = a_s.n

        policy /= np.sum(policy)

        if not env.white_to_move:
            policy = Config.flip_policy(policy)
        return policy
コード例 #5
0
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """ expand new leaf, this is called only once per state
		this is called with state locked
		insert P(a|s), return leaf_v
		"""
        state_planes = env.canonical_input_planes()

        leaf_p, leaf_v = self.predict(state_planes)
        # these are canonical policy and value (i.e. side to move is "white")

        if env.board.turn == chess.BLACK:
            leaf_p = Config.flip_policy(
                leaf_p)  # get it back to python-chess form
        #np.testing.assert_array_equal(Config.flip_policy(Config.flip_policy(leaf_p)), leaf_p)

        return leaf_p, leaf_v
コード例 #6
0
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """ expand new leaf, this is called only once per state
        this is called with state locked
        insert P(a|s), return leaf_v

        This gets a prediction for the policy and value of the state within the given env
        :return (float, float): the policy and value predictions for this state
        """
        state_planes = env.canonical_input_planes()

        leaf_p, leaf_v = self.predict(state_planes)
        # these are canonical policy and value (i.e. side to move is "white")

        if not env.white_to_move:
            leaf_p = Config.flip_policy(leaf_p) # get it back to python-chess form

        return leaf_p, leaf_v
コード例 #7
0
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """expand new leaf

        this is called with state locked
        insert P(a|s), return leaf_v

        :param ChessEnv env:
        :return: leaf_v
        """
        if self.play_config.tablebase_access and env.board.num_pieces() <= 5:
            return self.tablebase_and_evaluate(env)

        state = env.board.gather_features(self.config.model.t_history)
        leaf_p, leaf_v = self.predict(state)

        if env.board.turn == chess.BLACK:
            leaf_p = Config.flip_policy(leaf_p)

        return leaf_p, leaf_v
コード例 #8
0
def load_data_from_file(filename, t_history):
    # necessary to catch an exception here...? if the play data file isn't completely written yet, then some error will be thrown about a "missing delimiter", etc.
    data = read_game_data_from_file(filename)

    state_list = []
    policy_list = []
    value_list = []

    board = MyBoard(None)
    board.fullmove_number = 1000  # an arbitrary large value.

    for state, policy, value in data:
        board.push_fen(state)
        state = board.gather_features(t_history)
        if board.turn == chess.BLACK:
            policy = Config.flip_policy(policy)

        state_list.append(state)
        policy_list.append(policy)
        value_list.append(value)

    return state_list, policy_list, value_list
コード例 #9
0
ファイル: optimize.py プロジェクト: Skyorca/chess-alpha-zero
def convert_to_cheating_data(data):
    """
    :param data: format is SelfPlayWorker.buffer
    :return:
    """
    state_list = []
    policy_list = []
    value_list = []
    for state_fen, policy, value in data:

        state_planes = canon_input_planes(state_fen)

        if is_black_turn(state_fen):
            policy = Config.flip_policy(policy)

        move_number = int(state_fen.split(' ')[5])
        value_certainty = min(5, move_number)/5 # reduces the noise of the opening... plz train faster
        sl_value = value*value_certainty + testeval(state_fen, False)*(1-value_certainty)

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(sl_value)

    return np.asarray(state_list, dtype=np.float32), np.asarray(policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)