コード例 #1
0
ファイル: test.py プロジェクト: xtxiatian200/cczero2
def test_static_env():
    from cchess_alphazero.environment.env import CChessEnv
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.static_env import INIT_STATE
    from cchess_alphazero.environment.lookup_tables import flip_move
    env = CChessEnv()
    env.reset()
    print("env:  " + env.observation)
    print("senv: " + INIT_STATE)
    state = INIT_STATE
    env.step('0001')
    state = senv.step(state, '0001')
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.step('7770')
    state = senv.step(state, flip_move('7770'))
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.render()
    board = senv.state_to_board(state)
    for i in range(9, -1, -1):
        print(board[i])
    print("env: ")
    print(env.input_planes()[0+7:3+7])
    print("senv: ")
    print(senv.state_to_planes(state)[0+7:3+7])
    print(f"env:  {env.board.legal_moves()}" )
    print(f"senv: {senv.get_legal_moves(state)}")
    print(set(env.board.legal_moves()) == set(senv.get_legal_moves(state)))
コード例 #2
0
ファイル: player.py プロジェクト: xtxiatian200/cczero2
 def expand_and_evaluate(self, state, history):
     '''
     Evaluate the state, return its policy and value computed by neural network
     '''
     state_planes = senv.state_to_planes(state)
     with self.q_lock:
         self.buffer_planes.append(state_planes)
         self.buffer_history.append(history)
コード例 #3
0
 def expand_and_evaluate(self, state, history):
     '''
     Evaluate the state, return its policy and value computed by neural network
     '''
     state_planes = senv.state_to_planes(state)
     with self.q_lock:
         self.buffer_planes.append(state_planes)
         self.buffer_history.append(history)
コード例 #4
0
ファイル: optimize.py プロジェクト: xtxiatian200/cczero2
def convert_to_trainging_data(data):
    state_list = []
    policy_list = []
    value_list = []

    for state, policy, value in data:
        state_planes = senv.state_to_planes(state)
        sl_value = value

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(sl_value)

    return np.asarray(state_list, dtype=np.float32), \
           np.asarray(policy_list, dtype=np.float32), \
           np.asarray(value_list, dtype=np.float32)
コード例 #5
0
 def expand_and_evaluate(self, state, history, real_hist=None):
     '''
     Evaluate the state, return its policy and value computed by neural network
     '''
     if self.use_history:
         if real_hist:
             # logger.debug(f"real history = {real_hist}")
             state_planes = senv.state_history_to_planes(state, real_hist)
         else:
             # logger.debug(f"history = {history}")
             state_planes = senv.state_history_to_planes(state, history)
     else:
         state_planes = senv.state_to_planes(state)
     with self.q_lock:
         self.buffer_planes.append(state_planes)
         self.buffer_history.append(history)
コード例 #6
0
def convert_to_trainging_data(data):
    state_list = []
    policy_list = []
    value_list = []

    for state, policy, value in data:
        state_planes = senv.state_to_planes(state)
        sl_value = value

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(sl_value)

    return np.asarray(state_list, dtype=np.float32), \
           np.asarray(policy_list, dtype=np.float32), \
           np.asarray(value_list, dtype=np.float32)
コード例 #7
0
ファイル: optimize.py プロジェクト: bynoud/ccguide
def convert_to_trainging_data(data, history):
    state_list = []
    policy_list = []
    value_list = []
    i = 0

    for state, policy, value in data:
        if history is None:
            state_planes = senv.state_to_planes(state)
        else:
            state_planes = senv.state_history_to_planes(
                state, history[0:i * 2 + 1])
        sl_value = value

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(sl_value)
        i += 1

    return np.asarray(state_list, dtype=np.float32), \
           np.asarray(policy_list, dtype=np.float32), \
           np.asarray(value_list, dtype=np.float32)