def test_static_env(): from cchess_alphazero.environment.env import CChessEnv import cchess_alphazero.environment.static_env as senv from cchess_alphazero.environment.static_env import INIT_STATE from cchess_alphazero.environment.lookup_tables import flip_move env = CChessEnv() env.reset() print("env: " + env.observation) print("senv: " + INIT_STATE) state = INIT_STATE env.step('0001') state = senv.step(state, '0001') print(senv.evaluate(state)) print("env: " + env.observation) print("senv: " + state) env.step('7770') state = senv.step(state, flip_move('7770')) print(senv.evaluate(state)) print("env: " + env.observation) print("senv: " + state) env.render() board = senv.state_to_board(state) for i in range(9, -1, -1): print(board[i]) print("env: ") print(env.input_planes()[0+7:3+7]) print("senv: ") print(senv.state_to_planes(state)[0+7:3+7]) print(f"env: {env.board.legal_moves()}" ) print(f"senv: {senv.get_legal_moves(state)}") print(set(env.board.legal_moves()) == set(senv.get_legal_moves(state)))
def expand_and_evaluate(self, state, history): ''' Evaluate the state, return its policy and value computed by neural network ''' state_planes = senv.state_to_planes(state) with self.q_lock: self.buffer_planes.append(state_planes) self.buffer_history.append(history)
def convert_to_trainging_data(data): state_list = [] policy_list = [] value_list = [] for state, policy, value in data: state_planes = senv.state_to_planes(state) sl_value = value state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) return np.asarray(state_list, dtype=np.float32), \ np.asarray(policy_list, dtype=np.float32), \ np.asarray(value_list, dtype=np.float32)
def expand_and_evaluate(self, state, history, real_hist=None): ''' Evaluate the state, return its policy and value computed by neural network ''' if self.use_history: if real_hist: # logger.debug(f"real history = {real_hist}") state_planes = senv.state_history_to_planes(state, real_hist) else: # logger.debug(f"history = {history}") state_planes = senv.state_history_to_planes(state, history) else: state_planes = senv.state_to_planes(state) with self.q_lock: self.buffer_planes.append(state_planes) self.buffer_history.append(history)
def convert_to_trainging_data(data, history): state_list = [] policy_list = [] value_list = [] i = 0 for state, policy, value in data: if history is None: state_planes = senv.state_to_planes(state) else: state_planes = senv.state_history_to_planes( state, history[0:i * 2 + 1]) sl_value = value state_list.append(state_planes) policy_list.append(policy) value_list.append(sl_value) i += 1 return np.asarray(state_list, dtype=np.float32), \ np.asarray(policy_list, dtype=np.float32), \ np.asarray(value_list, dtype=np.float32)