Beispiel #1
0
def test_static_env():
    from cchess_alphazero.environment.env import CChessEnv
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.static_env import INIT_STATE
    from cchess_alphazero.environment.lookup_tables import flip_move
    env = CChessEnv()
    env.reset()
    print("env:  " + env.observation)
    print("senv: " + INIT_STATE)
    state = INIT_STATE
    env.step('0001')
    state = senv.step(state, '0001')
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.step('7770')
    state = senv.step(state, flip_move('7770'))
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.render()
    board = senv.state_to_board(state)
    for i in range(9, -1, -1):
        print(board[i])
    print("env: ")
    print(env.input_planes()[0+7:3+7])
    print("senv: ")
    print(senv.state_to_planes(state)[0+7:3+7])
    print(f"env:  {env.board.legal_moves()}" )
    print(f"senv: {senv.get_legal_moves(state)}")
    print(set(env.board.legal_moves()) == set(senv.get_legal_moves(state)))
def test_env():
    from cchess_alphazero.environment.env import CChessEnv
    env = CChessEnv()
    env.reset()
    print(env.observation)
    env.step('0001')
    print(env.observation)
    env.step('7770')
    print(env.observation)
    env.render()
    print(env.input_planes()[0 + 7:3 + 7])
def test_light_env():
    from cchess_alphazero.environment.env import CChessEnv
    from cchess_alphazero.config import Config
    c = Config('mini')
    env = CChessEnv(c)
    env.reset()
    print(env.observation)
    env.step('0001')
    print(env.observation)
    env.step('7770')
    print(env.observation)
    env.render()
    print(env.input_planes()[0 + 7:3 + 7])
 def expand_and_evaluate(self, env: CChessEnv) -> (np.ndarray, float):
     '''
     Evaluate the state, return its policy and value computed by neural network
     '''
     state_planes = env.input_planes()
     # communicate with model api
     pipe = self.pipe_pool.pop()
     pipe.send(state_planes)
     leaf_p, leaf_v = pipe.recv()
     self.pipe_pool.append(pipe)
     # leaf_p, leaf_v = (np.random.random(len(ActionLabelsRed)), 1)
     # these are canonical policy and value (i.e. side to move is "red", maybe need flip)
     return leaf_p, leaf_v