Esempio n. 1
0
    def __init__(self, board_size=BOARD_SIZE, color=BLACK, board=None):
        '''
        Args:
            board_size: size of board
            color: color of current player
            board: current board
        '''
        assert color in [BLACK, WHITE], 'Invalid player color'

        if board:
            self.board = board
        else:
            self.board = pachi_py.CreateBoard(board_size)

        self.last_action = -1
        self.last_action_2 = -1

        self.color = color
        self.board_size = board_size

        self.game_over = False
        self.winner = None
        self.action_space = spaces.Discrete(board_size**2 + 1)

        self._new_state_checks()

        if color == BLACK:
            self.current_player = 1
        else:
            self.current_player = -1
Esempio n. 2
0
    def reset(self):
        """ Reset the board """

        self.board = pachi_py.CreateBoard(self.board_size)
        opponent_resigned = False
        self.done = self.board.is_terminal or opponent_resigned
        return _format_state(self.history, self.player_color, self.board_size)
Esempio n. 3
0
    def __init__(self,
                 board_size,
                 board=None,
                 player=-1,
                 done=False,
                 last_passed=False,
                 history=None,
                 move_num=0):
        self.board_size = board_size

        self.pass_action = board_size**2
        self.resign_action = board_size**2 + 1

        assert player in [-1, 1]

        self.curr_player = player
        self.done = done
        self.last_passed = last_passed
        self.move_num = move_num

        if history is None:
            self.history = [None] * 7
        else:
            self.history = history

        if board is None:
            self.board = pachi_py.CreateBoard(board_size)
        else:
            self.board = board
Esempio n. 4
0
def make_random_board(size):
    b = pachi_py.CreateBoard(size)
    c = pachi_py.BLACK
    for _ in range(0, 50):
        b = b.play(np.random.choice(b.get_legal_coords(c)), c)
        c = pachi_py.stone_other(c)
    return b
Esempio n. 5
0
def test_illegal_move():
    b = pachi_py.CreateBoard(9).play(14, pachi_py.WHITE)
    try:
        b.play(14, pachi_py.BLACK)
    except pachi_py.IllegalMove:
        return
    assert False, 'IllegalMove exception should have been raised'
Esempio n. 6
0
    def __init__(self, player_color, observation_type, illegal_move_mode,
                 board_size, komi):
        """
        Args:
            player_color: Stone color for the agent. Either 'black' or 'white'
            opponent: An opponent policy
            observation_type: State encoding
            illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
        """
        assert isinstance(
            board_size,
            int) and board_size >= 1, 'Invalid board size: {}'.format(
                board_size)
        self.board_size = board_size
        self.komi = komi
        self._seed()

        colormap = {
            'black': pachi_py.BLACK,
            'white': pachi_py.WHITE,
        }
        try:
            self.player_color = colormap[player_color]
        except KeyError:
            raise error.Error(
                "player_color must be 'black' or 'white', not {}".format(
                    player_color))

        # self.opponent_policy = None
        # self.opponent = opponent

        assert observation_type in ['image3c']
        self.observation_type = observation_type

        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

        if self.observation_type != 'image3c':
            raise error.Error('Unsupported observation type: {}'.format(
                self.observation_type))

        shape = pachi_py.CreateBoard(self.board_size).encode().shape
        self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
        # One action for each board position, pass, and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 2)

        # Filled in by _reset()
        self.state = None
        self.done = True

        # Modifications
        self.last_player_passed = False
        self.BLACK = np.array([1, 0, 0])
        self.WHITE = np.array([0, 1, 0])
        self.EMPTY = np.array([0, 0, 1])
Esempio n. 7
0
    def reset(self):
        self.state = GoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK)

        # (re-initialize) the opponent
        # necessary because a pachi engine is attached to a game via internal data in a board
        # so with a fresh game, we need a fresh engine
        # self._reset_opponent(self.state.board)

        # Let the opponent play if it's not the agent's turn
        opponent_resigned = False
        # We should be back to the agent color
        self.last_player_passed = False
        self.done = self.state.board.is_terminal
        return self.state.board.encode()
Esempio n. 8
0
File: go.py Progetto: tabzraz/gym
    def _seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
        pachi_py.pachi_srand(seed2)

        shape = pachi_py.CreateBoard(self.board_size).encode().shape
        self.observation_space = spaces.Box(np.zeros(shape),
                                            np.ones(shape),
                                            np_random=self.np_random)
        # One action for each board position, pass, and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 2,
                                            np_random=self.np_random)
        return [seed1, seed2]
Esempio n. 9
0
File: go.py Progetto: aforr/gym
    def _reset(self):
        self.state = GoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK)

        # (re-initialize) the opponent
        # necessary because a pachi engine is attached to a game via internal data in a board
        # so with a fresh game, we need a fresh engine
        self._reset_opponent(self.state.board)

        # Let the opponent play if it's not the agent's turn
        if self.state.color != self.player_color:
            self.state = self._exec_opponent_play(self.state, None, None)
        assert self.state.color == self.player_color

        self.done = self.state.board.is_terminal
        return self.state.board.encode()
Esempio n. 10
0
 def reset(self):
     """ Reset the board """
     colormap = {
         'black': pachi_py.BLACK,
         'white': pachi_py.WHITE,
     }
     self.player_color = colormap['black']
     self.history = [np.zeros((HISTORY + 1, self.board_size, self.board_size)),
                     np.zeros((HISTORY + 1, self.board_size, self.board_size))]
     self.board = pachi_py.CreateBoard(self.board_size)
     opponent_resigned = False
     self.state = _format_state(self.history,
                     self.player_color, self.board_size)
     self.done = self.board.is_terminal or opponent_resigned
     return self.state
Esempio n. 11
0
File: go.py Progetto: rdspring1/gym
    def __init__(self, player_color, opponent, observation_type,
                 illegal_move_mode, board_size):
        '''
        Args:
            player_color: Stone color for the agent. Either 'black' or 'white'
            opponent: An opponent policy
            observation_type: State encoding
            illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
        '''
        assert isinstance(
            board_size,
            int) and board_size >= 1, 'Invalid board size: {}'.format(
                board_size)
        self.board_size = board_size

        colormap = {
            'black': pachi_py.BLACK,
            'white': pachi_py.WHITE,
        }
        try:
            self.player_color = colormap[player_color]
        except KeyError:
            raise error.Error(
                "player_color must be 'black' or 'white', not {}".format(
                    player_color))

        self.opponent_policy = None
        self.opponent = opponent

        assert observation_type in ['image3c']
        self.observation_type = observation_type

        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

        # One action for each board position, pass, and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 2)

        if self.observation_type == 'image3c':
            shape = pachi_py.CreateBoard(self.board_size).encode().shape
            self.observation_space = spaces.Box(np.zeros(shape),
                                                np.ones(shape))
        else:
            raise error.Error('Unsupported observation type: {}'.format(
                self.observation_type))

        self.reset()
Esempio n. 12
0
def _play(black_policy_fn, white_policy_fn, board_size=19):
    '''
    Samples a trajectory for two player policies.
    Args:
        black_policy_fn, white_policy_fn: functions that maps a GoState to a move coord (int)
    '''
    moves = []

    prev_state, prev_action = None, None
    curr_state = GoState(pachi_py.CreateBoard(board_size), BLACK)

    while not curr_state.board.is_terminal:
        a = (black_policy_fn if curr_state.color == BLACK else white_policy_fn)(curr_state, prev_state, prev_action)
        next_state = curr_state.act(a)
        moves.append((curr_state, a, next_state))

        prev_state, prev_action = curr_state, a
        curr_state = next_state

    return moves
Esempio n. 13
0
    def _reset(self):
        # Don't touch moves_log!
        self.state = CustomGoState(pachi_py.CreateBoard(self.board_size),
                                   pachi_py.BLACK)

        # (re-initialize) the opponent
        # necessary because a pachi engine is attached to a game via internal data in a board
        # so with a fresh game, we need a fresh engine
        self._reset_opponent(self.state.board)

        # Let the opponent play if it's not the agent's turn
        opponent_resigned = False
        if self.state.color != self.player_color:
            self.state, opponent_resigned = self._exec_opponent_play(
                self.state, None, None)

        # We should be back to the agent color
        assert self.state.color == self.player_color

        self.done = self.state.board.is_terminal or opponent_resigned
        return self.state.board.encode()
Esempio n. 14
0
    def get(self):
        """
        Get new move from PACHI given board matrix and stone color.
        ---
        tags:
            -   move
        consumes:
            - application/json
        produces:
            - application/json 
        
                                           
        parameters:        
            -   in: body
                name: body                 
                schema:
                    properties:
                        board_format:
                            description: format of the board, matrix or ij_history
                        board:
                            description: Board data in two dimentional array
                            type: array
                            items:
                                type: array
                                items: integer
                        board_size:
                            description: board size
                        stone_color:
                            description: Stone color for the generated move. 2:Black or 1:White
                            type: integer
                        return_board:
                            description: Return the board in matrix. 0:False or 1:True.
                            type: integer                  
                example:
                    |-
                        {"stone_color": 1,"board": [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],"return_board": 1}
                            
                        
        """    
        try:
            req_data = request.get_json(force=True)
            print(req_data)
        except:
            return make_response(jsonify({"error":"Invalid payload"}), 500)  
        board_format = req_data.get('board_format', 'matrix')
        board = req_data.get('board',[[]])                
        stone_color = req_data.get('stone_color',0)        
        return_board = req_data.get('return_board',False)
        pachi_board = pachi_py.CreateBoard(19)
        if board_format == 'matrix':
            self.make_board_matrix(pachi_board, board)
        elif board_format == 'ij_history':
            stone_color = 2 if stone_color=='Black' else 1
            self.make_board_ij_history(pachi_board, board)
        else:
            return make_response(jsonify({"error":"Invalid payload"}), 500)  
        _pe=pachi_py.PyPachiEngine(pachi_board,'uct','threads=8,playout=light,maximize_score')        
        
        next_move = _pe.genmove(stone_color,'')
        _pe.curr_board.play_inplace(next_move, stone_color)
        print(_pe.curr_board.coord_to_ij(next_move))
        resp = jsonify({
            'move_str':_pe.curr_board.coord_to_str(next_move),
            'move_ij':_pe.curr_board.coord_to_ij(next_move),
            'stone_color':stone_color,
            'board':_pe.curr_board.encode()[0].tolist() if return_board else None})
        del _pe

        return make_response(resp,200)
Esempio n. 15
0
from gym import envs

PATH = "./model.pt"

go_env = gym.make('gym_go:go-v0', size=5, reward_method='heuristic')

state = go_env.reset()  # Reset environment and record the starting state

# Train 5x5 model
# Policy_grad(100, 10, 0.001, 0.99, go_env, False, True)

# Load trained model
policy = torch.load(PATH)

b = pachi_py.CreateBoard(5)

engine = pachi_py.PyPachiEngine(b, b'uct', b'')
# pachi_move = engine.genmove(pachi_py.WHITE, b'0')
# b.play_inplace(pachi_move, pachi_py.WHITE)
# b.play_inplace(9, pachi_py.BLACK)
# engine.notify(9, pachi_py.BLACK)
#
# pachi_move = engine.genmove(pachi_py.WHITE, b'2')

# print(pachi_move)
# # # print(b.coord_to_ij(8))
# b.play_inplace(9, pachi_py.BLACK)
# b.play_inplace(pachi_move, pachi_py.WHITE)
done = False
Esempio n. 16
0
def test_board_sizes():
    for s in [9, 19]:
        b = pachi_py.CreateBoard(s)
        for player in [pachi_py.BLACK, pachi_py.WHITE]:
            assert len(b.get_legal_coords(player)) == s*s + 1, \
                'Starting board should have size**2 + 1 legal moves'