Ejemplo n.º 1
0
    def getNextState(self, game_state, player, action):
        # def getCanonicalState(self, game_state, action):
        # if player takes action on board, return next (board,player)
        # action must be a valid move
        # if action == self.n*self.n:
        #     return (board, -player)
        # b = Board(self.n)
        # b.pieces = np.copy(board)
        # move = (int(action/self.n), action%self.n)
        # b.execute_move(move, player)
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        next_state = gogame.next_state(game_state, action, canonical=False)

        if next_state[govars.TURN_CHNL][0][0]:
            p = 1
        else:
            p = -1
        return next_state, p
Ejemplo n.º 2
0
    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        self.state_ = gogame.next_state(self.state_, action, canonical=False)
        self.done = gogame.game_ended(self.state_)
        return np.copy(self.state_), self.reward(), self.done, self.info()
    def run(self, model, state, to_play):

        root = Node(0, to_play)

        # EXPAND root
        action_probs, value = model.predict(state)
        valid_moves = gogame.valid_moves(state)
        action_probs = action_probs * valid_moves  # mask invalid moves
        action_probs /= np.sum(action_probs)
        root.expand(state, to_play, action_probs)

        for t in range(self.args['num_simulations']):
            #            print("num_simulations: {}/{}".format(t, self.args['num_simulations']))
            node = root
            search_path = [node]

            # SELECT
            while node.expanded():
                action, node = node.select_child()
                search_path.append(node)

            parent = search_path[-2]
            state = parent.state
            # Now we're at a leaf node and we would like to expand
            # Players always play from their own perspective
            next_state = gogame.next_state(state, action, canonical=True)

            # The value of the new state from the perspective of the other player
            value = gogame.winning(next_state) if gogame.game_ended(
                next_state) else None
            #            if gogame.game_ended(next_state):
            #                print("end")
            if value is None:
                # If the game has not ended:
                # EXPAND
                action_probs, value = model.predict(next_state)
                valid_moves = gogame.valid_moves(next_state)
                action_probs = action_probs * valid_moves  # mask invalid moves
                action_probs /= np.sum(action_probs)
                node.expand(next_state, parent.to_play * -1, action_probs)

            self.backpropagate(search_path, value, parent.to_play * -1)

        return root
Ejemplo n.º 4
0
    def exceute_episode(self):

        train_examples = []
        current_player = 1
        state = gogame.init_state(self.args['boardSize'])

        while True:
            #print("while True")
            canonical_board = gogame.canonical_form(state)

            self.mcts = MCTS(self.game, self.model, self.args)
            root = self.mcts.run(self.model, canonical_board, to_play=1)

            action_probs = [
                0 for _ in range((self.args['boardSize'] *
                                  self.args['boardSize']) + 1)
            ]
            for k, v in root.children.items():
                action_probs[k] = v.visit_count

            action_probs = action_probs / np.sum(action_probs)
            train_examples.append(
                (canonical_board, current_player, action_probs))

            action = root.select_action(temperature=1)
            state = gogame.next_state(state, action, canonical=False)
            current_player = -current_player
            reward = gogame.winning(
                state) * current_player if gogame.game_ended(state) else None

            if reward is not None:
                ret = []
                for hist_state, hist_current_player, hist_action_probs in train_examples:
                    # [Board, currentPlayer, actionProbabilities, Reward]
                    tfBoard = np.array(
                        [hist_state[0], hist_state[1],
                         hist_state[3]]).transpose().tolist()
                    #ret.append(np.array([tfBoard,tfBoard, hist_action_probs, reward * ((-1) ** (hist_current_player != current_player))]))
                    ret.append(
                        (tfBoard, hist_action_probs, reward *
                         ((-1)**(hist_current_player != current_player))))
                return ret
Ejemplo n.º 5
0
    def test_batch_canonical_form(self):
        states = gogame.batch_init_state(2, 7)
        states[0] = gogame.next_state(states[0], 0)

        self.assertEqual(states[0, govars.BLACK].sum(), 1)
        self.assertEqual(states[0, govars.WHITE].sum(), 0)

        states = gogame.batch_canonical_form(states)

        self.assertEqual(states[0, govars.BLACK].sum(), 0)
        self.assertEqual(states[0, govars.WHITE].sum(), 1)

        self.assertEqual(states[1, govars.BLACK].sum(), 0)
        self.assertEqual(states[1, govars.WHITE].sum(), 0)

        for i in range(2):
            self.assertEqual(gogame.turn(states[i]), govars.BLACK)

        canon_again = gogame.batch_canonical_form(states)

        self.assertTrue((canon_again == states).all())
Ejemplo n.º 6
0
    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        self.state_ = gogame.next_state(self.state_, action, canonical=False)
        self.done = gogame.game_ended(self.state_)
        # assert self.past_states_with_player[:14].shape == (14, 19, 19)
        # assert self.turn() in {0, 1} and self.turn() ^ 1 in {0, 1} and \
        #     self.turn() != self.turn() ^ 1
        """
        Past states gives the previous 8 board states and the current player
        We stack together:
        1. the current player's stones board (1 to each stone, 0
        everything else) (X_t) (NOT the player to play),
        2. the opponent's stones board (Y_t)
        3. and the previous 7 timesteps (X_{t-1}, Y_{t-1}, ... X_{t-7}, Y_{t-7}).
        4. The player to play, C (19x19 array of 1s for black and 0s for white)
        """
        self.past_states_with_player = np.concatenate(
            (self.state_[self.turn() ^ 1].reshape(
                (1, 19, 19)), self.state_[self.turn()].reshape((1, 19, 19)),
             self.past_states_with_player[:14], self.state_[2].reshape(
                 (1, 19, 19)) ^ 1),
            axis=0)

        return (np.copy(self.state_), np.copy(self.past_states_with_player)), \
            self.reward(), self.done, self.info()