コード例 #1
0
 def __init__(self, name):
     """
     Create the player.
     """
     self.name = name
     self.learning = True
     self.input_scale = 16.0
     self.selectivity = 0.0
     self.neural_net = BoardgameNeuralNet(num_inputs=9,
                                          num_hidden_layers=1,
                                          num_hidden_units=[250],
                                          step_size=3E-1,
                                          regulariser=3E-2)
コード例 #2
0
class LearningNoughtsAndCrossesPlayer(Player):
    """
    A learning computer player for noughts and crosses. Uses a neural net to
    estimate the probability of winning from any state (when the opponent is
    about to play).
    """
    strategies = ["win", "block"]
    symmetry_maps = np.array([[2,5,8,1,4,7,0,3,6],
                              [8,7,6,5,4,3,2,1,0],
                              [6,3,0,7,4,1,8,5,2],
                              [6,7,8,3,4,5,0,1,2],
                              [2,1,0,5,4,3,8,7,6],
                              [0,3,6,1,4,7,2,5,8],
                              [8,5,2,7,4,1,6,3,0]])

    def __init__(self, name):
        """
        Create the player.
        """
        self.name = name
        self.learning = True
        self.input_scale = 16.0
        self.selectivity = 0.0
        self.neural_net = BoardgameNeuralNet(num_inputs=9,
                                             num_hidden_layers=1,
                                             num_hidden_units=[250],
                                             step_size=3E-1,
                                             regulariser=3E-2)
                                             #momentum=0.0,
                                             #dropout_rate=0)

    def move(self, board):
        """
        Obtain a move.
        """
        legal_moves = board.permitted_moves
        log_prob = np.zeros((len(legal_moves),3))
        options = dict()
        for st in self.strategies:
            options[st] = []

        # Loop through the opponents possible moves
        for mv in legal_moves:
            bd = board.copy()
            bd.turn = -bd.turn
            bd.move(mv)

            # See if they won (or if it was a draw)
            if bd.over:
                options['block'].append(mv)

        # Loop through possible moves
        for mm in range(len(legal_moves)):
            mv = legal_moves[mm]
            bd = board.copy()
            bd.move(mv)

            # See if we won (or it was a draw)
            if bd.over:
                options['win'].append(mv)

            # Estimate probability of winning
            state = bd.state.flatten()[np.newaxis,:]
            log_prob[mm,:] = self.neural_net.predict(state/self.input_scale)
            
            #print("Log-probability of 0/+1/-1 victory if I make move {} "
            #      "is {}/{}/{}.".format(state, *prob[mm,:]))

        # Decide which option to take
        move = None
        for st in self.strategies:
            if options[st]:
                move = np.random.choice(options[st])
                break

        if move is None:
            # Calculated expected return (+1 for win, -1 for loss, 0 for draw)
            expct_return =  np.exp(log_prob[:,board.turn]) \
                                - np.exp(log_prob[:,-board.turn])

            if (self.learning and (np.random.rand() < self.selectivity)):
                move = np.random.choice(legal_moves)
            else:
                move = legal_moves[np.argmax(expct_return)]
                
            #select_prob = np.exp(expct_return/self.selectivity)
            #select_prob /= np.sum(select_prob)
            #move = np.random.choice(legal_moves, p=select_prob)

        # Store the board for learning later
        board.move(move)
        self._game_history.append(board.state.flatten())

        return move

    def learn(self, winner):
        """
        Update net.
        """
        if self.learning:
            # Parse the game history to make training data
            states = np.array(self._game_history)
            states = self.symmetric_equivalents(states)
            outputs = winner*np.ones(states.shape[0], dtype=int)

            # Update the net
            self.neural_net.update(states/self.input_scale, outputs)

    def notify(self, event, info):
        """
        Act on an event notification from the game.
        """
        if (event == "begin"):
            self._game_history = []
        elif (event == "finish"):
            self.learn(info)
        else:
            pass

    def symmetric_equivalents(self, states):
        """
        Add symmetrically identical states to an array of game states.
        """
        for state in states.copy():
            symmetries = self.symmetries(state)
            for sym in symmetries:
                if not np.any((states==sym).all(axis=1)):
                    states = np.vstack((states, sym))
        return states

    def symmetries(self, state):
        """
        Make a list of all the states obtainable by reflecting or rotating
        a base state.
        """
        syms = []
        for ii in range(7):
            syms.append(state[self.symmetry_maps[ii,:]])
        return syms