def __init__(self, name): """ Create the player. """ self.name = name self.learning = True self.input_scale = 16.0 self.selectivity = 0.0 self.neural_net = BoardgameNeuralNet(num_inputs=9, num_hidden_layers=1, num_hidden_units=[250], step_size=3E-1, regulariser=3E-2)
class LearningNoughtsAndCrossesPlayer(Player): """ A learning computer player for noughts and crosses. Uses a neural net to estimate the probability of winning from any state (when the opponent is about to play). """ strategies = ["win", "block"] symmetry_maps = np.array([[2,5,8,1,4,7,0,3,6], [8,7,6,5,4,3,2,1,0], [6,3,0,7,4,1,8,5,2], [6,7,8,3,4,5,0,1,2], [2,1,0,5,4,3,8,7,6], [0,3,6,1,4,7,2,5,8], [8,5,2,7,4,1,6,3,0]]) def __init__(self, name): """ Create the player. """ self.name = name self.learning = True self.input_scale = 16.0 self.selectivity = 0.0 self.neural_net = BoardgameNeuralNet(num_inputs=9, num_hidden_layers=1, num_hidden_units=[250], step_size=3E-1, regulariser=3E-2) #momentum=0.0, #dropout_rate=0) def move(self, board): """ Obtain a move. """ legal_moves = board.permitted_moves log_prob = np.zeros((len(legal_moves),3)) options = dict() for st in self.strategies: options[st] = [] # Loop through the opponents possible moves for mv in legal_moves: bd = board.copy() bd.turn = -bd.turn bd.move(mv) # See if they won (or if it was a draw) if bd.over: options['block'].append(mv) # Loop through possible moves for mm in range(len(legal_moves)): mv = legal_moves[mm] bd = board.copy() bd.move(mv) # See if we won (or it was a draw) if bd.over: options['win'].append(mv) # Estimate probability of winning state = bd.state.flatten()[np.newaxis,:] log_prob[mm,:] = self.neural_net.predict(state/self.input_scale) #print("Log-probability of 0/+1/-1 victory if I make move {} " # "is {}/{}/{}.".format(state, *prob[mm,:])) # Decide which option to take move = None for st in self.strategies: if options[st]: move = np.random.choice(options[st]) break if move is None: # Calculated expected return (+1 for win, -1 for loss, 0 for draw) expct_return = np.exp(log_prob[:,board.turn]) \ - np.exp(log_prob[:,-board.turn]) if (self.learning and (np.random.rand() < self.selectivity)): move = np.random.choice(legal_moves) else: move = legal_moves[np.argmax(expct_return)] #select_prob = np.exp(expct_return/self.selectivity) #select_prob /= np.sum(select_prob) #move = np.random.choice(legal_moves, p=select_prob) # Store the board for learning later board.move(move) self._game_history.append(board.state.flatten()) return move def learn(self, winner): """ Update net. """ if self.learning: # Parse the game history to make training data states = np.array(self._game_history) states = self.symmetric_equivalents(states) outputs = winner*np.ones(states.shape[0], dtype=int) # Update the net self.neural_net.update(states/self.input_scale, outputs) def notify(self, event, info): """ Act on an event notification from the game. """ if (event == "begin"): self._game_history = [] elif (event == "finish"): self.learn(info) else: pass def symmetric_equivalents(self, states): """ Add symmetrically identical states to an array of game states. """ for state in states.copy(): symmetries = self.symmetries(state) for sym in symmetries: if not np.any((states==sym).all(axis=1)): states = np.vstack((states, sym)) return states def symmetries(self, state): """ Make a list of all the states obtainable by reflecting or rotating a base state. """ syms = [] for ii in range(7): syms.append(state[self.symmetry_maps[ii,:]]) return syms