def __update_state(self, new_state): state_changed = np.any(self.state != new_state) changed = self.state[np.where(self.state != new_state)] empty_used = np.all(changed == utils.get_code_by_char(EMPTY)) if state_changed and empty_used: self.state = new_state return True return False
def __predict_winning_turn(self, state): if self.clf is None: raise TypeError self._say('..using a pre-trained neural network.') empty_cells = self.__get_empty_cells(state) self.probs = np.zeros(state.shape) for i, j in np.nditer(empty_cells): new_state = self._update_state(state, i, j) # Define a feature vector: X = utils.get_code_by_char(self.char) * \ np.hstack((state, new_state)).flatten() # Predict a probability of win for the current turn: turn_probs = self.clf.predict_proba([X]).flatten() self.probs[i, j] = turn_probs[np.where(self.clf.classes_ == 1)][0] # Choose a cell with the maximal probability of win: i, j = np.argwhere(self.probs == self.probs.max()).flatten() return i, j
def _update_state(self, state, i, j): new_state = np.copy(state) new_state[i, j] = utils.get_code_by_char(self.char) return new_state
def __clean_state(self): return np.array( DIM * DIM * [utils.get_code_by_char(EMPTY)] ).reshape((DIM, DIM))
def __is_draw(self): return not np.any(self.state == utils.get_code_by_char(EMPTY))
def play(self, num=1): t_start = time.time() quit = False for i in range(1, num + 1): np.random.shuffle(self.players) self.state = self.__clean_state() self.states[i] = { self.players[0].name: {}, self.players[1].name: {}, 'winner': None } j = 1 # turn index while j <= DIM*DIM: player_cur = self.players[0] # current player new_state = player_cur.turn(self.state, i, j) if new_state is None: quit = True break # Save a generalized game state of the current player which # consists of game states before and after the player's turn: player_cur_state = np.hstack((self.state, new_state)) if not self.__update_state(new_state): # couldn't update state, repeat turn continue # Normalize the game state of the current player before storing # it so as the player's char is always coded identically with # '1' regardless of its type ('x' or 'o'): self.states[i][player_cur.name][j] = \ utils.get_code_by_char(player_cur.char) * player_cur_state if self.__is_win(): self.states[i]['winner'] = player_cur.name break if self.__is_draw(): break self.players = self.players[::-1] # swap players j = j + 1 self.__vprint('--- Game {0} over, {1} turns, winner: ' '{2}'.format(i, j, self.states[i]['winner'])) if quit: if i in self.states: del self.states[i] break # Sum up wins grouped by players' names: winners = {} for game in self.states: name = self.states[game]['winner'] winners[name] = winners.get(name, 0) + 1 t_elapsed = time.time() - t_start t_min, t_sec = divmod(t_elapsed, 60) self.__vprint('--- Played {i} games. Winners: {winners}. {t_min:.0f} ' 'min {t_sec:.2f} sec'.format(i=i, winners=winners, t_min=t_min, t_sec=t_sec)) if self.record_states: self.__store_states_to_database()
def __get_empty_cells(self, state): return np.where(state == utils.get_code_by_char(EMPTY))