def update_history(self, ttt: TicTacToe): reward = 0 if ttt.game_over() and ttt.is_draw(): reward = 0 elif ttt.game_over() and ttt.winner == self.symbol: reward = 1 else: reward = -1 self.history.append((ttt.get_state(), reward))
def make_move(self, ttt: TicTacToe): # explore - make a random move if np.random.rand() < self.epsilon: while True: coord = np.random.randint(3, size=2) x = coord[0] y = coord[1] if ttt.make_move(x, y, self.symbol) or ttt.game_over(): return # let the network predict the next move state = ttt.get_state() one_hot = self.one_hot_encoded(state, ttt) values = self.model.predict(np.asarray([one_hot]))[0] high = -1000 # value of field field = -1 # index of field for i in range(len(state)): # select best move if state[i] == 0: if values[i] > high: high = values[i].copy() field = i x = field % 3 y = field // 3 if ttt.make_move(x, y, self.symbol): return else: raise Exception("dafuq?")