def get_move(self, board: Board, eps=0.0): self.moves += 1 legal_moves = board.legal_moves() if len(legal_moves) == 0: return None if random() < eps: self.rand += 1 return sample(legal_moves, 1)[0] max_q = -2**62 max_action = None for move in legal_moves: encoded = TTTQEncoding()(move, board, None, self.playerID) q_val = self.net(encoded)[0][0] if q_val > max_q: max_q = q_val max_action = move return max_action
def get_move(self, board: Board, eps=0): legal_moves = board.legal_moves() if len(legal_moves) == 0: return None if random() < eps: return sample(legal_moves, 1)[0] max_v = -2**62 max_action = None for move in legal_moves: # simulate move: sim_board = deepcopy(board) sim_board.do(move, self.playerID) encoded = TTTVEncoding()(None, sim_board, None, self.playerID) v_val = self.net(encoded)[0][0] if v_val > max_v: max_v = v_val max_action = move return max_action
def get_move(self, board: Board): legal = board.legal_moves(self.playerID) if len(legal) == 0: return None return sample(legal, 1)[0]