def __init__(self, rng: np.random.RandomState, arms: List[Arm], n_jobs: int, backend: Optional[str], lp: Union[_EpsilonGreedy, _Linear, _Random, _Softmax, _ThompsonSampling, _UCB1], n_clusters: Num, is_minibatch: bool): super().__init__(rng, arms, n_jobs, backend) self.n_clusters = n_clusters if is_minibatch: self.kmeans = MiniBatchKMeans(n_clusters, random_state=rng.get_state()[1][0]) else: self.kmeans = KMeans(n_clusters, random_state=rng.get_state()[1][0]) # Create the list of learning policies for each cluster # Deep copy all parameters of the lp objects, except refer to the originals of rng and arms self.lp_list = [deepcopy(lp) for _ in range(self.n_clusters)] for c in range(self.n_clusters): self.lp_list[c].rng = rng self.lp_list[c].arms = arms self.decisions = None self.rewards = None self.contexts = None # Initialize the arm expectations to nan # When there are neighbors, expectations of the underlying learning policy is used # When there are no neighbors, return nan expectations reset(self.arm_to_expectation, np.nan)
def temp_seed(rng: np.random.RandomState, seed: Optional[Union[int, Tuple[int, ...]]]): """A context manager for temporarily adjusting the random seed.""" if seed is None: try: yield finally: pass else: state = rng.get_state() rng.seed(seed) try: yield finally: rng.set_state(state)
def serialize_random_state(random_state: np.random.RandomState) -> Tuple[int, List, int, int, int]: (rnd0, rnd1, rnd2, rnd3, rnd4) = random_state.get_state() rnd1 = rnd1.tolist() return rnd0, rnd1, rnd2, rnd3, rnd4