Exemple #1
0
    def __init__(self, rng: np.random.RandomState, arms: List[Arm],
                 n_jobs: int, backend: Optional[str],
                 lp: Union[_EpsilonGreedy, _Linear, _Random, _Softmax,
                           _ThompsonSampling,
                           _UCB1], n_clusters: Num, is_minibatch: bool):
        super().__init__(rng, arms, n_jobs, backend)

        self.n_clusters = n_clusters

        if is_minibatch:
            self.kmeans = MiniBatchKMeans(n_clusters,
                                          random_state=rng.get_state()[1][0])
        else:
            self.kmeans = KMeans(n_clusters,
                                 random_state=rng.get_state()[1][0])

        # Create the list of learning policies for each cluster
        # Deep copy all parameters of the lp objects, except refer to the originals of rng and arms
        self.lp_list = [deepcopy(lp) for _ in range(self.n_clusters)]
        for c in range(self.n_clusters):
            self.lp_list[c].rng = rng
            self.lp_list[c].arms = arms

        self.decisions = None
        self.rewards = None
        self.contexts = None

        # Initialize the arm expectations to nan
        # When there are neighbors, expectations of the underlying learning policy is used
        # When there are no neighbors, return nan expectations
        reset(self.arm_to_expectation, np.nan)
def temp_seed(rng: np.random.RandomState,
              seed: Optional[Union[int, Tuple[int, ...]]]):
    """A context manager for temporarily adjusting the random seed."""
    if seed is None:
        try:
            yield
        finally:
            pass
    else:
        state = rng.get_state()
        rng.seed(seed)
        try:
            yield
        finally:
            rng.set_state(state)
Exemple #3
0
def serialize_random_state(random_state: np.random.RandomState) -> Tuple[int, List, int, int, int]:
    (rnd0, rnd1, rnd2, rnd3, rnd4) = random_state.get_state()
    rnd1 = rnd1.tolist()
    return rnd0, rnd1, rnd2, rnd3, rnd4