Esempio n. 1
0
    def __init__(self, rng: np.random.RandomState, arms: List[Arm],
                 n_jobs: int, backend: Optional[str],
                 lp: Union[_EpsilonGreedy, _Linear, _Random, _Softmax,
                           _ThompsonSampling,
                           _UCB1], n_clusters: Num, is_minibatch: bool):
        super().__init__(rng, arms, n_jobs, backend)

        self.n_clusters = n_clusters

        if is_minibatch:
            self.kmeans = MiniBatchKMeans(n_clusters,
                                          random_state=rng.get_state()[1][0])
        else:
            self.kmeans = KMeans(n_clusters,
                                 random_state=rng.get_state()[1][0])

        # Create the list of learning policies for each cluster
        # Deep copy all parameters of the lp objects, except refer to the originals of rng and arms
        self.lp_list = [deepcopy(lp) for _ in range(self.n_clusters)]
        for c in range(self.n_clusters):
            self.lp_list[c].rng = rng
            self.lp_list[c].arms = arms

        self.decisions = None
        self.rewards = None
        self.contexts = None

        # Initialize the arm expectations to nan
        # When there are neighbors, expectations of the underlying learning policy is used
        # When there are no neighbors, return nan expectations
        reset(self.arm_to_expectation, np.nan)
Esempio n. 2
0
 def _normalize_expectations(self):
     # TODO: this would not work for negative rewards!
     total = sum(self.arm_to_expectation.values())
     if total == 0:
         # set equal probabilities
         reset(self.arm_to_expectation, 1.0 / len(self.arms))
     else:
         for k, v in self.arm_to_expectation.items():
             self.arm_to_expectation[k] = v / total
Esempio n. 3
0
    def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> NoReturn:

        # If rewards are non binary, convert them
        rewards = self._get_binary_rewards(decisions, rewards)

        # Reset the success and failure counters to 1 (beta distribution is undefined for 0)
        reset(self.arm_to_success_count, 1)
        reset(self.arm_to_fail_count, 1)

        # Calculate fit
        self._parallel_fit(decisions, rewards)
Esempio n. 4
0
    def __init__(self, rng: np.random.RandomState, arms: List[Arm],
                 n_jobs: int, backend: str,
                 lp: Union[_EpsilonGreedy, _Linear, _Random, _Softmax,
                           _ThompsonSampling, _UCB1], metric: str):
        super().__init__(rng, arms, n_jobs, backend)
        self.lp = lp
        self.metric = metric

        self.decisions = None
        self.rewards = None
        self.contexts = None

        # Initialize the arm expectations to nan
        # When there are neighbors, expectations of the underlying learning policy is used
        # When there are no neighbors, return nan expectations
        reset(self.arm_to_expectation, np.nan)
Esempio n. 5
0
    def __init__(self,
                 rng: _BaseRNG,
                 arms: List[Arm],
                 n_jobs: int,
                 backend: Optional[str],
                 lp: Union[_EpsilonGreedy, _Linear, _Popularity, _Random,
                           _Softmax, _ThompsonSampling, _UCB1],
                 metric: str,
                 no_nhood_prob_of_arm: Optional[List] = None):
        super().__init__(rng, arms, n_jobs, backend)
        self.lp = lp
        self.metric = metric
        self.no_nhood_prob_of_arm = no_nhood_prob_of_arm

        self.decisions = None
        self.rewards = None
        self.contexts = None

        # Initialize the arm expectations to nan
        # When there are neighbors, expectations of the underlying learning policy is used
        # When there are no neighbors, return nan expectations
        reset(self.arm_to_expectation, np.nan)
Esempio n. 6
0
    def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> NoReturn:

        # Reset the sum, count, and expectations to zero
        reset(self.arm_to_sum, 0)
        reset(self.arm_to_count, 0)
        reset(self.arm_to_expectation, 0)

        self._parallel_fit(decisions, rewards, contexts)
Esempio n. 7
0
    def fit(self,
            decisions: np.ndarray,
            rewards: np.ndarray,
            contexts: np.ndarray = None) -> NoReturn:

        # Reset the sum, count, and expectations to zero
        reset(self.arm_to_sum, 0)
        reset(self.arm_to_count, 0)
        reset(self.arm_to_mean, 0)
        reset(self.arm_to_expectation, 0)

        # Total number of decisions
        self.total_count = len(decisions)

        # Calculate fit
        self._parallel_fit(decisions, rewards)
Esempio n. 8
0
    def fit(self,
            decisions: np.ndarray,
            rewards: np.ndarray,
            contexts: np.ndarray = None) -> NoReturn:

        # Reset the sum, count, and expectations to zero
        reset(self.arm_to_sum, 0)
        reset(self.arm_to_count, 0)
        reset(self.arm_to_mean, 0)

        # Calculate fit
        self._parallel_fit(decisions, rewards)
        self._expectation_operation()