def thompson_sampling(T: int, mu, seed=None, prior_SF=None, **_kwargs): """ Bernoulli Thompson Sampling with known mu""" K_ = len(mu) S, F, theta = np.zeros((K_, )), np.zeros((K_, )), np.zeros((K_, )) if prior_SF is not None: S, F = prior_SF arms_selected = np.zeros((T, )).astype(int) rewards = np.zeros((T, )) with seeded(seed): random_numbers = np.random.rand(T) for t in range(T): theta = [beta(S[i] + 1, F[i] + 1) for i in range(K_)] arm_x = rand_argmax(theta) reward_y = int(random_numbers[t] <= mu[arm_x]) arms_selected[t] = arm_x rewards[t] = reward_y if reward_y == 1: S[arm_x] += 1 else: F[arm_x] += 1 return arms_selected, rewards
def classify(self, instance): (label, p1, p2) = instance useLabel = label if bernoulli(self.epsilon): useLabel = not label if self.simple: res = float(useLabel) else: if useLabel: res = beta(self.aIn, self.bIn) else: res = beta(self.aOut, self.bOut) self.scores.add(label, int(res > .5)) return res
def beta(a, b, shape=[]): """beta(a, b) or beta(a, b, [n, m, ...]) returns array of beta distributed random numbers.""" if shape == []: shape = None return mt.beta(a, b, shape)
def test_beta_small_parameters(self): # Test that beta with small a and b parameters does not produce # NaNs due to roundoff errors causing 0 / 0, gh-5851 random.seed(1234567890) x = random.beta(0.0001, 0.0001, size=100) assert_(not np.any(np.isnan(x)), 'Nans in random.beta')