def getReward(self, arm, reward): UCB.getReward(self, arm, reward) self.budget += reward self.estmeans[arm] = (self.estmeans[arm] * (self.pulls[arm] - 1) + reward) / self.pulls[arm] if (reward > 0): self.successes[arm] += 1
def __init__(self, nbArms, inibudget=10.0, safebudget=1.0, lower=-1.0, amplitude=2.0): UCB.__init__(self, nbArms, lower=lower, amplitude=amplitude) SafeAlg.__init__(self, nbArms)
def __init__(self, nbArms, inibudget=10.0, min_r=-1.0, max_r=+1.0, lower=0.0, amplitude=1.0): UCB.__init__(self, nbArms, lower=lower, amplitude=amplitude) Budgeted.__init__(self, inibudget=inibudget, min_r=min_r, max_r=max_r) BernoulliEstimator.__init__(self, nbArms)
def getReward(self, arm, reward): UCB.getReward(self, arm, reward) if (reward >= 0): self.positive_count[arm] += 1 self.positive_mean[arm] = (self.positive_mean[arm] * (self.positive_count[arm] - 1) + reward) / self.positive_count[arm] else: self.negative_count[arm] += 1 self.negative_mean[arm] = (self.negative_mean[arm] * (self.negative_count[arm] - 1) + reward) / self.negative_count[arm]
def __init__(self, nbArms, inibudget=10.0, safebudget=1.0, min_r=-1.0, max_r=+1.0, lower=0.0, amplitude=1.0): UCB.__init__(self, nbArms, lower=lower, amplitude=amplitude) SafeAlg.__init__(self, nbArms, inibudget=inibudget, min_r=min_r, max_r=max_r, safebudget=safebudget)
""" Example of use of SMPyBandits. See https://SMPyBandits.GitHub.io/API.html for more details!""" import numpy as np np.random.seed(0) # for reproducibility from SMPyBandits.Arms import Bernoulli arms = [Bernoulli(0.1), Bernoulli(0.9)] from SMPyBandits.Environment import MAB my_MAB_problem = MAB(arms) nbArms = my_MAB_problem.nbArms # 2 arms ! from SMPyBandits.Policies import UCB my_UCB_algo = UCB(nbArms) my_UCB_algo.startGame() # reset internal memory horizon = 1000 for t in range(horizon): # simulation loop chosen_arm = my_UCB_algo.choice() observed_reward = my_MAB_problem.draw(chosen_arm) my_UCB_algo.getReward(chosen_arm, observed_reward) cumulated_reward = sum(my_UCB_algo.rewards) # random! number_of_plays = sum(my_UCB_algo.pulls) # horizon = 1000 mean_reward = cumulated_reward / number_of_plays print("The UCB algorithm obtains here a mean reward =", mean_reward)
def startGame(self): UCB.startGame(self) self.budget = self.inibudget self.estmeans.fill(0.0) self.successes.fill(0)
def startGame(self): UCB.startGame(self) Budgeted.startGame(self) BernoulliEstimator.startGame(self)
def getReward(self, arm, reward): UCB.getReward(self, arm, reward) self.budget += reward
def __init__(self, nbArms, inibudget=10.0, lower=-1.0, amplitude=2.0): UCB.__init__(self, nbArms, lower=lower, amplitude=amplitude) self.inibudget = inibudget self.budget = inibudget self.estmeans = np.zeros(nbArms) self.successes = np.zeros(nbArms, dtype='int')
def getReward(self, arm, reward): UCB.getReward(self, arm, reward) SafeAlg.getReward(self, arm, reward)
def choice(self): r = SafeAlg.choice(self) if r is None: r = UCB.choice(self) return r
def startGame(self): UCB.startGame(self) SafeAlg.startGame(self)
def getReward(self, arm, reward): UCB.getReward(self, arm, reward) Budgeted.getReward(self, reward) BernoulliEstimator.getReward(self, arm, reward)
def startGame(self): UCB.startGame(self) self.reward_samples = [np.array([0.0]) for a in range(self.nbArms)]
def startGame(self): UCB.startGame(self) self.positive_mean = nb.repeat(0.0, self.nbArms, dtype='float') self.negative_mean = nb.repeat(0.0, self.nbArms, dtype='float') self.positive_count = nb.repeat(0, self.nbArms, dtype='int') self.negative_count = nb.repeat(0, self.nbArms, dtype='int')
def getReward(self, arm, reward): UCB.getReward(self, arm, reward) self.reward_samples[arm] = np.sort( np.append(self.reward_samples[arm], [reward]))