def simple_regret(bandit, best): max_expected = max([bandit.rewards[i] * bandit.probabilities[i] for i in range(bandit.get_num_arms())]) regrets = [0 for _ in range(len(best))] for i in range(len(best)): idx = best[i] regrets[i] = max_expected - (bandit.rewards[idx] * bandit.probabilities[idx]) return regrets
def cumulative_regret(bandit, pulls): max_expected = max([bandit.rewards[i] * bandit.probabilities[i] for i in range(bandit.get_num_arms())]) regrets = [0 for _ in range(len(pulls))] for i in range(len(pulls)): if i > 0: idx = pulls[i] regrets[i] = regrets[i - 1] + (max_expected - (bandit.rewards[idx] * bandit.probabilities[idx])) else: idx = pulls[i] regrets[i] = max_expected - (bandit.rewards[idx] * bandit.probabilities[idx]) return regrets