Esempio n. 1
0
def simple_regret(bandit, best):
    max_expected = max([bandit.rewards[i] * bandit.probabilities[i] for i in range(bandit.get_num_arms())])
    regrets = [0 for _ in range(len(best))]
    for i in range(len(best)):
        idx = best[i]
        regrets[i] = max_expected - (bandit.rewards[idx] * bandit.probabilities[idx])

    return regrets
Esempio n. 2
0
def cumulative_regret(bandit, pulls):
    max_expected = max([bandit.rewards[i] * bandit.probabilities[i] for i in range(bandit.get_num_arms())])
    regrets = [0 for _ in range(len(pulls))]
    for i in range(len(pulls)):
        if i > 0:
            idx = pulls[i]
            regrets[i] = regrets[i - 1] + (max_expected - (bandit.rewards[idx] * bandit.probabilities[idx]))
        else:
            idx = pulls[i]
            regrets[i] = max_expected - (bandit.rewards[idx] * bandit.probabilities[idx])

    return regrets