Beispiel #1
0
def construct_non_parametric_MAB():
    arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
    arm2 = arms.ArmBeta(0.5, 0.5, random_state=np.random.randint(1, 312414))
    arm3 = arms.ArmBeta(1., 3., random_state=np.random.randint(1, 312414))
    arm4 = arms.ArmExp(1., random_state=np.random.randint(1, 312414))
    arm5 = arms.ArmFinite(np.array([0., 0.1, 0.5, 0.8]), np.array([0.2, 0.3, 0.4, 0.1]))
    return [arm1, arm2, arm3, arm4, arm5]
C=np.sum((p_star-list_pbis)/kl(list_pbis,p_star)) # we calcul C
oracle = C*np.log(list_t)


plt.figure(3)
plt.clf()
plt.plot(list_t, R[0], label='Expected regret of UCB1')
plt.plot(list_t, R[1], label='Expected regret of TS')
plt.plot(list_t, R[2], label='Eps_Greedy')
plt.plot(list_t,oracle, label='Oracle') # we display
plt.legend()


## Question 1:
arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
arm2 = arms.ArmBeta(0.20, 0.30, random_state=np.random.randint(1, 312414))
arm3 = arms.ArmExp(0.25, random_state=np.random.randint(1, 312414))
arm4 = arms.ArmFinite(np.array([0.3,0.5,0.2]), np.array([0.5,0.1,0.4]), random_state=np.random.randint(1, 312414))

MAB = [arm1, arm2, arm3, arm4]


def TS_non_binarity(T,MAB):
    nb_arms = len(MAB)
    rew, draw = np.zeros(T), np.zeros(T)
    N = np.zeros(nb_arms) # number of draws of arms up to time t
    S = np.zeros_like(N) # sum of rewards gathered up to time t
    tau = np.zeros(nb_arms)
    for t in range(T):
        for a in range(nb_arms):
            if N[a] == 0:
    plt.figure(1)
    x = np.arange(1, T + 1)
    plt.plot(x, reg1, label='UCB')
    plt.plot(x, reg2, label='Thompson')
    # plt.plot(x, reg3, label='Best arm')
    plt.plot(x, oracle, label='Oracle')
    plt.legend(['UCB', 'Thompson', 'Oracle'])
    plt.xlabel('Rounds')
    plt.ylabel('Cumulative Regret')
    # plt.title('First problem')

    plt.show()
# (Expected) regret curve for UCB and Thompson Sampling

npm_1 = arms.ArmBeta(0.7, 0.6)
npm_2 = arms.ArmBeta(0.5, 0.6)
npm_3 = arms.ArmExp(0.7)
npm_4 = arms.ArmExp(0.35)
NPM = [npm_1, npm_2, npm_3]

means = [el.mean for el in NPM]
mu_max = np.max(means)
rew4, draws4 = avg_bandit_game(NPM, T, strategy='ucb1', runs=100)
reg4 = mu_max * np.arange(1, T + 1) - np.cumsum(rew4)
rew5, draws5 = avg_bandit_game(NPM, T, strategy='thompson', runs=100)
reg5 = mu_max * np.arange(1, T + 1) - np.cumsum(rew5)
plt.figure(1)
x = np.arange(1, T + 1)
plt.plot(x, reg4, label='UCB')
plt.plot(x, reg5, label='Thomson')
def main():
    # Build your own bandit problem

    random_state = np.random.randint(1, 312414)

    delta = 0.1
    # Bernoulli loss arm
    arm1 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm2 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm3 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm4 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm5 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm6 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm7 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm8 = arms.ArmBernoulli(0.50, random_state=random_state)
    arm9 = arms.ArmBernoulli(0.50 - delta, random_state=random_state)
    arm10_1 = arms.ArmBernoulli(0.50 + delta, random_state=random_state)
    arm10_2 = arms.ArmBernoulli(0.50 - 4 * delta, random_state=random_state)

    arm11 = arms.ArmPieceConstant(mean=0.5, delta=0.2, fre=500, random_state=0)
    arm12 = arms.ArmPieceIncrease(lower=0,
                                  upper=1,
                                  delta=0.1,
                                  prob=0.001,
                                  random_state=0)
    arm13 = arms.ArmPieceDecrease(lower=0,
                                  upper=1,
                                  delta=0.1,
                                  prob=0.001,
                                  random_state=0)

    arm14 = arms.ArmBeta(a=2, b=2, random_state=0)
    arm15 = arms.ArmBeta(a=0.5, b=0.5, random_state=0)

    MAB1 = [
        arm1, arm2, arm3, arm4, arm5, arm6, arm7, arm8, arm9, arm10_1, arm11,
        arm12, arm13, arm14, arm15
    ]
    MAB2 = [
        arm1, arm2, arm3, arm4, arm5, arm6, arm7, arm8, arm9, arm10_2, arm11,
        arm12, arm13, arm14, arm15
    ]

    #reward arm
    #arm9_ = arms.ArmBernoulli(0.50+delta, random_state=random_state)
    #arm10_1_ = arms.ArmBernoulli(0.50-delta, random_state=random_state)
    #arm10_2_ = arms.ArmBernoulli(0.50+4*delta, random_state=random_state)

    #MAB1_ = [arm1, arm2, arm3, arm4, arm5, arm6, arm7, arm8, arm9_, arm10_1_]
    #MAB2_ = [arm1, arm2, arm3, arm4, arm5, arm6, arm7, arm8, arm9_, arm10_2_]

    # bandit : set of arms

    T = 1e4
    K = len(MAB1)
    change_time = int(T / 2)

    loss_sequence = produce_loss_sequence(env=MAB1,
                                          T=T,
                                          env_change=True,
                                          new_env=MAB2,
                                          change_time=change_time)

    single_global_best = np.min(np.sum(loss_sequence, axis=0))
    single_global_best_2 = np.min(np.sum(loss_sequence[:int(T / 2)], axis=0))

    etas = [10**i for i in np.linspace(-2.5, 0, 8)]
    repeat = 50
    regrets_ix = []
    regrets_exp3 = []
    regrets_exp3p = []
    #regrets = []
    #regrets_2 = []
    for eta in etas:
        tmp_ix = [[], []]
        tmp_exp3 = [[], []]
        tmp_exp3p = [[], []]
        #gamma = np.min([0.6, 2*np.sqrt(0.6 * K * np.log(K) / T)])
        gamma = 0.005
        #alpha = 2 * np.sqrt(np.log(K * T / 0.01))
        #beta = 0.006
        beta = gamma / K
        for _ in range(repeat):
            _, loss = EXP3_P(loss_sequence=loss_sequence,
                             eta=eta,
                             gamma=gamma,
                             beta=beta,
                             T=T)
            tmp_exp3p[0].append(np.sum(loss) - single_global_best)
            tmp_exp3p[1].append(
                np.sum(loss[:change_time]) - single_global_best_2)

            _, loss = EXP3(loss_sequence=loss_sequence,
                           eta=eta,
                           gamma=gamma,
                           T=T)
            tmp_exp3[0].append(np.sum(loss) - single_global_best)
            tmp_exp3[1].append(
                np.sum(loss[:change_time]) - single_global_best_2)

            _, loss = EXP3_IX(loss_sequence=loss_sequence,
                              eta=eta,
                              gamma=gamma,
                              T=T)
            tmp_ix[0].append(np.sum(loss) - single_global_best)
            tmp_ix[1].append(np.sum(loss[:change_time]) - single_global_best_2)
        #print('eta: %0.3f, regret: %f' % (eta, np.mean(tmp)))
        regrets_ix.append(tmp_ix)
        regrets_exp3.append(tmp_exp3)
        regrets_exp3p.append(tmp_exp3p)

    regrets_ix = np.array(regrets_ix)
    regrets_exp3p = np.array(regrets_exp3p)
    regrets_exp3 = np.array(regrets_exp3)

    std_ix = np.std(regrets_ix, axis=2).T
    mean_ix = np.mean(regrets_ix, axis=2).T

    std_exp3 = np.std(regrets_exp3, axis=2).T
    mean_exp3 = np.mean(regrets_exp3, axis=2).T

    std_exp3p = np.std(regrets_exp3p, axis=2).T
    mean_exp3p = np.mean(regrets_exp3p, axis=2).T

    means = [mean_exp3, mean_exp3p, mean_ix]
    stds = [std_exp3, std_exp3p, std_ix]

    algos = ['EXP3', 'EXP3.P', 'EXP3-IX']
    # Two subplots, unpack the axes array immediately
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize=(14, 6))
    for i in range(len(algos)):
        ax1.errorbar(etas,
                     means[i][1],
                     yerr=stds[i][1],
                     fmt='-o',
                     label=algos[i])
    ax1.set_xscale('log')
    ax1.set_xlabel(r'$\eta$ multiplier', fontsize=14)
    ax1.set_ylabel(r'Regret at $T/2$', fontsize=14)
    ax1.legend()

    for i in range(len(algos)):
        ax2.errorbar(etas,
                     means[i][0],
                     yerr=stds[i][0],
                     fmt='-o',
                     label=algos[i])
    ax2.set_xscale('log')
    ax2.set_xlabel(r'$\eta$ multiplier', fontsize=14)
    ax2.set_ylabel(r'Regret at $T$', fontsize=14)
    ax2.legend()
    plt.show()