def construct_non_parametric_MAB(): arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414)) arm2 = arms.ArmBeta(0.5, 0.5, random_state=np.random.randint(1, 312414)) arm3 = arms.ArmBeta(1., 3., random_state=np.random.randint(1, 312414)) arm4 = arms.ArmExp(1., random_state=np.random.randint(1, 312414)) arm5 = arms.ArmFinite(np.array([0., 0.1, 0.5, 0.8]), np.array([0.2, 0.3, 0.4, 0.1])) return [arm1, arm2, arm3, arm4, arm5]
def __init__(self, lambdas=None, n_arms=None): Bandit.__init__(self) if lambdas is None: if n_arms is None: self.n_arms = 2 else: self.n_arms = n_arms self.lambdas = np.linspace(1, self.n_arms, self.n_arms) else: self.lambdas = lambdas self.n_arms = len(lambdas) self.arms = [arms.ArmExp(lambd) for lambd in self.lambdas]
regret_ucb /= n_samples regret_ts /= n_samples regret_general_ts /= n_samples # regret_naive /= n_samples opt = pd.Series(np.linspace(1, time_horizon, time_horizon)) * p_star regret_ucb += opt regret_ts += opt regret_general_ts += opt # regret_naive += opt regret_oracle = pd.Series([bandit.complexity() * np.log(t) for t in range(time_horizon)]) fig = plt.figure() regret_ucb.plot(label='UCB regret') regret_ts.plot(label='Bernoulli Thompson Sampling regret') regret_general_ts.plot(label='General Thompson Sampling regret') # regret_naive.plot(label='Naive algorithm regret') regret_oracle.plot(label='Oracle regret') plt.legend(loc=4) plt.title('Regret curves') fig.savefig(figtitle + ".png") start = time.time() bandit = bandits.Bandit([arms.ArmExp(.4), arms.ArmExp(.5), arms.ArmBernoulli(.8), arms.ArmBernoulli(.9)]) q2(bandit) print(time.time() - start)
oracle = C*np.log(list_t) plt.figure(3) plt.clf() plt.plot(list_t, R[0], label='Expected regret of UCB1') plt.plot(list_t, R[1], label='Expected regret of TS') plt.plot(list_t, R[2], label='Eps_Greedy') plt.plot(list_t,oracle, label='Oracle') # we display plt.legend() ## Question 1: arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414)) arm2 = arms.ArmBeta(0.20, 0.30, random_state=np.random.randint(1, 312414)) arm3 = arms.ArmExp(0.25, random_state=np.random.randint(1, 312414)) arm4 = arms.ArmFinite(np.array([0.3,0.5,0.2]), np.array([0.5,0.1,0.4]), random_state=np.random.randint(1, 312414)) MAB = [arm1, arm2, arm3, arm4] def TS_non_binarity(T,MAB): nb_arms = len(MAB) rew, draw = np.zeros(T), np.zeros(T) N = np.zeros(nb_arms) # number of draws of arms up to time t S = np.zeros_like(N) # sum of rewards gathered up to time t tau = np.zeros(nb_arms) for t in range(T): for a in range(nb_arms): if N[a] == 0: tau[a] = np.random.rand()
x = np.arange(1, T + 1) plt.plot(x, reg1, label='UCB') plt.plot(x, reg2, label='Thompson') # plt.plot(x, reg3, label='Best arm') plt.plot(x, oracle, label='Oracle') plt.legend(['UCB', 'Thompson', 'Oracle']) plt.xlabel('Rounds') plt.ylabel('Cumulative Regret') # plt.title('First problem') plt.show() # (Expected) regret curve for UCB and Thompson Sampling npm_1 = arms.ArmBeta(0.7, 0.6) npm_2 = arms.ArmBeta(0.5, 0.6) npm_3 = arms.ArmExp(0.7) npm_4 = arms.ArmExp(0.35) NPM = [npm_1, npm_2, npm_3] means = [el.mean for el in NPM] mu_max = np.max(means) rew4, draws4 = avg_bandit_game(NPM, T, strategy='ucb1', runs=100) reg4 = mu_max * np.arange(1, T + 1) - np.cumsum(rew4) rew5, draws5 = avg_bandit_game(NPM, T, strategy='thompson', runs=100) reg5 = mu_max * np.arange(1, T + 1) - np.cumsum(rew5) plt.figure(1) x = np.arange(1, T + 1) plt.plot(x, reg4, label='UCB') plt.plot(x, reg5, label='Thomson') plt.legend(['UCB', 'Thompson']) plt.xlabel('Rounds')
opt = pd.Series(np.linspace(1, time_horizon, time_horizon)) * p_star regret_ucb += opt regret_ts += opt regret_general_ts += opt # regret_naive += opt regret_oracle = pd.Series( [bandit.complexity() * np.log(t) for t in range(time_horizon)]) fig = plt.figure() regret_ucb.plot(label='UCB regret') regret_ts.plot(label='Bernoulli Thompson Sampling regret') regret_general_ts.plot(label='General Thompson Sampling regret') # regret_naive.plot(label='Naive algorithm regret') regret_oracle.plot(label='Oracle regret') plt.legend(loc=4) plt.title('Regret curves') fig.savefig(figtitle + ".png") start = time.time() bandit = bandits.Bandit([ arms.ArmExp(.4), arms.ArmExp(.5), arms.ArmBernoulli(.8), arms.ArmBernoulli(.9) ]) q2(bandit) print(time.time() - start)
mu_max1 * np.arange(1, T + 1) - np.cumsum(r1), label='rho=' + str(rho)) ax1.set_title('Cumulative regrets of UCB1 (MAB1)') ax2.set_title('Cumulative regrets of UCB1 (MAB2)') ax2.plot(np.arange(1, T + 1), mu_max1 * np.arange(1, T + 1) - np.cumsum(r2), label='rho=' + str(rho)) plt.legend() plt.show() ##################### Question 2 - Implementation ######################### # (Expected) regret curve for UCB and Thompson Sampling arm1 = arms.ArmBernoulli(0.50, random_state=np.random.randint(1, 312414)) arm2 = arms.ArmBeta(0.3, 0.45, random_state=np.random.randint(1, 312414)) arm3 = arms.ArmExp(0.20, random_state=np.random.randint(1, 312414)) arm4 = arms.ArmExp(0.10, random_state=np.random.randint(1, 312414)) arm5 = arms.ArmBernoulli(0.1, random_state=np.random.randint(1, 312414)) arm6 = arms.ArmFinite(X=np.array([0.1, 0.3, 0.7, 0.8]), P=np.array([0.2, 0.4, 0.1, 0.3]), random_state=np.random.randint(1, 312414)) MAB = [arm1, arm2, arm3, arm4, arm5, arm6] print("Means of diversified MAB arms (respectively)") for a in MAB: print(a.mean) # bandit : set of arms nb_arms = len(MAB) means = [el.mean for el in MAB] mu_max = np.max(means)