Esempi in Python per ArmExp, esempi in Python per arms.ArmExp

Esempio n. 1

0

Mostra file

def construct_non_parametric_MAB():
    arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
    arm2 = arms.ArmBeta(0.5, 0.5, random_state=np.random.randint(1, 312414))
    arm3 = arms.ArmBeta(1., 3., random_state=np.random.randint(1, 312414))
    arm4 = arms.ArmExp(1., random_state=np.random.randint(1, 312414))
    arm5 = arms.ArmFinite(np.array([0., 0.1, 0.5, 0.8]), np.array([0.2, 0.3, 0.4, 0.1]))
    return [arm1, arm2, arm3, arm4, arm5]

Esempio n. 2

0

Mostra file

File: bandits.py Progetto: GeoffNN/BanditsGameTheoryRL

 def __init__(self, lambdas=None, n_arms=None):
     Bandit.__init__(self)
     if lambdas is None:
         if n_arms is None:
             self.n_arms = 2
         else:
             self.n_arms = n_arms
         self.lambdas = np.linspace(1, self.n_arms, self.n_arms)
     else:
         self.lambdas = lambdas
         self.n_arms = len(lambdas)
     self.arms = [arms.ArmExp(lambd) for lambd in self.lambdas]

Esempio n. 3

0

Mostra file

File: main_bandits.py Progetto: GeoffNN/RL_TP1

    regret_ucb /= n_samples
    regret_ts /= n_samples
    regret_general_ts /= n_samples
    # regret_naive /= n_samples

    opt = pd.Series(np.linspace(1, time_horizon, time_horizon)) * p_star
    regret_ucb += opt
    regret_ts += opt
    regret_general_ts += opt
    # regret_naive += opt

    regret_oracle = pd.Series([bandit.complexity() * np.log(t) for t in range(time_horizon)])

    fig = plt.figure()
    regret_ucb.plot(label='UCB regret')
    regret_ts.plot(label='Bernoulli Thompson Sampling regret')
    regret_general_ts.plot(label='General Thompson Sampling regret')
    # regret_naive.plot(label='Naive algorithm regret')
    regret_oracle.plot(label='Oracle regret')

    plt.legend(loc=4)
    plt.title('Regret curves')
    fig.savefig(figtitle + ".png")


start = time.time()
bandit = bandits.Bandit([arms.ArmExp(.4), arms.ArmExp(.5), arms.ArmBernoulli(.8), arms.ArmBernoulli(.9)])
q2(bandit)
print(time.time() - start)

Esempio n. 4

0

Mostra file

File: mainTP2_SMAB.py Progetto: VincentPlassier/Reinforcement-Learning-MVA

oracle = C*np.log(list_t)


plt.figure(3)
plt.clf()
plt.plot(list_t, R[0], label='Expected regret of UCB1')
plt.plot(list_t, R[1], label='Expected regret of TS')
plt.plot(list_t, R[2], label='Eps_Greedy')
plt.plot(list_t,oracle, label='Oracle') # we display
plt.legend()


## Question 1:
arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
arm2 = arms.ArmBeta(0.20, 0.30, random_state=np.random.randint(1, 312414))
arm3 = arms.ArmExp(0.25, random_state=np.random.randint(1, 312414))
arm4 = arms.ArmFinite(np.array([0.3,0.5,0.2]), np.array([0.5,0.1,0.4]), random_state=np.random.randint(1, 312414))

MAB = [arm1, arm2, arm3, arm4]


def TS_non_binarity(T,MAB):
    nb_arms = len(MAB)
    rew, draw = np.zeros(T), np.zeros(T)
    N = np.zeros(nb_arms) # number of draws of arms up to time t
    S = np.zeros_like(N) # sum of rewards gathered up to time t
    tau = np.zeros(nb_arms)
    for t in range(T):
        for a in range(nb_arms):
            if N[a] == 0:
                tau[a] = np.random.rand()

Esempio n. 5

0

Mostra file

File: mainTP2_SMAB.py Progetto: raphaelavalos/reinforcementlearning

    x = np.arange(1, T + 1)
    plt.plot(x, reg1, label='UCB')
    plt.plot(x, reg2, label='Thompson')
    # plt.plot(x, reg3, label='Best arm')
    plt.plot(x, oracle, label='Oracle')
    plt.legend(['UCB', 'Thompson', 'Oracle'])
    plt.xlabel('Rounds')
    plt.ylabel('Cumulative Regret')
    # plt.title('First problem')

    plt.show()
# (Expected) regret curve for UCB and Thompson Sampling

npm_1 = arms.ArmBeta(0.7, 0.6)
npm_2 = arms.ArmBeta(0.5, 0.6)
npm_3 = arms.ArmExp(0.7)
npm_4 = arms.ArmExp(0.35)
NPM = [npm_1, npm_2, npm_3]

means = [el.mean for el in NPM]
mu_max = np.max(means)
rew4, draws4 = avg_bandit_game(NPM, T, strategy='ucb1', runs=100)
reg4 = mu_max * np.arange(1, T + 1) - np.cumsum(rew4)
rew5, draws5 = avg_bandit_game(NPM, T, strategy='thompson', runs=100)
reg5 = mu_max * np.arange(1, T + 1) - np.cumsum(rew5)
plt.figure(1)
x = np.arange(1, T + 1)
plt.plot(x, reg4, label='UCB')
plt.plot(x, reg5, label='Thomson')
plt.legend(['UCB', 'Thompson'])
plt.xlabel('Rounds')

Esempio n. 6

0

Mostra file

File: main_bandits.py Progetto: GeoffNN/BanditsGameTheoryRL

    opt = pd.Series(np.linspace(1, time_horizon, time_horizon)) * p_star
    regret_ucb += opt
    regret_ts += opt
    regret_general_ts += opt
    # regret_naive += opt

    regret_oracle = pd.Series(
        [bandit.complexity() * np.log(t) for t in range(time_horizon)])

    fig = plt.figure()
    regret_ucb.plot(label='UCB regret')
    regret_ts.plot(label='Bernoulli Thompson Sampling regret')
    regret_general_ts.plot(label='General Thompson Sampling regret')
    # regret_naive.plot(label='Naive algorithm regret')
    regret_oracle.plot(label='Oracle regret')

    plt.legend(loc=4)
    plt.title('Regret curves')
    fig.savefig(figtitle + ".png")


start = time.time()
bandit = bandits.Bandit([
    arms.ArmExp(.4),
    arms.ArmExp(.5),
    arms.ArmBernoulli(.8),
    arms.ArmBernoulli(.9)
])
q2(bandit)
print(time.time() - start)

Esempio n. 7

0

Mostra file

File: mainTP2_SMAB.py Progetto: AmineKheldouni/Reinforcement-Learning-Overview

             mu_max1 * np.arange(1, T + 1) - np.cumsum(r1),
             label='rho=' + str(rho))
    ax1.set_title('Cumulative regrets of UCB1 (MAB1)')
    ax2.set_title('Cumulative regrets of UCB1 (MAB2)')
    ax2.plot(np.arange(1, T + 1),
             mu_max1 * np.arange(1, T + 1) - np.cumsum(r2),
             label='rho=' + str(rho))
plt.legend()
plt.show()

##################### Question 2 - Implementation #########################

# (Expected) regret curve for UCB and Thompson Sampling
arm1 = arms.ArmBernoulli(0.50, random_state=np.random.randint(1, 312414))
arm2 = arms.ArmBeta(0.3, 0.45, random_state=np.random.randint(1, 312414))
arm3 = arms.ArmExp(0.20, random_state=np.random.randint(1, 312414))
arm4 = arms.ArmExp(0.10, random_state=np.random.randint(1, 312414))
arm5 = arms.ArmBernoulli(0.1, random_state=np.random.randint(1, 312414))
arm6 = arms.ArmFinite(X=np.array([0.1, 0.3, 0.7, 0.8]),
                      P=np.array([0.2, 0.4, 0.1, 0.3]),
                      random_state=np.random.randint(1, 312414))

MAB = [arm1, arm2, arm3, arm4, arm5, arm6]
print("Means of diversified MAB arms (respectively)")
for a in MAB:
    print(a.mean)
# bandit : set of arms
nb_arms = len(MAB)
means = [el.mean for el in MAB]
mu_max = np.max(means)