Python ArmFinite 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: arms

메소드/함수: ArmFinite

hotexamples.com에서의 예제들: 4

Python ArmFinite - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 arms.ArmFinite에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: bandits.py 프로젝트: GeoffNN/BanditsGameTheoryRL

 def __init__(self, parameter_list=None):
     if parameter_list is None:
         self.parameter_list = [(0, 1)]
     else:
         self.parameter_list = parameter_list
     self.arms = [arms.ArmFinite(x, p) for x, p in parameter_list]
     self.n_arms = len(self.arms)

예제 #2

파일 보기

def construct_non_parametric_MAB():
    arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
    arm2 = arms.ArmBeta(0.5, 0.5, random_state=np.random.randint(1, 312414))
    arm3 = arms.ArmBeta(1., 3., random_state=np.random.randint(1, 312414))
    arm4 = arms.ArmExp(1., random_state=np.random.randint(1, 312414))
    arm5 = arms.ArmFinite(np.array([0., 0.1, 0.5, 0.8]), np.array([0.2, 0.3, 0.4, 0.1]))
    return [arm1, arm2, arm3, arm4, arm5]

예제 #3

파일 보기

파일: mainTP2_SMAB.py 프로젝트: VincentPlassier/Reinforcement-Learning-MVA


plt.figure(3)
plt.clf()
plt.plot(list_t, R[0], label='Expected regret of UCB1')
plt.plot(list_t, R[1], label='Expected regret of TS')
plt.plot(list_t, R[2], label='Eps_Greedy')
plt.plot(list_t,oracle, label='Oracle') # we display
plt.legend()


## Question 1:
arm1 = arms.ArmBernoulli(0.30, random_state=np.random.randint(1, 312414))
arm2 = arms.ArmBeta(0.20, 0.30, random_state=np.random.randint(1, 312414))
arm3 = arms.ArmExp(0.25, random_state=np.random.randint(1, 312414))
arm4 = arms.ArmFinite(np.array([0.3,0.5,0.2]), np.array([0.5,0.1,0.4]), random_state=np.random.randint(1, 312414))

MAB = [arm1, arm2, arm3, arm4]


def TS_non_binarity(T,MAB):
    nb_arms = len(MAB)
    rew, draw = np.zeros(T), np.zeros(T)
    N = np.zeros(nb_arms) # number of draws of arms up to time t
    S = np.zeros_like(N) # sum of rewards gathered up to time t
    tau = np.zeros(nb_arms)
    for t in range(T):
        for a in range(nb_arms):
            if N[a] == 0:
                tau[a] = np.random.rand()
            else:

예제 #4

파일 보기

파일: mainTP2_SMAB.py 프로젝트: AmineKheldouni/Reinforcement-Learning-Overview

    ax2.plot(np.arange(1, T + 1),
             mu_max1 * np.arange(1, T + 1) - np.cumsum(r2),
             label='rho=' + str(rho))
plt.legend()
plt.show()

##################### Question 2 - Implementation #########################

# (Expected) regret curve for UCB and Thompson Sampling
arm1 = arms.ArmBernoulli(0.50, random_state=np.random.randint(1, 312414))
arm2 = arms.ArmBeta(0.3, 0.45, random_state=np.random.randint(1, 312414))
arm3 = arms.ArmExp(0.20, random_state=np.random.randint(1, 312414))
arm4 = arms.ArmExp(0.10, random_state=np.random.randint(1, 312414))
arm5 = arms.ArmBernoulli(0.1, random_state=np.random.randint(1, 312414))
arm6 = arms.ArmFinite(X=np.array([0.1, 0.3, 0.7, 0.8]),
                      P=np.array([0.2, 0.4, 0.1, 0.3]),
                      random_state=np.random.randint(1, 312414))

MAB = [arm1, arm2, arm3, arm4, arm5, arm6]
print("Means of diversified MAB arms (respectively)")
for a in MAB:
    print(a.mean)
# bandit : set of arms
nb_arms = len(MAB)
means = [el.mean for el in MAB]
mu_max = np.max(means)


def TSnonbinary(T, MAB, N=50):
    draws = np.zeros((N, T))
    rew = np.zeros((N, T))