Python MultiArmedBandit 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: environments

클래스/타입: MultiArmedBandit

hotexamples.com에서의 예제들: 5

Python MultiArmedBandit - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 environments.MultiArmedBandit에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MultiArmedBandit(3)

expected_performance(1)

get_hidden_states(1)

예제 #1

파일 보기

파일: rmodels.py 프로젝트: satpreetsingh/pyBefit

    def expected_performance(x):
        """Example of how the expected performance can be computed."""
        pm_pars = {'alpha': x[0], 'beta': x[1]}
        rm_pars = {'theta': x[2]}

        T = 100  #number of trials
        n_b = 2  #number of bandits
        rho = 0.01  #switch probability of the arm-reward contingencies

        n_env = 100  #number of environments
        n_blocks = 1  #number of experimental blocks
        ep = 0  # expected performance

        for n in range(n_env):
            #generate n_env mutli-armed bandit environmets
            env = MultiArmedBandit(T, rho=rho, n_b=n_b)
            pm = RescorlaWagner(env, n_b)
            for m in range(n_blocks):
                #in each environment repeat the experiment n_blocks times
                rm = SoftMaxResponses([], pm, d_r)
                rm.get_responses(pm_pars, rm_pars)
                #For each block compute the expected performance.
                ep += env.expected_performance()

        return ep / (n_env * n_blocks)

예제 #2

파일 보기

파일: rmodels.py 프로젝트: dimarkov/pyBefit

 def expected_performance(x):
     """Example of how the expected performance can be computed."""
     pm_pars = {'alpha': x[0], 'beta': x[1]}
     rm_pars = {'theta': x[2]}
     
     T = 100 #number of trials
     n_b = 2 #number of bandits
     rho = 0.01 #switch probability of the arm-reward contingencies
     
     n_env = 100 #number of environments
     n_blocks = 1 #number of experimental blocks
     ep = 0 # expected performance
     
     for n in range(n_env):
         #generate n_env mutli-armed bandit environmets
         env = MultiArmedBandit(T, rho = rho, n_b = n_b)
         pm = RescorlaWagner(env, n_b)
         for m in range(n_blocks):
             #in each environment repeat the experiment n_blocks times
             rm = SoftMaxResponses([], pm, d_r)
             rm.get_responses(pm_pars, rm_pars)
             #For each block compute the expected performance.
             ep += env.expected_performance()
     
     return ep/(n_env*n_blocks)

예제 #3

파일 보기

파일: pmodels.py 프로젝트: satpreetsingh/pyBefit

def main():

    from environments import MultiArmedBandit
    import seaborn as sns
    sns.set(style="white", palette="muted", color_codes=True)

    T = 100
    env = MultiArmedBandit(T)
    pm = RescorlaWagner(env, env.d_x)

    obs = env.get_observations()
    hst = env.get_hidden_states()

    #use isres for optimisation of one dimensional functions
    from optmethods import isres
    bounds = {'ub': np.array([1.]), 'lb': np.array([0.])}
    f_opt, x_opt, res = isres(pm.get_free_energy, 1, 1e-6, 1e-8, bounds,
                              np.array([0.5]))
    print(f_opt, x_opt, res)

    post = pm.get_beliefs(alpha=x_opt)

    ax = obs.plot(y=r'$o_t$', style='go')
    ax = hst.plot(y=r'$p_t$', style='k--', ax=ax)

    ax = post.plot(y=r'$\mu_t$', style='r-', ax=ax)
    ax.legend(numpoints=1)

    #optimize preceptual surprise over multiple experimental blocks

    def total_fe(x, n_pars, blocks):
        fe = 0
        for b in blocks:
            pm = RescorlaWagner(b, b.d_x)
            fe += pm.get_free_energy(x)

        return fe

    n = 100
    T = 100
    exp_blocks = [MultiArmedBandit(T)] * 100

    fe = lambda x, p: total_fe(x, p, exp_blocks)
    f_opt, x_opt, res = isres(fe, 1, 1e-6, 1e-8, bounds, np.array([0.5]))
    print(f_opt / n, x_opt, res)

    post = pm.get_beliefs(alpha=x_opt)

    ax = obs.plot(y=r'$o_t$', style='go')
    ax = hst.plot(y=r'$p_t$', style='k--', ax=ax)

    ax = post.plot(y=r'$\mu_t$', style='r-', ax=ax)
    ax.legend(numpoints=1)

예제 #4

파일 보기

파일: pmodels.py 프로젝트: dimarkov/pyBefit

def main():
    
    from environments import MultiArmedBandit
    import seaborn as sns
    sns.set(style = "white", palette="muted", color_codes=True)
    
    T = 100
    env = MultiArmedBandit(T)
    pm = RescorlaWagner(env, env.d_x)
    
    obs = env.get_observations()
    hst = env.get_hidden_states()
    
    #use isres for optimisation of one dimensional functions
    from optmethods import isres
    bounds = {'ub': np.array([1.]), 'lb': np.array([0.])}
    f_opt, x_opt, res = isres( pm.get_free_energy, 1, 1e-6, 1e-8, bounds, np.array([0.5]) )
    print(f_opt, x_opt, res)
    
    post = pm.get_beliefs(alpha = x_opt)

    
    ax = obs.plot(y = r'$o_t$', style = 'go')
    ax = hst.plot(y = r'$p_t$', style = 'k--', ax = ax)
    
    ax = post.plot(y = r'$\mu_t$', style = 'r-', ax = ax)
    ax.legend(numpoints = 1)
    
    #optimize preceptual surprise over multiple experimental blocks
    
    def total_fe(x, n_pars, blocks):
        fe = 0
        for b in blocks:
            pm = RescorlaWagner(b, b.d_x)
            fe += pm.get_free_energy(x)
            
        return fe
    
    n = 100
    T = 100
    exp_blocks = [MultiArmedBandit(T)]*100 
        
    fe = lambda x,p: total_fe(x, p, exp_blocks)
    f_opt, x_opt, res = isres( fe, 1, 1e-6, 1e-8, bounds, np.array([0.5]) )
    print(f_opt/n, x_opt, res)
    
    post = pm.get_beliefs(alpha = x_opt)
    
    ax = obs.plot(y = r'$o_t$', style = 'go')
    ax = hst.plot(y = r'$p_t$', style = 'k--', ax = ax)
    
    ax = post.plot(y = r'$\mu_t$', style = 'r-', ax = ax)
    ax.legend(numpoints = 1)

예제 #5

파일 보기

파일: rmodels.py 프로젝트: satpreetsingh/pyBefit

def main():
    import time
    from environments import MultiArmedBandit
    from pmodels import RescorlaWagner
    from inference import MLEInference

    def expected_performance(x):
        """Example of how the expected performance can be computed."""
        pm_pars = {'alpha': x[0], 'beta': x[1]}
        rm_pars = {'theta': x[2]}

        T = 100  #number of trials
        n_b = 2  #number of bandits
        rho = 0.01  #switch probability of the arm-reward contingencies

        n_env = 100  #number of environments
        n_blocks = 1  #number of experimental blocks
        ep = 0  # expected performance

        for n in range(n_env):
            #generate n_env mutli-armed bandit environmets
            env = MultiArmedBandit(T, rho=rho, n_b=n_b)
            pm = RescorlaWagner(env, n_b)
            for m in range(n_blocks):
                #in each environment repeat the experiment n_blocks times
                rm = SoftMaxResponses([], pm, d_r)
                rm.get_responses(pm_pars, rm_pars)
                #For each block compute the expected performance.
                ep += env.expected_performance()

        return ep / (n_env * n_blocks)

    T = 100  #number of trials
    n_b = 2  #number of bandits
    rho = 0.01  #switch probability of the arm-reward contingencies
    d_r = n_b

    ###########################################################################
    #Lets try to find the set of parameters that lead to the highest performance.
    #This takes lots of time, as it converges very slowly because of noisy estimates.
    #Just comment it out and use the x_opt values provided bellow.

    #    from optmethods import cmaes
    #    n_p = 3
    #    bounds = bounds = {'ub': np.array([1., 1., 100.]), 'lb': np.zeros(3)}
    #    f_opt, x_opt, res_msg = cmaes( expected_performance, n_p, 1e-2, 1e-4,
    #                                  bounds, np.zeros(n_p), verb_disp = 10 )
    #    print(f_opt, x_opt, res_msg)

    #the following values give resonably high expected performance
    x_opt = np.array([0.125, 0.1, 10])
    ###########################################################################

    t_start = time.time()
    print(expected_performance(x_opt), time.time() - t_start)

    ####mle estimate of the parameter values#############################

    #we first simulate the behavior
    env = MultiArmedBandit(T, rho=rho, n_b=n_b)
    d_b = n_b
    pm = RescorlaWagner(env, d_b)
    d_r = n_b
    rm = SoftMaxResponses([], pm, d_r)

    pm_pars = {
        'alpha': x_opt[0],
        'beta': x_opt[1]
    }  #parameters of the perceptual model
    rm_pars = {'theta': x_opt[2]}  # parameters of the response model
    rm.get_responses(pm_pars, rm_pars)

    pm_inference = RescorlaWagner(env, d_b)
    rm_inference = SoftMaxResponses([], pm_inference, d_r)

    opts = {'np': 3, 'verb_disp': 100}

    mle = MLEInference(opts=opts)
    m_mle, s_mle, f_mle = mle.infer_posterior(rm_inference)

    p_mle = [1 / (1 + np.exp(-m_mle[:-1])), np.exp(m_mle[-1])]
    print(f_mle, p_mle, np.diag(s_mle))