def expected_performance(x): """Example of how the expected performance can be computed.""" pm_pars = {'alpha': x[0], 'beta': x[1]} rm_pars = {'theta': x[2]} T = 100 #number of trials n_b = 2 #number of bandits rho = 0.01 #switch probability of the arm-reward contingencies n_env = 100 #number of environments n_blocks = 1 #number of experimental blocks ep = 0 # expected performance for n in range(n_env): #generate n_env mutli-armed bandit environmets env = MultiArmedBandit(T, rho=rho, n_b=n_b) pm = RescorlaWagner(env, n_b) for m in range(n_blocks): #in each environment repeat the experiment n_blocks times rm = SoftMaxResponses([], pm, d_r) rm.get_responses(pm_pars, rm_pars) #For each block compute the expected performance. ep += env.expected_performance() return ep / (n_env * n_blocks)
def expected_performance(x): """Example of how the expected performance can be computed.""" pm_pars = {'alpha': x[0], 'beta': x[1]} rm_pars = {'theta': x[2]} T = 100 #number of trials n_b = 2 #number of bandits rho = 0.01 #switch probability of the arm-reward contingencies n_env = 100 #number of environments n_blocks = 1 #number of experimental blocks ep = 0 # expected performance for n in range(n_env): #generate n_env mutli-armed bandit environmets env = MultiArmedBandit(T, rho = rho, n_b = n_b) pm = RescorlaWagner(env, n_b) for m in range(n_blocks): #in each environment repeat the experiment n_blocks times rm = SoftMaxResponses([], pm, d_r) rm.get_responses(pm_pars, rm_pars) #For each block compute the expected performance. ep += env.expected_performance() return ep/(n_env*n_blocks)
def main(): from environments import MultiArmedBandit import seaborn as sns sns.set(style="white", palette="muted", color_codes=True) T = 100 env = MultiArmedBandit(T) pm = RescorlaWagner(env, env.d_x) obs = env.get_observations() hst = env.get_hidden_states() #use isres for optimisation of one dimensional functions from optmethods import isres bounds = {'ub': np.array([1.]), 'lb': np.array([0.])} f_opt, x_opt, res = isres(pm.get_free_energy, 1, 1e-6, 1e-8, bounds, np.array([0.5])) print(f_opt, x_opt, res) post = pm.get_beliefs(alpha=x_opt) ax = obs.plot(y=r'$o_t$', style='go') ax = hst.plot(y=r'$p_t$', style='k--', ax=ax) ax = post.plot(y=r'$\mu_t$', style='r-', ax=ax) ax.legend(numpoints=1) #optimize preceptual surprise over multiple experimental blocks def total_fe(x, n_pars, blocks): fe = 0 for b in blocks: pm = RescorlaWagner(b, b.d_x) fe += pm.get_free_energy(x) return fe n = 100 T = 100 exp_blocks = [MultiArmedBandit(T)] * 100 fe = lambda x, p: total_fe(x, p, exp_blocks) f_opt, x_opt, res = isres(fe, 1, 1e-6, 1e-8, bounds, np.array([0.5])) print(f_opt / n, x_opt, res) post = pm.get_beliefs(alpha=x_opt) ax = obs.plot(y=r'$o_t$', style='go') ax = hst.plot(y=r'$p_t$', style='k--', ax=ax) ax = post.plot(y=r'$\mu_t$', style='r-', ax=ax) ax.legend(numpoints=1)
def main(): from environments import MultiArmedBandit import seaborn as sns sns.set(style = "white", palette="muted", color_codes=True) T = 100 env = MultiArmedBandit(T) pm = RescorlaWagner(env, env.d_x) obs = env.get_observations() hst = env.get_hidden_states() #use isres for optimisation of one dimensional functions from optmethods import isres bounds = {'ub': np.array([1.]), 'lb': np.array([0.])} f_opt, x_opt, res = isres( pm.get_free_energy, 1, 1e-6, 1e-8, bounds, np.array([0.5]) ) print(f_opt, x_opt, res) post = pm.get_beliefs(alpha = x_opt) ax = obs.plot(y = r'$o_t$', style = 'go') ax = hst.plot(y = r'$p_t$', style = 'k--', ax = ax) ax = post.plot(y = r'$\mu_t$', style = 'r-', ax = ax) ax.legend(numpoints = 1) #optimize preceptual surprise over multiple experimental blocks def total_fe(x, n_pars, blocks): fe = 0 for b in blocks: pm = RescorlaWagner(b, b.d_x) fe += pm.get_free_energy(x) return fe n = 100 T = 100 exp_blocks = [MultiArmedBandit(T)]*100 fe = lambda x,p: total_fe(x, p, exp_blocks) f_opt, x_opt, res = isres( fe, 1, 1e-6, 1e-8, bounds, np.array([0.5]) ) print(f_opt/n, x_opt, res) post = pm.get_beliefs(alpha = x_opt) ax = obs.plot(y = r'$o_t$', style = 'go') ax = hst.plot(y = r'$p_t$', style = 'k--', ax = ax) ax = post.plot(y = r'$\mu_t$', style = 'r-', ax = ax) ax.legend(numpoints = 1)
def main(): import time from environments import MultiArmedBandit from pmodels import RescorlaWagner from inference import MLEInference def expected_performance(x): """Example of how the expected performance can be computed.""" pm_pars = {'alpha': x[0], 'beta': x[1]} rm_pars = {'theta': x[2]} T = 100 #number of trials n_b = 2 #number of bandits rho = 0.01 #switch probability of the arm-reward contingencies n_env = 100 #number of environments n_blocks = 1 #number of experimental blocks ep = 0 # expected performance for n in range(n_env): #generate n_env mutli-armed bandit environmets env = MultiArmedBandit(T, rho=rho, n_b=n_b) pm = RescorlaWagner(env, n_b) for m in range(n_blocks): #in each environment repeat the experiment n_blocks times rm = SoftMaxResponses([], pm, d_r) rm.get_responses(pm_pars, rm_pars) #For each block compute the expected performance. ep += env.expected_performance() return ep / (n_env * n_blocks) T = 100 #number of trials n_b = 2 #number of bandits rho = 0.01 #switch probability of the arm-reward contingencies d_r = n_b ########################################################################### #Lets try to find the set of parameters that lead to the highest performance. #This takes lots of time, as it converges very slowly because of noisy estimates. #Just comment it out and use the x_opt values provided bellow. # from optmethods import cmaes # n_p = 3 # bounds = bounds = {'ub': np.array([1., 1., 100.]), 'lb': np.zeros(3)} # f_opt, x_opt, res_msg = cmaes( expected_performance, n_p, 1e-2, 1e-4, # bounds, np.zeros(n_p), verb_disp = 10 ) # print(f_opt, x_opt, res_msg) #the following values give resonably high expected performance x_opt = np.array([0.125, 0.1, 10]) ########################################################################### t_start = time.time() print(expected_performance(x_opt), time.time() - t_start) ####mle estimate of the parameter values############################# #we first simulate the behavior env = MultiArmedBandit(T, rho=rho, n_b=n_b) d_b = n_b pm = RescorlaWagner(env, d_b) d_r = n_b rm = SoftMaxResponses([], pm, d_r) pm_pars = { 'alpha': x_opt[0], 'beta': x_opt[1] } #parameters of the perceptual model rm_pars = {'theta': x_opt[2]} # parameters of the response model rm.get_responses(pm_pars, rm_pars) pm_inference = RescorlaWagner(env, d_b) rm_inference = SoftMaxResponses([], pm_inference, d_r) opts = {'np': 3, 'verb_disp': 100} mle = MLEInference(opts=opts) m_mle, s_mle, f_mle = mle.infer_posterior(rm_inference) p_mle = [1 / (1 + np.exp(-m_mle[:-1])), np.exp(m_mle[-1])] print(f_mle, p_mle, np.diag(s_mle))