def simulate(self, seed, scheme='TS_HGLM'): output = open('result/' + scheme + '_seed_' + str(seed) + '.txt', 'w') if scheme == 'TS_HGLM': #Thompson sampling with a hierarchical generalized linear model (partial pooling) np.random.seed(seed) prior_beta_bar = np.random.normal(-1, 1, self.d) tmp = np.random.rand(Scheme.d, Scheme.d) prior_Sigma = np.dot(tmp, tmp.T) print >> output, prior_beta_bar output.flush() num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period ]) #used to store conversion in each period X = np.empty([self.Budget, self.d]) #X and y are used to store training data y = np.empty([self.Budget]) for period in range(num_period): counter = 0 #display ad for j in range(self.J): #for each website for b in range(budget_for_each_website): beta_j = np.random.multivariate_normal( prior_beta_bar, prior_Sigma) #sample a bete_j to_select = 0 max = 1 / (1 + np.exp(-np.dot(self.x[0], beta_j.T))) for i in range( 1, self.K ): #choose the best arm under sampled beta_j beta_j = np.random.multivariate_normal( prior_beta_bar, prior_Sigma) #sample a bete_j new_value = 1 / ( 1 + np.exp(-np.dot(self.x[i], beta_j))) if new_value > max: to_select, max = i, new_value prob = self.mu_true[j, to_select] X[period * budget_for_each_period + j * budget_for_each_website + b] = self.x[ to_select] #all training data is recorded if np.random.random() < prob: y[period * budget_for_each_period + j * budget_for_each_website + b] = 1 counter += 1 else: y[period * budget_for_each_period + j * budget_for_each_website + b] = 0 prior_beta_bar, prior_Sigma = bl.fit_bayes_logistic( y[0:(period + 1) * budget_for_each_period], X[0:(period + 1) * budget_for_each_period], prior_beta_bar, prior_Sigma) conversion[period] = counter print >> output, prior_beta_bar, counter, np.average( conversion[0:period + 1]) output.flush() return np.average(conversion) elif scheme == 'Balanced': #balanced allocation num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) budget_for_each_arm = budget_for_each_website / self.K for period in range(num_period): counter = 0 #display ad for j in range(self.J): #for each website for k in range(self.K): for ii in range(budget_for_each_arm): prob = self.mu_true[j, k] counter += 1 if np.random.random() < prob else 0 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Perfect': #scheme with perfect information best_arm = np.argmax(self.mu_true, axis=1) num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): prob = self.mu_true[j, best_arm[j]] counter += 1 if np.random.random() < prob else 0 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Test_rollout_Unpooled': num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J budget_for_each_arm = budget_for_each_website / self.K conversion = np.empty([num_period]) tau = num_period / 5 #length of exploration periods accumulated_conversion = np.empty([self.J, self.K]) for period in range(tau): counter = 0 #display ad for j in range(self.J): #for each website for k in range(self.K): #for each arm for ii in range(budget_for_each_arm): prob = self.mu_true[j, k] if np.random.random() < prob: counter += 1 accumulated_conversion[j, k] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) best_arm = np.argmax(accumulated_conversion, axis=1) for period in range(tau, num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): prob = self.mu_true[j, best_arm[j]] counter += 1 if np.random.random() < prob else 0 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Test_rollout_Pooled': num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J budget_for_each_arm = budget_for_each_website / self.K conversion = np.empty([num_period]) tau = num_period / 5 #length of exploration accumulated_conversion = np.empty([self.K]) for period in range(tau): counter = 0 #display ad for j in range(self.J): #for each website for k in range(self.K): #for each arm for ii in range(budget_for_each_arm): prob = self.mu_true[j, k] if np.random.random() < prob: counter += 1 accumulated_conversion[k] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) best_arm = np.argmax(accumulated_conversion) for period in range(tau, num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): prob = self.mu_true[j, best_arm] counter += 1 if np.random.random() < prob else 0 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Greedy_Pooled': conversion_numerator = np.zeros( self.K ) # number of conversions, the same in the following code conversion_denominator = np.zeros( self.K) #number of impressions, the same in the following code num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): # 0.00001 is added in denominator to avoid the problem of deviding by zero # 0.00001 is also added in numerator such that the original reward is 1, which guarantees that every arm will be explored # the same in the following code best_arm = np.argmax( (conversion_numerator + 0.00001) / (conversion_denominator + 0.00001)) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[best_arm] += 1 counter += 1 conversion_denominator[best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Greedy_Unpooled': conversion_numerator = np.zeros([self.J, self.K]) conversion_denominator = np.zeros([self.J, self.K]) num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator[j] + 0.00001) / (conversion_denominator[j] + 0.00001)) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[j, best_arm] += 1 counter += 1 conversion_denominator[j, best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Epsilon_Greedy_Pooled_10': conversion_numerator = np.zeros(self.K) conversion_denominator = np.zeros(self.K) conversion_rate = np.zeros(self.K) num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator + 0.00001) / (conversion_denominator + 0.00001)) if np.random.random() < 0.1: best_arm = np.random.randint(0, self.K) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[best_arm] += 1 counter += 1 conversion_denominator[best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Epsilon_Greedy_Unpooled_10': conversion_numerator = np.zeros([self.J, self.K]) conversion_denominator = np.zeros([self.J, self.K]) num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator[j] + 0.00001) / (conversion_denominator[j] + 0.00001)) if np.random.random() < 0.1: best_arm = np.random.randint(0, self.K) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[j, best_arm] += 1 counter += 1 conversion_denominator[j, best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'UCB1_Pooled': conversion_numerator = np.zeros(self.K) #number of conversion conversion_denominator = np.zeros(self.K) #number of impression num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator + 0.00001) / (conversion_denominator + 0.00001)) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[best_arm] += 1 counter += 1 conversion_denominator[best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) for period in range(1, num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator + 0.00001) / (conversion_denominator + 0.00001) + np.sqrt( 2 * np.log(period * budget_for_each_period) / (conversion_numerator + 1)) ) # add 1 to conversion_numerator to avoid the problem of deviding by zeros prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[best_arm] += 1 counter += 1 conversion_denominator[best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'UCB1_Unpooled': conversion_numerator = np.zeros([self.J, self.K]) conversion_denominator = np.zeros([self.J, self.K]) num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator[j] + 0.00001) / (conversion_denominator[j] + 0.00001)) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[j, best_arm] += 1 counter += 1 conversion_denominator[j, best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) for period in range(1, num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator[j] + 0.00001) / (conversion_denominator[j] + 0.00001) + np.sqrt(2 * np.log(period * budget_for_each_website) / (conversion_numerator[j] + 1))) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[j, best_arm] += 1 counter += 1 conversion_denominator[j, best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'UCB1_Tuned_Pooled': conversion_numerator = np.zeros(self.K) #number of conversion conversion_denominator = np.zeros(self.K) #number of impression num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator + 0.00001) / (conversion_denominator + 0.00001)) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[best_arm] += 1 counter += 1 conversion_denominator[best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) for period in range(1, num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): variance = conversion_numerator / conversion_denominator * ( 1 - conversion_numerator / conversion_denominator) V_kt = variance + np.sqrt( 2 * np.log(period * budget_for_each_period) / (conversion_numerator + 1)) min = np.minimum(V_kt, np.zeros_like(V_kt) + 0.25) best_arm = np.argmax( (conversion_numerator + 0.00001) / (conversion_denominator + 0.00001) + np.sqrt(min * np.log(period * budget_for_each_period) / (conversion_numerator + 1))) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[best_arm] += 1 counter += 1 conversion_denominator[best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'UCB1_Tuned_Unpooled': conversion_numerator = np.zeros([self.J, self.K]) #number of conversion conversion_denominator = np.zeros([self.J, self.K]) #number of impression num_period = self.simulation_time / self.period_length budget_for_each_period = self.Budget / num_period budget_for_each_website = budget_for_each_period / self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): best_arm = np.argmax( (conversion_numerator[j] + 0.00001) / (conversion_denominator[j] + 0.00001)) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[j, best_arm] += 1 counter += 1 conversion_denominator[j, best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) for period in range(1, num_period): counter = 0 #display ad for j in range(self.J): #for each website for ii in range(budget_for_each_website): variance = conversion_numerator[ j] / conversion_denominator[j] * ( 1 - conversion_numerator[j] / conversion_denominator[j]) V_kt = variance + np.sqrt( 2 * np.log(period * budget_for_each_website) / (conversion_numerator[j] + 1)) min = np.minimum(V_kt, np.zeros_like(V_kt) + 0.25) best_arm = np.argmax( (conversion_numerator[j] + 0.00001) / (conversion_denominator[j] + 0.00001) + np.sqrt(min * np.log(period * budget_for_each_website) / (conversion_numerator[j] + 1))) prob = self.mu_true[j, best_arm] if np.random.random() < prob: conversion_numerator[j, best_arm] += 1 counter += 1 conversion_denominator[j, best_arm] += 1 conversion[period] = counter print >> output, counter, np.average(conversion[0:period + 1]) return np.average(conversion) elif scheme == 'Gittins_Pooled': #TODO return 0 pass elif scheme == 'Gittins_Unpooled': #TODO return 0 pass
N1, ]) # bias term for _ in np.arange(5): X1[:, _ + 6] = (X1[:, _ + 1] * X1[:, _ + 6] ) # this is where impose the correlation # ------------------------------------------------------------------------------ # make a parameter vector w_true = np.random.uniform(-0.5, 0.5, p) w_true[0] = -1 # bias parameter # ------------------------------------------------------------------------------ # make some binary responses mu = bl.logistic_prob(X1, w_true) y1 = np.empty([N1]) for _ in np.arange(N1): y1[_] = np.random.binomial(1, mu[_]) # to get going, set a prior parameter of zeros, and a diagonal hessian w_prior = np.zeros(p) H_prior = np.diag(np.ones(p)) * 0.001 #---------------------------------------------------------------------------------------- # Do a bayesian fit with this random sample # The default uses a full Hessian matrix and a Newton's conjugate gradient solver w_posterior, H_posterior = bl.fit_bayes_logistic(y1, X1, w_prior, H_prior) logistic_prob = bl.logistic_prob(X1, w_posterior) print(r2_score(mu, logistic_prob)) print(roc_auc_score(y1, logistic_prob))
def simulate(self,seed,scheme = 'TS_HGLM'): output = open('result/'+scheme+'_seed_'+str(seed)+'.txt','w') if scheme == 'TS_HGLM':#Thompson sampling with a hierarchical generalized linear model (partial pooling) np.random.seed(seed) prior_beta_bar = np.random.normal(-1,1,self.d) tmp = np.random.rand(Scheme.d,Scheme.d) prior_Sigma = np.dot(tmp,tmp.T) print>>output, prior_beta_bar output.flush() num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period])#used to store conversion in each period X = np.empty([self.Budget,self.d])#X and y are used to store training data y = np.empty([self.Budget]) for period in range(num_period): counter = 0 #display ad for j in range(self.J):#for each website for b in range(budget_for_each_website): beta_j = np.random.multivariate_normal(prior_beta_bar,prior_Sigma)#sample a bete_j to_select = 0 max = 1/(1+np.exp(-np.dot(self.x[0],beta_j.T))) for i in range(1,self.K):#choose the best arm under sampled beta_j beta_j = np.random.multivariate_normal(prior_beta_bar,prior_Sigma)#sample a bete_j new_value = 1/(1+np.exp(-np.dot(self.x[i],beta_j))) if new_value > max: to_select, max = i, new_value prob = self.mu_true[j,to_select] X[period*budget_for_each_period + j*budget_for_each_website+b] = self.x[to_select]#all training data is recorded if np.random.random()<prob: y[period*budget_for_each_period + j*budget_for_each_website+b] = 1 counter += 1 else: y[period*budget_for_each_period + j*budget_for_each_website+b] = 0 prior_beta_bar, prior_Sigma = bl.fit_bayes_logistic(y[0:(period+1)*budget_for_each_period], X[0:(period+1)*budget_for_each_period], prior_beta_bar, prior_Sigma) conversion[period] = counter print>>output, prior_beta_bar,counter,np.average(conversion[0:period+1]) output.flush() return np.average(conversion) elif scheme == 'Balanced':#balanced allocation num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) budget_for_each_arm = budget_for_each_website/self.K for period in range(num_period): counter = 0 #display ad for j in range(self.J):#for each website for k in range(self.K): for ii in range(budget_for_each_arm): prob = self.mu_true[j,k] counter += 1 if np.random.random()<prob else 0 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Perfect':#scheme with perfect information best_arm = np.argmax(self.mu_true,axis = 1) num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): prob = self.mu_true[j,best_arm[j]] counter += 1 if np.random.random()<prob else 0 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Test_rollout_Unpooled': num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J budget_for_each_arm = budget_for_each_website/self.K conversion = np.empty([num_period]) tau = num_period/5#length of exploration periods accumulated_conversion = np.empty([self.J,self.K]) for period in range(tau): counter = 0 #display ad for j in range(self.J):#for each website for k in range(self.K):#for each arm for ii in range(budget_for_each_arm): prob = self.mu_true[j,k] if np.random.random()<prob: counter += 1 accumulated_conversion[j,k] += 1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) best_arm = np.argmax(accumulated_conversion,axis = 1) for period in range(tau,num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): prob = self.mu_true[j,best_arm[j]] counter += 1 if np.random.random()<prob else 0 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Test_rollout_Pooled': num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J budget_for_each_arm = budget_for_each_website/self.K conversion = np.empty([num_period]) tau = num_period/5#length of exploration accumulated_conversion = np.empty([self.K]) for period in range(tau): counter = 0 #display ad for j in range(self.J):#for each website for k in range(self.K):#for each arm for ii in range(budget_for_each_arm): prob = self.mu_true[j,k] if np.random.random()<prob: counter += 1 accumulated_conversion[k] += 1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) best_arm = np.argmax(accumulated_conversion) for period in range(tau,num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): prob = self.mu_true[j,best_arm] counter += 1 if np.random.random()<prob else 0 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Greedy_Pooled': conversion_numerator = np.zeros(self.K)# number of conversions, the same in the following code conversion_denominator = np.zeros(self.K)#number of impressions, the same in the following code num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): # 0.00001 is added in denominator to avoid the problem of deviding by zero # 0.00001 is also added in numerator such that the original reward is 1, which guarantees that every arm will be explored # the same in the following code best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[best_arm] +=1 counter += 1 conversion_denominator[best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Greedy_Unpooled': conversion_numerator = np.zeros([self.J,self.K]) conversion_denominator = np.zeros([self.J, self.K]) num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[j,best_arm] +=1 counter += 1 conversion_denominator[j,best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Epsilon_Greedy_Pooled_10': conversion_numerator = np.zeros(self.K) conversion_denominator = np.zeros(self.K) conversion_rate = np.zeros(self.K) num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)) if np.random.random()<0.1: best_arm = np.random.randint(0,self.K) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[best_arm] +=1 counter += 1 conversion_denominator[best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Epsilon_Greedy_Unpooled_10': conversion_numerator = np.zeros([self.J,self.K]) conversion_denominator = np.zeros([self.J, self.K]) num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)) if np.random.random()<0.1: best_arm = np.random.randint(0,self.K) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[j,best_arm] +=1 counter += 1 conversion_denominator[j,best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'UCB1_Pooled': conversion_numerator = np.zeros(self.K)#number of conversion conversion_denominator = np.zeros(self.K)#number of impression num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[best_arm] +=1 counter += 1 conversion_denominator[best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) for period in range(1,num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)+np.sqrt(2*np.log(period*budget_for_each_period)/(conversion_numerator+1)))# add 1 to conversion_numerator to avoid the problem of deviding by zeros prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[best_arm] +=1 counter += 1 conversion_denominator[best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'UCB1_Unpooled': conversion_numerator = np.zeros([self.J,self.K]) conversion_denominator = np.zeros([self.J,self.K]) num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[j,best_arm] +=1 counter += 1 conversion_denominator[j,best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) for period in range(1,num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)+np.sqrt(2*np.log(period*budget_for_each_website)/(conversion_numerator[j]+1))) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[j,best_arm] +=1 counter += 1 conversion_denominator[j,best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'UCB1_Tuned_Pooled': conversion_numerator = np.zeros(self.K)#number of conversion conversion_denominator = np.zeros(self.K)#number of impression num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[best_arm] +=1 counter += 1 conversion_denominator[best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) for period in range(1,num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): variance = conversion_numerator/conversion_denominator*(1-conversion_numerator/conversion_denominator) V_kt = variance + np.sqrt(2*np.log(period*budget_for_each_period)/(conversion_numerator+1)) min = np.minimum(V_kt,np.zeros_like(V_kt)+0.25) best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)+np.sqrt(min*np.log(period*budget_for_each_period)/(conversion_numerator+1))) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[best_arm] +=1 counter += 1 conversion_denominator[best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'UCB1_Tuned_Unpooled': conversion_numerator = np.zeros([self.J,self.K])#number of conversion conversion_denominator = np.zeros([self.J,self.K])#number of impression num_period = self.simulation_time/self.period_length budget_for_each_period = self.Budget/num_period budget_for_each_website = budget_for_each_period/self.J conversion = np.empty([num_period]) for period in range(1): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[j,best_arm] +=1 counter += 1 conversion_denominator[j,best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) for period in range(1,num_period): counter = 0 #display ad for j in range(self.J):#for each website for ii in range(budget_for_each_website): variance = conversion_numerator[j]/conversion_denominator[j]*(1-conversion_numerator[j]/conversion_denominator[j]) V_kt = variance + np.sqrt(2*np.log(period*budget_for_each_website)/(conversion_numerator[j]+1)) min = np.minimum(V_kt,np.zeros_like(V_kt)+0.25) best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)+np.sqrt(min*np.log(period*budget_for_each_website)/(conversion_numerator[j]+1))) prob = self.mu_true[j,best_arm] if np.random.random()<prob: conversion_numerator[j,best_arm] +=1 counter += 1 conversion_denominator[j,best_arm] +=1 conversion[period] = counter print>>output, counter,np.average(conversion[0:period+1]) return np.average(conversion) elif scheme == 'Gittins_Pooled':#TODO return 0 pass elif scheme == 'Gittins_Unpooled':#TODO return 0 pass
## bayesian logistic regressions # Set the number of data points and variables N = new_nn_train_array.shape[0] p = new_nn_train_array.shape[1] # randomly permute the data r = np.random.permutation(N) X = new_nn_train_array[r,:] y = new_nn_label_array[r] idx = np.arange(int(N/10)) w_prior = np.zeros(p) H_prior = np.diag(np.ones(p))*0.001 w_posterior, H_posterior = bl.fit_bayes_logistic(y[idx], X[idx, :], w_prior, H_prior) # Now make this posterior our new prior w_prior = copy.copy(w_posterior) H_prior = copy.copy(H_posterior) # get the logistic and moderated logistic probabilities test_p = np.array([x]) bayes_prob_lin = bl.bayes_logistic_prob(test_p,w_posterior,H_posterior) if bayes_prob_lin[0] > 0.50: pred_br = label_1 else: pred_br = label_0 if pred_br == t:
plt.title("Log-Unnormalised Posterior") j2 = np.argmax(log_joint) wb = W[j2][:] plt.scatter(wb[0], wb[1], c='red', s=100) plt.grid() pml.savefig("logreg_laplace_unnormalised_posterior.pdf", dpi=300) #Plotting the Laplace approximation to posterior plt.figure(3) #https://bayes-logistic.readthedocs.io/en/latest/usage.html #Visit the website above to access the source code of bayes_logistic library #parameter info : bayes_logistic.fit_bayes_logistic(y, X, wprior, H, weights=None, solver='Newton-CG', bounds=None, maxiter=100) wfit, hfit = bayes_logistic.fit_bayes_logistic(t.reshape((N * D)), X, np.zeros(D), ((np.identity(D)) * 1 / alpha), weights=None, solver='Newton-CG', bounds=None, maxiter=100) co = np.linalg.inv(hfit) #wfit represents the posterior parameters (MAP estimate) #hfit represents the posterior Hessian (Hessian of negative log posterior evaluated at MAP parameters) log_laplace_posterior = np.log(multivariate_normal.pdf(W, mean=wfit, cov=co)) plt.contour(xx, yy, -1 * log_laplace_posterior.reshape((n, n)), 30) plt.scatter(wb[0], wb[1], c='red', s=100) plt.title("Laplace Approximation to Posterior") plt.grid() pml.savefig("logreg_laplace_posterior.pdf", dpi=300) #Plotting the predictive distribution for logistic regression plt.figure(5)
def main(): np.random.seed(135) #Creating data N = 30 D = 2 mu1 = np.hstack((np.ones((N, 1)), 5 * np.ones((N, 1)))) mu2 = np.hstack((-5 * np.ones((N, 1)), np.ones((N, 1)))) class1_std = 1 class2_std = 1.1 X_1 = np.add(class1_std * np.random.randn(N, 2), mu1) X_2 = np.add(2 * class2_std * np.random.randn(N, 2), mu2) X = np.vstack((X_1, X_2)) t = np.vstack((np.ones((N, 1)), np.zeros((N, 1)))) #Plotting data x_1, y_1 = X[np.where(t == 1)[0]].T x_2, y_2 = X[np.where(t == 0)[0]].T plt.figure(0) plt.scatter(x_1, y_1, c='red', s=20, marker='o') plt.scatter(x_2, y_2, c='blue', s=20, marker='o') #Plotting Predictions alpha = 100 Range = 8 step = 0.1 xx, yy = np.meshgrid(np.arange(-Range, Range, step), np.arange(-Range, Range, step)) [n, n] = xx.shape W = np.hstack((xx.reshape((n * n, 1)), yy.reshape((n * n, 1)))) Xgrid = W ws = np.array([[3, 1], [4, 2], [5, 3], [7, 3]]) col = ['black', 'red', 'green', 'blue'] for ii in range(ws.shape[0]): w = ws[ii][:] pred = 1.0 / (1 + np.exp(np.dot(-Xgrid, w))) plt.contour(xx, yy, pred.reshape((n, n)), 1, colors=col[ii]) plt.title("data") plt.savefig("logregLaplaceGirolamiDemo_data.png", dpi=300) #Plot prior, likelihood, posterior Xt = np.transpose(X) f = np.dot(W, Xt) log_prior = np.log(multivariate_normal.pdf(W, cov=(np.identity(D)) * alpha)) log_like = np.dot(np.dot(W, Xt), t) - np.sum(np.log(1 + np.exp(f)), 1).reshape((n * n, 1)) log_joint = log_like.reshape((n * n, 1)) + log_prior.reshape((n * n, 1)) #Plotting log-prior #plt.figure(1) #plt.contour(xx, yy, -1*log_prior.reshape((n,n)), 30) #plt.title("Log-Prior") plt.figure(1) plt.contour(xx, yy, -1 * log_like.reshape((n, n)), 30) plt.title("Log-Likelihood") #Plotting points corresponding to chosen lines for ii in range(0, ws.shape[0]): w = np.transpose(ws[ii, :]) plt.annotate(str(ii + 1), xy=(w[0], w[1]), color=col[ii]) j = np.argmax(log_like) wmle = W[j, :] slope = wmle[1] / wmle[0] #plt.axline([wmle[0], wmle[1]], slope=slope) plt.plot([0, 7.9], [0, 7.9 * slope]) plt.grid() plt.savefig("logregLaplaceGirolamiDemo_LogLikelihood.png", dpi=300) #Plotting the log posterior(Unnormalised plt.figure(2) plt.contour(xx, yy, -1 * log_joint.reshape((n, n)), 30) plt.title("Log-Unnormalised Posterior") j2 = np.argmax(log_joint) wb = W[j2][:] plt.scatter(wb[0], wb[1], c='red', s=100) plt.grid() plt.savefig("logregLaplaceGirolamiDemo_LogUnnormalisedPosterior.png", dpi=300) #Plotting the Laplace approximation to posterior plt.figure(3) #https://bayes-logistic.readthedocs.io/en/latest/usage.html #Visit the website above to access the source code of bayes_logistic library #parameter info : bayes_logistic.fit_bayes_logistic(y, X, wprior, H, weights=None, solver='Newton-CG', bounds=None, maxiter=100) wfit, hfit = bayes_logistic.fit_bayes_logistic( t.reshape((N * D)), X, np.zeros(D), ((np.identity(D)) * 1 / alpha), weights=None, solver='Newton-CG', bounds=None, maxiter=100) co = np.linalg.inv(hfit) #wfit represents the posterior parameters (MAP estimate) #hfit represents the posterior Hessian (Hessian of negative log posterior evaluated at MAP parameters) log_laplace_posterior = np.log( multivariate_normal.pdf(W, mean=wfit, cov=co)) plt.contour(xx, yy, -1 * log_laplace_posterior.reshape((n, n)), 30) plt.scatter(wb[0], wb[1], c='red', s=100) plt.title("Laplace Approximation to Posterior") plt.grid() plt.savefig( "logregLaplaceGirolamiDemo_LaplaceApproximationtoPosterior.png", dpi=300) #Plotting the predictive distribution for logistic regression plt.figure(5) pred = 1.0 / (1 + np.exp(np.dot(-Xgrid, wfit))) plt.contour(xx, yy, pred.reshape((n, n)), 30) x_1, y_1 = X[np.where(t == 1)[0]].T x_2, y_2 = X[np.where(t == 0)[0]].T plt.scatter(x_1, y_1, c='red', s=20, marker='o') plt.scatter(x_2, y_2, c='blue', s=40, marker='o') plt.title("p(y=1|x, wMAP)") plt.savefig("logregLaplaceGirolamiDemo_preddistlogit.png", dpi=300) #Decision boundary for sampled w plt.figure(6) plt.scatter(x_1, y_1, c='red', s=20, marker='o') plt.scatter(x_2, y_2, c='blue', s=20, marker='o') predm = np.zeros((n * n, 1)) s = 100 for i in range(s): wsamp = np.random.multivariate_normal(mean=wfit, cov=co) pred = 1.0 / (1 + np.exp(np.dot(-Xgrid, wsamp))) predm = np.add(predm, pred.reshape((n * n, 1))) plt.contour(xx, yy, pred.reshape((n, n)), np.array([0.5])) plt.title("decision boundary for sampled w") plt.savefig("logregLaplaceGirolamiDemo_decisionboundarysampledw.png", dpi=300) #MC plt.figure(7) predm = predm / s plt.contour(xx, yy, predm.reshape((n, n)), 30) plt.scatter(x_1, y_1, c='red', s=20, marker='o') plt.scatter(x_2, y_2, c='blue', s=20, marker='o') plt.title("MC approx of p(y=1|x)") plt.savefig("logregLaplaceGirolamiDemo_MonteCarloApprox.png", dpi=300) #Numerical plt.figure(8) plt.scatter(x_1, y_1, c='red', s=20, marker='o') plt.scatter(x_2, y_2, c='blue', s=20, marker='o') pr = bayes_logistic.bayes_logistic_prob(Xgrid, wfit, hfit) plt.contour(xx, yy, pr.reshape((n, n)), 30) plt.title("numerical approx of p(y=1|x)") plt.savefig("logregLaplaceGirolamiDemo_logitprob.png", dpi=300) plt.show()