def handle(self): data = { 111: { 'sends': 1000, 'open_rate': 0.11 }, 112: { 'sends': 1000, 'open_rate': 0.13 }, 113: { 'sends': 1000, 'open_rate': 0.16 } } algorithm = UCB.UCB([], []) arms = {} for subject, subjectData in data.items(): if subject not in arms: arms[subject] = {} arms[subject]['value'] = subjectData['open_rate'] arms[subject]['count'] = subjectData['sends'] result = self.MabAlgorithm(algorithm, arms) print(result)
def runUCB(nbmanchots, run, iterations, k): for i in range(run): tabMachines = creerManchots(nbmanchots) nbiterations3.append(i+1) gains3.append(ucb.UCB(iterations, tabMachines, k)) print(gains3) return nbiterations3, gains3
def main(): algo = sys.argv[1] print("Sweeping c values in for ", algo) assert algo in algos filename = algo + "_grid_search.csv" csv_file = open(filename, mode='w') csv_writer = csv.writer(csv_file, delimiter=',') csv_writer.writerow(["n", "k", "c", "avg_cum_regret"]) if algo == "uniform": c_vals = c_vals_unif elif algo == "eps_greedy": c_vals = c_vals_eps results_dict = {} for n in n_range: for k in k_range: for c in c_vals: print("N:", n, "K:", k, "C:", c) cum_regrets = [] for i in range(num_instances): bandit = NSW_Bandit(n, k) mu_mat = load_i_instance_nk(n, k, i) bandit.set_mu_matrix(mu_mat) if algo == "uniform": uniform = Uniform(bandit, c, T, num_sims) mean_regrets, _ = uniform.run() elif algo == "eps_greedy": eps_greedy = epsilon_greedy(bandit, c, T=T, num_sims=num_sims) _, _, mean_regrets, _ = eps_greedy.run() elif algo == "UCB": ucb = UCB(bandit, c, T, num_sims) mean_regrets, _ = ucb.run() cum_regrets.append(np.sum(mean_regrets)) results_dict[(n, k, c)] = np.mean(cum_regrets) csv_writer.writerow( [str(n), str(k), str(c), results_dict[(n, k, c)]]) print("Finished!")
def testUCB(runs, pulls, stationary, c, alpha): #Initialize bandit bandit = Bandit(10, 0, 1) #Initialize all of the algorithms you wish to test algorithms = [] #Gradient algorithm ucb = UCB(bandit, c, alpha) algorithms.append(ucb) #Run the tests results = testAlgorithms(bandit, algorithms, runs, pulls, stationary) #Analyze results plotAlgorithmOptimalActions(results=results, algorithm=ucb, stationary=stationary, c=c) return results
def compareUCB(runs, pulls, c, alpha): #Initialize bandit bandit = Bandit(10, 0, 1) #Initialize algorithms algorithms = [] #Epsilon Greedy algorithm ucb = UCB(bandit, c, alpha) algorithms.append(ucb) #Run tests resultsStationary = testAlgorithms(bandit, algorithms, runs, pulls, True) resultsNonStationary = testAlgorithms(bandit, algorithms, runs, pulls, False) #Analyze results optimalActionsDictionaryStationary = resultsStationary['optimalActions'] optimalActionsStationary = optimalActionsDictionaryStationary[ucb] optimalActionsDictionaryNonStationary = resultsNonStationary[ 'optimalActions'] optimalActionsNonStationary = optimalActionsDictionaryNonStationary[ucb] fig = plt.figure(1) fig.suptitle('Bandit', fontsize=14, fontweight='bold') ax = fig.add_subplot(211) titleLabel = "UCB (stationary vs. non)" ax.set_title(titleLabel) ax.set_xlabel('Step/Pull') ax.set_ylabel('Average reward') ax.plot(optimalActionsStationary) ax.plot(optimalActionsNonStationary) plt.show() return (optimalActionsStationary, optimalActionsNonStationary)
def mab(feeds, train, test, filepath, risk_pref, dlt, dfb, pulls, presence): # print(i[0], model.threshold_) # import magpie_rpp_modules as magpie ''' run = magpie.Model() run.train(filepath, train, feeds, 'train', risk_profile) ip_mod = joblib.load(filepath + "models/" + train + ".ip.iforest.sav") wifi_mod = joblib.load(filepath + "models/" + train + ".wifi.iforest.sav") zb_mod = joblib.load(filepath + "models/" + train + ".zigbee.iforest.sav") rf_mod = joblib.load(filepath + "models/" + train + ".rf.iforest.sav") audio_mod = joblib.load(filepath + "models/" + train + ".audio.iforest.sav") ''' #test_data = test # print("MAB dataset sample: " + str(test_data)) # test_data = str(test) + str(sample) results = [] for i in presence: if presence == 1: train = "1.train" # DO NOT FORGET TO CHANGE FOR EACH TRAINING test_data = ['1.7', '1.10train'], # add .0 if using train, only add only 1.x if for test else: train = "0.train" # DO NOT FORGET TO CHANGE FOR EACH TRAINING test_data = ['0.7', '0.10train'], # add .0 if using train, only add only 1.x if for test for j in dlt: for k in dfb: action_set = [ [['ip', j, 0.003], ['wifi', j, 0.003], ['zigbee', j, 0.003], ['rf', j, 0.003], ['audio', j, 0.003], ['amds'], [k]], [['ip', j, 0.005], ['wifi', j, 0.005], ['zigbee', j, 0.005], ['rf', j, 0.005], ['audio', j, 0.005], ['amds'], [k]], [['ip', j, 0.007], ['wifi', j, 0.007], ['zigbee', j, 0.007], ['rf', j, 0.007], ['audio', j, 0.007], ['amds'], [k]], [['ip', j, 0.01], ['wifi', j, 0.01], ['zigbee', j, 0.01], ['rf', j, 0.01], ['audio', j, 0.01], ['amds'], [k]], [['ip', j, 0.03], ['wifi', j, 0.03], ['zigbee', j, 0.03], ['rf', j, 0.03], ['audio', j, 0.03], ['amds'], [k]], [['ip', j, 0.05], ['wifi', j, 0.05], ['zigbee', j, 0.05], ['rf', j, 0.05], ['audio', j, 0.05], ['amds'], [k]], [['ip', j, 0.07], ['wifi', j, 0.07], ['zigbee', j, 0.07], ['rf', j, 0.07], ['audio', j, 0.07], ['amds'], [k]], [['ip', j, 0.1], ['wifi', j, 0.1], ['zigbee', j, 0.1], ['rf', j, 0.1], ['audio', j, 0.1], ['amds'], [k]], ] action_space = int(len(action_set)) # N = action_space * 1000 # N = action_space print('Number of RPP Arms in Bandit: ' + str(action_space)) # bandit = Bandit() stationary = False # pulls = N*100 # pulls = 1000 print("Number of Pulls: " + str(pulls)) runs = 1 algorithm = [] alpha = 0.1 c = 2 ucb = UCB(action_space, c, alpha) algorithm.append(ucb) Q, learning_results = nonstationary2.testAlgorithms(action_set, filepath, test_data, algorithm, runs, pulls, stationary, train, feeds, 'train', risk_pref, j) print("Q after learning: " + str(Q)) arm_id = int(Q.index(max(Q))) print(Q.index(max(Q))) print(action_set[Q.index(max(Q))]) presence_conf = i dfb_conf = j dlt_conf = k mab_result = [presence_conf] + [dfb_conf] + [dlt_conf] + [arm_id] results.append(mab_result) ### send to list for print plot_performance(learning_results, algorithm=ucb, j=j, k=k) print("Results: ") print(results) return results
def testAllAlgorithms(): """ Step 1: Initialize the environment """ #Initialize bandit bandit = Bandit(10, 0, 1) stationary = False #TODO - Enter the actual number of pulls and runs we want to do for testing. pulls = 200000 runs = 100 """ Step 2: Initialize all of the algorithms you wish to test """ algorithms = [] #Epsilon Greedy algorithms #TODO - Enter the actial epsilons we want to test #epsilons = [1/128, 1/64, 1/32, 1/16, 1/8, 1/4] epsilons = [1 / 256] greedyAlgorithms = [] alpha = 0.1 for epsilon in epsilons: epsilonGreedy = EpsilonGreedy(bandit, alpha, epsilon) algorithms.append(epsilonGreedy) greedyAlgorithms.append(epsilonGreedy) #Optimistic greedy #TODO - Enter teh actual initial values we want to test #initialValues = [1/4, 1/2, 1, 2, 4] initialValues = [6, 8, 10] optimisticAlgorithms = [] alpha = 0.1 for initialValue in initialValues: optimisticGreedy = OptimisticGreedy(bandit, initialValue, alpha) algorithms.append(optimisticGreedy) optimisticAlgorithms.append(optimisticGreedy) #UCB alpha = 0.1 #TODO - Enter the actual c values we want to test #cValues = [1/16, 1/4, 1/2, 1, 2, 4] cValues = [6, 8] ucbAlgorithms = [] for c in cValues: ucb = UCB(bandit, c, alpha) algorithms.append(ucb) ucbAlgorithms.append(ucb) #Gradient #TODO - Enter teh actual alpha values we want to test alphas = [1 / 32, 1 / 16, 1 / 8, 1 / 4, 1 / 2, 1, 2] gradientAlgorithms = [] """ for alpha in alphas: gradient = Gradient(bandit, alpha) algorithms.append(gradient) gradientAlgorithms.append(gradient) """ """ Step 4: Run the tests """ results = testAlgorithms(bandit, algorithms, runs, pulls, stationary) """ Step 5: Analyze the results """ rewardsDictionaryOfAllAlgorithms = results['rewards'] optimalActionsDictionaryOfAllAlgorithms = results['optimalActions'] for algorithm in greedyAlgorithms: rewards = rewardsDictionaryOfAllAlgorithms[algorithm] lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1] avgReward = np.sum(lastRewards) / pulls / 2 print(algorithm.name + ", epsilon: " + str(algorithm.eps) + ", Average Reward: " + str(avgReward)) #TODO - this should be plotted as a data point on a plot, connected to the other greedy points for algorithm in optimisticAlgorithms: rewards = rewardsDictionaryOfAllAlgorithms[algorithm] lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1] avgReward = np.sum(lastRewards) / pulls / 2 print(algorithm.name + ", initial Values: " + str(algorithm.initialValues) + ", Average Reward: " + str(avgReward)) #TODO - this should be plotted as a data point on a plot, connected to the other optimistic points for algorithm in ucbAlgorithms: rewards = rewardsDictionaryOfAllAlgorithms[algorithm] lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1] avgReward = np.sum(lastRewards) / pulls / 2 print(algorithm.name + ", c: " + str(algorithm.c) + ", Average Reward: " + str(avgReward)) #TODO - this should be plotted as a data point on a plot, connected to the other ucb points for algorithm in gradientAlgorithms: rewards = rewardsDictionaryOfAllAlgorithms[algorithm] lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1] avgReward = np.sum(lastRewards) / pulls / 2 print(algorithm.name + ", alpha: " + str(algorithm.alpha) + ", Average Reward: " + str(avgReward))
# Greedy-Optimist agent optimist = Optimist_greedy(timesteps) for k in range(5, 21): bandit = K_Bandit(k) optimist.initAgent(k) optimist.initOptimist( k, opt=10 ) #<-- this is done every time because this code is not very good for t in range(1, timesteps + 1): action = optimist.chooseAction() reward = bandit.play(action) optimist.updateTimestep(t - 1, reward) optimist.updateAction(reward, action) # Upper Confidence Bound agent ucb = UCB(timesteps, c=1) for k in range(5, 21): bandit = K_Bandit(k) ucb.initAgent(k) ucb.correctActionCnt() for t in range(1, timesteps + 1): # We do 1000 actions for each bandit action = ucb.chooseAction(t) #<-- dependent on time-step 't' reward = bandit.play(action) ucb.updateTimestep(t - 1, reward) ucb.updateAction(reward, action) # Proportional Exploration (apparently that is a very bad idea!) prop_e = Proportional_exploration(timesteps, k) #Proportional Exploration assigns probabilities to actions that are proportional #to its expected pay offs. for k in range(5, 21):
def simulate(self, c, boltz, ucb): #tempTruth = [np.random.normal(self.truth_100[i], scale = 8) for i in range(100)] rewards = [[0 for j in range(82)] for k in range(4)] # Create prior mean pMean = [np.random.uniform(3, 18) for i in range(100)] priorMeans = [pMean for _ in range(4)] # Create prior covariance matrix self.fillCov() priorCov = [self.covariance.copy() for i in range(4)] #generate the truth from prior tempTruth = np.random.multivariate_normal(pMean, self.covariance) #EG policy initializes constant = c #Boltzmann policy initialize theta_b = boltz #UCB initializes theta_u = ucb #KG policy initialize precision = [1 / 22.5 for i in range(100)] num_selected = [1 for i in range(100)] def drawObservations(self, lineupChoice): return (tempTruth[lineupChoice] + np.random.normal(0, scale=np.sqrt(22.5))) for i in range(0, 82): choices = [0 for j in range(4)] #get choices for all the policies and put in a list choices[0] = eg.EpsilonGreedy(priorMeans[0], constant, i) choices[1] = b.Boltzmann(priorMeans[1], theta_b, i) choices[2], numselected = UCB.UCB(priorMeans[2], theta_u, i, num_selected) choices[3] = KGCB.kgcb(priorMeans[3], precision, priorCov[3], i) #print('EGreedy choice {}, Boltzmann choice{}, UCB {}, KG {}'.format(choices[0], choices[1], choices[2], choices[3])) results = [drawObservations(self, j) for j in choices] for j in range(4): rewards[j][i] = results[j] ## THIS STUFF IS FOR UPDATING EQUATIONS # max_value is the best estimated value of the KG # x is the argument that produces max_value # observe the outcome of the decision # w_k=mu_k+Z*SigmaW_k where SigmaW is standard deviation of the # error for each observation for j in range(4): w_k = results[j] cov_m = np.asarray(priorCov[j]) x = choices[j] # updating equations for Normal-Normal model with covariance addscalar = (w_k - priorMeans[j][x]) / (1 / precision[x] + cov_m[x][x]) # cov_m_x is the x-th column of the covariance matrix cov_m cov_m_x = np.array([row[x] for row in cov_m]) priorMeans[j] = np.add(priorMeans[j], np.multiply(addscalar, cov_m_x)) cov_m = np.subtract( cov_m, np.divide(np.outer(cov_m_x, cov_m_x), 1 / precision[x] + cov_m[x][x])) priorCov[j] = cov_m return (rewards)