Exemple #1
0
    def handle(self):
        data = {
            111: {
                'sends': 1000,
                'open_rate': 0.11
            },
            112: {
                'sends': 1000,
                'open_rate': 0.13
            },
            113: {
                'sends': 1000,
                'open_rate': 0.16
            }
        }

        algorithm = UCB.UCB([], [])
        arms = {}
        for subject, subjectData in data.items():
            if subject not in arms:
                arms[subject] = {}

            arms[subject]['value'] = subjectData['open_rate']
            arms[subject]['count'] = subjectData['sends']

        result = self.MabAlgorithm(algorithm, arms)
        print(result)
Exemple #2
0
def runUCB(nbmanchots, run, iterations, k):

    for i in range(run):
        tabMachines = creerManchots(nbmanchots)
        nbiterations3.append(i+1)
        gains3.append(ucb.UCB(iterations, tabMachines, k))
    print(gains3)
    return nbiterations3, gains3
Exemple #3
0
def main():
    algo = sys.argv[1]
    print("Sweeping c values in for ", algo)
    assert algo in algos
    filename = algo + "_grid_search.csv"

    csv_file = open(filename, mode='w')
    csv_writer = csv.writer(csv_file, delimiter=',')
    csv_writer.writerow(["n", "k", "c", "avg_cum_regret"])

    if algo == "uniform":
        c_vals = c_vals_unif
    elif algo == "eps_greedy":
        c_vals = c_vals_eps

    results_dict = {}
    for n in n_range:
        for k in k_range:
            for c in c_vals:
                print("N:", n, "K:", k, "C:", c)
                cum_regrets = []
                for i in range(num_instances):
                    bandit = NSW_Bandit(n, k)
                    mu_mat = load_i_instance_nk(n, k, i)
                    bandit.set_mu_matrix(mu_mat)
                    if algo == "uniform":
                        uniform = Uniform(bandit, c, T, num_sims)
                        mean_regrets, _ = uniform.run()
                    elif algo == "eps_greedy":
                        eps_greedy = epsilon_greedy(bandit,
                                                    c,
                                                    T=T,
                                                    num_sims=num_sims)
                        _, _, mean_regrets, _ = eps_greedy.run()
                    elif algo == "UCB":
                        ucb = UCB(bandit, c, T, num_sims)
                        mean_regrets, _ = ucb.run()
                    cum_regrets.append(np.sum(mean_regrets))
                results_dict[(n, k, c)] = np.mean(cum_regrets)
                csv_writer.writerow(
                    [str(n), str(k),
                     str(c), results_dict[(n, k, c)]])

    print("Finished!")
Exemple #4
0
def testUCB(runs, pulls, stationary, c, alpha):

    #Initialize bandit
    bandit = Bandit(10, 0, 1)

    #Initialize all of the algorithms you wish to test
    algorithms = []

    #Gradient algorithm
    ucb = UCB(bandit, c, alpha)
    algorithms.append(ucb)

    #Run the tests
    results = testAlgorithms(bandit, algorithms, runs, pulls, stationary)

    #Analyze results
    plotAlgorithmOptimalActions(results=results,
                                algorithm=ucb,
                                stationary=stationary,
                                c=c)
    return results
Exemple #5
0
def select_action(Gsys):
    """
    Select an action according to the algorithm algo. 
    
    Input:
      Gsys:  the game system object.  
      
    Output:
      a:   an action/arm, an integer in [K]. 
    """

    if Gsys.algo == "Thompson Sampling":
        a = TS.select_action(Gsys)
    elif Gsys.algo == "UCB":
        a = ucb.select_action(Gsys)
    elif Gsys.algo == "Particle Filter":
        a = PF.select_action(Gsys)
    else:
        pass

    return a
Exemple #6
0
def compareUCB(runs, pulls, c, alpha):

    #Initialize bandit
    bandit = Bandit(10, 0, 1)

    #Initialize algorithms
    algorithms = []

    #Epsilon Greedy algorithm
    ucb = UCB(bandit, c, alpha)
    algorithms.append(ucb)

    #Run tests
    resultsStationary = testAlgorithms(bandit, algorithms, runs, pulls, True)
    resultsNonStationary = testAlgorithms(bandit, algorithms, runs, pulls,
                                          False)

    #Analyze results
    optimalActionsDictionaryStationary = resultsStationary['optimalActions']
    optimalActionsStationary = optimalActionsDictionaryStationary[ucb]

    optimalActionsDictionaryNonStationary = resultsNonStationary[
        'optimalActions']
    optimalActionsNonStationary = optimalActionsDictionaryNonStationary[ucb]

    fig = plt.figure(1)
    fig.suptitle('Bandit', fontsize=14, fontweight='bold')
    ax = fig.add_subplot(211)
    titleLabel = "UCB (stationary vs. non)"
    ax.set_title(titleLabel)
    ax.set_xlabel('Step/Pull')
    ax.set_ylabel('Average reward')

    ax.plot(optimalActionsStationary)

    ax.plot(optimalActionsNonStationary)
    plt.show()

    return (optimalActionsStationary, optimalActionsNonStationary)
Exemple #7
0
    def __init__(self, K, T, Npar, algo):

        # number of arms
        self.K = K

        # time horizon
        self.T = T

        # number of particles
        self.Npar = Npar

        # algorithm
        self.algo = algo

        # The true theta vector of length K
        self.theta_true = np.zeros(K)

        # The best action
        self.best_action = 0

        # The state variables
        if algo == "Thompson Sampling":
            self.state = TS.State(K)
        elif algo == "UCB":
            self.state = ucb.State(K)
        elif algo == "Particle Filter":
            self.state = PF.State(K, Npar)
        else:
            pass

        # History
        ## self.X = np.zeros((T, d))  # the context history
        self.A = np.zeros(T)  # the action history
        self.OBS = np.zeros(T)  # the observation history
        self.rews = np.zeros(T)  # the reward history
        self.regs = np.zeros(T)  # The regret history
def mab(feeds, train, test, filepath, risk_pref, dlt, dfb, pulls, presence):

    # print(i[0], model.threshold_)
    # import magpie_rpp_modules as magpie
    '''
    run = magpie.Model()
    run.train(filepath, train, feeds, 'train', risk_profile)

    ip_mod = joblib.load(filepath + "models/" + train + ".ip.iforest.sav")
    wifi_mod = joblib.load(filepath + "models/" + train + ".wifi.iforest.sav")
    zb_mod = joblib.load(filepath + "models/" + train + ".zigbee.iforest.sav")
    rf_mod = joblib.load(filepath + "models/" + train + ".rf.iforest.sav")
    audio_mod = joblib.load(filepath + "models/" + train + ".audio.iforest.sav")
    '''

    #test_data = test
    # print("MAB dataset sample: " + str(test_data))
    # test_data = str(test) + str(sample)

    results = []
    for i in presence:
        if presence == 1:
            train = "1.train"  # DO NOT FORGET TO CHANGE FOR EACH TRAINING
            test_data = ['1.7',
                    '1.10train'],  # add .0 if using train, only add only 1.x if for test
        else:
            train = "0.train"  # DO NOT FORGET TO CHANGE FOR EACH TRAINING
            test_data = ['0.7',
                    '0.10train'],  # add .0 if using train, only add only 1.x if for test
        for j in dlt:
            for k in dfb:
                action_set = [

                    [['ip', j, 0.003], ['wifi', j, 0.003], ['zigbee', j, 0.003],
                     ['rf', j, 0.003], ['audio', j, 0.003], ['amds'], [k]],

                    [['ip', j, 0.005], ['wifi', j, 0.005], ['zigbee', j, 0.005],
                     ['rf', j, 0.005], ['audio', j, 0.005], ['amds'], [k]],

                    [['ip', j, 0.007], ['wifi', j, 0.007], ['zigbee', j, 0.007],
                     ['rf', j, 0.007], ['audio', j, 0.007], ['amds'], [k]],

                    [['ip', j, 0.01], ['wifi', j, 0.01], ['zigbee', j, 0.01],
                     ['rf', j, 0.01], ['audio', j, 0.01], ['amds'], [k]],
                    
                    [['ip', j, 0.03], ['wifi', j, 0.03], ['zigbee', j, 0.03],
                     ['rf', j, 0.03], ['audio', j, 0.03], ['amds'], [k]],

                    [['ip', j, 0.05], ['wifi', j, 0.05], ['zigbee', j, 0.05],
                     ['rf', j, 0.05], ['audio', j, 0.05], ['amds'], [k]],

                    [['ip', j, 0.07], ['wifi', j, 0.07], ['zigbee', j, 0.07],
                     ['rf', j, 0.07], ['audio', j, 0.07], ['amds'], [k]],

                    [['ip', j, 0.1], ['wifi', j, 0.1], ['zigbee', j, 0.1],
                     ['rf', j, 0.1], ['audio', j, 0.1], ['amds'], [k]],

                ]

                action_space = int(len(action_set))
                # N = action_space * 1000
                # N = action_space
                print('Number of RPP Arms in Bandit: ' + str(action_space))
                # bandit = Bandit()
                stationary = False
                # pulls = N*100
                # pulls = 1000
                print("Number of Pulls: " + str(pulls))
                runs = 1
                algorithm = []
                alpha = 0.1
                c = 2
                ucb = UCB(action_space, c, alpha)
                algorithm.append(ucb)

                Q, learning_results = nonstationary2.testAlgorithms(action_set, filepath, test_data,
                                        algorithm, runs, pulls, stationary, train, feeds, 'train', risk_pref, j)

                print("Q after learning: " + str(Q))
                arm_id = int(Q.index(max(Q)))
                print(Q.index(max(Q)))
                print(action_set[Q.index(max(Q))])
                presence_conf = i
                dfb_conf = j
                dlt_conf = k

                mab_result = [presence_conf] + [dfb_conf] + [dlt_conf] + [arm_id]
                results.append(mab_result)
                ### send to list for print
                plot_performance(learning_results, algorithm=ucb, j=j, k=k)

    print("Results: ")
    print(results)
    return results
Exemple #9
0
def testAllAlgorithms():
    """
    Step 1: Initialize the environment
    """
    #Initialize bandit
    bandit = Bandit(10, 0, 1)
    stationary = False

    #TODO - Enter the actual number of pulls and runs we want to do for testing.
    pulls = 200000
    runs = 100
    """
    Step 2: Initialize all of the algorithms you wish to test
    """
    algorithms = []

    #Epsilon Greedy algorithms

    #TODO - Enter the actial epsilons we want to test
    #epsilons = [1/128, 1/64, 1/32, 1/16, 1/8, 1/4]
    epsilons = [1 / 256]
    greedyAlgorithms = []
    alpha = 0.1
    for epsilon in epsilons:
        epsilonGreedy = EpsilonGreedy(bandit, alpha, epsilon)
        algorithms.append(epsilonGreedy)
        greedyAlgorithms.append(epsilonGreedy)

    #Optimistic greedy
    #TODO - Enter teh actual initial values we want to test
    #initialValues = [1/4, 1/2, 1, 2, 4]
    initialValues = [6, 8, 10]
    optimisticAlgorithms = []
    alpha = 0.1
    for initialValue in initialValues:
        optimisticGreedy = OptimisticGreedy(bandit, initialValue, alpha)
        algorithms.append(optimisticGreedy)
        optimisticAlgorithms.append(optimisticGreedy)

    #UCB
    alpha = 0.1
    #TODO - Enter the actual c values we want to test
    #cValues = [1/16, 1/4, 1/2, 1, 2, 4]
    cValues = [6, 8]
    ucbAlgorithms = []
    for c in cValues:
        ucb = UCB(bandit, c, alpha)
        algorithms.append(ucb)
        ucbAlgorithms.append(ucb)

    #Gradient
    #TODO - Enter teh actual alpha values we want to test
    alphas = [1 / 32, 1 / 16, 1 / 8, 1 / 4, 1 / 2, 1, 2]
    gradientAlgorithms = []
    """
    for alpha in alphas:
        gradient = Gradient(bandit, alpha)
        algorithms.append(gradient)
        gradientAlgorithms.append(gradient)
    """
    """
    Step 4: Run the tests
    """
    results = testAlgorithms(bandit, algorithms, runs, pulls, stationary)
    """
    Step 5: Analyze the results
    """
    rewardsDictionaryOfAllAlgorithms = results['rewards']
    optimalActionsDictionaryOfAllAlgorithms = results['optimalActions']

    for algorithm in greedyAlgorithms:
        rewards = rewardsDictionaryOfAllAlgorithms[algorithm]
        lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1]
        avgReward = np.sum(lastRewards) / pulls / 2
        print(algorithm.name + ", epsilon: " + str(algorithm.eps) +
              ", Average Reward: " + str(avgReward))
        #TODO - this should be plotted as a data point on a plot, connected to the other greedy points

    for algorithm in optimisticAlgorithms:
        rewards = rewardsDictionaryOfAllAlgorithms[algorithm]
        lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1]
        avgReward = np.sum(lastRewards) / pulls / 2
        print(algorithm.name + ", initial Values: " +
              str(algorithm.initialValues) + ", Average Reward: " +
              str(avgReward))
        #TODO - this should be plotted as a data point on a plot, connected to the other optimistic points

    for algorithm in ucbAlgorithms:
        rewards = rewardsDictionaryOfAllAlgorithms[algorithm]
        lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1]
        avgReward = np.sum(lastRewards) / pulls / 2
        print(algorithm.name + ", c: " + str(algorithm.c) +
              ", Average Reward: " + str(avgReward))
        #TODO - this should be plotted as a data point on a plot, connected to the other ucb points

    for algorithm in gradientAlgorithms:
        rewards = rewardsDictionaryOfAllAlgorithms[algorithm]
        lastRewards = rewards[int(len(rewards) / 2):len(rewards) - 1]
        avgReward = np.sum(lastRewards) / pulls / 2
        print(algorithm.name + ", alpha: " + str(algorithm.alpha) +
              ", Average Reward: " + str(avgReward))
# Greedy-Optimist agent
optimist = Optimist_greedy(timesteps)
for k in range(5, 21):
    bandit = K_Bandit(k)
    optimist.initAgent(k)
    optimist.initOptimist(
        k, opt=10
    )  #<-- this is done every time because this code is not very good
    for t in range(1, timesteps + 1):
        action = optimist.chooseAction()
        reward = bandit.play(action)
        optimist.updateTimestep(t - 1, reward)
        optimist.updateAction(reward, action)

# Upper Confidence Bound agent
ucb = UCB(timesteps, c=1)
for k in range(5, 21):
    bandit = K_Bandit(k)
    ucb.initAgent(k)
    ucb.correctActionCnt()
    for t in range(1, timesteps + 1):  # We do 1000 actions for each bandit
        action = ucb.chooseAction(t)  #<-- dependent on time-step 't'
        reward = bandit.play(action)
        ucb.updateTimestep(t - 1, reward)
        ucb.updateAction(reward, action)

# Proportional Exploration (apparently that is a very bad idea!)
prop_e = Proportional_exploration(timesteps, k)
#Proportional Exploration assigns probabilities to actions that are proportional
#to its expected pay offs.
for k in range(5, 21):
    def simulate(self, c, boltz, ucb):
        #tempTruth = [np.random.normal(self.truth_100[i], scale = 8) for i in range(100)]
        rewards = [[0 for j in range(82)] for k in range(4)]
        # Create prior mean
        pMean = [np.random.uniform(3, 18) for i in range(100)]
        priorMeans = [pMean for _ in range(4)]
        # Create prior covariance matrix
        self.fillCov()
        priorCov = [self.covariance.copy() for i in range(4)]

        #generate the truth from prior
        tempTruth = np.random.multivariate_normal(pMean, self.covariance)

        #EG policy initializes
        constant = c
        #Boltzmann policy initialize
        theta_b = boltz
        #UCB initializes
        theta_u = ucb
        #KG policy initialize
        precision = [1 / 22.5 for i in range(100)]
        num_selected = [1 for i in range(100)]

        def drawObservations(self, lineupChoice):
            return (tempTruth[lineupChoice] +
                    np.random.normal(0, scale=np.sqrt(22.5)))

        for i in range(0, 82):
            choices = [0 for j in range(4)]
            #get choices for all the policies and put in a list
            choices[0] = eg.EpsilonGreedy(priorMeans[0], constant, i)
            choices[1] = b.Boltzmann(priorMeans[1], theta_b, i)
            choices[2], numselected = UCB.UCB(priorMeans[2], theta_u, i,
                                              num_selected)
            choices[3] = KGCB.kgcb(priorMeans[3], precision, priorCov[3], i)
            #print('EGreedy choice {}, Boltzmann choice{}, UCB {}, KG {}'.format(choices[0], choices[1], choices[2], choices[3]))

            results = [drawObservations(self, j) for j in choices]

            for j in range(4):

                rewards[j][i] = results[j]

            ## THIS STUFF IS FOR UPDATING EQUATIONS

            # max_value is the best estimated value of the KG
            # x is the argument that produces max_value

            # observe the outcome of the decision
            # w_k=mu_k+Z*SigmaW_k where SigmaW is standard deviation of the
            # error for each observation
            for j in range(4):
                w_k = results[j]
                cov_m = np.asarray(priorCov[j])
                x = choices[j]
                # updating equations for Normal-Normal model with covariance
                addscalar = (w_k - priorMeans[j][x]) / (1 / precision[x] +
                                                        cov_m[x][x])
                # cov_m_x is the x-th column of the covariance matrix cov_m
                cov_m_x = np.array([row[x] for row in cov_m])
                priorMeans[j] = np.add(priorMeans[j],
                                       np.multiply(addscalar, cov_m_x))
                cov_m = np.subtract(
                    cov_m,
                    np.divide(np.outer(cov_m_x, cov_m_x),
                              1 / precision[x] + cov_m[x][x]))
                priorCov[j] = cov_m
        return (rewards)