def history(means, algorithm, seed, horizon, epsilon=0.02):
    '''
		Returns regret history from T = 0 to T = horizon
	'''
    np.random.seed(seed)
    bandit = BanditInstance(means)
    optimal_arm = np.amax(means)
    reward_sum = 0
    regret = np.zeros(horizon + 1)

    strategy = 0

    if algorithm == 'epsilon-greedy':
        strategy = EpsilonGreedy(means.shape[0], epsilon)

    elif algorithm == 'ucb':
        strategy = UCB(means.shape[0])

    elif algorithm == 'kl-ucb':
        strategy = KLUCB(means.shape[0])

    elif algorithm == 'thompson-sampling':
        strategy = ThompsonSampling(means.shape[0])

    elif algorithm == 'thompson-sampling-with-hint':
        strategy = ThompsonSamplingWithHint(means.shape[0], np.sort(means))

    for i in range(1, horizon + 1):
        arm = strategy.getArm()
        reward = bandit.pull(arm)
        strategy.getReward(arm, reward)
        reward_sum += reward
        regret[i] = i * optimal_arm - reward_sum

    return regret
Beispiel #2
0

experiment = Experiment(1)
experiment.log_code()

# Experiment 1
N = 50
epsilon = .3
simulations = 10000
T = 400
algorithms = [
    GeneralCausal(truncate='None'),
    ParallelCausal(),
    SuccessiveRejects(),
    AlphaUCB(2),
    ThompsonSampling()
]
m_vals = range(2, N, 2)

regret, models = regret_vs_m(algorithms,
                             m_vals,
                             N,
                             T,
                             epsilon,
                             simulations=simulations)

experiment.plot_regret(regret,
                       m_vals,
                       "m",
                       algorithms,
                       legend_loc="lower right")
Beispiel #3
0
                
    
    return m_vals,regret,models
    
   
experiment = Experiment(4)
experiment.log_code()
    
N = 50
N1_vals = range(1,N,3)
pz = .4
q = (0.00001,0.00001,.4,.65)
epsilon = .3
simulations = 10000
T = 400
algorithms = [SuccessiveRejects(),GeneralCausal(),AlphaUCB(2),ThompsonSampling()]


epsilon = .3
pY = ParallelConfounded.pY_epsilon_best(q,pz,epsilon)

m_vals,regret,models = regret_vs_m_general(algorithms,N1_vals,N,T,pz,pY,q,epsilon,simulations = simulations)
experiment.plot_regret(regret,m_vals,"m",algorithms,legend_loc = "lower right",legend_extra = [ParallelCausal])