def compare_all(runs=2000, time=1000): """Compare all algorithms. """ labels = ['epsilon-greedy', 'gradient bandit', 'UCB', 'optimistic initialization'] generators = [lambda epsilon: Bandit(epsilon=epsilon, sample_averages=True), lambda alpha: Bandit(gradient=True, step_size=alpha, gradient_baseline=True), lambda coef: Bandit(epsilon=0, UCB_param=coef, sample_averages=True), lambda initial: Bandit(epsilon=0, optimistic_init=initial, step_size=0.1)] parameters = [np.arange(-7, -1, dtype=np.float), np.arange(-5, 2, dtype=np.float), np.arange(-4, 3, dtype=np.float), np.arange(-2, 3, dtype=np.float)] bandits = [] for generator, parameter in zip(generators, parameters): for param in parameter: bandits.append(generator(pow(2, param))) _, average_rewards = simulate(bandits, runs, time) rewards = np.mean(average_rewards, axis=1) i = 0 for label, parameter in zip(labels, parameters): l = len(parameter) plt.plot(parameter, rewards[i:i+l], label=label) i += l plt.xlabel('Parameter(2^x)') plt.ylabel('Average reward') plt.legend() plt.savefig(os.path.join(SAVING_PATH, "figure_2_6_compare_all.png")) plt.close()
def run_experiment(m1, m2, m3, eps, N, experiment_name): bandits = [Bandit(m1), Bandit(m2), Bandit(m3)] data = np.empty(N) for i in range(N): # epsilon greedy p = np.random.random() if p < eps: j = np.random.choice(3) else: j = np.argmax([b.mean for b in bandits]) x = bandits[j].pull() bandits[j].update(x) # for the plot data[i] = x cumulative_average = np.cumsum(data) / (np.arange(N) + 1) # # plot moving average ctr # plt.plot(cumulative_average) # plt.plot(np.ones(N) * m1) # plt.plot(np.ones(N) * m2) # plt.plot(np.ones(N) * m3) # plt.xscale('log') # plt.show() for i, b in enumerate(bandits): print(f'{experiment_name}, bandit {i + 1} mean: {b.mean} win: {np.sum(data)}') return cumulative_average
def gradient(runs=2000, time=1000): """Test the k-armed bandit with gradient. Args: runs (int): test each bandit with the # of runs. time (int): the # of the time-steps in each run. """ bandits = [] bandits.append(Bandit(gradient=True, step_size=0.1, gradient_baseline=True, true_reward=4)) bandits.append(Bandit(gradient=True, step_size=0.1, gradient_baseline=False, true_reward=4)) bandits.append(Bandit(gradient=True, step_size=0.4, gradient_baseline=True, true_reward=4)) bandits.append(Bandit(gradient=True, step_size=0.4, gradient_baseline=False, true_reward=4)) print("===== %s =====" % ("Gradient")) best_action_counts, _ = simulate(bandits, runs, time) labels = ['alpha = 0.1, with baseline', 'alpha = 0.1, without baseline', 'alpha = 0.4, with baseline', 'alpha = 0.4, without baseline', ] for i in range(0, len(bandits)): plt.plot(best_action_counts[i], label=labels[i]) plt.xlabel('Steps') plt.ylabel('% Optimal action') plt.legend() plt.savefig(os.path.join(SAVING_PATH, "figure_2_5_gradient.png")) plt.close()
def run(self, name, agent_name, **agent_parameters): """ :param name: Name of experiments :param agent_name: Name of Agent :param agent_parameters: Parameters of Agent """ rewards = np.zeros(self.pulls) optimal_actions = np.zeros(self.pulls) for _ in tqdm(range(self.experiments)): bandit = Bandit(self.pulls, self.actions, agent_name, **agent_parameters) reward, optimal_action = bandit.experiment() rewards += reward optimal_actions += optimal_action pass rewards /= np.float(self.experiments) optimal_actions /= np.float(self.experiments) self.values[name] = {} self.values[name]['rewards'] = rewards self.values[name]['optimal_actions'] = optimal_actions pass
def bandit(): bandit = Bandit() game.init_bandit(bandit) player = game.get_player() return render_template('bandit.html', credits_demanded=bandit.get_credits_demanded(), fskills=player.get_fighter_skills(), pskills=player.get_pilotskills())
def experiment2(): params = [{ "time_horizon" : 500, "number_of_arms" : 5 }, { "time_horizon" : 5000, "number_of_arms" : 5 }, { "time_horizon" : 500, "number_of_arms" : 10 }, { "time_horizon" : 5000, "number_of_arms" : 10 }, { "time_horizon" : 500, "number_of_arms" : 20 }, { "time_horizon" : 5000, "number_of_arms" : 20 }, ] for i in range(len(params)): results = [] time_horizon = params[i]["time_horizon"] number_of_arms = params[i]["number_of_arms"] agent1 = agentFactory("random",time_horizon,number_of_arms) epsilons = [] for j in range(time_horizon): epsilons.append(math.pow((j+1)*number_of_arms*math.log(j+1),1/3)) agent2 = agentFactory("epsilon-greedy",time_horizon,number_of_arms,epsilons) agent3 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/100) agent4 = agentFactory("ucb1",time_horizon,number_of_arms) agent5 = agentFactory("successive-elimination",time_horizon,number_of_arms) bandit = Bandit(time_horizon,number_of_arms,agent1) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,agent2) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,agent3) results.append(mc_simulate(n_sim,bandit,"N=T/100")) bandit = Bandit(time_horizon,number_of_arms,agent4) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,agent5) results.append(mc_simulate(n_sim,bandit)) plot(results,time_horizon,params[i])
def get_market_data(): global username if request.method == "GET": band1 = Bandit() trader1 = Trader() police1 = Police() cost1 = Cost() market_inventory = regions.child(currRegion).child('inventory').get() ship_inventory = users.child(username).child('ship').child( 'inventory').get() difficulty = users.child(username).child('difficulty').get() ship_cargo = users.child(username).child('ship').child('cargo').get() credit_val = users.child(username).child('credit').get() ship_health = users.child(username).child('ship').child('health').get() pilot_skill = users.child(username).child('skills').child( 'pilot').get() engineer = users.child(username).child('skills').child( 'engineer').get() fighter_skill = users.child(username).child('skills').child( 'fighter').get() merchant_skill = users.child(username).child('skills').child( 'merchant').get() fuel = users.child(username).child('ship').child('fuel').get() fuelcost = cost1.calculate_fuel(difficulty, credit_val) demand = band1.calculate_demand(difficulty, credit_val) repair = cost1.calculate_repair(difficulty, engineer, credit_val) price = trader1.item_to_sell(difficulty, credit_val) qty = trader1.qty item = trader1.item stolen = police1.stolen_item(ship_inventory) to_return = { 'username': username, 'currRegion': currRegion, 'market_inventory': market_inventory, 'ship_inventory': ship_inventory, 'cargo': ship_cargo, 'credit': credit_val, 'health': ship_health, 'demand': demand, 'qty': qty, 'item': item, 'price': price, 'eng': engineer, 'stolen': stolen, 'difficulty': difficulty, 'pilot': pilot_skill, 'fighter': fighter_skill, 'fuel': fuel, 'fuelcost': fuelcost, 'merch': merchant_skill, 'repair': repair } return to_return return None
def __init__(self, predict_market, num_bids, trials, label='Multi-Armed Prediction Market Bandit'): self.predict_market = predict_market self.n_arms = predict_market.arms self.agents = predict_market.agents self.data = predict_market.dataframe self.num_bids = num_bids self.label = label self.bandit = Bandit(self.n_arms, self.data) self.trials = trials self.scores = None self.optimal = None
def __init__(self, models, n, a, task=None): self.models = models self.n = n self.a = a self.num_arms = models.shape[1] self.num_models = models.shape[0] self.counts = np.zeros([self.num_arms]) self.means = np.zeros([self.num_arms]) if task is None: self.task = np.random.choice(self.num_models) else: self.task = task self.bandit = Bandit(self.models[self.task])
def run(algorithm, non_stationary=False): """Runs an algorithm with the specified paramters. algorithm: Instance of an algorithm from `algorithms.py`. non_stationary: Whether to run the stationary or non_stationary test bench. """ with open('config.yml') as cfile: config = y.load(cfile)['run'] runs, steps = config['runs'], config['steps'] avg_rewards, optim_action_percent = np.zeros(steps), np.zeros(steps) for run in range(runs): bandit = Bandit(non_stationary) print(f'Run number {run + 1}.') # One-run rewards or_rewards = [] # One-run actions or_actions = [] optim_action = np.argmax([bandit.q_star(a) for a in range(10)]) # One-run optimal actions or_optim_actions = [] if non_stationary else optim_action for step in range(1, steps + 1): action = algorithm.act(step) reward = bandit(action) if non_stationary: optim_action = np.argmax([bandit.q_star(a) for a in range(10)]) algorithm.update(action, reward, step) or_rewards.append(reward) or_actions.append(action) if non_stationary: or_optim_actions.append(optim_action) avg_rewards += or_rewards if non_stationary: a, o = np.array(or_actions), np.array(or_optim_actions) else: a, o = np.array(or_actions), or_optim_actions optim_action_percent += (a == o) algorithm.reset() avg_rewards /= runs optim_action_percent = (optim_action_percent / runs) * 100.0 return avg_rewards, optim_action_percent
def create_bandit(means, variances): def reward_fn(mu, variance): stddev = np.sqrt(variance) return lambda: np.random.normal(mu, stddev) arms = list(map(lambda a: reward_fn(*a), zip(means, variances))) return Bandit(arms)
def epsilon_greedy(runs=2000, time=1000): """Test the k-armed bandit with the policy of epsilon greedy. Args: runs (int): test each bandit with the # of runs. time (int): the # of the time-steps in each run. """ epsilons = [0, 0.01, 0.1, 0.2] bandits = [Bandit(epsilon=eps, sample_averages=True) for eps in epsilons] print("===== {} =====".format("Epsilon greedy")) best_action_counts, rewards = simulate(bandits, runs, time) plt.figure(figsize=(10, 20)) plt.subplot(2, 1, 1) for eps, rewards in zip(epsilons, rewards): plt.plot(rewards, label="epsilon = %.02f" % (eps)) plt.xlabel('steps') plt.ylabel('average reward') plt.legend() plt.subplot(2, 1, 2) for eps, counts in zip(epsilons, best_action_counts): plt.plot(counts, label="epsilon = %.02f" % (eps)) plt.xlabel('steps') plt.ylabel('% optimal action') plt.legend() plt.savefig(os.path.join(SAVING_PATH, "figure_2_2_epsilon_greedy.png")) plt.close()
def run_greedy(number_bandits, epsilon, iterations): bandits = [Bandit(i + 1, 0) for i in range(number_bandits)] current_best = bandits[0] data = np.empty(iterations) print(f'Starting with bandit {current_best.true_mean}.') for i in range(iterations): explore_exploit = np.random.rand() bandit = current_best # explore if explore_exploit < epsilon: selection = np.random.randint(0, number_bandits) #print(f'Machine {selection} selected.') bandit = bandits[selection] # exploit value = bandit.pull() bandit.update(value) data[i] = value #update if current_best.current_mean < bandit.current_mean: print(f'Updated to bandit {bandit.true_mean}') current_best = bandit print(f'Chose bandit {current_best.true_mean}') cumulative_average = np.cumsum(data) / (np.arange(iterations) + 1) plt.plot(cumulative_average) for i in range(number_bandits): plt.plot(np.ones(iterations) * (i + 1)) plt.xscale('log') plt.show() return cumulative_average
def plot_figures(k, n_bandits, n_steps, eps_list, weight_fn=sample_average, random_walk=False, y_bounds=[0, 1.5], Q_1=0, show=True, method='epsilon-greedy', extra_label='', title=None, percentage=False): avg_rew_per_eps = [np.zeros(n_steps) for _ in range(len(eps_list))] avg_rew_in_perc = [np.zeros(n_steps) for _ in range(len(eps_list))] for i in range(n_bandits): print(i) bandit_pb = Bandit(k) for i, eps in enumerate(eps_list): _, per, avg_rew, _ = a_simple_bandit_algorithm( bandit_pb, n_iterations=n_steps, eps=eps, weight_fn=weight_fn, random_walk=random_walk, Q_1=Q_1, method=method) avg_rew_per_eps[i] += avg_rew avg_rew_in_perc[i] += per to_plot = avg_rew_in_perc if percentage else avg_rew_per_eps bounds = [0, 100] if percentage else y_bounds plot_average(to_plot, eps_list, n_bandits, bounds, show, extra_label, title, percentage)
def fig_2_5(n_bandits=2000, n_steps=1000, k=10, alpha_list=[0.1, 0.4]): d = {} for baseline in [False, True]: for alpha in alpha_list: d[(baseline, alpha)] = np.zeros(n_steps) for n in range(n_bandits): print(n) bandit = Bandit(k, mean=4) for baseline in [False, True]: for alpha in alpha_list: result_arr, _ = gradient_bandit(bandit, n_steps=n_steps, alpha=alpha, baseline=baseline) d[(baseline, alpha)] += result_arr def label(baseline, alpha): return ("with" if baseline else "without") + f" baseline, alpha={alpha}" for key, avg_rew in d.items(): plt.plot((avg_rew / n_bandits) * 100, label=label(key[0], key[1])) axes = plt.gca() axes.set_ylim([0, 100]) plt.xlabel("Steps") plt.ylabel("Optimal Action %") plt.title("Figure 2.5") plt.legend() plt.show()
def step(self, bandit : Bandit, epsilon= 0): ''' action selection through greedy method :param bandit: :return: ''' #flip coin to decide a greedy action or random flip_coin = self.rng.rand() if flip_coin < epsilon: #random action chosen_lever = self.rng.randint(self.n_arms) else: # find all the best levers and then sample randomly from them reward = max(self.Q) best_levers = [lever for lever in range(self.n_arms) if self.Q[lever] == reward] chosen_lever = best_levers[self.rng.randint(len(best_levers))] reward = bandit.pull(chosen_lever) self.N[chosen_lever] += 1 #Updated estimated expected reward for the best lever self.Q[chosen_lever] = self.Q[chosen_lever] + (1/self.N[chosen_lever])*(reward - self.Q[chosen_lever]) return chosen_lever, reward
def captions_bandit(): data = captions_data def reward_fn(percentage): return lambda: np.random.choice((0, 0.5, 1), 1, p=percentage)[0] arms = list(map(reward_fn, data)) return Bandit(arms)
def experiment1(): params = [{ "time_horizon" : 1000, "number_of_arms" : 5 }, { "time_horizon" : 10000, "number_of_arms" : 5 }, { "time_horizon" : 1000, "number_of_arms" : 10 }, { "time_horizon" : 10000, "number_of_arms" : 10 }, { "time_horizon" : 1000, "number_of_arms" : 20 }, { "time_horizon" : 10000, "number_of_arms" : 20 }, ] for i in range(len(params)): results = [] time_horizon = params[i]["time_horizon"] number_of_arms = params[i]["number_of_arms"] agent1 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,5) agent2 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/10) agent3 = agentFactory("explore-then-exploit",time_horizon,number_of_arms,time_horizon/100) bandit = Bandit(time_horizon,number_of_arms,agent1) results.append(mc_simulate(n_sim,bandit,"N=5")) bandit = Bandit(time_horizon,number_of_arms,agent2) results.append(mc_simulate(n_sim,bandit,"N=T/10")) bandit = Bandit(time_horizon,number_of_arms,agent3) results.append(mc_simulate(n_sim,bandit,"N=T/100")) plot(results,time_horizon,params[i])
def polynomial_bandit(n, variance): means = polynomial_means(n) stddev = np.sqrt(variance) def reward_fn(mu): return lambda: np.random.normal(mu, stddev) arms = list(map(reward_fn, means)) return Bandit(arms)
def main(): """ Constants """ k: int = 20 epsilon: float = 0.01 init_val: int = 10 c: float = 1.0 max_time: int = 1000 rounds: int = 1000 policy: Policy = EpsilonGreedyPolicy(epsilon) agent = Agent(k, policy) bandits = [Bandit() for _ in range(k)] def play_round(): """ Simulates one round of the game. """ # get the next action action = agent.choose_action() # get a reward from the bandit reward = bandits[action].get_reward() # play the action agent.play_action(action, reward) return reward def reset(): agent.reset() for bandit in bandits: bandit.reset() optimal_bandit = np.argmax([bandit.get_reward() for bandit in bandits]) def print_bandits(): for i, bandit in enumerate(bandits): print('Bandit {} reward={}'.format(i, bandit.get_reward())) def experiment(): scores = np.zeros(max_time, dtype=float) for _ in range(rounds): for t in range(max_time): scores[t] += play_round() reset() return scores / rounds def plot(label): print_bandits() scores = experiment() time = range(max_time) plt.title(label + " for k = " + str(k)) plt.ylim([0.0, 2.0]) plt.xlabel('Steps') plt.ylabel('Avg. Reward') plt.scatter(x=time, y=scores, s=0.5) plt.show() plot(policy.__str__())
def sparse_bandit(n_arms, variance): means = sparse_means(n_arms) stddev = np.sqrt(variance) def reward_fn(mu): return lambda: np.random.normal(mu, stddev) arms = list(map(reward_fn, means)) return Bandit(arms)
def poisson_exp_bandit(): n = n_arms() means_ = poisson_exp_means() def reward_fn(lambda_): return lambda: np.random.poisson(lambda_) arms = list(map(reward_fn, means_)) return Bandit(arms)
def captions_bandit(n): categories = np.array([0, 0.5, 1]) data = captions_data()[:n] def reward_fn(percentage): return lambda: np.random.choice(categories, 1, p=percentage)[0] arms = list(map(reward_fn, data)) return Bandit(arms)
class UCB(): def __init__(self, models, n, alpha_ucb, task=None): self.models = models self.n = n self.alpha = alpha_ucb self.num_arms = models.shape[1] self.num_models = models.shape[0] self.counts = np.zeros([self.num_arms]) self.means = np.zeros([self.num_arms]) if task is None: self.task = np.random.choice(self.num_models) else: self.task = task self.bandit = Bandit(self.models[self.task]) def select_arm_ucb(self, t): for a in range(self.num_arms): if self.counts[a] == 0: return a ucb_values = self.means + np.sqrt( (self.alpha * np.log(t)) / self.counts) return np.argmax(ucb_values) def update_arm(self, action, reward): self.counts[action] += 1 n = self.counts[action] value = self.means[action] self.means[action] = ((n - 1) / n) * value + (1. / n) * reward def run(self, T_list): regrets = [] counts = np.zeros([max(T_list), self.num_arms]) for t in range(self.n): action = self.select_arm_ucb(t) reward = self.bandit.pull_arm(action) self.update_arm(action, reward) if (t + 1) in T_list: regret = self.bandit.calculate_regret( self.counts) # keep track of regrets regrets.append(regret) counts[t, :] = self.counts return regrets, np.asarray(counts)
def upper_confidence_bound(runs=2000, time=1000): """Test the k-armed bandit with UCB. Args: runs (int): test each bandit with the # of runs. time (int): the # of the time-steps in each run. """ bandits = [] bandits.append(Bandit(epsilon=0, UCB_param=2, sample_averages=True)) bandits.append(Bandit(epsilon=0.1, sample_averages=True)) _, average_rewards = simulate(bandits, runs, time) plt.plot(average_rewards[0], label='UCB c = 2') plt.plot(average_rewards[1], label='epsilon greedy epsilon = 0.1') plt.xlabel('Steps') plt.ylabel('Average reward') plt.legend() plt.savefig(os.path.join(SAVING_PATH, "figure_2_4_UCB.png")) plt.close()
def run_experiment(m1, m2, m3, N, experiment_name): bandits = [Bandit(m1), Bandit(m2), Bandit(m3)] data = np.empty(N) for i in range(N): j = np.argmax([ucb(b.mean, i + 1, b.N) for b in bandits]) x = bandits[j].pull() bandits[j].update(x) # for the plot data[i] = x cumulative_average = np.cumsum(data) / (np.arange(N) + 1) for i, b in enumerate(bandits): print( f'{experiment_name}, bandit {i + 1} mean: {b.mean} win: {np.sum(data)}' ) return cumulative_average
def init_bandits(self, holdout=True): """Specify some way to split up the indices?""" df_len = len(self.data_sim.df.index) num_divide = self.n_bandits + 1 if holdout else self.n_bandits increment = int(df_len / num_divide) for i in range(self.n_bandits): indices = [i * increment, (i + 1) * increment] self.bandits.append(Bandit(self.n_arms, self.data_sim.df, data_indices=indices)) if holdout: self.df_holdout = self.data_sim.df[df_len - increment + 1:] self.trials = increment if self.trials is None else self.trials
def createBanditInstancesAndSimulate(params,n_mc_sim): n_sim = n_mc_sim for i in range(len(params)): results = [] time_horizon = params[i]['time_horizon'] number_of_arms = params[i]['number_of_arms'] number_of_exploration_per_arm = params[i]['number_of_exploration_per_arm'] exp_agent = ExploreThenExploit(time_horizon,number_of_arms,number_of_exploration_per_arm) epsilon_greedy_constant_half_epsilonAgent = EpsilonGreedy(time_horizon,number_of_arms,[1/2]*time_horizon) epsilon_greedy_constant_epsilonAgent = EpsilonGreedy(time_horizon,number_of_arms,[number_of_exploration_per_arm*number_of_arms/time_horizon]*time_horizon) ubc_agent = UBC1Agent(time_horizon,number_of_arms) se_agent = SuccessiveEliminationAgent(time_horizon,number_of_arms) random_agent = Agent() bandit = Bandit(time_horizon,number_of_arms,random_agent) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,exp_agent) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,epsilon_greedy_constant_epsilonAgent) results.append(mc_simulate(n_sim,bandit,"constant-epsilon=rate-of-explore-exploit")) bandit = Bandit(time_horizon,number_of_arms,epsilon_greedy_constant_half_epsilonAgent) results.append(mc_simulate(n_sim,bandit,"constant-epsilon=0.5")) bandit = Bandit(time_horizon,number_of_arms,ubc_agent) results.append(mc_simulate(n_sim,bandit)) bandit = Bandit(time_horizon,number_of_arms,se_agent) results.append(mc_simulate(n_sim,bandit)) plot(results,time_horizon,params[i])
class Training(TrainingBase): def __init__(self): super(Training, self).__init__() self.steps = options.get('environment/steps', 1000) self.bandit = Bandit() def episode(self, number): # get first action from agent action = self.agent.update(reward=None, state=[]) # update agent with state and reward for step in range(self.steps): reward = self.bandit.pull(action) action = self.agent.update(reward=reward, state=[]) log.info('step: %s, action: %s' % (step, action))
def param_study(n_bandits=2000, n_steps=1000, title='Figure 2.6', fn='fig2_6', nonstat=False, print_freq=10, start_timestep=np.inf): results = {(method, hyper): 0 for (method, hyperparams) in HYPERPARMS.items() for hyper in hyperparams} y_label = (f"Average Reward over last {n_steps-start_timestep} steps" if nonstat else f"Average Reward over first {n_steps} steps") for t in range(1, n_bandits + 1): print(f"{t}/{n_bandits}") bandit = Bandit() for method, hyperparams in HYPERPARMS.items(): for hyper in hyperparams: results[(method, hyper)] += apply_method(bandit, n_steps, method, hyper, nonstat, start_timestep)[-1] bandit.reset() # need to reset q values after random walk if (t % print_freq == 0): plot_current(n_steps, results, t, title, fn, y_label)
def BanditExperiment(debug=True, replications=100, # Posen & Levinthal used 25,000 arms=10, turns=500, payoff_fxn=[betadist_payoff], turbulence_fxn=[randomshock], strategy_fxn=[softmax_strategy], turbulence=[0], belief_fxn=[belief_with_latency_and_memory], strategy=[0.5], latency=[0], initial_learning=0, memory=[500], experiment_name="" ): """ This function should set up a set of Bandit simulations with given parameters and run them as an experiment. Its output will be a table of data, each row containing the variable inputs and the outcomes (averaged over replications within each experimental condition). Arguments except 'debug', 'experiment_name', 'replications', 'arms', and 'turns' are lists of values to experiment with. For each combination of values, the simulation is repeated 'replications' times. So if you want to test the difference between turbulence of 0 and 0.1, set turbulence=[0,0.1] and the experiment will be set up. """ programstart = datetime.datetime.now() _experiment_name = experiment_name if experiment_name else programstart.strftime('%Y%m%d-%H%M%S') _logfile = open(("output/"+_experiment_name+"-log.txt"), 'w') _datafile = open(("output/"+_experiment_name+"-data.csv"), 'w') _datafile.write("EXPERIMENT,REPLICATION,PAYOFF_FXN,TURBULENCE_FXN,STRATEGY_FXN,BELIEF_FXN,TURBULENCE,STRATEGY,LATENCY,MEMORY,SCORE,KNOWLEDGE,OPINION,PROBEXPLORE\n") # CSV header row _summaryfile = open(("output/"+_experiment_name+"-summary.csv"), 'w') _summaryfile.write("EXPERIMENT,PAYOFF_FXN,TURBULENCE_FXN,STRATEGY_FXN,BELIEF_FXN,TURBULENCE,STRATEGY,LATENCY,MEMORY,MEAN_SCORE,MEAN_KNOWLEDGE,MEAN_OPINION,MEAN_PROBEXPLORE\n") # CSV header row def log(message): _logfile.write(message) if debug: print(message) #only print to screen if user wants it wordy for debugging purposes _numexps = len(payoff_fxn)*len(turbulence_fxn)*len(strategy_fxn)*len(turbulence)*len(belief_fxn)*len(strategy)*len(latency)*len(memory) log("Planning "+str(_numexps)+" experiments with "+str(replications)+ " replications x "+str(turns)+" turns each.\nI.e., a total of "+ str(_numexps*replications*turns)+" turns of processing.\n\n") _currentexp = 0 # which experiment are we on currently? # Loop through all experimental conditions and run simulations: for pf in payoff_fxn: for tf in turbulence_fxn: for sf in strategy_fxn: for tb in turbulence: for bf in belief_fxn: for st in strategy: for lt in latency: for mm in memory: # Run several replications of the simulation within one experimental condition: # hold the data from each replication (to be averaged later) finalscores = [] finalknowledges = [] finalopinions = [] finalprobexplores = [] _currentexp += 1 log("Starting experiment " + str(_currentexp) + " of " + str(_numexps) + " with:\n payoff_fxn="+str(pf)+ "\n turbulence_fxn="+str(tf)+ "\n strategy_fxn="+str(sf)+ "\n belief_fxn="+str(bf)+ "\n turbulence="+str(tb)+ "\n strategy="+str(st)+ "\n latency="+str(lt)+ "\n memory="+str(mm)+"\n") expstart = datetime.datetime.now() for i in range(replications): # Do one replication (of many) within an experimental condition: b = Bandit( arms=arms, turns=turns, payoff_fxn=pf, turbulence_fxn=tf, strategy_fxn=sf, turbulence=tb, belief_fxn=bf, strategy=st, latency=lt, initial_learning=initial_learning, memory=mm) b.simulate() finalscores.append(b.score()) finalknowledges.append(b.knowledge()) finalopinions.append(b.opinion()) finalprobexplores.append(b.probexplore()) # log the data _datafile.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( _currentexp, # experiment number (i+1), # replication number str(pf), str(tf), str(sf), str(bf), str(tb), str(st), str(lt), str(mm), b.score(), b.knowledge(), b.opinion(), b.probexplore() )) #log("simulation "+str(i+1)+" of "+str(replications)+" took "+str(b._simtime)) # Take average results from all replications (within one experimental condition) # and output them to a 'summary' data file. _summaryfile.write('{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( _currentexp, str(pf), str(tf), str(sf), str(bf), str(tb), str(st), str(lt), str(mm), sum(finalscores)/replications, sum(finalknowledges)/replications, sum(finalopinions)/replications, sum(finalprobexplores)/replications )) log("FINISHED in "+str(datetime.datetime.now()-expstart)+"\n\n") # Loop goes to the next experimental condition. log("All experiments completed in " + str(datetime.datetime.now() - programstart)) _logfile.close() _datafile.close() _summaryfile.close()
def main(): bandit = Bandit() bandit.setupBandit(10) for i in range(1000): greedyPlay(bandit, 500)
def run(self): programstart = datetime.datetime.now() _experiment_name = self.experiment_name if self.experiment_name else programstart.strftime('%Y%m%d-%H%M%S') _logfile = open(("output/"+_experiment_name+"-log.txt"), 'w', buffering=1) _datafile = open(("output/"+_experiment_name+"-data.csv"), 'w', buffering=1) _datafile.write("EXPERIMENT,REPLICATION,ARMS,TURNS,PAYOFF_FXN,TURBULENCE_FXN,STRATEGY_FXN,BELIEF_FXN,TURBULENCE,STRATEGY,LATENCY,INITIAL_LEARNING,MEMORY,SCORE,KNOWLEDGE,OPINION,PROBEXPLORE\n") # CSV header row _summaryfile = open(("output/"+_experiment_name+"-summary.csv"), 'w', buffering=1) _summaryfile.write("EXPERIMENT,ARMS,TURNS,PAYOFF_FXN,TURBULENCE_FXN,STRATEGY_FXN,BELIEF_FXN,TURBULENCE,STRATEGY,LATENCY,INITIAL_LEARNING,MEMORY,MEAN_SCORE,MEAN_KNOWLEDGE,MEAN_OPINION,MEAN_PROBEXPLORE\n") # CSV header row if self.timeseries: _timeseriesfile = open(("output/"+_experiment_name+"-timeseries.csv"), 'w', buffering=1) def log(message): _logfile.write(message) if self.debug: print(message) #only print to screen if user wants it wordy for debugging purposes _numexps_without_turns = len(self.arms)*len(self.payoff_fxn)*len(self.turbulence_fxn)*len(self.strategy_fxn)*len(self.turbulence)*len(self.belief_fxn)*len(self.strategy)*len(self.latency)*len(self.memory) _numturns = sum([t*_numexps_without_turns for t in self.turns]) _numexps = _numexps_without_turns*len(self.turns) log("Planning "+str(_numexps)+" experiments with "+str(self.replications)+ " replications x "+" or ".join([str(t) for t in self.turns])+" turns each.\nI.e., a total of "+ str(self.replications*_numturns)+" turns of processing.\n\n") _currentexp = 0 # which experiment are we on currently? # Loop through all experimental conditions and run simulations: for ar in self.arms: for tu in self.turns: #for il in self.initial_learning: for pf in self.payoff_fxn: for tf in self.turbulence_fxn: for sf in self.strategy_fxn: for tb in self.turbulence: for bf in self.belief_fxn: for st in self.strategy: for lt in self.latency: for mm in self.memory: # Run several replications of the simulation within one experimental condition: # hold the data from each replication (to be averaged later) finalscores = [] finalknowledges = [] finalopinions = [] finalprobexplores = [] _currentexp += 1 il = lt #this should guarantee that latency conditions don't wait until turn lt+1 to start learning; they have some initial learning, it's just out of date log("Starting experiment " + str(_currentexp) + " of " + str(_numexps) + " with:"+ "\n arms="+str(ar)+ "\n turns="+str(tu)+ "\n payoff_fxn="+str(pf)+ "\n turbulence_fxn="+str(tf)+ "\n strategy_fxn="+str(sf)+ "\n belief_fxn="+str(bf)+ "\n turbulence="+str(tb)+ "\n strategy="+str(st)+ "\n latency="+str(lt)+ "\n initial_learning="+str(il)+ "\n memory="+str(mm)+"\n") expstart = datetime.datetime.now() for i in range(self.replications): # Do one replication (of many) within an experimental condition: b = Bandit( arms=ar, turns=tu, payoff_fxn=pf, turbulence_fxn=tf, strategy_fxn=sf, turbulence=tb, belief_fxn=bf, strategy=st, latency=lt, initial_learning=il, memory=mm) b.simulate() finalscores.append(b.score()) finalknowledges.append(b.knowledge()) finalopinions.append(b.opinion()) finalprobexplores.append(b.probexplore()) # log the data _datafile.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( _currentexp, # experiment number (i+1), # replication number str(ar), str(tu), str(pf), str(tf), str(sf), str(bf), str(tb), str(st), str(lt), str(il), str(mm), b.score(), b.knowledge(), b.opinion(), b.probexplore() )) #log("simulation "+str(i+1)+" of "+str(replications)+" took "+str(b._simtime)) if self.timeseries: _timeseriesfile.write(','.join([str(s) for s in b.allscores()])+'\n') # Take average results from all replications (within one experimental condition) # and output them to a 'summary' data file. _summaryfile.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( _currentexp, str(ar), str(tu), str(pf), str(tf), str(sf), str(bf), str(tb), str(st), str(lt), str(il), str(mm), sum(finalscores)/self.replications, sum(finalknowledges)/self.replications, sum(finalopinions)/self.replications, sum(finalprobexplores)/self.replications )) log("FINISHED in "+str(datetime.datetime.now()-expstart)+"\n\n") # Loop goes to the next experimental condition. log("All experiments completed in " + str(datetime.datetime.now() - programstart)) _logfile.close() _datafile.close() _summaryfile.close()