def main(A, t_max, M, N_max, R, exec_type, theta): ############################### MAIN CONFIG ############################### print( '{}-armed Bernoulli bandit with optimal, TS and all arm sampling policies with {} MC samples for {} time-instants and {} realizations' .format(A, M, t_max, R)) # Directory configuration dir_string = '../results/{}/A={}/t_max={}/R={}/M={}/N_max={}/theta={}'.format( os.path.basename(__file__).split('.')[0], A, t_max, R, M, N_max, str.replace(str.strip(np.array_str(theta.flatten()), ' []'), ' ', '_')) os.makedirs(dir_string, exist_ok=True) ########## Bernoulli Bandit configuration ########## # No context context = None # Reward function and prior reward_function = { 'type': 'bernoulli', 'dist': stats.bernoulli, 'theta': theta } reward_prior = { 'dist': stats.beta, 'alpha': np.ones((A, 1)), 'beta': np.ones((A, 1)) } ############################### BANDITS ############################### ### Monte Carlo integration types MC_types = ['MC_rewards', 'MC_expectedRewards', 'MC_arms'] # Bandits to evaluate as a list bandits = [] bandits_labels = [] ### Optimal bandit bandits.append(OptimalBandit(A, reward_function)) bandits_labels.append('Optimal Bandit') ### Thompson sampling: when sampling with static n=1 # MC samples for m in np.array([1, M]): # MC types for MC_type in MC_types: thompsonSampling = { 'sampling_type': 'static', 'arm_N_samples': 1, 'M': m, 'MC_type': MC_type } bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, thompsonSampling)) bandits_labels.append('TS, {}, M={}'.format( MC_type, thompsonSampling['M'])) ### Inverse Pfa sampling # MC types for MC_type in MC_types: # Truncated Gaussian with log10(1/Pfa) invPfaSampling = { 'sampling_type': 'infPfa', 'Pfa': 'tGaussian', 'f(1/Pfa)': np.log10, 'M': M, 'N_max': N_max, 'MC_type': MC_type } bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, invPfaSampling)) bandits_labels.append('tGaussian log10(1/Pfa), {}, M={}'.format( MC_type, invPfaSampling['M'])) # MC types for MC_type in MC_types: # Markov with log(1/Pfa) invPfaSampling = { 'sampling_type': 'infPfa', 'Pfa': 'Markov', 'f(1/Pfa)': np.log, 'M': M, 'N_max': N_max, 'MC_type': MC_type } bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, invPfaSampling)) bandits_labels.append('Markov log(1/Pfa), {}, M={}'.format( MC_type, invPfaSampling['M'])) # MC types for MC_type in MC_types: # Chebyshev with log(1/Pfa) invPfaSampling = { 'sampling_type': 'infPfa', 'Pfa': 'Chebyshev', 'f(1/Pfa)': np.log, 'M': M, 'N_max': N_max, 'MC_type': MC_type } bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, invPfaSampling)) bandits_labels.append('Chebyshev log(1/Pfa), {}, M={}'.format( MC_type, invPfaSampling['M'])) ### BANDIT EXECUTION # Execute each bandit for (n, bandit) in enumerate(bandits): bandit.execute_realizations(R, t_max, context, exec_type) # Save bandits info with open(dir_string + '/bandits.pickle', 'wb') as f: pickle.dump(bandits, f) with open(dir_string + '/bandits_labels.pickle', 'wb') as f: pickle.dump(bandits_labels, f) ############################### PLOTTING ############################### ## Plotting arrangements (in general) bandits_colors = [ colors.cnames['black'], colors.cnames['skyblue'], colors.cnames['cyan'], colors.cnames['blue'], colors.cnames['palegreen'], colors.cnames['lime'], colors.cnames['green'], colors.cnames['yellow'], colors.cnames['orange'], colors.cnames['red'], colors.cnames['purple'], colors.cnames['fuchsia'], colors.cnames['pink'], colors.cnames['saddlebrown'], colors.cnames['chocolate'], colors.cnames['burlywood'] ] # Plotting direcotries dir_plots = dir_string + '/plots' os.makedirs(dir_plots, exist_ok=True) # Plotting time: all t_plot = t_max # Plot regret plot_std = False bandits_plot_regret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_regret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot cumregret plot_std = False bandits_plot_cumregret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_cumregret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot rewards expected plot_std = True bandits_plot_rewards_expected(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot actions plot_std = False bandits_plot_actions(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_actions(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot correct actions plot_std = False bandits_plot_actions_correct(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_actions_correct(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) ## Sampling bandits # Plot action predictive density plot_std = True bandits_plot_arm_density(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots)
def main(A, t_max, M, N_max, R, exec_type, theta, sigma, d_context, type_context): ############################### MAIN CONFIG ############################### print( '{}-armed Contextual Linear Gaussian bandit with optimal, TS and sampling policies with {} MC samples for {} time-instants and {} realizations' .format(A, M, t_max, R)) # Directory configuration dir_string = '../results/{}/A={}/t_max={}/R={}/M={}/N_max={}/d_context={}/type_context={}/theta={}/sigma={}'.format( os.path.basename(__file__).split('.')[0], A, t_max, R, M, N_max, d_context, type_context, str.replace(str.strip(np.array_str(theta.flatten()), ' []'), ' ', '_'), str.replace(str.strip(np.array_str(sigma.flatten()), ' []'), ' ', '_')) os.makedirs(dir_string, exist_ok=True) ########## Contextual Bandit configuration ########## # Context if type_context == 'static': # Static context context = np.ones((d_context, t_max)) elif type_context == 'randn': # Dynamic context: standard Gaussian context = np.random.randn(d_context, t_max) elif type_context == 'rand': # Dynamic context: uniform context = np.random.rand(d_context, t_max) else: # Unknown context raise ValueError('Invalid context type={}'.format(type_context)) # Reward function reward_function = { 'type': 'linear_gaussian', 'dist': stats.norm, 'theta': theta, 'sigma': sigma } # Reward prior Sigmas = np.zeros((A, d_context, d_context)) for a in np.arange(A): Sigmas[a, :, :] = np.eye(d_context) reward_prior = { 'dist': 'NIG', 'alpha': np.ones((A, 1)), 'beta': np.ones((A, 1)), 'theta': np.ones((A, d_context)), 'Sigma': Sigmas } ############################### BANDITS ############################### ### Monte Carlo integration types MC_types = ['MC_rewards', 'MC_expectedRewards', 'MC_arms'] # Bandits to evaluate as a list bandits = [] bandits_labels = [] ### Optimal bandit bandits.append(OptimalBandit(A, reward_function)) bandits_labels.append('Optimal Bandit') ### Thompson sampling: when sampling with static n=1 thompsonSampling = {'sampling_type': 'static', 'arm_N_samples': 1} # MC samples for thompsonSampling['M'] in np.array([1, M]): # MC types for MC_type in MC_types: thompsonSampling['MC_type'] = MC_type bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, thompsonSampling)) bandits_labels.append('TS, {}, M={}'.format( MC_type, thompsonSampling['M'])) ### Inverse Pfa sampling # Truncated Gaussian with log10(1/Pfa) invPfaSampling = { 'sampling_type': 'infPfa', 'Pfa': 'tGaussian', 'f(1/Pfa)': np.log10, 'M': M, 'N_max': N_max } # MC types for MC_type in MC_types: invPfaSampling['MC_type'] = MC_type bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, invPfaSampling)) bandits_labels.append('tGaussian: log10(1/Pfa), {}, M={}'.format( MC_type, invPfaSampling['M'])) # Markov with log(1/Pfa) invPfaSampling = { 'sampling_type': 'infPfa', 'Pfa': 'Markov', 'f(1/Pfa)': np.log, 'M': M, 'N_max': N_max } # MC types for MC_type in MC_types: invPfaSampling['MC_type'] = MC_type bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, invPfaSampling)) bandits_labels.append('Markov: log(1/Pfa), {}, M={}'.format( MC_type, invPfaSampling['M'])) # Chebyshev with log(1/Pfa) invPfaSampling = { 'sampling_type': 'infPfa', 'Pfa': 'Chebyshev', 'f(1/Pfa)': np.log, 'M': M, 'N_max': N_max } # MC types for MC_type in MC_types: invPfaSampling['MC_type'] = MC_type bandits.append( BayesianBanditSampling(A, reward_function, reward_prior, invPfaSampling)) bandits_labels.append('Chebyshev: log(1/Pfa), {}, M={}'.format( MC_type, invPfaSampling['M'])) ### BANDIT EXECUTION # Execute each bandit for (n, bandit) in enumerate(bandits): bandit.execute_realizations(R, t_max, context, exec_type) # Save bandits info with open(dir_string + '/bandits.pickle', 'wb') as f: pickle.dump(bandits, f) with open(dir_string + '/bandits_labels.pickle', 'wb') as f: pickle.dump(bandits_labels, f) ############################### PLOTTING ############################### ## Plotting arrangements (in general) bandits_colors = [ colors.cnames['black'], colors.cnames['skyblue'], colors.cnames['cyan'], colors.cnames['blue'], colors.cnames['palegreen'], colors.cnames['lime'], colors.cnames['green'], colors.cnames['yellow'], colors.cnames['orange'], colors.cnames['red'], colors.cnames['purple'], colors.cnames['fuchsia'], colors.cnames['pink'], colors.cnames['saddlebrown'], colors.cnames['chocolate'], colors.cnames['burlywood'] ] # Plotting direcotries dir_plots = dir_string + '/plots' os.makedirs(dir_plots, exist_ok=True) # Plotting time: all t_plot = t_max # Plot regret plot_std = False bandits_plot_regret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_regret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot cumregret plot_std = False bandits_plot_cumregret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_cumregret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot rewards expected plot_std = True bandits_plot_rewards_expected(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot action predictive density plot_std = True bandits_plot_arm_density(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot actions plot_std = False bandits_plot_actions(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_actions(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot correct actions plot_std = False bandits_plot_actions_correct(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_actions_correct(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots)
def main(A, K, t_max, R, exec_type, pi, theta, sigma, d_context, type_context, prior_K, mixture_expectations): ############################### MAIN CONFIG ############################### print( '{}-armed Contextual Linear Gaussian mixture bandit with optimal and sampling policies with Variational inference for {} time-instants and {} realizations' .format(A, t_max, R)) # Directory configuration dir_string = '../results/{}/A={}/t_max={}/R={}/d_context={}/type_context={}/pi={}/theta={}/sigma={}/prior_K={}/{}'.format( os.path.basename(__file__).split('.')[0], A, t_max, R, d_context, type_context, str.replace(str.strip(np.array_str(pi.flatten()), ' []'), ' ', '_'), str.replace(str.strip(np.array_str(theta.flatten()), ' []'), ' ', '_'), str.replace(str.strip(np.array_str(sigma.flatten()), ' []'), ' ', '_'), str.replace(str.strip(np.array_str(prior_K.flatten()), ' []'), ' ', '_'), '_'.join(mixture_expectations)) os.makedirs(dir_string, exist_ok=True) ########## Contextual Bandit configuration ########## # Context if type_context == 'static': # Static context context = np.ones((d_context, t_max)) elif type_context == 'randn': # Dynamic context: standard Gaussian context = np.random.randn(d_context, t_max) elif type_context == 'rand': # Dynamic context: uniform context = np.random.rand(d_context, t_max) else: # Unknown context raise ValueError('Invalid context type={}'.format(type_context)) # Reward function reward_function = { 'type': 'linear_gaussian_mixture', 'dist': stats.norm, 'pi': pi, 'theta': theta, 'sigma': sigma } ########## Inference # Variational parameters variational_max_iter = 100 variational_lb_eps = 0.0001 # Plotting variational_plot_save = 'show' variational_plot_save = None if variational_plot_save != None and variational_plot_save != 'show': # Plotting directories variational_plots = dir_string + '/variational_plots' os.makedirs(variational_plots, exist_ok=True) ########## Priors gamma = 0.1 alpha = 1. beta = 1. sigma = 1. ############################### BANDITS ############################### # Bandits to evaluate as a list bandits = [] bandits_labels = [] ### Optimal bandit bandits.append(OptimalBandit(A, reward_function)) bandits_labels.append('Optimal Bandit') ### Thompson sampling: when sampling with static n=1 and no Monte Carlo thompsonSampling = { 'sampling_type': 'static', 'arm_N_samples': 1, 'M': 1, 'MC_type': 'MC_arms' } # Mixture expectation for mixture_expectation in mixture_expectations: thompsonSampling['mixture_expectation'] = mixture_expectation # Different mixture priors for this_K in prior_K: # New dirs if variational_plot_save != None and variational_plot_save != 'show': os.makedirs(variational_plots + '/prior_K{}'.format(this_K), exist_ok=True) variational_plot_save = variational_plots + '/prior_K{}'.format( this_K) # Hyperparameters # Dirichlet for mixture weights prior_gamma = gamma * np.ones((A, this_K)) # NIG for linear Gaussians prior_alpha = alpha * np.ones((A, this_K)) prior_beta = beta * np.ones((A, this_K)) # Initial thetas prior_theta = np.ones((A, this_K, d_context)) # Different initial thetas for k in np.arange(this_K): prior_theta[:, k, :] = k prior_Sigma = np.zeros((A, this_K, d_context, d_context)) # Initial covariances: uncorrelated for a in np.arange(A): for k in np.arange(this_K): prior_Sigma[a, k, :, :] = sigma * np.eye(d_context) # Variational # Reward prior as dictionary: plotting Variational lower bound reward_prior = { 'type': 'linear_gaussian_mixture', 'dist': 'NIG', 'K': this_K, 'gamma': prior_gamma, 'alpha': prior_alpha, 'beta': prior_beta, 'theta': prior_theta, 'Sigma': prior_Sigma, 'variational_max_iter': variational_max_iter, 'variational_lb_eps': variational_lb_eps, 'variational_plot_save': variational_plot_save } # Instantitate bandit bandits.append( VariationalBanditSampling(A, reward_function, reward_prior, thompsonSampling)) bandits_labels.append('VTS, prior_K={}, {}'.format( this_K, mixture_expectation)) ### BANDIT EXECUTION # Execute each bandit for (n, bandit) in enumerate(bandits): bandit.execute_realizations(R, t_max, context, exec_type) # Save bandits info with open(dir_string + '/bandits.pickle', 'wb') as f: pickle.dump(bandits, f) with open(dir_string + '/bandits_labels.pickle', 'wb') as f: pickle.dump(bandits_labels, f) ############################### PLOTTING ############################### ## Plotting arrangements (in general) bandits_colors = [ colors.cnames['black'], colors.cnames['skyblue'], colors.cnames['cyan'], colors.cnames['blue'], colors.cnames['palegreen'], colors.cnames['lime'], colors.cnames['green'], colors.cnames['yellow'], colors.cnames['orange'], colors.cnames['red'], colors.cnames['purple'], colors.cnames['fuchsia'], colors.cnames['pink'], colors.cnames['saddlebrown'], colors.cnames['chocolate'], colors.cnames['burlywood'] ] # Plotting direcotries dir_plots = dir_string + '/plots' os.makedirs(dir_plots, exist_ok=True) # Plotting time: all t_plot = t_max # Plot regret plot_std = False bandits_plot_regret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_regret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot cumregret plot_std = False bandits_plot_cumregret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_cumregret(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot rewards expected plot_std = True bandits_plot_rewards_expected(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot action predictive density plot_std = True bandits_plot_arm_density(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot actions plot_std = False bandits_plot_actions(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_actions(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) # Plot correct actions plot_std = False bandits_plot_actions_correct(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots) plot_std = True bandits_plot_actions_correct(bandits, bandits_colors, bandits_labels, t_plot, plot_std, plot_save=dir_plots)
def main(A, K, t_max, R, exec_type, pi, theta, sigma, d_context, type_context, prior_K, mixture_expectations): ############################### MAIN CONFIG ############################### print('{}-armed Contextual Linear Gaussian mixture bandit with optimal and sampling policies with MCMC inference for {} time-instants and {} realizations'.format(A, t_max, R)) # Directory configuration dir_string='../results/{}/A={}/t_max={}/R={}/d_context={}/type_context={}/pi={}/theta={}/sigma={}/prior_K={}/{}'.format(os.path.basename(__file__).split('.')[0], A, t_max, R, d_context, type_context, str.replace(str.strip(np.array_str(pi.flatten()),' []'), ' ', '_'), str.replace(str.strip(np.array_str(theta.flatten()),' []'), ' ', '_'), str.replace(str.strip(np.array_str(sigma.flatten()),' []'), ' ', '_'), str.replace(str.strip(np.array_str(prior_K.flatten()),' []'), ' ', '_'), '_'.join(mixture_expectations)) os.makedirs(dir_string, exist_ok=True) ########## Contextual Bandit configuration ########## # Context if type_context=='static': # Static context context=np.ones((d_context,t_max)) elif type_context=='randn': # Dynamic context: standard Gaussian context=np.random.randn(d_context,t_max) elif type_context=='rand': # Dynamic context: uniform context=np.random.rand(d_context,t_max) else: # Unknown context raise ValueError('Invalid context type={}'.format(type_context)) # Reward function reward_function={'type':'linear_gaussian_mixture', 'dist':stats.norm, 'pi':pi, 'theta':theta, 'sigma':sigma} ########## Inference # MCMC (Gibbs) parameters gibbs_max_iter=4 gibbs_loglik_eps=0.01 # Plotting gibbs_plot_save='show' gibbs_plot_save=None if gibbs_plot_save != None and gibbs_plot_save != 'show': # Plotting directories gibbs_plots=dir_string+'/gibbs_plots' os.makedirs(gibbs_plots, exist_ok=True) ########## Priors gamma=0.1 alpha=1. beta=1. sigma=1. pitman_yor_d=0 assert (0<=pitman_yor_d) and (pitman_yor_d<1) and (gamma >-pitman_yor_d) ############################### BANDITS ############################### # Bandits to evaluate as a list bandits=[] bandits_labels=[] ### Optimal bandit bandits.append(OptimalBandit(A, reward_function)) bandits_labels.append('Optimal Bandit') ### Thompson sampling: when sampling with static n=1 and no Monte Carlo thompsonSampling={'sampling_type':'static', 'arm_N_samples':1, 'M':1, 'MC_type':'MC_arms'} # Mixture expectation for mixture_expectation in mixture_expectations: thompsonSampling['mixture_expectation']=mixture_expectation # Different mixture priors for this_K in prior_K: # New dirs if gibbs_plot_save != None and gibbs_plot_save != 'show': os.makedirs(gibbs_plots+'/prior_K{}'.format(this_K), exist_ok=True) gibbs_plot_save=gibbs_plots+'/prior_K{}'.format(this_K) # Hyperparameters # Dirichlet for mixture weights prior_gamma=gamma*np.ones((A,this_K)) # NIG for linear Gaussians prior_alpha=alpha*np.ones((A,this_K)) prior_beta=beta*np.ones((A,this_K)) # Initial thetas prior_theta=np.ones((A,this_K,d_context)) # Different initial thetas for k in np.arange(this_K): prior_theta[:,k,:]=k prior_Sigma=np.zeros((A, this_K, d_context, d_context)) # Initial covariances: uncorrelated for a in np.arange(A): for k in np.arange(this_K): prior_Sigma[a,k,:,:]=sigma*np.eye(d_context) # MCMC # Reward prior as dictionary reward_prior={'type':'linear_gaussian_mixture', 'dist':'NIG', 'K':this_K, 'gamma':prior_gamma, 'alpha':prior_alpha, 'beta':prior_beta, 'theta':prior_theta, 'Sigma':prior_Sigma, 'gibbs_max_iter':gibbs_max_iter, 'gibbs_loglik_eps':gibbs_loglik_eps, 'gibbs_plot_save':gibbs_plot_save} # Instantitate bandit bandits.append(MCMCBanditSampling(A, reward_function, reward_prior, thompsonSampling)) bandits_labels.append('Gibbs-TS, prior_K={}, {}'.format(this_K,mixture_expectation)) # Nonparametric mixture priors if gibbs_plot_save != None and gibbs_plot_save != 'show': os.makedirs(gibbs_plots+'/nonparametric', exist_ok=True) gibbs_plot_save=gibbs_plots+'/nonparametric' # Hyperparameters # Concentration parameter prior_d=pitman_yor_d*np.ones(A) prior_gamma=gamma*np.ones(A) # NIG for linear Gaussians prior_alpha=alpha*np.ones(A) prior_beta=beta*np.ones(A) # Initial thetas prior_theta=np.ones((A,d_context)) prior_Sigma=np.zeros((A,d_context, d_context)) # Initial covariances: uncorrelated for a in np.arange(A): prior_Sigma[a,:,:]=sigma*np.eye(d_context) # Reward prior as dictionary reward_prior={'type':'linear_gaussian_mixture', 'dist':'NIG', 'K':'nonparametric', 'd':prior_d, 'gamma':prior_gamma, 'alpha':prior_alpha, 'beta':prior_beta, 'theta':prior_theta, 'Sigma':prior_Sigma, 'gibbs_max_iter':gibbs_max_iter, 'gibbs_loglik_eps':gibbs_loglik_eps, 'gibbs_plot_save':gibbs_plot_save} # Instantitate bandit bandits.append(MCMCBanditSampling(A, reward_function, reward_prior, thompsonSampling)) bandits_labels.append('Gibbs-TS, nonparametric, {}'.format(mixture_expectation)) ### BANDIT EXECUTION # Execute each bandit for (n,bandit) in enumerate(bandits): bandit.execute_realizations(R, t_max, context, exec_type) # Save bandits info with open(dir_string+'/bandits_{}.pickle'.format(np.random.randn()), 'wb') as f: pickle.dump(bandits, f) '''