Python OptimalBanditの例

プログラミング言語: Python

クラス/型: OptimalBandit

hotexamples.comのコード掲載数: 4

Python OptimalBandit - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのOptimalBanditの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

OptimalBandit(4)

よく使われるメソッド

OptimalBandit (4)

コード例 #1

ファイルを表示

def main(A, t_max, M, N_max, R, exec_type, theta):

    ############################### MAIN CONFIG  ###############################
    print(
        '{}-armed Bernoulli bandit with optimal, TS and all arm sampling policies with {} MC samples for {} time-instants and {} realizations'
        .format(A, M, t_max, R))

    # Directory configuration
    dir_string = '../results/{}/A={}/t_max={}/R={}/M={}/N_max={}/theta={}'.format(
        os.path.basename(__file__).split('.')[0], A, t_max, R, M, N_max,
        str.replace(str.strip(np.array_str(theta.flatten()), ' []'), '  ',
                    '_'))
    os.makedirs(dir_string, exist_ok=True)

    ########## Bernoulli Bandit configuration ##########
    # No context
    context = None

    # Reward function and prior
    reward_function = {
        'type': 'bernoulli',
        'dist': stats.bernoulli,
        'theta': theta
    }
    reward_prior = {
        'dist': stats.beta,
        'alpha': np.ones((A, 1)),
        'beta': np.ones((A, 1))
    }

    ############################### BANDITS  ###############################
    ### Monte Carlo integration types
    MC_types = ['MC_rewards', 'MC_expectedRewards', 'MC_arms']

    # Bandits to evaluate as a list
    bandits = []
    bandits_labels = []

    ### Optimal bandit
    bandits.append(OptimalBandit(A, reward_function))
    bandits_labels.append('Optimal Bandit')

    ### Thompson sampling: when sampling with static n=1
    # MC samples
    for m in np.array([1, M]):
        # MC types
        for MC_type in MC_types:
            thompsonSampling = {
                'sampling_type': 'static',
                'arm_N_samples': 1,
                'M': m,
                'MC_type': MC_type
            }
            bandits.append(
                BayesianBanditSampling(A, reward_function, reward_prior,
                                       thompsonSampling))
            bandits_labels.append('TS, {}, M={}'.format(
                MC_type, thompsonSampling['M']))

    ### Inverse Pfa sampling
    # MC types
    for MC_type in MC_types:
        # Truncated Gaussian with log10(1/Pfa)
        invPfaSampling = {
            'sampling_type': 'infPfa',
            'Pfa': 'tGaussian',
            'f(1/Pfa)': np.log10,
            'M': M,
            'N_max': N_max,
            'MC_type': MC_type
        }
        bandits.append(
            BayesianBanditSampling(A, reward_function, reward_prior,
                                   invPfaSampling))
        bandits_labels.append('tGaussian log10(1/Pfa), {}, M={}'.format(
            MC_type, invPfaSampling['M']))

    # MC types
    for MC_type in MC_types:
        # Markov with log(1/Pfa)
        invPfaSampling = {
            'sampling_type': 'infPfa',
            'Pfa': 'Markov',
            'f(1/Pfa)': np.log,
            'M': M,
            'N_max': N_max,
            'MC_type': MC_type
        }
        bandits.append(
            BayesianBanditSampling(A, reward_function, reward_prior,
                                   invPfaSampling))
        bandits_labels.append('Markov log(1/Pfa), {}, M={}'.format(
            MC_type, invPfaSampling['M']))

    # MC types
    for MC_type in MC_types:
        # Chebyshev with log(1/Pfa)
        invPfaSampling = {
            'sampling_type': 'infPfa',
            'Pfa': 'Chebyshev',
            'f(1/Pfa)': np.log,
            'M': M,
            'N_max': N_max,
            'MC_type': MC_type
        }
        bandits.append(
            BayesianBanditSampling(A, reward_function, reward_prior,
                                   invPfaSampling))
        bandits_labels.append('Chebyshev log(1/Pfa), {}, M={}'.format(
            MC_type, invPfaSampling['M']))

    ### BANDIT EXECUTION
    # Execute each bandit
    for (n, bandit) in enumerate(bandits):
        bandit.execute_realizations(R, t_max, context, exec_type)

    # Save bandits info
    with open(dir_string + '/bandits.pickle', 'wb') as f:
        pickle.dump(bandits, f)
    with open(dir_string + '/bandits_labels.pickle', 'wb') as f:
        pickle.dump(bandits_labels, f)

    ############################### PLOTTING  ###############################
    ## Plotting arrangements (in general)
    bandits_colors = [
        colors.cnames['black'], colors.cnames['skyblue'],
        colors.cnames['cyan'], colors.cnames['blue'],
        colors.cnames['palegreen'], colors.cnames['lime'],
        colors.cnames['green'], colors.cnames['yellow'],
        colors.cnames['orange'], colors.cnames['red'], colors.cnames['purple'],
        colors.cnames['fuchsia'], colors.cnames['pink'],
        colors.cnames['saddlebrown'], colors.cnames['chocolate'],
        colors.cnames['burlywood']
    ]

    # Plotting direcotries
    dir_plots = dir_string + '/plots'
    os.makedirs(dir_plots, exist_ok=True)

    # Plotting time: all
    t_plot = t_max

    # Plot regret
    plot_std = False
    bandits_plot_regret(bandits,
                        bandits_colors,
                        bandits_labels,
                        t_plot,
                        plot_std,
                        plot_save=dir_plots)
    plot_std = True
    bandits_plot_regret(bandits,
                        bandits_colors,
                        bandits_labels,
                        t_plot,
                        plot_std,
                        plot_save=dir_plots)

    # Plot cumregret
    plot_std = False
    bandits_plot_cumregret(bandits,
                           bandits_colors,
                           bandits_labels,
                           t_plot,
                           plot_std,
                           plot_save=dir_plots)
    plot_std = True
    bandits_plot_cumregret(bandits,
                           bandits_colors,
                           bandits_labels,
                           t_plot,
                           plot_std,
                           plot_save=dir_plots)

    # Plot rewards expected
    plot_std = True
    bandits_plot_rewards_expected(bandits,
                                  bandits_colors,
                                  bandits_labels,
                                  t_plot,
                                  plot_std,
                                  plot_save=dir_plots)

    # Plot actions
    plot_std = False
    bandits_plot_actions(bandits,
                         bandits_colors,
                         bandits_labels,
                         t_plot,
                         plot_std,
                         plot_save=dir_plots)
    plot_std = True
    bandits_plot_actions(bandits,
                         bandits_colors,
                         bandits_labels,
                         t_plot,
                         plot_std,
                         plot_save=dir_plots)

    # Plot correct actions
    plot_std = False
    bandits_plot_actions_correct(bandits,
                                 bandits_colors,
                                 bandits_labels,
                                 t_plot,
                                 plot_std,
                                 plot_save=dir_plots)
    plot_std = True
    bandits_plot_actions_correct(bandits,
                                 bandits_colors,
                                 bandits_labels,
                                 t_plot,
                                 plot_std,
                                 plot_save=dir_plots)

    ## Sampling bandits
    # Plot action predictive density
    plot_std = True
    bandits_plot_arm_density(bandits,
                             bandits_colors,
                             bandits_labels,
                             t_plot,
                             plot_std,
                             plot_save=dir_plots)

コード例 #2

ファイルを表示

ファイル: evaluate_ContextualLinearGaussianBandits_all.py プロジェクト: ssukumar/bandits

def main(A, t_max, M, N_max, R, exec_type, theta, sigma, d_context,
         type_context):

    ############################### MAIN CONFIG  ###############################
    print(
        '{}-armed Contextual Linear Gaussian bandit with optimal, TS and sampling policies with {} MC samples for {} time-instants and {} realizations'
        .format(A, M, t_max, R))

    # Directory configuration
    dir_string = '../results/{}/A={}/t_max={}/R={}/M={}/N_max={}/d_context={}/type_context={}/theta={}/sigma={}'.format(
        os.path.basename(__file__).split('.')[0], A, t_max, R, M, N_max,
        d_context, type_context,
        str.replace(str.strip(np.array_str(theta.flatten()), ' []'), '  ',
                    '_'),
        str.replace(str.strip(np.array_str(sigma.flatten()), ' []'), '  ',
                    '_'))
    os.makedirs(dir_string, exist_ok=True)

    ########## Contextual Bandit configuration ##########
    # Context
    if type_context == 'static':
        # Static context
        context = np.ones((d_context, t_max))
    elif type_context == 'randn':
        # Dynamic context: standard Gaussian
        context = np.random.randn(d_context, t_max)
    elif type_context == 'rand':
        # Dynamic context: uniform
        context = np.random.rand(d_context, t_max)
    else:
        # Unknown context
        raise ValueError('Invalid context type={}'.format(type_context))

    # Reward function
    reward_function = {
        'type': 'linear_gaussian',
        'dist': stats.norm,
        'theta': theta,
        'sigma': sigma
    }
    # Reward prior
    Sigmas = np.zeros((A, d_context, d_context))
    for a in np.arange(A):
        Sigmas[a, :, :] = np.eye(d_context)

    reward_prior = {
        'dist': 'NIG',
        'alpha': np.ones((A, 1)),
        'beta': np.ones((A, 1)),
        'theta': np.ones((A, d_context)),
        'Sigma': Sigmas
    }

    ############################### BANDITS  ###############################
    ### Monte Carlo integration types
    MC_types = ['MC_rewards', 'MC_expectedRewards', 'MC_arms']

    # Bandits to evaluate as a list
    bandits = []
    bandits_labels = []

    ### Optimal bandit
    bandits.append(OptimalBandit(A, reward_function))
    bandits_labels.append('Optimal Bandit')

    ### Thompson sampling: when sampling with static n=1
    thompsonSampling = {'sampling_type': 'static', 'arm_N_samples': 1}

    # MC samples
    for thompsonSampling['M'] in np.array([1, M]):
        # MC types
        for MC_type in MC_types:
            thompsonSampling['MC_type'] = MC_type
            bandits.append(
                BayesianBanditSampling(A, reward_function, reward_prior,
                                       thompsonSampling))
            bandits_labels.append('TS, {}, M={}'.format(
                MC_type, thompsonSampling['M']))

    ### Inverse Pfa sampling
    # Truncated Gaussian with log10(1/Pfa)
    invPfaSampling = {
        'sampling_type': 'infPfa',
        'Pfa': 'tGaussian',
        'f(1/Pfa)': np.log10,
        'M': M,
        'N_max': N_max
    }

    # MC types
    for MC_type in MC_types:
        invPfaSampling['MC_type'] = MC_type
        bandits.append(
            BayesianBanditSampling(A, reward_function, reward_prior,
                                   invPfaSampling))
        bandits_labels.append('tGaussian: log10(1/Pfa), {}, M={}'.format(
            MC_type, invPfaSampling['M']))

    # Markov with log(1/Pfa)
    invPfaSampling = {
        'sampling_type': 'infPfa',
        'Pfa': 'Markov',
        'f(1/Pfa)': np.log,
        'M': M,
        'N_max': N_max
    }

    # MC types
    for MC_type in MC_types:
        invPfaSampling['MC_type'] = MC_type
        bandits.append(
            BayesianBanditSampling(A, reward_function, reward_prior,
                                   invPfaSampling))
        bandits_labels.append('Markov: log(1/Pfa), {}, M={}'.format(
            MC_type, invPfaSampling['M']))

    # Chebyshev with log(1/Pfa)
    invPfaSampling = {
        'sampling_type': 'infPfa',
        'Pfa': 'Chebyshev',
        'f(1/Pfa)': np.log,
        'M': M,
        'N_max': N_max
    }

    # MC types
    for MC_type in MC_types:
        invPfaSampling['MC_type'] = MC_type
        bandits.append(
            BayesianBanditSampling(A, reward_function, reward_prior,
                                   invPfaSampling))
        bandits_labels.append('Chebyshev: log(1/Pfa), {}, M={}'.format(
            MC_type, invPfaSampling['M']))

    ### BANDIT EXECUTION
    # Execute each bandit
    for (n, bandit) in enumerate(bandits):
        bandit.execute_realizations(R, t_max, context, exec_type)

    # Save bandits info
    with open(dir_string + '/bandits.pickle', 'wb') as f:
        pickle.dump(bandits, f)
    with open(dir_string + '/bandits_labels.pickle', 'wb') as f:
        pickle.dump(bandits_labels, f)

    ############################### PLOTTING  ###############################
    ## Plotting arrangements (in general)
    bandits_colors = [
        colors.cnames['black'], colors.cnames['skyblue'],
        colors.cnames['cyan'], colors.cnames['blue'],
        colors.cnames['palegreen'], colors.cnames['lime'],
        colors.cnames['green'], colors.cnames['yellow'],
        colors.cnames['orange'], colors.cnames['red'], colors.cnames['purple'],
        colors.cnames['fuchsia'], colors.cnames['pink'],
        colors.cnames['saddlebrown'], colors.cnames['chocolate'],
        colors.cnames['burlywood']
    ]

    # Plotting direcotries
    dir_plots = dir_string + '/plots'
    os.makedirs(dir_plots, exist_ok=True)

    # Plotting time: all
    t_plot = t_max

    # Plot regret
    plot_std = False
    bandits_plot_regret(bandits,
                        bandits_colors,
                        bandits_labels,
                        t_plot,
                        plot_std,
                        plot_save=dir_plots)
    plot_std = True
    bandits_plot_regret(bandits,
                        bandits_colors,
                        bandits_labels,
                        t_plot,
                        plot_std,
                        plot_save=dir_plots)

    # Plot cumregret
    plot_std = False
    bandits_plot_cumregret(bandits,
                           bandits_colors,
                           bandits_labels,
                           t_plot,
                           plot_std,
                           plot_save=dir_plots)
    plot_std = True
    bandits_plot_cumregret(bandits,
                           bandits_colors,
                           bandits_labels,
                           t_plot,
                           plot_std,
                           plot_save=dir_plots)

    # Plot rewards expected
    plot_std = True
    bandits_plot_rewards_expected(bandits,
                                  bandits_colors,
                                  bandits_labels,
                                  t_plot,
                                  plot_std,
                                  plot_save=dir_plots)

    # Plot action predictive density
    plot_std = True
    bandits_plot_arm_density(bandits,
                             bandits_colors,
                             bandits_labels,
                             t_plot,
                             plot_std,
                             plot_save=dir_plots)

    # Plot actions
    plot_std = False
    bandits_plot_actions(bandits,
                         bandits_colors,
                         bandits_labels,
                         t_plot,
                         plot_std,
                         plot_save=dir_plots)
    plot_std = True
    bandits_plot_actions(bandits,
                         bandits_colors,
                         bandits_labels,
                         t_plot,
                         plot_std,
                         plot_save=dir_plots)

    # Plot correct actions
    plot_std = False
    bandits_plot_actions_correct(bandits,
                                 bandits_colors,
                                 bandits_labels,
                                 t_plot,
                                 plot_std,
                                 plot_save=dir_plots)
    plot_std = True
    bandits_plot_actions_correct(bandits,
                                 bandits_colors,
                                 bandits_labels,
                                 t_plot,
                                 plot_std,
                                 plot_save=dir_plots)

コード例 #3

ファイルを表示

def main(A, K, t_max, R, exec_type, pi, theta, sigma, d_context, type_context,
         prior_K, mixture_expectations):

    ############################### MAIN CONFIG  ###############################
    print(
        '{}-armed Contextual Linear Gaussian mixture bandit with optimal and sampling policies with Variational inference for {} time-instants and {} realizations'
        .format(A, t_max, R))

    # Directory configuration
    dir_string = '../results/{}/A={}/t_max={}/R={}/d_context={}/type_context={}/pi={}/theta={}/sigma={}/prior_K={}/{}'.format(
        os.path.basename(__file__).split('.')[0], A, t_max, R, d_context,
        type_context,
        str.replace(str.strip(np.array_str(pi.flatten()), ' []'), '  ', '_'),
        str.replace(str.strip(np.array_str(theta.flatten()), ' []'), '  ',
                    '_'),
        str.replace(str.strip(np.array_str(sigma.flatten()), ' []'), '  ',
                    '_'),
        str.replace(str.strip(np.array_str(prior_K.flatten()), ' []'), ' ',
                    '_'), '_'.join(mixture_expectations))
    os.makedirs(dir_string, exist_ok=True)

    ########## Contextual Bandit configuration ##########
    # Context
    if type_context == 'static':
        # Static context
        context = np.ones((d_context, t_max))
    elif type_context == 'randn':
        # Dynamic context: standard Gaussian
        context = np.random.randn(d_context, t_max)
    elif type_context == 'rand':
        # Dynamic context: uniform
        context = np.random.rand(d_context, t_max)
    else:
        # Unknown context
        raise ValueError('Invalid context type={}'.format(type_context))

    # Reward function
    reward_function = {
        'type': 'linear_gaussian_mixture',
        'dist': stats.norm,
        'pi': pi,
        'theta': theta,
        'sigma': sigma
    }

    ########## Inference
    # Variational parameters
    variational_max_iter = 100
    variational_lb_eps = 0.0001
    # Plotting
    variational_plot_save = 'show'
    variational_plot_save = None
    if variational_plot_save != None and variational_plot_save != 'show':
        # Plotting directories
        variational_plots = dir_string + '/variational_plots'
        os.makedirs(variational_plots, exist_ok=True)
    ########## Priors
    gamma = 0.1
    alpha = 1.
    beta = 1.
    sigma = 1.

    ############################### BANDITS  ###############################
    # Bandits to evaluate as a list
    bandits = []
    bandits_labels = []

    ### Optimal bandit
    bandits.append(OptimalBandit(A, reward_function))
    bandits_labels.append('Optimal Bandit')

    ### Thompson sampling: when sampling with static n=1 and no Monte Carlo
    thompsonSampling = {
        'sampling_type': 'static',
        'arm_N_samples': 1,
        'M': 1,
        'MC_type': 'MC_arms'
    }

    # Mixture expectation
    for mixture_expectation in mixture_expectations:
        thompsonSampling['mixture_expectation'] = mixture_expectation

        # Different mixture priors
        for this_K in prior_K:
            # New dirs
            if variational_plot_save != None and variational_plot_save != 'show':
                os.makedirs(variational_plots + '/prior_K{}'.format(this_K),
                            exist_ok=True)
                variational_plot_save = variational_plots + '/prior_K{}'.format(
                    this_K)

            # Hyperparameters
            # Dirichlet for mixture weights
            prior_gamma = gamma * np.ones((A, this_K))
            # NIG for linear Gaussians
            prior_alpha = alpha * np.ones((A, this_K))
            prior_beta = beta * np.ones((A, this_K))

            # Initial thetas
            prior_theta = np.ones((A, this_K, d_context))
            # Different initial thetas
            for k in np.arange(this_K):
                prior_theta[:, k, :] = k

            prior_Sigma = np.zeros((A, this_K, d_context, d_context))
            # Initial covariances: uncorrelated
            for a in np.arange(A):
                for k in np.arange(this_K):
                    prior_Sigma[a, k, :, :] = sigma * np.eye(d_context)

            # Variational
            # Reward prior as dictionary: plotting Variational lower bound
            reward_prior = {
                'type': 'linear_gaussian_mixture',
                'dist': 'NIG',
                'K': this_K,
                'gamma': prior_gamma,
                'alpha': prior_alpha,
                'beta': prior_beta,
                'theta': prior_theta,
                'Sigma': prior_Sigma,
                'variational_max_iter': variational_max_iter,
                'variational_lb_eps': variational_lb_eps,
                'variational_plot_save': variational_plot_save
            }

            # Instantitate bandit
            bandits.append(
                VariationalBanditSampling(A, reward_function, reward_prior,
                                          thompsonSampling))
            bandits_labels.append('VTS, prior_K={}, {}'.format(
                this_K, mixture_expectation))

    ### BANDIT EXECUTION
    # Execute each bandit
    for (n, bandit) in enumerate(bandits):
        bandit.execute_realizations(R, t_max, context, exec_type)

    # Save bandits info
    with open(dir_string + '/bandits.pickle', 'wb') as f:
        pickle.dump(bandits, f)
    with open(dir_string + '/bandits_labels.pickle', 'wb') as f:
        pickle.dump(bandits_labels, f)

    ############################### PLOTTING  ###############################
    ## Plotting arrangements (in general)
    bandits_colors = [
        colors.cnames['black'], colors.cnames['skyblue'],
        colors.cnames['cyan'], colors.cnames['blue'],
        colors.cnames['palegreen'], colors.cnames['lime'],
        colors.cnames['green'], colors.cnames['yellow'],
        colors.cnames['orange'], colors.cnames['red'], colors.cnames['purple'],
        colors.cnames['fuchsia'], colors.cnames['pink'],
        colors.cnames['saddlebrown'], colors.cnames['chocolate'],
        colors.cnames['burlywood']
    ]

    # Plotting direcotries
    dir_plots = dir_string + '/plots'
    os.makedirs(dir_plots, exist_ok=True)

    # Plotting time: all
    t_plot = t_max

    # Plot regret
    plot_std = False
    bandits_plot_regret(bandits,
                        bandits_colors,
                        bandits_labels,
                        t_plot,
                        plot_std,
                        plot_save=dir_plots)
    plot_std = True
    bandits_plot_regret(bandits,
                        bandits_colors,
                        bandits_labels,
                        t_plot,
                        plot_std,
                        plot_save=dir_plots)

    # Plot cumregret
    plot_std = False
    bandits_plot_cumregret(bandits,
                           bandits_colors,
                           bandits_labels,
                           t_plot,
                           plot_std,
                           plot_save=dir_plots)
    plot_std = True
    bandits_plot_cumregret(bandits,
                           bandits_colors,
                           bandits_labels,
                           t_plot,
                           plot_std,
                           plot_save=dir_plots)

    # Plot rewards expected
    plot_std = True
    bandits_plot_rewards_expected(bandits,
                                  bandits_colors,
                                  bandits_labels,
                                  t_plot,
                                  plot_std,
                                  plot_save=dir_plots)

    # Plot action predictive density
    plot_std = True
    bandits_plot_arm_density(bandits,
                             bandits_colors,
                             bandits_labels,
                             t_plot,
                             plot_std,
                             plot_save=dir_plots)

    # Plot actions
    plot_std = False
    bandits_plot_actions(bandits,
                         bandits_colors,
                         bandits_labels,
                         t_plot,
                         plot_std,
                         plot_save=dir_plots)
    plot_std = True
    bandits_plot_actions(bandits,
                         bandits_colors,
                         bandits_labels,
                         t_plot,
                         plot_std,
                         plot_save=dir_plots)

    # Plot correct actions
    plot_std = False
    bandits_plot_actions_correct(bandits,
                                 bandits_colors,
                                 bandits_labels,
                                 t_plot,
                                 plot_std,
                                 plot_save=dir_plots)
    plot_std = True
    bandits_plot_actions_correct(bandits,
                                 bandits_colors,
                                 bandits_labels,
                                 t_plot,
                                 plot_std,
                                 plot_save=dir_plots)

コード例 #4

ファイルを表示

ファイル: evaluate_linearGaussianMixture_BanditSampling_MCMC.py プロジェクト: ssukumar/bandits

def main(A, K, t_max, R, exec_type, pi, theta, sigma, d_context, type_context, prior_K, mixture_expectations):

    ############################### MAIN CONFIG  ############################### 
    print('{}-armed Contextual Linear Gaussian mixture bandit with optimal and sampling policies with MCMC inference for {} time-instants and {} realizations'.format(A, t_max, R))

    # Directory configuration
    dir_string='../results/{}/A={}/t_max={}/R={}/d_context={}/type_context={}/pi={}/theta={}/sigma={}/prior_K={}/{}'.format(os.path.basename(__file__).split('.')[0], A, t_max, R, d_context, type_context, str.replace(str.strip(np.array_str(pi.flatten()),' []'), '  ', '_'), str.replace(str.strip(np.array_str(theta.flatten()),' []'), '  ', '_'), str.replace(str.strip(np.array_str(sigma.flatten()),' []'), '  ', '_'), str.replace(str.strip(np.array_str(prior_K.flatten()),' []'), ' ', '_'), '_'.join(mixture_expectations))
    os.makedirs(dir_string, exist_ok=True)
    
    ########## Contextual Bandit configuration ##########
    # Context
    if type_context=='static':
        # Static context
        context=np.ones((d_context,t_max))
    elif type_context=='randn':
        # Dynamic context: standard Gaussian
        context=np.random.randn(d_context,t_max)
    elif type_context=='rand':
        # Dynamic context: uniform
        context=np.random.rand(d_context,t_max)
    else:
        # Unknown context
        raise ValueError('Invalid context type={}'.format(type_context))
    
    # Reward function
    reward_function={'type':'linear_gaussian_mixture', 'dist':stats.norm, 'pi':pi, 'theta':theta, 'sigma':sigma}

    ########## Inference
    # MCMC (Gibbs) parameters
    gibbs_max_iter=4
    gibbs_loglik_eps=0.01
    # Plotting
    gibbs_plot_save='show'
    gibbs_plot_save=None
    if gibbs_plot_save != None and gibbs_plot_save != 'show':
        # Plotting directories
        gibbs_plots=dir_string+'/gibbs_plots'
        os.makedirs(gibbs_plots, exist_ok=True)
    ########## Priors
    gamma=0.1
    alpha=1.
    beta=1.
    sigma=1.
    pitman_yor_d=0
    assert (0<=pitman_yor_d) and (pitman_yor_d<1) and (gamma >-pitman_yor_d)
    
    ############################### BANDITS  ###############################    
    # Bandits to evaluate as a list
    bandits=[]
    bandits_labels=[]

    ### Optimal bandit
    bandits.append(OptimalBandit(A, reward_function))
    bandits_labels.append('Optimal Bandit')
            
    ### Thompson sampling: when sampling with static n=1 and no Monte Carlo
    thompsonSampling={'sampling_type':'static', 'arm_N_samples':1, 'M':1, 'MC_type':'MC_arms'}

    # Mixture expectation
    for mixture_expectation in mixture_expectations:
        thompsonSampling['mixture_expectation']=mixture_expectation
        
        # Different mixture priors
        for this_K in prior_K:
            # New dirs
            if gibbs_plot_save != None and gibbs_plot_save != 'show':
                os.makedirs(gibbs_plots+'/prior_K{}'.format(this_K), exist_ok=True)
                gibbs_plot_save=gibbs_plots+'/prior_K{}'.format(this_K)

            # Hyperparameters
            # Dirichlet for mixture weights
            prior_gamma=gamma*np.ones((A,this_K))
            # NIG for linear Gaussians
            prior_alpha=alpha*np.ones((A,this_K))
            prior_beta=beta*np.ones((A,this_K))

            # Initial thetas
            prior_theta=np.ones((A,this_K,d_context))
            # Different initial thetas
            for k in np.arange(this_K):
                prior_theta[:,k,:]=k
                
            prior_Sigma=np.zeros((A, this_K, d_context, d_context))
            # Initial covariances: uncorrelated
            for a in np.arange(A):
                for k in np.arange(this_K):
                    prior_Sigma[a,k,:,:]=sigma*np.eye(d_context)
            
            # MCMC        
            # Reward prior as dictionary
            reward_prior={'type':'linear_gaussian_mixture', 'dist':'NIG', 'K':this_K, 'gamma':prior_gamma, 'alpha':prior_alpha, 'beta':prior_beta, 'theta':prior_theta, 'Sigma':prior_Sigma, 'gibbs_max_iter':gibbs_max_iter, 'gibbs_loglik_eps':gibbs_loglik_eps, 'gibbs_plot_save':gibbs_plot_save}
        
            # Instantitate bandit    
            bandits.append(MCMCBanditSampling(A, reward_function, reward_prior, thompsonSampling))
            bandits_labels.append('Gibbs-TS, prior_K={}, {}'.format(this_K,mixture_expectation))
            
        # Nonparametric mixture priors
        if gibbs_plot_save != None and gibbs_plot_save != 'show':
            os.makedirs(gibbs_plots+'/nonparametric', exist_ok=True)
            gibbs_plot_save=gibbs_plots+'/nonparametric'

        # Hyperparameters
        # Concentration parameter
        prior_d=pitman_yor_d*np.ones(A)
        prior_gamma=gamma*np.ones(A)
        # NIG for linear Gaussians
        prior_alpha=alpha*np.ones(A)
        prior_beta=beta*np.ones(A)

        # Initial thetas
        prior_theta=np.ones((A,d_context))            
        prior_Sigma=np.zeros((A,d_context, d_context))
        # Initial covariances: uncorrelated
        for a in np.arange(A):
            prior_Sigma[a,:,:]=sigma*np.eye(d_context)
                
        # Reward prior as dictionary
        reward_prior={'type':'linear_gaussian_mixture', 'dist':'NIG', 'K':'nonparametric', 'd':prior_d, 'gamma':prior_gamma, 'alpha':prior_alpha, 'beta':prior_beta, 'theta':prior_theta, 'Sigma':prior_Sigma, 'gibbs_max_iter':gibbs_max_iter, 'gibbs_loglik_eps':gibbs_loglik_eps, 'gibbs_plot_save':gibbs_plot_save}
    
        # Instantitate bandit    
        bandits.append(MCMCBanditSampling(A, reward_function, reward_prior, thompsonSampling))
        bandits_labels.append('Gibbs-TS, nonparametric, {}'.format(mixture_expectation))
                           
    ### BANDIT EXECUTION
    # Execute each bandit
    for (n,bandit) in enumerate(bandits):
        bandit.execute_realizations(R, t_max, context, exec_type)

    # Save bandits info
    with open(dir_string+'/bandits_{}.pickle'.format(np.random.randn()), 'wb') as f:
        pickle.dump(bandits, f)

    '''