def get_config(): """Generates the config for the experiment.""" name = 'graph_correlated_sanity' n_stages = 20 mu0 = -0.5 sigma0 = 1 sigma_tilde = 1 step_count = 100 step_size = 0.01 agents = collections.OrderedDict([ ('Langevin TS', functools.partial(CorrelatedBBLangevin, n_stages, mu0, sigma0, sigma_tilde, step_count, step_size)), ('TS', functools.partial(CorrelatedBBTS, n_stages, mu0, sigma0, sigma_tilde)), ('Gibbs TS', functools.partial(GibbsCorrelatedBB, n_stages, mu0, sigma0, sigma_tilde)), ('bootstrap TS', functools.partial(BootstrapCorrelatedBB, n_stages, mu0, sigma0, sigma_tilde)) ]) environments = collections.OrderedDict([ ('env', functools.partial(CorrelatedBinomialBridge, n_stages, mu0, sigma0, sigma_tilde)) ]) experiments = collections.OrderedDict([(name, ExperimentNoAction)]) n_steps = 500 n_seeds = 1000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'finite_misspecified' n_arm = 3 true_prior_success = [1, 1, 1] informative_prior_failure = [100, 100, 100] true_prior_failure = [50, 100, 200] def _correct_ts_init(n_arm): assert n_arm == 3 # adhoc method for this experiment agent = FiniteBernoulliBanditTS(n_arm) agent.set_prior(true_prior_success, informative_prior_failure) return agent agents = collections.OrderedDict([ ('correct_ts', functools.partial(_correct_ts_init, n_arm)), ('misspecified_ts', functools.partial(FiniteBernoulliBanditTS, n_arm)) ]) def _env_init(n_arm): environment = DriftingFiniteArmedBernoulliBandit(n_arm, gamma=0.0) environment.set_prior(true_prior_success, true_prior_failure) return environment environments = collections.OrderedDict([ ('env', functools.partial(_env_init, n_arm)) ]) experiments = collections.OrderedDict([(name, ExperimentWithMean)]) n_steps = 1000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'dynamic_pricing' num_products = 5 scale = 1 noise_var = 10 p_max = 1 agents = collections.OrderedDict( [('bsPricing', functools.partial(BootstrapDynamicPricing, num_products, scale, noise_var, p_max))] ) environments = collections.OrderedDict( [('env', functools.partial(DynamicPricing, num_products, scale, noise_var, p_max))] ) experiments = collections.OrderedDict( [(name, ExperimentNoAction)] ) n_steps = 80 n_seeds = 2000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'graph_indep_binary' n_stages = 20 shape = 2 scale = 0.5 tol = 0.1 alpha = 0.2 beta = 0.5 agents = collections.OrderedDict([ ('Bootstrap', functools.partial(BootstrapIndependentBBWithBinaryReward, n_stages, shape, scale, tol, alpha, beta)), ('Laplace', functools.partial(LaplaceIndependentBBWithBinaryReward, n_stages, shape, scale, tol, alpha, beta)) ]) environments = collections.OrderedDict([ ('env', functools.partial(IndependentBinomialBridgeWithBinaryReward, n_stages, shape, scale)) ]) experiments = collections.OrderedDict([(name, ExperimentNoAction)]) n_steps = 500 n_seeds = 1000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'graph_correlated' n_stages = 20 mu0 = -0.5 sigma0 = 1 sigma_tilde = 1 agents = collections.OrderedDict([ ('coherent TS', functools.partial(CorrelatedBBTS, n_stages, mu0, sigma0, sigma_tilde)), ('misspecified TS', functools.partial(IndependentBBTS, n_stages, mu0, sigma0, sigma_tilde)) ]) environments = collections.OrderedDict([ ('env', functools.partial(CorrelatedBinomialBridge, n_stages, mu0, sigma0, sigma_tilde)) ]) experiments = collections.OrderedDict([(name, ExperimentNoAction)]) n_steps = 500 n_seeds = 1000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'graph_indep_concurrent' n_stages = 20 mu0 = -0.5 sigma0 = 1 sigma_tilde = 1 num_agents = [1, 10, 20, 50, 100] agents_list = [] for num_agent in num_agents: agents_list.append( ('K = ' + str(num_agent), functools.partial(IndependentBBMultipleTS, n_stages, mu0, sigma0, sigma_tilde, num_agent))) agents = collections.OrderedDict(agents_list) environments = collections.OrderedDict([ ('env', functools.partial(MultiAgentCorrelatedBinomialBridge, n_stages, mu0, sigma0, sigma_tilde)) ]) experiments = collections.OrderedDict([(name, ExperimentMultipleAgents)]) n_steps = 100 n_seeds = 1000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'finite_simple_sanity' n_arm = 3 step_size = 0.01 step_count = 100 agents = collections.OrderedDict([ ('Laplace TS', functools.partial(FiniteBernoulliBanditLaplace, n_arm)), ( 'Langevin TS', functools.partial(FiniteBernoulliBanditLangevin, n_arm, step_count, step_size), ), ('bootstrap TS', functools.partial(FiniteBernoulliBanditBootstrap, n_arm)), ('TS', functools.partial(FiniteBernoulliBanditTS, n_arm)) ]) environments = collections.OrderedDict() n_env = 100 for env in range(n_env): probs = np.random.rand(n_arm) environments[env] = functools.partial(FiniteArmedBernoulliBandit, probs) experiments = collections.OrderedDict([(name, BaseExperiment)]) n_steps = 1000 n_seeds = 100 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'logistic' num_articles = 3 dim = 7 theta_mean = 0 theta_std = 1 epsilon1 = 0.01 epsilon2 = 0.05 batch_size = 50 step_count = 200 step_size = 1 / 200 alpha = 0.2 beta = 0.5 tol = 0.0001 agents = collections.OrderedDict([ #('greedy', # functools.partial(GreedyLogisticBandit, # num_articles, dim, theta_mean, theta_std, epsilon1, # alpha,beta,tol)), ('OSAGA-LD TS', functools.partial(OSAGALDTSLogisticBandit, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol, batch_size, step_count, step_size)), ('SAGA-LD TS', functools.partial(SAGALDTSLogisticBandit, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol, batch_size, step_count, step_size)), ('Langevin TS', functools.partial(LangevinTSLogisticBandit, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol, batch_size, step_count, step_size)), #(str(epsilon1)+'-greedy', # functools.partial(EpsilonGreedyLogisticBandit, # num_articles, dim, theta_mean, theta_std, epsilon1, # alpha,beta,tol)), #(str(epsilon2)+'-greedy', # functools.partial(EpsilonGreedyLogisticBandit, # num_articles, dim, theta_mean, theta_std, epsilon2, # alpha,beta,tol)), ('Laplace TS', functools.partial(LaplaceTSLogisticBandit, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol)) ]) environments = collections.OrderedDict([ ('env', functools.partial(LogisticBandit, num_articles, dim, None, None)) ] #theta_mean, theta_std))] ) experiments = collections.OrderedDict([(name, ExperimentNoAction)]) n_steps = 5000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'cascade' num_items = 50 num_positions = 10 true_a0 = 1 true_b0 = 40 best_optimism = 0.1 def _ts_init(num_items, num_positions): agent = CascadingBanditTS(num_items, num_positions, a0=true_a0, b0=true_b0) return agent def _ucb1_init(num_items, num_positions): agent = CascadingBanditUCB1(num_items, num_positions, a0=true_a0, b0=true_b0, optimism=1) return agent def _ucb_best_init(num_items, num_positions): agent = CascadingBanditUCB1(num_items, num_positions, a0=true_a0, b0=true_b0, optimism=best_optimism) return agent agents = collections.OrderedDict([ ('ts', functools.partial(_ts_init, num_items, num_positions)), ('ucb1', functools.partial(_ucb1_init, num_items, num_positions)), ('ucb-best', functools.partial(_ucb_best_init, num_items, num_positions)) ]) environments = collections.OrderedDict([ ('env', functools.partial(CascadingBandit, num_items, num_positions, true_a0, true_b0)) ]) # Very large experiment so don't log as frequently to keep file sensible. experiments = collections.OrderedDict([ (name, functools.partial(ExperimentNoAction, rec_freq=10)) ]) n_steps = 5000 n_seeds = 1000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'graph_indep' n_stages = 20 mu0 = -0.5 sigma0 = 1 sigma_tilde = 1 agents = collections.OrderedDict([ ('ts', functools.partial(IndependentBBTS, n_stages, mu0, sigma0, sigma_tilde)), ('greedy', functools.partial(IndependentBBEpsilonGreedy, n_stages, mu0, sigma0, sigma_tilde, epsilon=0.0)), ('0.01-greedy', functools.partial(IndependentBBEpsilonGreedy, n_stages, mu0, sigma0, sigma_tilde, epsilon=0.01)), ('0.05-greedy', functools.partial(IndependentBBEpsilonGreedy, n_stages, mu0, sigma0, sigma_tilde, epsilon=0.05)), ('0.1-greedy', functools.partial(IndependentBBEpsilonGreedy, n_stages, mu0, sigma0, sigma_tilde, epsilon=0.1)) ]) environments = collections.OrderedDict([ ('env', functools.partial(IndependentBinomialBridge, n_stages, mu0, sigma0, sigma_tilde)) ]) experiments = collections.OrderedDict([(name, ExperimentNoAction)]) n_steps = 500 n_seeds = 1000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'news_recommendation' num_articles = 3 dim = 7 theta_mean = 0 theta_std = 1 epsilon1 = 0.01 epsilon2 = 0.05 batch_size = 50 step_count = 200 step_size = 1 / 200 alpha = 0.2 beta = 0.5 tol = 0.0001 agents = collections.OrderedDict([ ('greedy', functools.partial(GreedyNewsRecommendation, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol)), ('Langevin TS', functools.partial(LangevinTSNewsRecommendation, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol, batch_size, step_count, step_size)), (str(epsilon1) + '-greedy', functools.partial(EpsilonGreedyNewsRecommendation, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol)), (str(epsilon2) + '-greedy', functools.partial(EpsilonGreedyNewsRecommendation, num_articles, dim, theta_mean, theta_std, epsilon2, alpha, beta, tol)), ('Laplace TS', functools.partial(LaplaceTSNewsRecommendation, num_articles, dim, theta_mean, theta_std, epsilon1, alpha, beta, tol)) ]) environments = collections.OrderedDict([ ('env', functools.partial(NewsRecommendation, num_articles, dim, theta_mean, theta_std)) ]) experiments = collections.OrderedDict([(name, ExperimentNoAction)]) n_steps = 5000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'graph_indep_binary_new' n_stages = 20 shape = 2 scale = 0.5 tol = 0.001 alpha = 0.2 beta = 0.5 langevin_batch_size = 100 langevin_step_count = 200 langevin_step_size = 0.0005 epsilon = 0 agents = collections.OrderedDict( [('Langevin TS', functools.partial(EpsilonGreedyIndependentBBWithBinaryReward, n_stages, epsilon, shape, scale, tol, alpha, beta))]) # agents = collections.OrderedDict( # [('Langevin TS', # functools.partial(StochasticLangevinMCMCIndependentBBWithBinaryReward, # n_stages, shape, scale, tol, alpha, beta, langevin_batch_size, # langevin_step_count, langevin_step_size)), # ('bootstrap TS', # functools.partial(BootstrapIndependentBBWithBinaryReward, # n_stages, shape, scale, tol, alpha, beta)), # ('Laplace TS', # functools.partial(LaplaceIndependentBBWithBinaryReward, # n_stages, shape, scale, tol, alpha, beta))] # ) environments = collections.OrderedDict( [('env', functools.partial(IndependentBinomialBridgeWithBinaryReward, n_stages, shape, scale))] ) experiments = collections.OrderedDict( [(name, ExperimentNoAction)] ) n_steps = 500 n_seeds = 1000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'finite_drift' n_arm = 3 agents = collections.OrderedDict([ ('stationary_ts', functools.partial(FiniteBernoulliBanditTS, n_arm)), ('nonstationary_ts', functools.partial(DriftingFiniteBernoulliBanditTS, n_arm)) ]) environments = collections.OrderedDict([ ('env', functools.partial(DriftingFiniteArmedBernoulliBandit, n_arm)) ]) experiments = collections.OrderedDict([(name, BaseExperiment)]) n_steps = 1000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'finite_simple' n_arm = 3 agents = collections.OrderedDict([ ('greedy', functools.partial(FiniteBernoulliBanditEpsilonGreedy, n_arm)), ('ts', functools.partial(FiniteBernoulliBanditTS, n_arm)) ]) probs = [0.7, 0.8, 0.9] environments = collections.OrderedDict([ ('env', functools.partial(FiniteArmedBernoulliBandit, probs)) ]) experiments = collections.OrderedDict([(name, BaseExperiment)]) n_steps = 1000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'product_assortment' num_products = 6 prior_mean = 0 prior_var_diagonal = 1 prior_var_off_diagonal = 0.2 noise_var = 0.04 profits = np.array([1 / 6] * 6) epsilon = 0.07 k = 9 agents = collections.OrderedDict([ ('TS', functools.partial(TSAssortment, num_products, prior_mean, prior_var_diagonal, prior_var_off_diagonal, noise_var, profits, epsilon, k)), ('greedy', functools.partial(GreedyAssortment, num_products, prior_mean, prior_var_diagonal, prior_var_off_diagonal, noise_var, profits, epsilon, k)), (str(epsilon) + '-greedy', functools.partial(EpsilonGreedyAssortment, num_products, prior_mean, prior_var_diagonal, prior_var_off_diagonal, noise_var, profits, epsilon, k)), (str(k) + '/(' + str(k) + '+t)-greedy', functools.partial(AnnealingEpsilonGreedyAssortment, num_products, prior_mean, prior_var_diagonal, prior_var_off_diagonal, noise_var, profits, epsilon, k)) ]) environments = collections.OrderedDict([ ('env', functools.partial(ProductAssortment, num_products, prior_mean, prior_var_diagonal, prior_var_off_diagonal, noise_var, profits)) ]) experiments = collections.OrderedDict([(name, ExperimentNoAction)]) n_steps = 500 n_seeds = 20000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'cascade' num_items = 50 num_positions = 10 true_a0 = 1 true_b0 = 10 def _correct_ts_init(num_items, num_positions): agent = CascadingBanditTS(num_items, num_positions, a0=true_a0, b0=true_b0) return agent agents = collections.OrderedDict([ ('correct_ts', functools.partial(_correct_ts_init, num_items, num_positions)), ('misspecified_ts', functools.partial(CascadingBanditTS, num_items, num_positions)), ('ucb1', functools.partial(CascadingBanditUCB1, num_items, num_positions)), ('kl_ucb', functools.partial(CascadingBanditKLUCB, num_items, num_positions)) ]) environments = collections.OrderedDict([ ('env', functools.partial(CascadingBandit, num_items, num_positions, true_a0, true_b0)) ]) # Very large experiment so don't log as frequently to keep file sensible. experiments = collections.OrderedDict([ (name, functools.partial(ExperimentNoAction, rec_freq=10)) ]) n_steps = 5000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'finite_simple_rand' n_arm = 3 agents = collections.OrderedDict([ ('greedy', functools.partial(FiniteBernoulliBanditEpsilonGreedy, n_arm)), ('ts', functools.partial(FiniteBernoulliBanditTS, n_arm)) ]) environments = collections.OrderedDict() n_env = 100 for env in range(n_env): probs = np.random.rand(n_arm) environments[env] = functools.partial(FiniteArmedBernoulliBandit, probs) experiments = collections.OrderedDict([(name, BaseExperiment)]) n_steps = 1000 n_seeds = 100 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'rl_bbq' # true_theta = [0.9, 0.1] unit_circle_angle = np.random.uniform( 0, 2 * np.pi ) # sample 100-dim theta with first two non-zero and l2-norm theta=1 true_theta = [np.cos(unit_circle_angle), np.sin(unit_circle_angle)] + [0] * 98 kappa_1 = 0.4 kappa_2 = 0.5 args = argparse.Namespace( ) # From https://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary args.n_feat = len(true_theta) args.optim = 'sgd' args.learn_rate = 1e-2 args.momentum = 0.9 # only for SGD args.gamma = 1 # discount factor args.sample_cost = 2 # 0<=cost args.in_dim = 100 # input dim of policy args.n_act = 2 # num actions agents = collections.OrderedDict([ ('bbq_k4', functools.partial(SelectiveSampleBBQ, args.n_feat, kappa_1)), ('bbq_k5', functools.partial(SelectiveSampleBBQ, args.n_feat, kappa_2)), ('reinf', functools.partial(PolicyGradientREINFORCE, PolicyNN, args)) ]) environments = collections.OrderedDict([ ('env', functools.partial(ContextualBanditFunctionalContext, uniform_iid, true_theta, linear_classifier)) ]) experiments = collections.OrderedDict([(name, BaseExperiment)]) n_steps = 1000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'bbq' true_theta = [1.0, 0.5] # unit_circle_angle = np.random.uniform(0, 2*np.pi) # sample 100-dim theta with first two non-zero and l2-norm theta=1 # true_theta = [np.cos(unit_circle_angle), np.sin(unit_circle_angle)] + [0]*98 kappa_1 = 0.25 kappa_2 = 0.3 n_feat = len(true_theta) agents = collections.OrderedDict([ ('bbq_k25', functools.partial(SelectiveSampleBBQ, n_feat, kappa_1)), ('bbq_k3', functools.partial(SelectiveSampleBBQ, n_feat, kappa_2)) ]) environments = collections.OrderedDict( [('env', functools.partial(ContextualBanditFunctionalContext, normal_iid, true_theta, linear_classifier))] # [('env', functools.partial(ContextualBanditFunctionalContext, uniform_iid, true_theta, linear_classifier))] ) experiments = collections.OrderedDict([(name, BaseExperiment)]) n_steps = 1000 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" name = 'ensemble_nn' input_dim = 100 hidden_dim = 50 num_actions = 100 prior_var = 1. noise_var = 100. # We have to do something weird since ensemble_nn requires construction of # the environment before the agent in order to specify the actions. agents = collections.OrderedDict([ ('epsilon=0.01', lambda: functools.partial(EpsilonGreedy, epsilon_param=0.01)), ('epsilon=0.05', lambda: functools.partial(EpsilonGreedy, epsilon_param=0.05)), ('epsilon=0.1', lambda: functools.partial(EpsilonGreedy, epsilon_param=0.1)), ('epsilon=0.2', lambda: functools.partial(EpsilonGreedy, epsilon_param=0.2)), ('epsilon=0.3', lambda: functools.partial(EpsilonGreedy, epsilon_param=0.3)), ('epsilon=10/(10+t)', lambda: functools.partial(EpsilonAnnealing, epsilon_param=10.)), ('epsilon=20/(20+t)', lambda: functools.partial(EpsilonAnnealing, epsilon_param=20.)), ('epsilon=30/(30+t)', lambda: functools.partial(EpsilonAnnealing, epsilon_param=30.)), ('epsilon=40/(40+t)', lambda: functools.partial(EpsilonAnnealing, epsilon_param=40.)), ('epsilon=50/(50+t)', lambda: functools.partial(EpsilonAnnealing, epsilon_param=50.)), ('ensemble=1', lambda: functools.partial(EnsembleSampling, num_models=3)), ('ensemble=10', lambda: functools.partial(EnsembleSampling, num_models=10)), ('ensemble=30', lambda: functools.partial(EnsembleSampling, num_models=30)), ('ensemble=100', lambda: functools.partial(EnsembleSampling, num_models=100)), ('ensemble=300', lambda: functools.partial(EnsembleSampling, num_models=300)), ('dropout=0.1', lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.1)), ('dropout=0.25', lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.25)), ('dropout=0.5', lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.5)), ('dropout=0.75', lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.75)), ('dropout=0.9', lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.9)) ]) # Similarly we do not actually evaluate the environment since we need a custom # experiment function. def _custom_partial_nn(): f = functools.partial(TwoLayerNNBandit, input_dim, hidden_dim, num_actions, prior_var, noise_var) return f environments = collections.OrderedDict([('env', _custom_partial_nn)]) n_steps = 1000 n_seeds = 1000 def _env_constructor(agent_lambda, env_lambda, n_steps, seed, unique_id): """Constructor for neural network experiments. This is more involved than other configs since the construction of the actors requires specification of the environment. We could/should improve on this with a code refactor, but leaving it now since it's working. """ environment = env_lambda(seed=seed) actions = environment.get_actions() agent = agent_lambda(input_dim, hidden_dim, actions, n_steps, prior_var, noise_var) experiment = ExperimentNoAction(agent, environment, n_steps, seed, unique_id=unique_id) return experiment experiments = collections.OrderedDict([(name, _env_constructor)]) config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config
def get_config(): """Generates the config for the experiment.""" # name = 'rl_reinf_ac_bbq' name = 'rl_reinf_ac_feat_bbq_uniform' # name = 'rl_reinf_ac_bbq_uniform' # name = 'rl_reinf_ac_bbq_adult' # true_theta = [1.0, 0.5] # kappa_1 = 1.0 # kappa_2 = 0.9999 unit_circle_angle = np.random.uniform( 0, 2 * np.pi ) # sample 100-dim theta with first two non-zero and l2-norm theta=1 true_theta = [np.cos(unit_circle_angle), np.sin(unit_circle_angle)] + [0] * 98 # infile = '../data/adult.csv' kappa_1 = 0.08 kappa_2 = 0.1 # kappa_1 = 0.12 # kappa_2 = 0.1 args_1 = argparse.Namespace( ) # From https://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary args_1.n_feat = len(true_theta) args_1.optim = 'sgd' # args_1.learn_rate = 2e-4 args_1.learn_rate = 1e-4 args_1.momentum = 0.9 # only for SGD args_1.gamma = 1 # discount factor args_1.sample_cost = 0.9 # 0<=cost # args_1.sample_cost = 1 # 0<=cost args_1.in_dim = len(true_theta) + 1 # input dim of policy args_1.n_act = 2 # num actions args_2 = argparse.Namespace( ) # From https://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary args_2.n_feat = len(true_theta) args_2.optim = 'sgd' #'adam' # args_2.learn_rate = 2e-4 args_2.learn_rate = 1e-4 args_2.momentum = 0.9 # only for SGD args_2.gamma = 1 # discount factor args_2.sample_cost = 0.9 # 0 <= cost # args_2.sample_cost = 1 # 0 <= cost args_2.in_dim = len(true_theta) + 1 # input dim of policy args_2.n_act = 2 # num actions agents = collections.OrderedDict([ ('bbq_k08', functools.partial(SelectiveSampleBBQ, args_1.n_feat, kappa_1)), ('bbq_k1', functools.partial(SelectiveSampleBBQ, args_1.n_feat, kappa_2)), ('reinf', functools.partial(PolicyGradientREINFORCE, PolicyNN, args_1)), ('ac', functools.partial(PolicyGradientActorCritic, PolicyNNActorCritic, args_2)) ]) environments = collections.OrderedDict( # [('env', functools.partial(ContextualBanditFunctionalContext, normal_iid, true_theta, linear_classifier))] [('env', functools.partial(ContextualBanditFunctionalContext, uniform_iid, true_theta, linear_classifier))] # [('env', functools.partial(ContextualBanditDataFileContext, infile))] ) experiments = collections.OrderedDict([(name, BaseExperiment)]) n_steps = 500 n_seeds = 10000 config = Config(name, agents, environments, experiments, n_steps, n_seeds) return config