def run_trial(trial_idx, delta, algo_names): """Runs a trial of wheel bandit problem instance for a set of algorithms.""" filename = os.path.join(FLAGS.datasetdir, str(delta) + '_' + str(trial_idx) + '.npz') with gfile.GFile(filename, 'r') as f: sampled_vals = np.load(f) dataset = sampled_vals['dataset'] opt_rewards = sampled_vals['opt_rewards'] x_hidden_size = 100 x_encoder_sizes = [x_hidden_size] * 2 algos = [] for algo_name in algo_names: if algo_name == 'uniform': hparams = contrib_training.HParams(num_actions=num_actions) algos.append(uniform_sampling.UniformSampling(algo_name, hparams)) elif algo_name == 'neurolinear': hparams = contrib_training.HParams(num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, layer_sizes=x_encoder_sizes, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=1, training_freq_network=20, training_epochs=50, a0=12, b0=30, lambda_prior=23) algos.append( neural_linear_sampling.NeuralLinearPosteriorSampling( algo_name, hparams)) elif algo_name == 'multitaskgp': hparams_gp = contrib_training.HParams( num_actions=num_actions, num_outputs=num_actions, context_dim=context_dim, reset_lr=False, learn_embeddings=True, max_num_points=1000, show_training=False, freq_summary=1000, batch_size=512, keep_fixed_after_max_obs=True, training_freq=20, initial_pulls=2, training_epochs=50, lr=0.01, buffer_s=-1, initial_lr=0.001, lr_decay_rate=0.0, optimizer='RMS', task_latent_dim=5, activate_decay=False) algos.append( posterior_bnn_sampling.PosteriorBNNSampling( algo_name, hparams_gp, 'GP')) elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp': hidden_size = 64 latent_units = 32 global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units] local_latent_net_sizes = [hidden_size] * 3 + [2] x_y_encoder_sizes = [hidden_size] * 3 heteroskedastic_net_sizes = None mean_att_type = attention.laplace_attention scale_att_type_1 = attention.laplace_attention scale_att_type_2 = attention.laplace_attention att_type = 'multihead' att_heads = 8 data_uncertainty = False is_anp = False config = algo_name.split('_') mfile = FLAGS.prefix + config[1] + '_' + config[2] + FLAGS.suffix if algo_name[:3] == 'anp': mfile = 'anp_' + mfile local_latent_net_sizes = [hidden_size] * 3 + [2 * 5] is_anp = True mpath = os.path.join(FLAGS.modeldir, mfile) hparams = contrib_training.HParams( num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, x_encoder_sizes=x_encoder_sizes, x_y_encoder_sizes=x_y_encoder_sizes, global_latent_net_sizes=global_latent_net_sizes, local_latent_net_sizes=local_latent_net_sizes, heteroskedastic_net_sizes=heteroskedastic_net_sizes, att_type=att_type, att_heads=att_heads, mean_att_type=mean_att_type, scale_att_type_1=scale_att_type_1, scale_att_type_2=scale_att_type_2, data_uncertainty=data_uncertainty, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=10, training_freq_network=20, training_epochs=50, uncertainty_type='attentive_freeform', local_variational=True, model_path=mpath, is_anp=is_anp) if config[1] == 'prior': hparams.set_hparam('local_variational', False) if config[2] == 'gp': hparams.set_hparam('uncertainty_type', 'attentive_gp') algos.append( offline_contextual_bandits.OfflineContextualBandits( algo_name, hparams)) t_init = time.time() _, h_rewards = contextual_bandit.run_contextual_bandit( context_dim, num_actions, dataset, algos, num_contexts=FLAGS.num_contexts) # pytype: disable=wrong-keyword-args t_final = time.time() return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
def run_trial(trial_idx, delta, algo_names): """Runs a trial of wheel bandit problem instance for a set of algorithms.""" filename = os.path.join(FLAGS.datasetdir, str(delta) + '_' + str(trial_idx) + '.npz') with gfile.GFile(filename, 'r') as f: sampled_vals = np.load(f) dataset = sampled_vals['dataset'] opt_rewards = sampled_vals['opt_rewards'] x_hidden_size = 100 x_encoder_sizes = [x_hidden_size] * 2 algos = [] for algo_name in algo_names: if algo_name == 'uniform': hparams = contrib_training.HParams(num_actions=num_actions) algos.append(uniform_sampling.UniformSampling(algo_name, hparams)) elif algo_name == 'neurolinear': hparams = contrib_training.HParams(num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, layer_sizes=x_encoder_sizes, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=1, training_freq_network=20, training_epochs=50, a0=12, b0=30, lambda_prior=23) algos.append( neural_linear_sampling.NeuralLinearPosteriorSampling( algo_name, hparams)) elif algo_name == 'multitaskgp': hparams_gp = contrib_training.HParams( num_actions=num_actions, num_outputs=num_actions, context_dim=context_dim, reset_lr=False, learn_embeddings=True, max_num_points=1000, show_training=False, freq_summary=1000, batch_size=512, keep_fixed_after_max_obs=True, training_freq=20, initial_pulls=2, training_epochs=50, lr=0.01, buffer_s=-1, initial_lr=0.001, lr_decay_rate=0.0, optimizer='RMS', task_latent_dim=5, activate_decay=False) algos.append( posterior_bnn_sampling.PosteriorBNNSampling( algo_name, hparams_gp, 'GP')) elif algo_name[:3] == 'gnp': hidden_size = 64 x_encoder_net_sizes = None decoder_net_sizes = [hidden_size] * 3 + [2 * num_actions] heteroskedastic_net_sizes = None att_type = 'multihead' att_heads = 8 data_uncertainty = False config = algo_name.split('_') model_type = config[1] if algo_name[:len('gnp_anp_beta_')] == 'gnp_anp_beta_': mfile = algo_name + FLAGS.suffix x_y_encoder_net_sizes = [hidden_size] * 3 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = None beta = float(config[3]) temperature = float(config[5]) else: mfile = FLAGS.prefix + config[1] + FLAGS.suffix if model_type == 'cnp': x_y_encoder_net_sizes = [hidden_size] * 4 global_latent_net_sizes = None local_latent_net_sizes = None elif model_type == 'np': x_y_encoder_net_sizes = [hidden_size] * 2 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = None elif model_type == 'anp': x_y_encoder_net_sizes = [hidden_size] * 2 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = None elif model_type == 'acnp': x_y_encoder_net_sizes = [hidden_size] * 4 global_latent_net_sizes = None local_latent_net_sizes = None elif model_type == 'acns': x_y_encoder_net_sizes = [hidden_size] * 2 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = [hidden_size] * 2 beta = 1. temperature = 1. mpath = os.path.join(FLAGS.modeldir, mfile) hparams = contrib_training.HParams( num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, x_encoder_net_sizes=x_encoder_net_sizes, x_y_encoder_net_sizes=x_y_encoder_net_sizes, global_latent_net_sizes=global_latent_net_sizes, local_latent_net_sizes=local_latent_net_sizes, decoder_net_sizes=decoder_net_sizes, heteroskedastic_net_sizes=heteroskedastic_net_sizes, att_type=att_type, att_heads=att_heads, model_type=model_type, data_uncertainty=data_uncertainty, beta=beta, temperature=temperature, model_path=mpath, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=10, training_freq_network=20, training_epochs=50) algos.append( offline_contextual_bandits_gnp.OfflineContextualBandits( algo_name, hparams)) t_init = time.time() _, h_rewards = contextual_bandit.run_contextual_bandit( context_dim, num_actions, dataset, algos, num_contexts=FLAGS.num_contexts) t_final = time.time() return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]