Esempio n. 1
0
def main(_):

  # Problem parameters
  num_contexts = 2000

  # Data type in {linear, sparse_linear, mushroom, financial, jester,
  #                 statlog, adult, covertype, census, wheel}
  data_type = 'mushroom'

  # Create dataset
  sampled_vals = sample_data(data_type, num_contexts)
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)

  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=50,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)

  algos = [
      UniformSampling('Uniform Sampling', hparams),
      UniformSampling('Uniform Sampling 2', hparams),
      FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
      FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
      PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
      PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
      PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
      NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
      NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
      LinearFullPosteriorSampling('LinFullPost', hparams_linear),
      BootstrappedBNNSampling('BootRMS', hparams_rms),
      ParameterNoiseSampling('ParamNoise', hparams_pnoise),
      PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
      PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
  ]

  # Run contextual bandit problem
  t_init = time.time()
  results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
  _, h_rewards = results

  # Display results
  display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
def main(argv):
    opts = get_options()
    print("Parameters: {}".format(opts))
    address = ('localhost', opts.ipc_port)  # family is deduced to be 'AF_INET'
    listener = Listener(address, authkey=b'bandit')
    conn = listener.accept()
    multiprocessing.current_process().authkey = b'bandit'
    print('connection accepted from', listener.last_accepted)


    # Create contextual bandit
    bandit = IPCBandit(conn)

    if opts.algorithm == "uniform":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions)
        policy = UniformSampling('Uniform Sampling', policy_parameters)

    elif opts.algorithm == "linear":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                     context_dim=bandit.context_dim,
                                                     a0=6,
                                                     b0=6,
                                                     lambda_prior=0.25,
                                                     initial_pulls=2)
        policy = LinearFullPosteriorSampling('LinFullPost', policy_parameters)

    elif opts.algorithm == "rms":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  p=0.95,
                                                  q=3)
        policy = PosteriorBNNSampling('RMS', policy_parameters, 'RMSProp')

    elif opts.algorithm == "bootrms":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  p=0.95,
                                                  q=3)
        policy =BootstrappedBNNSampling('BootRMS', policy_parameters)

    elif opts.algorithm == "dropout":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                      context_dim=bandit.context_dim,
                                                      init_scale=0.3,
                                                      activation=tf.nn.relu,
                                                      layer_sizes=[50],
                                                      batch_size=512,
                                                      activate_decay=True,
                                                      initial_lr=0.1,
                                                      max_grad_norm=5.0,
                                                      show_training=False,
                                                      freq_summary=1000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      optimizer='RMS',
                                                      reset_lr=True,
                                                      lr_decay_rate=0.5,
                                                      training_freq=50,
                                                      training_epochs=100,
                                                      use_dropout=True,
                                                      keep_prob=0.80)
        policy = PosteriorBNNSampling('Dropout', policy_parameters, 'RMSProp')

    elif opts.algorithm == "bbb":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100)
        policy = PosteriorBNNSampling('BBB', policy_parameters, 'Variational')

    elif opts.algorithm == "neurallinear":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                      context_dim=bandit.context_dim,
                                                      init_scale=0.3,
                                                      activation=tf.nn.relu,
                                                      layer_sizes=[50],
                                                      batch_size=512,
                                                      activate_decay=True,
                                                      initial_lr=0.1,
                                                      max_grad_norm=5.0,
                                                      show_training=False,
                                                      freq_summary=1000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      reset_lr=True,
                                                      lr_decay_rate=0.5,
                                                      training_freq=1,
                                                      training_freq_network=50,
                                                      training_epochs=100,
                                                      a0=6,
                                                      b0=6,
                                                      lambda_prior=0.25)
        policy = NeuralLinearPosteriorSampling('NeuralLinear', policy_parameters)

    elif opts.algorithm == "neurallinear2":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                       context_dim=bandit.context_dim,
                                                       init_scale=0.3,
                                                       activation=tf.nn.relu,
                                                       layer_sizes=[50],
                                                       batch_size=512,
                                                       activate_decay=True,
                                                       initial_lr=0.1,
                                                       max_grad_norm=5.0,
                                                       show_training=False,
                                                       freq_summary=1000,
                                                       buffer_s=-1,
                                                       initial_pulls=2,
                                                       reset_lr=True,
                                                       lr_decay_rate=0.5,
                                                       training_freq=10,
                                                       training_freq_network=50,
                                                       training_epochs=100,
                                                       a0=6,
                                                       b0=6,
                                                       lambda_prior=0.25)
        policy = NeuralLinearPosteriorSampling('NeuralLinear2', policy_parameters)

    elif opts.algorithm == "pnoise":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                     context_dim=bandit.context_dim,
                                                     init_scale=0.3,
                                                     activation=tf.nn.relu,
                                                     layer_sizes=[50],
                                                     batch_size=512,
                                                     activate_decay=True,
                                                     initial_lr=0.1,
                                                     max_grad_norm=5.0,
                                                     show_training=False,
                                                     freq_summary=1000,
                                                     buffer_s=-1,
                                                     initial_pulls=2,
                                                     optimizer='RMS',
                                                     reset_lr=True,
                                                     lr_decay_rate=0.5,
                                                     training_freq=50,
                                                     training_epochs=100,
                                                     noise_std=0.05,
                                                     eps=0.1,
                                                     d_samples=300,
                                                     )
        policy = ParameterNoiseSampling('ParamNoise', policy_parameters)

    elif opts.algorithm == "alpha_div":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                        context_dim=bandit.context_dim,
                                                        init_scale=0.3,
                                                        activation=tf.nn.relu,
                                                        layer_sizes=[50],
                                                        batch_size=512,
                                                        activate_decay=True,
                                                        initial_lr=0.1,
                                                        max_grad_norm=5.0,
                                                        show_training=False,
                                                        freq_summary=1000,
                                                        buffer_s=-1,
                                                        initial_pulls=2,
                                                        optimizer='RMS',
                                                        use_sigma_exp_transform=True,
                                                        cleared_times_trained=10,
                                                        initial_training_steps=100,
                                                        noise_sigma=0.1,
                                                        reset_lr=False,
                                                        training_freq=50,
                                                        training_epochs=100,
                                                        alpha=1.0,
                                                        k=20,
                                                        prior_variance=0.1)
        policy = PosteriorBNNSampling('BBAlphaDiv', policy_parameters, 'AlphaDiv')

    elif opts.algorithm == "gp":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                        num_outputs=bandit.num_actions,
                                                        context_dim=bandit.context_dim,
                                                        reset_lr=False,
                                                        learn_embeddings=True,
                                                        max_num_points=1000,
                                                        show_training=False,
                                                        freq_summary=1000,
                                                        batch_size=512,
                                                        keep_fixed_after_max_obs=True,
                                                        training_freq=50,
                                                        initial_pulls=2,
                                                        training_epochs=100,
                                                        lr=0.01,
                                                        buffer_s=-1,
                                                        initial_lr=0.001,
                                                        lr_decay_rate=0.0,
                                                        optimizer='RMS',
                                                        task_latent_dim=5,
                                                        activate_decay=False)
        policy = PosteriorBNNSampling('MultitaskGP', policy_parameters, 'GP')

    else:
        raise Exception("Misspecified bandit algorithm.")

    print(policy)
    # Run the contextual bandit process
    while True:
        context = bandit.context()
        if context is None:
            break
        action = policy.action(context)
        reward = bandit.pull(action)
        if reward is None:
            break

        policy.update(context, action, reward)

    conn.close()
    listener.close()
Esempio n. 3
0
def main(_):
    # create dataset
    data_type = "job_bank"
    num_contexts = 2000
    num_actions = 2
    context_dim = 2
    dataset = np.empty((num_contexts, 4), dtype=np.float)
    opt_actions = np.empty(num_contexts, dtype=np.int)
    opt_rewards = np.empty(num_contexts, dtype=np.float)
    for iter in range(num_contexts):
        ctx = context_bandit_gen_context()
        all_probs = [context_bandit_prob(ctx, a) for a in range(num_actions)]
        optimal = np.argmax(all_probs)
        rewards = [context_bandit_reward(ctx, a) for a in range(num_actions)]
        dataset[iter, :] = np.array(ctx.tolist() + rewards)
        opt_actions[iter] = optimal
        opt_rewards[iter] = all_probs[optimal]

    hparams = HParams(num_actions=num_actions)

    hparams_linear = HParams(num_actions=num_actions,
                             context_dim=context_dim,
                             a0=6,
                             b0=6,
                             lambda_prior=0.25,
                             initial_pulls=2)

    hparams_rms = HParams(num_actions=num_actions,
                          context_dim=context_dim,
                          init_scale=0.3,
                          activation=tf.nn.relu,
                          layer_sizes=[50],
                          batch_size=512,
                          activate_decay=True,
                          initial_lr=0.1,
                          max_grad_norm=5.0,
                          show_training=False,
                          freq_summary=1000,
                          buffer_s=-1,
                          initial_pulls=2,
                          optimizer='RMS',
                          reset_lr=True,
                          lr_decay_rate=0.5,
                          training_freq=50,
                          training_epochs=100,
                          p=0.95,
                          q=3,
                          verbose=False)

    hparams_dropout = HParams(num_actions=num_actions,
                              context_dim=context_dim,
                              init_scale=0.3,
                              activation=tf.nn.relu,
                              layer_sizes=[50],
                              batch_size=512,
                              activate_decay=True,
                              initial_lr=0.1,
                              max_grad_norm=5.0,
                              show_training=False,
                              freq_summary=1000,
                              buffer_s=-1,
                              initial_pulls=2,
                              optimizer='RMS',
                              reset_lr=True,
                              lr_decay_rate=0.5,
                              training_freq=50,
                              training_epochs=100,
                              use_dropout=True,
                              keep_prob=0.80,
                              verbose=False)

    hparams_bbb = HParams(num_actions=num_actions,
                          context_dim=context_dim,
                          init_scale=0.3,
                          activation=tf.nn.relu,
                          layer_sizes=[50],
                          batch_size=512,
                          activate_decay=True,
                          initial_lr=0.1,
                          max_grad_norm=5.0,
                          show_training=False,
                          freq_summary=1000,
                          buffer_s=-1,
                          initial_pulls=2,
                          optimizer='RMS',
                          use_sigma_exp_transform=True,
                          cleared_times_trained=10,
                          initial_training_steps=100,
                          noise_sigma=0.1,
                          reset_lr=False,
                          training_freq=50,
                          training_epochs=100,
                          verbose=False)

    hparams_nlinear = HParams(num_actions=num_actions,
                              context_dim=context_dim,
                              init_scale=0.3,
                              activation=tf.nn.relu,
                              layer_sizes=[50],
                              batch_size=512,
                              activate_decay=True,
                              initial_lr=0.1,
                              max_grad_norm=5.0,
                              show_training=False,
                              freq_summary=1000,
                              buffer_s=-1,
                              initial_pulls=2,
                              reset_lr=True,
                              lr_decay_rate=0.5,
                              training_freq=1,
                              training_freq_network=50,
                              training_epochs=100,
                              a0=6,
                              b0=6,
                              lambda_prior=0.25,
                              verbose=False)

    hparams_nlinear2 = HParams(num_actions=num_actions,
                               context_dim=context_dim,
                               init_scale=0.3,
                               activation=tf.nn.relu,
                               layer_sizes=[50],
                               batch_size=512,
                               activate_decay=True,
                               initial_lr=0.1,
                               max_grad_norm=5.0,
                               show_training=False,
                               freq_summary=1000,
                               buffer_s=-1,
                               initial_pulls=2,
                               reset_lr=True,
                               lr_decay_rate=0.5,
                               training_freq=10,
                               training_freq_network=50,
                               training_epochs=100,
                               a0=6,
                               b0=6,
                               lambda_prior=0.25,
                               verbose=False)

    hparams_pnoise = HParams(num_actions=num_actions,
                             context_dim=context_dim,
                             init_scale=0.3,
                             activation=tf.nn.relu,
                             layer_sizes=[50],
                             batch_size=512,
                             activate_decay=True,
                             initial_lr=0.1,
                             max_grad_norm=5.0,
                             show_training=False,
                             freq_summary=1000,
                             buffer_s=-1,
                             initial_pulls=2,
                             optimizer='RMS',
                             reset_lr=True,
                             lr_decay_rate=0.5,
                             training_freq=50,
                             training_epochs=100,
                             noise_std=0.05,
                             eps=0.1,
                             d_samples=300,
                             verbose=False)

    hparams_alpha_div = HParams(num_actions=num_actions,
                                context_dim=context_dim,
                                init_scale=0.3,
                                activation=tf.nn.relu,
                                layer_sizes=[50],
                                batch_size=512,
                                activate_decay=True,
                                initial_lr=0.1,
                                max_grad_norm=5.0,
                                show_training=False,
                                freq_summary=1000,
                                buffer_s=-1,
                                initial_pulls=2,
                                optimizer='RMS',
                                use_sigma_exp_transform=True,
                                cleared_times_trained=10,
                                initial_training_steps=100,
                                noise_sigma=0.1,
                                reset_lr=False,
                                training_freq=50,
                                training_epochs=100,
                                alpha=1.0,
                                k=20,
                                prior_variance=0.1,
                                verbose=False)

    hparams_gp = HParams(num_actions=num_actions,
                         num_outputs=num_actions,
                         context_dim=context_dim,
                         reset_lr=False,
                         learn_embeddings=True,
                         max_num_points=1000,
                         show_training=False,
                         freq_summary=1000,
                         batch_size=512,
                         keep_fixed_after_max_obs=True,
                         training_freq=50,
                         initial_pulls=2,
                         training_epochs=100,
                         lr=0.01,
                         buffer_s=-1,
                         initial_lr=0.001,
                         lr_decay_rate=0.0,
                         optimizer='RMS',
                         task_latent_dim=5,
                         activate_decay=False,
                         verbose=False)

    algos = [
        UniformSampling('Uniform Sampling', hparams),
        FixedPolicySampling('Fixed 1', [0.75, 0.25], hparams),
        FixedPolicySampling('Fixed 2', [0.25, 0.75], hparams),
        PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
        PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        BootstrappedBNNSampling('BootRMS', hparams_rms),
        ParameterNoiseSampling('ParamNoise', hparams_pnoise),
        PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
        PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
    ]

    _, h_rewards, times = run_contextual_bandit(context_dim, num_actions,
                                                dataset, algos)

    display_results(algos, opt_rewards, opt_actions, h_rewards, times,
                    data_type)
Esempio n. 4
0
hparams_lineps = tf.contrib.training.HParams(num_actions=num_actions,
                                             context_dim=context_dim,
                                             lam=0.1,
                                             eps=0.05)

random_proto = lambda: UniformSampling('Uniform Sampling', hparams)
neural_greedy_proto = lambda: PosteriorBNNSampling('NeuralGreedy', hparams_rms,
                                                   'RMSProp')
neural_greedy_proto_bootstrapped = lambda: PosteriorBNNSampling(
    'NeuralGreedy_artificial_data', hparams_rms_bootstrapped, 'RMSProp')

bootstrap_proto = lambda: BootstrappedBNNSampling('BootRMS', hparams_rmsb)
bootstrap_proto_bootstrapped = lambda: BootstrappedBNNSampling(
    'BootRMS_artificial_data', hparams_rmsb_bootstrapped)

noise_proto = lambda: ParameterNoiseSampling('ParamNoise', hparams_pnoise)
noise_proto_bootstrapped = lambda: ParameterNoiseSampling(
    'ParamNoise_artificial_data', hparams_pnoise_bootstrapped)

dropout_proto = lambda: PosteriorBNNSampling('Dropout', hparams_dropout,
                                             'RMSProp')
dropout_proto_bootstrapped = lambda: PosteriorBNNSampling(
    'Dropout_artificial_data', hparams_dropout_bootstrapped, 'RMSProp')

linThompson_proto = lambda: LinearFullPosteriorSampling(
    'linThompson', hparams_linear)
linUCB_proto = lambda: LinUCB('linUCB', hparams_linucb)
linEps_proto = lambda: LinEpsilon('LinEpsilon', hparams_lineps)

neuralLinUCB_proto = lambda: NeuralLinUCB('NeuralLinUCB',
                                          hparams_neural_linucb, 'RMSProp')