Exemple #1
0
    def run_experiments(self, iterations=10):
        cum_rew = np.zeros(
            (self.nb_contexts, len(self.algo_protos), iterations))
        cum_reg = np.zeros(cum_rew.shape)

        for iter in range(iterations):
            print(str(iter + 1), '/', str(iterations))
            t_init = time.time()

            dataset, opt_linear = self.dataset_proto()
            print('dataset created')
            opt_rewards, opt_actions = opt_linear

            algos = [algo_proto() for algo_proto in self.algo_protos]
            print('algo ready')

            outcome = run_contextual_bandit(self.context_dim, self.num_actions,
                                            dataset, algos)
            h_actions, h_rewards = outcome

            cum_rew[:, :, iter] = np.cumsum(h_rewards, axis=0)
            cum_reg[:, :,
                    iter] = np.cumsum(opt_rewards)[:,
                                                   np.newaxis] - cum_rew[:, :,
                                                                         iter]

            # print('Iter {} took {} ms'%(iter, time.time()-t_init))

        # if other_results is not None:
        #     self.results = np.concatenate((other_results, results), axis=2)
        # else:
        #     self.results = results
        self.cum_rew = cum_rew
        self.cum_reg = cum_reg
def main():
    data_type = 'mushroom'

    # vae_data = get_vae_features()
    # features, rewards, opt_vals = construct_dataset_from_features(vae_data)
    # dataset = np.hstack((features, rewards))

    num_contexts = 2000
    dataset, opt_mushroom = sample_mushroom_data(file_name, num_contexts)
    opt_rewards, opt_actions = opt_mushroom

    context_dim = 117
    num_actions = 2

    # dataset, opt_rewards, opt_actions, num_actions, context_dim

    # hyperparams
    hp_nlinear = HyperParams(num_actions=num_actions,
                             context_dim=context_dim,
                             init_scale=0.3,
                             layer_sizes=[50],
                             batch_size=512,
                             activate_decay=True,
                             initial_lr=0.005,
                             max_grad_norm=5.0,
                             show_training=False,
                             freq_summary=1000,
                             buffer_s=-1,
                             initial_pulls=2,
                             reset_lr=True,
                             lr_decay_rate=0.5,
                             training_freq=1,
                             training_freq_network=50,
                             training_epochs=100,
                             a0=6,
                             b0=6,
                             lambda_prior=0.25,
                             keep_prob=1.0,
                             global_step=50)

    algos = [NeuralLinearPosteriorSampling('NeuralLinear', hp_nlinear)]

    t_init = time.time()

    # run contextual bandit experiment
    print(context_dim, num_actions)
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
    _, h_rewards = results
    np.save("mushroom_rewards.npy", h_rewards)

    display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                    data_type)
def Run(context_dim, num_actions, dataset, algos, opt_rewards, opt_actions, data_type):
    # Run contextual bandit problem
    t_init = time.time()
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
    _, h_rewards = results

    # Display results
    display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
    # Append Results
    res=[]
    for j, a in enumerate(algos):
        res.append((np.sum(h_rewards[:, j])))
    return res
def main():
    data_type = 'mnist'

    vae_data = get_vae_features()
    features, rewards, opt_vals = construct_dataset_from_features(vae_data)
    dataset = np.hstack((features, rewards))

    context_dim = features.shape[1]
    num_actions = 10


    init_lrs = [0.001, 0.0025, 0.005, 0.01]
    base_lrs = [0.0005, 0.001]
    modes = ["triangular", "triangular2", "exp_range"]
    batch_sizes = [32, 128, 512]
    layer_sizes = [[50, 50], [100, 100], [100]]
    # hyperparams
    for init_lr in init_lrs:
        for base_lrs in base_lrs:
            for mode in modes
    hp_nlinear = HyperParams(num_actions=num_actions,
                             context_dim=context_dim,
                             init_scale=0.3,
                             layer_sizes=[50, 50],
                             batch_size=32,
                             activate_decay=True,
                             initial_lr=0.1,
                             max_grad_norm=5.0,
                             show_training=False,
                             freq_summary=1000,
                             buffer_s=-1,
                             initial_pulls=2,
                             reset_lr=True,
                             lr_decay_rate=0.5,
                             training_freq=1,
                             training_freq_network=50,
                             training_epochs=100,
                             a0=6,
                             b0=6,
                             lambda_prior=0.25,
                             keep_prob=1.0,
                             global_step=1,
                             mode=mode)

    algos = [NeuralLinearPosteriorSampling('NeuralLinear', hp_nlinear)]

    # run contextual bandit experiment
    print(context_dim, num_actions)
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
    actions, rewards = results
    np.save("results.npy", rewards)
def run_trial(process):
    for idx, combo in enumerate(combos):
        if idx % num_processes == process:
            print('running combo %d: %s', idx, combo)
            # hyperparams
            hp_nlinear = HyperParams(num_actions=num_actions,
                                     context_dim=context_dim,
                                     init_scale=0.3,
                                     layer_sizes=combo["layer_size"],
                                     batch_size=combo["batch_size"],
                                     activate_decay=True,
                                     initial_lr=combo["init_lr"],
                                     base_lr=combo["base_lr"],
                                     max_grad_norm=5.0,
                                     show_training=False,
                                     freq_summary=1000,
                                     buffer_s=-1,
                                     initial_pulls=2,
                                     reset_lr=True,
                                     lr_decay_rate=0.5,
                                     training_freq=1,
                                     training_freq_network=combo["training_freq"],
                                     training_epochs=100,
                                     a0=6,
                                     b0=6,
                                     lambda_prior=0.25,
                                     keep_prob=1.0,
                                     global_step=1,
                                     mode=mode)
            algos = [NeuralLinearPosteriorSampling('NeuralLinear', hp_nlinear)]

            # run contextual bandit experiment
            print(context_dim, num_actions)
            results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
            actions, rewards = results
            np.save(mode + "results" + str(idx) + ".npy", rewards)
Exemple #6
0
def main(_):

  # Problem parameters
  num_contexts = 2000

  # Data type in {linear, sparse_linear, mushroom, financial, jester,
  #                 statlog, adult, covertype, census, wheel}
  data_type = 'mushroom'

  # Create dataset
  sampled_vals = sample_data(data_type, num_contexts)
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)

  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=50,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)

  algos = [
      UniformSampling('Uniform Sampling', hparams),
      UniformSampling('Uniform Sampling 2', hparams),
      FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
      FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
      PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
      PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
      PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
      NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
      NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
      LinearFullPosteriorSampling('LinFullPost', hparams_linear),
      BootstrappedBNNSampling('BootRMS', hparams_rms),
      ParameterNoiseSampling('ParamNoise', hparams_pnoise),
      PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
      PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
  ]

  # Run contextual bandit problem
  t_init = time.time()
  results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
  _, h_rewards = results

  # Display results
  display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
Exemple #7
0
def main(_):

    np.random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)

    dt = datetime.datetime.now()
    timestr = '{}-{}-{}-{}'.format(dt.month, dt.day, dt.hour, dt.minute)
    FLAGS.logdir = os.path.join(FLAGS.logdir, timestr)

    # Problem parameters
    num_contexts = FLAGS.num_context

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = FLAGS.bandit

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    layer_sizes = [int(i) for i in FLAGS.layers.split(',')]

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=layer_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=2000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              training_freq=50,
                                              training_epochs=100,
                                              p=0.95,
                                              q=20)

    hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=layer_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=2000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  use_dropout=True,
                                                  keep_prob=0.80)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=layer_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=True,
                                              freq_summary=2000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=20,
                                              initial_training_steps=2000,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                             num_outputs=num_actions,
                                             context_dim=context_dim,
                                             reset_lr=False,
                                             learn_embeddings=True,
                                             max_num_points=1000,
                                             show_training=False,
                                             freq_summary=2000,
                                             batch_size=512,
                                             keep_fixed_after_max_obs=True,
                                             training_freq=50,
                                             initial_pulls=2,
                                             training_epochs=100,
                                             lr=0.01,
                                             buffer_s=-1,
                                             initial_lr=0.001,
                                             lr_decay_rate=0.0,
                                             optimizer='RMS',
                                             task_latent_dim=5,
                                             activate_decay=False)

    hparams_fsvgd = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        #   activation=tf.nn.relu,
        layer_sizes=layer_sizes,
        batch_size=512,
        activate_decay=False,
        initial_lr=0.1,
        lr=FLAGS.lr,
        n_mm_sample=4,
        mm_n_particles=40,
        mm_jitter=FLAGS.mm_jitter,
        #   max_grad_norm=5.0,
        show_training=True,
        freq_summary=2000,
        buffer_s=-1,
        initial_pulls=2,
        optimizer='Adam',
        use_sigma_exp_transform=True,
        cleared_times_trained=20,
        initial_training_steps=2000,
        noise_sigma=0.1,
        reset_lr=False,
        training_freq=50,
        training_epochs=100,
        n_particles=20,
        interp_batch_size=FLAGS.interp_batch_size,
        prior_variance=FLAGS.prior_variance)

    algos = [
        UniformSampling('Uniform Sampling', hparams),
        #     UniformSampling('Uniform Sampling 2', hparams),
        #     FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
        #     FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
        PosteriorBNNSampling('fSVGD', hparams_fsvgd, 'SVGD'),
        #     PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
        #     PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        #     NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        #     NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        #     LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        BootstrappedBNNSampling('BootRMS', hparams_rms),
        #     ParameterNoiseSampling('ParamNoise', hparams_pnoise),
        #     PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
        PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
    ]

    # Run contextual bandit problem
    t_init = time.time()
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos,
                                    opt_rewards)
    _, h_rewards = results

    # Display results
    display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                    data_type)
Exemple #8
0
def main(_):

    # Problem parameters
    num_contexts = 20000
    nb_simulations = 2
    l_sizes = [50, 50]
    plt_dir = "plots/"
    dict_dir = "dicts/"

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = 'adult'

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  use_dropout=True,
                                                  keep_prob=0.80)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=l_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                   context_dim=context_dim,
                                                   init_scale=0.3,
                                                   activation=tf.nn.relu,
                                                   layer_sizes=l_sizes,
                                                   batch_size=512,
                                                   activate_decay=True,
                                                   initial_lr=0.1,
                                                   max_grad_norm=5.0,
                                                   show_training=False,
                                                   freq_summary=1000,
                                                   buffer_s=-1,
                                                   initial_pulls=2,
                                                   reset_lr=True,
                                                   lr_decay_rate=0.5,
                                                   training_freq=10,
                                                   training_freq_network=50,
                                                   training_epochs=100,
                                                   a0=6,
                                                   b0=6,
                                                   lambda_prior=0.25)

    hparams_nlinear_finite_memory = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=1)

    hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=0,
        sigma_prior_flag=0)

    hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=0)

    hparams_ucb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=l_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              optimizer='RMS',
                                              training_freq=1,
                                              training_freq_network=50,
                                              training_epochs=100,
                                              lambda_prior=0.25,
                                              delta=0.01,
                                              lamb=0.01,
                                              mu=1,
                                              S=1)

    algos = [
        #UniformSampling('Uniform Sampling', hparams),
        #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
        PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        NeuralLinearPosteriorSamplingFiniteMemory(
            'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
        NeuralLinearPosteriorSamplingFiniteMemory(
            'NeuralLinearFiniteMemory_noP',
            hparams_nlinear_finite_memory_no_prior),
        #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior),
        #NeuralUCBSampling('NeuralUCB', hparams_ucb)
    ]

    regrets = {}
    rewards = {}
    for a in algos:
        regrets[a.name] = np.zeros((nb_simulations, num_contexts))
        rewards[a.name] = np.zeros(nb_simulations)
    rewards['opt_reward'] = np.zeros(nb_simulations)

    for k in range(nb_simulations):

        algos = [
            #UniformSampling('Uniform Sampling', hparams),
            #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
            PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
            PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
            NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
            #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
            LinearFullPosteriorSampling('LinFullPost', hparams_linear),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory_noP',
                hparams_nlinear_finite_memory_no_prior),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior),
            #NeuralUCBSampling('NeuralUCB', hparams_ucb)
        ]

        # Run contextual bandit problem
        t_init = time.time()
        results = run_contextual_bandit(context_dim, num_actions, dataset,
                                        algos)
        _, h_rewards = results

        # Display results
        display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                        data_type)

        for j, a in enumerate(algos):
            regrets[a.name][k, :] = np.cumsum(opt_rewards - h_rewards[:, j])
            rewards[a.name][k] = np.sum(h_rewards[:, j])
        rewards['opt_reward'][k] = np.sum(opt_rewards)

    save_plot(algos, regrets, data_type, num_contexts, plt_dir)
    np.save(dict_dir + 'dict_' + data_type + '.npy', rewards)
Exemple #9
0
def run_iter():

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = FLAGS.dataset

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              training_freq=50,
                                              training_epochs=100,
                                              p=0.95,
                                              q=3)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_luga = tf.contrib.training.HParams(num_actions=num_actions,
                                             num_contexts=num_contexts,
                                             context_dim=context_dim,
                                             activation=tf.nn.relu,
                                             latent_dim=50,
                                             batch_size=512,
                                             initial_lr=2e-4,
                                             show_training=False,
                                             lr_decay=False,
                                             freq_summary=10000,
                                             buffer_s=-1,
                                             initial_pulls=2,
                                             training_freq=20,
                                             training_epochs=40,
                                             lambda_prior=0.25,
                                             show_loss=False,
                                             kl=1.0,
                                             recon=1.0,
                                             psigma=1.0,
                                             glnoise=False)

    hparams_sivi1 = tf.contrib.training.HParams(num_actions=num_actions,
                                                num_contexts=num_contexts,
                                                context_dim=context_dim,
                                                activation=tf.nn.relu,
                                                latent_dim=50,
                                                batch_size=512,
                                                initial_lr=1e-3,
                                                show_training=False,
                                                verbose=False,
                                                lr_decay=False,
                                                freq_summary=10000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                training_freq=20,
                                                training_epochs=40,
                                                lambda_prior=0.25,
                                                show_loss=False,
                                                kl=1.0,
                                                recon=1.0,
                                                two_decoder=False,
                                                glnoise=False,
                                                psigma=1.25)
    
    hparams_lusi_abl_km = tf.contrib.training.HParams(num_actions=num_actions,
                                                      num_contexts=num_contexts,
                                                      context_dim=context_dim,
                                                      activation=tf.nn.relu,
                                                      latent_dim=50,
                                                      batch_size=512,
                                                      initial_lr=1e-3,
                                                      show_training=False,
                                                      verbose=False,
                                                      lr_decay=False,
                                                      freq_summary=10000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      training_freq=20,
                                                      training_epochs=40,
                                                      lambda_prior=0.25,
                                                      show_loss=False,
                                                      km=1,
                                                      onez=0,
                                                      recon=1.0,
                                                      two_decoder=False,
                                                      glnoise=False,
                                                      psigma=1.25)

    hparams_luga_abl_km = tf.contrib.training.HParams(num_actions=num_actions,
                                                      num_contexts=num_contexts,
                                                      context_dim=context_dim,
                                                      activation=tf.nn.relu,
                                                      latent_dim=50,
                                                      batch_size=512,
                                                      initial_lr=2e-4,
                                                      show_training=False,
                                                      lr_decay=False,
                                                      freq_summary=10000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      training_freq=20,
                                                      training_epochs=40,
                                                      lambda_prior=0.25,
                                                      show_loss=False,
                                                      km=1,
                                                      onez=0,
                                                      recon=1.0,
                                                      psigma=1.0,
                                                      glnoise=False)


    algos = [
        UniformSampling('Uniform Sampling', hparams), #1

        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'), #2

        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear), #3

        LinearFullPosteriorSampling('LinFullPost', hparams_linear), #4

        PiposteriorBNNSampling('DGF', hparams_bbb, 'DGF'), #5

        VariationalSampling_v4('LU_Gaussian', hparams_luga), #6

        # A smaller learning rate like 3e-4 or 1e-4 will work better on the 'mushroom' dataset for LU_SIVI and LU_Gaussian
        VariationalSamplingSivi_dgf_v7("LU_SIVI", hparams_sivi1), #7 

        # For Ablation Study

        VariationalSampling_abl('LU_Gaussian_Ablation_multi_z_1m', hparams_luga_abl_km),

        VariationalSamplingSivi_dgf_abl("LU_SIVI_Ablation_multi_z_1m", hparams_lusi_abl_km)

    ]

    t_init = time.time()
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
    _, h_rewards = results

    display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)

    opt_rewards = opt_rewards.reshape([-1, 1])
    regret_i = opt_rewards - h_rewards

    return regret_i
def main(_):

  # Problem parameters
  num_contexts = 3500#2000
  tfn=200
  MEMSIZE = 700#num_contexts/10
  # Data type in {linear, sparse_linear, mushroom, financial, jester,
  #                 statlog, adult, covertype, census, wheel}
  data_type = 'financial'

  # Create dataset
  sampled_vals = sample_data(data_type, num_contexts)
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)

  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_nlinear_finite_memory = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=1,
                                                mem=MEMSIZE,
                                                mu_prior_flag=1,
                                                sigma_prior_flag=1,
                                                              )

  hparams_nlinear_finite_memory2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=1,
                                                 mem=MEMSIZE,
                                                 mu_prior_flag=1,
                                                 sigma_prior_flag=1,
                                                               )

  hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=1,
                                                mem=MEMSIZE,
                                                mu_prior_flag=0,
                                                sigma_prior_flag=0,
                                                              )

  hparams_nlinear_finite_memory2_no_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=1,
                                                 mem=MEMSIZE,
                                                 mu_prior_flag=0,
                                                 sigma_prior_flag=0,
                                                               )
  hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=1,
                                                mem=MEMSIZE,
                                                mu_prior_flag=1,
                                                sigma_prior_flag=0,
                                                              )

  hparams_nlinear_finite_memory2_no_sig_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=1,
                                                 mem=MEMSIZE,
                                                 mu_prior_flag=1,
                                                 sigma_prior_flag=0,
                                                               )


  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)


  Nruns=50
  par=0
  NAgents = 10
  res=[[]for i in xrange(NAgents)]
  #
  for i in xrange(Nruns):
      print(i)
      algos = [
          UniformSampling('Uniform Sampling', hparams),
          # UniformSampling('Uniform Sampling 2', hparams),
          # FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
          # FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
          # PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
          # PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
          # PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
          NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
          NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory2', hparams_nlinear_finite_memory2),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noP',
                                                    hparams_nlinear_finite_memory_no_prior),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory2_noP',
                                                    hparams_nlinear_finite_memory2_no_prior),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP',
                                                    hparams_nlinear_finite_memory_no_sig_prior),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory2_noSigP',
                                                    hparams_nlinear_finite_memory2_no_sig_prior),
          LinearFullPosteriorSampling('LinFullPost', hparams_linear),
          # BootstrappedBNNSampling('BootRMS', hparams_rms),
          # ParameterNoiseSampling('ParamNoise', hparams_pnoise),
          # PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
          # PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
      ]
      if par==0:

          # Run contextual bandit problem
          t_init = time.time()
          results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
          _, h_rewards = results

          # Display results
          display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
          # Append Results
          for j, a in enumerate(algos):
              res[j].append((np.sum(h_rewards[:, j])))
      else:
        par_res = Parallel(n_jobs=num_cores)(
            delayed(Run)(context_dim, num_actions, dataset, algos, opt_rewards, opt_actions, data_type) for i in xrange(Nruns))
        for j, rr in enumerate(par_res):
            res[j].append(rr[j])

      if i<(Nruns-3):
          algos=None


  display_final_results(algos,opt_rewards, res, data_type)
Exemple #11
0
def main(argv):

    # Problem parameters
    num_contexts = 4000
    tfn = 400
    tfe = tfn * 2
    data_type = 'statlog'
    l_sizes = [50]
    outdir = "./"

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim, vocab_processor = sampled_vals

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_txt = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              batch_size=64,
                                              initial_lr=0.1,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              reset_lr=True,
                                              training_freq=1,
                                              training_freq_network=tfn,
                                              training_epochs=tfe,
                                              a0=6,
                                              b0=6,
                                              lambda_prior=0.25)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=64,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=tfn,
                                                  training_epochs=tfe,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_epsilon = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=64,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=tfn,
                                                  training_epochs=tfe,
                                                  epsilon=0.1)

    hparams_nlinear_finite_memory = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=tfn,
        training_epochs=tfe,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=1)

    hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=tfn,
        training_epochs=tfe,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=0,
        sigma_prior_flag=0)

    hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=tfn,
        training_epochs=tfe,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=0)
    Nruns = 10
    n_algs = 5

    res = np.zeros((n_algs, num_contexts))
    totalreward = [0 for i in xrange(n_algs)]
    rewards = [[] for i in xrange(n_algs)]
    for i_run in xrange(Nruns):
        algos = [
            NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory_noP',
                hparams_nlinear_finite_memory_no_prior),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory_noSigP',
                hparams_nlinear_finite_memory_no_sig_prior),
            LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        ]
        results = run_contextual_bandit(context_dim, num_actions, dataset,
                                        algos)
        h_actions, h_rewards = results
        for j, a in enumerate(algos):
            print(np.sum(h_rewards[:, j]))
            totalreward[j] += ((np.sum(h_rewards[:, j])) / Nruns)
            rewards[j].append((np.sum(h_rewards[:, j])))
        actions = [[] for i in xrange(len(h_actions[0]))]
        for aa in h_actions:
            for i, a in enumerate(aa):
                actions[i].append(a)
        for i_alg in xrange(len(algos)):
            res[i_alg, :] += 1 * ((actions[i_alg] != opt_actions))
        if i_run < (Nruns - 1):
            algos = None
    display_final_results(algos, opt_rewards, opt_actions, rewards, data_type)
def main(_):

  # Problem parameters
  num_contexts = 2000
  num_test_contexts = 200
  # Data type in {linear, sparse_linear, mushroom, financial, jester,
  #                 statlog, adult, covertype, census, wheel}
  data_type = 'mushroom'

  # Create dataset
  sampled_vals = sample_data(data_type, num_contexts)
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals
  
  # dataset = dataset_full[:200, :]
  sampled_vals_t = sample_data(data_type, num_test_contexts)
  dataset_test, opt_rewards_t, _, _, _ = sampled_vals_t
  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)

  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=50,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)

  algos = [
      # UniformSampling('Uniform Sampling', hparams),
      # UniformSampling('Uniform Sampling 2', hparams),
      # FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
      # FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
      # PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
      # PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
      # PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
      # NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
      NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
      # LinearFullPosteriorSampling('LinFullPost', hparams_linear),
      # BootstrappedBNNSampling('BootRMS', hparams_rms),
      # ParameterNoiseSampling('ParamNoise', hparams_pnoise),
      # PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
      # PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
  ]

  # al1 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
  # al2 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
  # al3 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
  # al4 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
  # Run contextual bandit problem
  t_init = time.time()
  log_algos_my = []
  log_algos_avg = [[] for i in range(5)]
  reg_algos_avg = [[] for i in range(5)]
  log_algos_avg_t = [[] for i in range(5)]
  algos_avg = [[] for i in range(5)]
  algos_avg_t = [[] for i in range(5)]
  for i in range(5):
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals
  
    # dataset = dataset_full[:200, :]
    sampled_vals_t = sample_data(data_type, num_test_contexts)
    dataset_test, opt_rewards_t, _, _, _ = sampled_vals_t

    print("starting new chance")
    al1 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
    al2 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
    al3 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
    al4 = [NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2)]
    for al_i in [al1, al2, al3, al4]:
      al_i[0].update_freq_lr=10
      al_i[0].update_freq_nn=50
    print("al1", al1[0].hparams.training_freq, "training")
    results = run_mixup_contextual_bandit(context_dim, num_actions, dataset, al1)
    _, h_rewards, al1 = results
    # al1 = tmp[0]
    for j in range(len(algos)):
      log_algos_my.append(["old mix", np.sum(h_rewards[:, j])])
      algos_avg[0].append(np.sum(h_rewards[:, j]))
      log_algos_avg[0].append((np.sum(opt_rewards)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards))
      reg_algos_avg[0].append(np.sum(opt_rewards)-np.sum(h_rewards[:, j]))


    # Display results
    display_results(al1, opt_rewards, opt_actions, h_rewards, t_init, data_type, "mix", i)


    results = run_random_mixup_contextual_bandit(context_dim, num_actions, dataset, al2)
    _, h_rewards, al2 = results
    # al2 = tmp[0]
    for j in range(len(algos)):
      log_algos_my.append(["random mix", np.sum(h_rewards[:, j])])
      algos_avg[1].append(np.sum(h_rewards[:, j]))
      log_algos_avg[1].append((np.sum(opt_rewards)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards))
      reg_algos_avg[1].append(np.sum(opt_rewards)-np.sum(h_rewards[:, j]))

    # Display results
    display_results(al2, opt_rewards, opt_actions, h_rewards, t_init, data_type, "mix random", i)


    results = run_contrast_mixup_contextual_bandit(context_dim, num_actions, dataset, al3)
    _, h_rewards, al3 = results
    # al3 = tmp[0]

    for j in range(len(algos)):
      log_algos_my.append(["contrast mix", np.sum(h_rewards[:, j])])
      algos_avg[2].append(np.sum(h_rewards[:, j]))
      log_algos_avg[2].append((np.sum(opt_rewards)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards))
      reg_algos_avg[2].append(np.sum(opt_rewards)-np.sum(h_rewards[:, j]))
    # Display results
    display_results(al3, opt_rewards, opt_actions, h_rewards, t_init, data_type, "contrast mix", i)

    results = run_contextual_bandit(context_dim, num_actions, dataset, al4)
    _, h_rewards, al4 = results
    # al4 = tmp[0]
    # Display results
    display_results(al4, opt_rewards, opt_actions, h_rewards, t_init, data_type, "orig", i)
    for j in range(len(algos)):
      log_algos_my.append(["orig", np.sum(h_rewards[:, j])])
      algos_avg[3].append(np.sum(h_rewards[:, j]))
      log_algos_avg[3].append((np.sum(opt_rewards)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards))
      reg_algos_avg[3].append(np.sum(opt_rewards)-np.sum(h_rewards[:, j]))

    print(log_algos_my, "my")

    ual = [UniformSampling('Uniform Sampling', hparams)]
    results = run_contextual_bandit(context_dim, num_actions, dataset, ual)
    _, h_rewards, ual = results
    # al4 = tmp[0]
    # Display results
    display_results(ual, opt_rewards, opt_actions, h_rewards, t_init, data_type, "uniform", i)
    for j in range(len(algos)):
      log_algos_my.append(["ual", np.sum(h_rewards[:, j])])
      algos_avg[4].append(np.sum(h_rewards[:, j]))
      log_algos_avg[4].append((np.sum(opt_rewards)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards))
      reg_algos_avg[4].append(np.sum(opt_rewards)-np.sum(h_rewards[:, j]))
    print(log_algos_my, "my")
  
  # train_results = log_algos_avg.copy()
  # print(log_algos_their, "their")
  # print("algo 1 my vs their", sum(log_algos_my[0]), sum(log_algos_their[0]))
  # print("algo 2 my vs their", sum(log_algos_my[1]), sum(log_algos_their[1]))
  #########
  #########
  ########
  # testing phase
  #######
  ########
  ########

    print("starting test, switching off training")
    for al_i in [al1, al2, al3, al4]:
      al_i[0].update_freq_lr=10000
      al_i[0].update_freq_nn=10000
    print("al1", al1[0].hparams.training_freq, "test")


    results = run_contextual_bandit(context_dim, num_actions, dataset_test, al1)
    _, h_rewards, al1 = results
    # al1 = tmp[0]
    for j in range(len(algos)):
      log_algos_my.append(["old mix", np.sum(h_rewards[:, j])])
      algos_avg_t[0].append(np.sum(h_rewards[:, j]))
      log_algos_avg_t[0].append((np.sum(opt_rewards_t)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards_t))
    # Display results
    display_results(al1, opt_rewards, opt_actions, h_rewards, t_init, data_type, "mix", i)


    results = run_contextual_bandit(context_dim, num_actions, dataset_test, al2)
    _, h_rewards, al2 = results
    # al2 = tmp[0]
    for j in range(len(algos)):
      log_algos_my.append(["random mix", np.sum(h_rewards[:, j])])
      algos_avg_t[1].append(np.sum(h_rewards[:, j]))
      log_algos_avg_t[1].append((np.sum(opt_rewards_t)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards_t))
    # Display results
    display_results(al2, opt_rewards, opt_actions, h_rewards, t_init, data_type, "mix random", i)


    results = run_contextual_bandit(context_dim, num_actions, dataset_test, al3)
    _, h_rewards, al3 = results
    # al3 = tmp[0]
    for j in range(len(algos)):
      log_algos_my.append(["contrast mix", np.sum(h_rewards[:, j])])
      algos_avg_t[2].append(np.sum(h_rewards[:, j]))
      log_algos_avg_t[2].append((np.sum(opt_rewards_t)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards_t))
    
    # Display results
    display_results(al3, opt_rewards, opt_actions, h_rewards, t_init, data_type, "contrast mix", i)

    results = run_contextual_bandit(context_dim, num_actions, dataset_test, al4)
    _, h_rewards, al4 = results
    # al4 = tmp[0]
    # Display results
    display_results(al4, opt_rewards, opt_actions, h_rewards, t_init, data_type, "orig", i)
    for j in range(len(algos)):
      log_algos_my.append(["orig", np.sum(h_rewards[:, j])])
      algos_avg_t[3].append(np.sum(h_rewards[:, j]))
      log_algos_avg_t[3].append((np.sum(opt_rewards_t)-np.sum(h_rewards[:, j]))/np.sum(opt_rewards_t))

    print(log_algos_my, "my")

  for i, ex in enumerate(['orig mix', 'random mix', 'contrast mix', 'orig', 'uniform']):
    print("TRAINNN", ex, " ", np.mean(log_algos_avg[i]), np.mean(algos_avg[i]),  np.mean(reg_algos_avg[i]))
  for i, ex in enumerate(['orig mix', 'random mix', 'contrast mix', 'orig']):
    print("TESTTT", ex, " ", np.mean(log_algos_avg_t[i]), np.mean(algos_avg_t[i]))
Exemple #13
0
def main(_):

    # Problem parameters
    num_contexts = 40000

    # parameters of finite
    tfn = 400
    tfe = tfn * 2
    data_type = 'statlog'
    l_sizes = [50, 50]
    outdir = "./"

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = 'moon'
    nExperiment = 2
    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              training_freq=50,
                                              training_epochs=100,
                                              p=0.95,
                                              q=3)

    hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50, 50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  use_dropout=True,
                                                  keep_prob=0.80)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50, 50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50, 50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                   context_dim=context_dim,
                                                   init_scale=0.3,
                                                   activation=tf.nn.relu,
                                                   layer_sizes=[50],
                                                   batch_size=512,
                                                   activate_decay=True,
                                                   initial_lr=0.1,
                                                   max_grad_norm=5.0,
                                                   show_training=False,
                                                   freq_summary=1000,
                                                   buffer_s=-1,
                                                   initial_pulls=2,
                                                   reset_lr=True,
                                                   lr_decay_rate=0.5,
                                                   training_freq=10,
                                                   training_freq_network=50,
                                                   training_epochs=100,
                                                   a0=6,
                                                   b0=6,
                                                   lambda_prior=0.25)

    hparams_nlinear_finite_memory = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=1)

    hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=0,
        sigma_prior_flag=0)

    hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=0)

    hparams_pnoise = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=[50],
        batch_size=512,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        optimizer='RMS',
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=50,
        training_epochs=100,
        noise_std=0.05,
        eps=0.1,
        d_samples=300,
    )

    hparams_alpha_div = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=[50],
        batch_size=512,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        optimizer='RMS',
        use_sigma_exp_transform=True,
        cleared_times_trained=10,
        initial_training_steps=100,
        noise_sigma=0.1,
        reset_lr=False,
        training_freq=50,
        training_epochs=100,
        alpha=1.0,
        k=20,
        prior_variance=0.1)

    hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                             num_outputs=num_actions,
                                             context_dim=context_dim,
                                             reset_lr=False,
                                             learn_embeddings=True,
                                             max_num_points=1000,
                                             show_training=False,
                                             freq_summary=1000,
                                             batch_size=512,
                                             keep_fixed_after_max_obs=True,
                                             training_freq=50,
                                             initial_pulls=2,
                                             training_epochs=100,
                                             lr=0.01,
                                             buffer_s=-1,
                                             initial_lr=0.001,
                                             lr_decay_rate=0.0,
                                             optimizer='RMS',
                                             task_latent_dim=5,
                                             activate_decay=False)
    hparams_greedy = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 optimizer='RMS',
                                                 training_freq=50,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 lambda_prior=0.25,
                                                 delta=0.01,
                                                 lamb=0.01,
                                                 mu=1,
                                                 S=1)
    hparams_ucb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              optimizer='RMS',
                                              training_freq=50,
                                              training_freq_network=50,
                                              training_epochs=100,
                                              lambda_prior=0.25,
                                              delta=0.01,
                                              lamb=0.01,
                                              mu=1,
                                              S=1)

    # Run contextual bandit problem
    t_init = time.time()
    for i in range(nExperiment):

        algos = [
            #UniformSampling('Uniform Sampling', hparams),
            #UniformSampling('Uniform Sampling 2', hparams),
            #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
            #FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
            #PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
            #PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
            #PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
            #NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
            #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
            LinearFullPosteriorSampling('LinFullPost', hparams_linear),
            #BootstrappedBNNSampling('BootRMS', hparams_rms),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noP', hparams_nlinear_finite_memory_no_prior),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior)
            #ParameterNoiseSampling('ParamNoise', hparams_pnoise),
            #PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
            #PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),hparams_ucb
            #NeuralUCBSampling('NeuralUCB', hparams_ucb)
            NeuralGreedy('NeuralGreedy', hparams_greedy)
        ]

        results = run_contextual_bandit(context_dim, num_actions, dataset,
                                        algos)
        _, h_rewards = results
        np.savetxt("resultLin" + str(i) + ".csv",
                   h_rewards[:, 0],
                   delimiter=',')
        np.savetxt("resultMoon" + str(i) + ".csv",
                   h_rewards[:, 1],
                   delimiter=',')
        # Display results
        display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                        data_type)