def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']
        opt_rewards = sampled_vals['opt_rewards']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp':
            hidden_size = 64
            latent_units = 32
            global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units]
            local_latent_net_sizes = [hidden_size] * 3 + [2]
            x_y_encoder_sizes = [hidden_size] * 3
            heteroskedastic_net_sizes = None
            mean_att_type = attention.laplace_attention
            scale_att_type_1 = attention.laplace_attention
            scale_att_type_2 = attention.laplace_attention
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            is_anp = False

            config = algo_name.split('_')
            mfile = FLAGS.prefix + config[1] + '_' + config[2] + FLAGS.suffix
            if algo_name[:3] == 'anp':
                mfile = 'anp_' + mfile
                local_latent_net_sizes = [hidden_size] * 3 + [2 * 5]
                is_anp = True
            mpath = os.path.join(FLAGS.modeldir, mfile)

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_sizes=x_encoder_sizes,
                x_y_encoder_sizes=x_y_encoder_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                mean_att_type=mean_att_type,
                scale_att_type_1=scale_att_type_1,
                scale_att_type_2=scale_att_type_2,
                data_uncertainty=data_uncertainty,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50,
                uncertainty_type='attentive_freeform',
                local_variational=True,
                model_path=mpath,
                is_anp=is_anp)

            if config[1] == 'prior':
                hparams.set_hparam('local_variational', False)

            if config[2] == 'gp':
                hparams.set_hparam('uncertainty_type', 'attentive_gp')

            algos.append(
                offline_contextual_bandits.OfflineContextualBandits(
                    algo_name, hparams))

    t_init = time.time()
    _, h_rewards = contextual_bandit.run_contextual_bandit(
        context_dim,
        num_actions,
        dataset,
        algos,
        num_contexts=FLAGS.num_contexts)  # pytype: disable=wrong-keyword-args
    t_final = time.time()

    return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
Esempio n. 2
0
def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    all_algo_names = '_'.join(algo_names)
    runfile = str(delta) + '_' + str(trial_idx) + '_' + all_algo_names + '.pkl'
    savefile = os.path.join(FLAGS.savedir, runfile)
    if gfile.Exists(savefile):
        print('File exists...terminating')
        print(savefile)
        with gfile.Open(savefile, 'rb') as infile:
            saved_state = pickle.load(infile, encoding='latin-1')
        return saved_state['h_rewards'], saved_state['time']

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    ckptfile = None
    save_once = False
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'gnp':
            hidden_size = 64
            x_encoder_net_sizes = None
            decoder_net_sizes = [hidden_size] * 3 + [2 * num_actions]
            heteroskedastic_net_sizes = None
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False

            config = algo_name.split('_')
            model_type = config[1]

            if algo_name[:len('gnp_anp_beta_')] == 'gnp_anp_beta_':
                mfile = algo_name + FLAGS.suffix
                x_y_encoder_net_sizes = [hidden_size] * 3
                global_latent_net_sizes = [hidden_size] * 2
                local_latent_net_sizes = None
                beta = float(config[3])
                temperature = float(config[5])
            else:
                mfile = FLAGS.prefix + config[1] + FLAGS.suffix

                if model_type == 'cnp':
                    x_y_encoder_net_sizes = [hidden_size] * 4
                    global_latent_net_sizes = None
                    local_latent_net_sizes = None
                elif model_type == 'np':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = None
                elif model_type == 'anp':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = None
                elif model_type == 'acnp':
                    x_y_encoder_net_sizes = [hidden_size] * 4
                    global_latent_net_sizes = None
                    local_latent_net_sizes = None
                elif model_type == 'acns':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = [hidden_size] * 2

                beta = 1.
                temperature = 1.

            mpath = os.path.join(FLAGS.modeldir, mfile)

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_net_sizes=x_encoder_net_sizes,
                x_y_encoder_net_sizes=x_y_encoder_net_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                decoder_net_sizes=decoder_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                model_type=model_type,
                data_uncertainty=data_uncertainty,
                beta=beta,
                temperature=temperature,
                model_path=mpath,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50)
            algos.append(
                offline_contextual_bandits_gnp.OfflineContextualBandits(
                    algo_name, hparams))
            ckptfile = os.path.join(FLAGS.ckptdir, runfile)
            if gfile.Exists(ckptfile):
                save_once = True

    t_init = time.time()
    print('started')
    print([algo.name for algo in algos])
    _, h_rewards = contextual_bandit.run_contextual_bandit_new(
        context_dim,
        num_actions,
        dataset,
        algos,
        save_once=save_once,
        pkl_file=ckptfile)
    t_final = time.time()

    savedict = {'h_rewards': h_rewards, 'time': t_final - t_init}
    with gfile.Open(savefile, 'wb') as outfile:
        pickle.dump(savedict, outfile)
    return h_rewards, t_final - t_init
Esempio n. 3
0
def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    all_algo_names = '_'.join(algo_names)
    runfile = str(delta) + '_' + str(trial_idx) + '_' + all_algo_names + '.pkl'
    savefile = os.path.join(FLAGS.savedir, runfile)
    if gfile.Exists(savefile):
        print('File exists...terminating')
        with gfile.Open(savefile, 'rb') as infile:
            saved_state = pickle.load(infile, encoding='latin-1')
        return saved_state['h_rewards'], saved_state['time']

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    ckptfile = None
    save_once = False
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp':
            hidden_size = 64
            latent_units = 32
            global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units]
            if algo_name[:3] == 'snp':
                local_latent_net_sizes = [hidden_size] * 3 + [2]
            else:
                local_latent_net_sizes = [hidden_size] * 3 + [2 * 5]
            x_y_encoder_sizes = [hidden_size] * 3
            heteroskedastic_net_sizes = None
            mean_att_type = attention.laplace_attention
            scale_att_type_1 = attention.laplace_attention
            scale_att_type_2 = attention.laplace_attention
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            is_anp = True if algo_name[:3] == 'anp' else False

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_sizes=x_encoder_sizes,
                x_y_encoder_sizes=x_y_encoder_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                mean_att_type=mean_att_type,
                scale_att_type_1=scale_att_type_1,
                scale_att_type_2=scale_att_type_2,
                data_uncertainty=data_uncertainty,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50,
                uncertainty_type='attentive_freeform',
                local_variational=True,
                model_path=None,
                is_anp=is_anp)

            config = algo_name.split('_')
            if config[1] == 'prior':
                hparams.set_hparam('local_variational', False)

            if config[2] == 'gp':
                hparams.set_hparam('uncertainty_type', 'attentive_gp')

            if config[3] == 'warmstart' or config[3] == 'offline':
                mfile = FLAGS.prefix + config[1] + '_' + config[
                    2] + FLAGS.suffix
                if algo_name[:3] == 'anp':
                    mfile = 'anp_' + mfile
                mpath = os.path.join(FLAGS.modeldir, mfile)
                hparams.set_hparam('model_path', mpath)

            if config[3] == 'online' or config[3] == 'warmstart':
                algos.append(
                    online_contextual_bandits.OnlineContextualBandits(
                        algo_name, hparams))
            else:
                algos.append(
                    offline_contextual_bandits.OfflineContextualBandits(
                        algo_name, hparams))
                ckptfile = os.path.join(FLAGS.ckptdir, runfile)
                if gfile.Exists(ckptfile):
                    save_once = True

    t_init = time.time()
    print('started')
    _, h_rewards = run_contextual_bandit(dataset,
                                         algos,
                                         save_once=save_once,
                                         pkl_file=ckptfile)
    t_final = time.time()

    savedict = {'h_rewards': h_rewards, 'time': t_final - t_init}
    with gfile.Open(savefile, 'wb') as outfile:
        pickle.dump(savedict, outfile)
    return h_rewards, t_final - t_init