Beispiel #1
0
    def __init__(self, hparams):
        self.bnn_model_name = self.name = "MultiTaskGP"
        self.hparams = hparams

        self.n_in = self.hparams.context_dim
        self.n_out = self.hparams.num_outputs
        self.keep_fixed_after_max_obs = self.hparams.keep_fixed_after_max_obs

        self._show_training = self.hparams.show_training
        self._freq_summary = self.hparams.freq_summary

        # Dimensionality of the latent task vectors
        self.task_latent_dim = self.hparams.task_latent_dim

        # Maximum number of observations to include
        self.max_num_points = self.hparams.max_num_points

        if self.hparams.learn_embeddings:
            self.learn_embeddings = self.hparams.learn_embeddings
        else:
            self.learn_embeddings = False

        # create the graph corresponding to the BNN instance
        self.graph = tf.Graph()
        with self.graph.as_default():
            # store a new session for the graph
            from bandits.algorithms import utils
            self.sess = utils.create_session()

            with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
                self.n = tf.placeholder(shape=[], dtype=tf.float64)
                self.x = tf.placeholder(shape=[None, self.n_in],
                                        dtype=tf.float64)
                self.x_in = tf.placeholder(shape=[None, self.n_in],
                                           dtype=tf.float64)
                self.y = tf.placeholder(shape=[None, self.n_out],
                                        dtype=tf.float64)
                self.weights = tf.placeholder(shape=[None, self.n_out],
                                              dtype=tf.float64)

                self.build_model()
            self.sess.run(tf.global_variables_initializer())
Beispiel #2
0
    def initialize_model(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            from bandits.algorithms import utils
            self.sess = utils.create_session()

            self.x = tf.placeholder(shape=[None, self.n_in],
                                    dtype=tf.float32,
                                    name='x')
            self.y = tf.placeholder(shape=[None, self.n_out],
                                    dtype=tf.float32,
                                    name='y')
            self.weights = tf.placeholder(shape=[None, self.n_out],
                                          dtype=tf.float32,
                                          name='w')
            self.data_size = tf.placeholder(tf.float32,
                                            shape=(),
                                            name='data_size')

            self.infer_op, all_preds, self.bnn_locals = nnet.build_bnn(
                self.x, self.y, self.weights, self.data_size, self.hparams)
            self.global_step = self.bnn_locals['global_step']
            self.log_likelihood = self.bnn_locals['log_likelihood']

            # NOTE: not sure if we should sample from joint or marginals; var and
            # bootstrap in this lib used joint
            # idc = tf.random_uniform(
            #   [self.n_out], 0, self.hparams.n_particles, dtype=tf.int32)
            # self.y_pred = tf.concat(
            #   [all_preds[idc[i], :, i:i+1] for i in range(self.n_out)],
            #   axis=-1)
            idc = tf.random_uniform([],
                                    0,
                                    self.hparams.n_particles,
                                    dtype=tf.int32)
            self.y_pred = all_preds[idc]

            self.summary_op = tf.summary.merge_all()
            self.summary_writer = tf.summary.FileWriter(
                '{}/graph_{}'.format(FLAGS.logdir, self.name), self.sess.graph)

            self.sess.run(tf.global_variables_initializer())
  def build_graph(self):
    """Defines graph, session, placeholders, and model.

    Placeholders are: n (size of the dataset), x and y (context and observed
    reward for each action), and weights (one-hot encoding of selected action
    for each context, i.e., only possibly non-zero element in each y).
    """

    self.graph = tf.Graph()
    with self.graph.as_default():

      from bandits.algorithms import utils
      self.sess = utils.create_session()

      self.n = tf.placeholder(shape=[], dtype=tf.float32)

      self.x = tf.placeholder(shape=[None, self.n_in], dtype=tf.float32)
      self.y = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32)
      self.weights = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32)

      self.build_model()
      self.sess.run(tf.global_variables_initializer())
Beispiel #4
0
    def build_model(self):
        """Defines the actual NN model with fully connected layers.

    The loss is computed for partial feedback settings (bandits), so only
    the observed outcome is backpropagated (see weighted loss).
    Selects the optimizer and, finally, it also initializes the graph.
    """

        # create and store the graph corresponding to the BNN instance
        self.graph = tf.Graph()

        with self.graph.as_default():

            # create and store a new session for the graph
            from bandits.algorithms import utils
            self.sess = utils.create_session()

            with tf.name_scope(self.name):

                self.global_step = tf.train.get_or_create_global_step()

                # context
                self.x = tf.placeholder(shape=[None, self.hparams.context_dim],
                                        dtype=tf.float32,
                                        name="{}_x".format(self.name))

                # reward vector
                self.y = tf.placeholder(shape=[None, self.hparams.num_actions],
                                        dtype=tf.float32,
                                        name="{}_y".format(self.name))

                # weights (1 for selected action, 0 otherwise)
                self.weights = tf.placeholder(
                    shape=[None, self.hparams.num_actions],
                    dtype=tf.float32,
                    name="{}_w".format(self.name))

                # with tf.variable_scope("prediction_{}".format(self.name)):
                self.nn, self.y_pred = self.forward_pass()
                self.loss = tf.squared_difference(self.y_pred, self.y)
                self.weighted_loss = tf.multiply(self.weights, self.loss)
                self.cost = tf.reduce_sum(
                    self.weighted_loss) / self.hparams.batch_size

                if self.hparams.activate_decay:
                    self.lr = tf.train.inverse_time_decay(
                        self.hparams.initial_lr, self.global_step, 1,
                        self.hparams.lr_decay_rate)
                else:
                    self.lr = tf.Variable(self.hparams.initial_lr,
                                          trainable=False)

                # create tensorboard metrics
                self.create_summaries()
                self.summary_writer = tf.summary.FileWriter(
                    "{}/graph_{}".format(FLAGS.logdir, self.name),
                    self.sess.graph)

                tvars = tf.trainable_variables()
                grads, _ = tf.clip_by_global_norm(
                    tf.gradients(self.cost, tvars), self.hparams.max_grad_norm)

                self.optimizer = self.select_optimizer()

                self.train_op = self.optimizer.apply_gradients(
                    zip(grads, tvars), global_step=self.global_step)

                self.init = tf.global_variables_initializer()

                self.initialize_graph()