def _build_sync_op(self): """Build the sync op.""" sync_count = tf.Variable(0, trainable=False) sync_ops = [tf.assign_add(sync_count, 1)] trainables_online = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='Online') trainables_target = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='Target') for (w_online, w_target) in zip(trainables_online, trainables_target): sync_ops.append(w_target.assign(w_online, use_locking=True)) tf.summary.scalar('Learning/SyncCount', sync_count) return sync_ops
def build_graph(self): """Builds the neural network graph.""" # define graph self.g = tf.Graph() with self.g.as_default(): # create and store a new session for the graph self.sess = tf.Session() # define placeholders self.x = tf.placeholder(shape=[None, self.dim_input], dtype=tf.float32) self.y = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32) # define simple model with tf.variable_scope('last_layer'): self.z = tf.layers.dense(inputs=self.x, units=self.num_classes) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y, logits=self.z)) self.output_probs = tf.nn.softmax(self.z) # Variables of the last layer self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.ll_vars_concat = tf.concat( [self.ll_vars[0], tf.expand_dims(self.ll_vars[1], axis=0)], 0) # Summary _variable_summaries(self.ll_vars_concat) # saving the weights of last layer when running bootstrap algorithm self.saver = tf.train.Saver(var_list=self.ll_vars) self.gd_opt = tf.train.GradientDescentOptimizer(self.step_size) # SGD optimizer for the last layer grads_vars_sgd = self.gd_opt.compute_gradients(self.loss) self.train_op = self.gd_opt.apply_gradients(grads_vars_sgd) for g, v in grads_vars_sgd: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' tf.summary.histogram(''.join(s) + '/grad_hist_boot_sgd', g) # Merge all the summaries and write them out self.all_summaries = tf.summary.merge_all() location = os.path.join(self.working_dir, 'logs') self.writer = tf.summary.FileWriter(location, graph=self.g) saver_network = tf.train.Saver(var_list=self.ll_vars) print('Loading the network...') # Restores from checkpoint saver_network.restore(self.sess, self.model_dir) print('Graph successfully loaded.')
def _build_sync_op(self): """Build ops for assigning weights from online to target network. Returns: ops: A list of ops assigning weights from online to target network. """ # Get trainable variables from online and target networks. sync_qt_ops = [] trainables_online = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Online') trainables_target = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Target') for (w_online, w_target) in zip(trainables_online, trainables_target): # Assign weights from online to target network. sync_qt_ops.append(w_target.assign(w_online, use_locking=True)) return sync_qt_ops
def get_train_op(self, global_step): """Returns the operation that performs a training update.""" # UPDATE_OPS picks up batch_norm updates. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = self.optimizer.minimize(self.losses['train'], global_step=global_step) return train_op
def compute_loss(self, onehot_labels, predictions): """Computes the MSE loss of `predictions` with respect to `onehot_labels`. Args: onehot_labels: A `tf.Tensor` containing the the class labels; each vector along the class dimension should hold a valid probability distribution. predictions: A `tf.Tensor` containing the the class predictions, interpreted as unnormalized log probabilities. Returns: A `tf.Tensor` representing the average loss. """ mse_loss = tf.losses.mean_squared_error(onehot_labels, predictions) regularization = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) loss = mse_loss + regularization return loss
def train_q(dataset, policy, optimizer=None, pack_transition_fn=None, q_graph_fn=None, log_dir=None, master='', task=0, training_steps=None, max_training_steps=100000, reuse=False, init_checkpoint=None, update_target_every_n_steps=50, log_every_n_steps=None, save_checkpoint_steps=500, save_summaries_steps=500): """Self-contained learning loop for offline Q-learning. Code inspired by OpenAI Baselines' deepq.build_train. This function is compatible with discrete Q-learning graphs, continuous Q learning graphs, and SARSA. Args: dataset: tf.data.Dataset providing transitions. policy: Instance of TFDQNPolicy class that provides functor for building the critic function. optimizer: Optional instance of an optimizer. If not specified, creates an AdamOptimizer using the default constructor. pack_transition_fn: Optional function that performs additional processing of the transition. This is a convenience method for ad-hoc manipulation of transition data passed to the learning function after parsing. q_graph_fn: Function used to construct training objectives w.r.t. critic outputs. log_dir: Where to save model checkpoints and tensorboard summaries. master: Optional address of master worker. Specify this when doing distributed training. task: Optional worker task for distributed training. Defaults to solo master task on a single machine. training_steps: Optional number of steps to run training before terminating early. Max_training_steps remains unchanged - training will terminate after max_training_steps whether or not training_steps is specified. max_training_steps: maximum number of training iters. reuse: If True, reuse existing variables for all declared variables by this function. init_checkpoint: Optional checkpoint to restore prior to training. If not provided, variables are initialized using global_variables_initializer(). update_target_every_n_steps: How many global steps (training) between copying the Q network weights (scope='q_func') to target network (scope='target_q_func'). log_every_n_steps: How many global steps between logging loss tensors. save_checkpoint_steps: How many global steps between saving TF variables to a checkpoint file. save_summaries_steps: How many global steps between saving TF summaries. Returns: (int) Current `global_step` reached after training for training_steps, or `max_training_steps` if `global_step` has reached `max_training_steps`. Raises: ValueError: If a batch of transitions is empty or the zeroth element is empty, when it's supposed to be of length batch_size. """ data_iterator = dataset.make_one_shot_iterator() transition = data_iterator.get_next() if pack_transition_fn: transition = pack_transition_fn(transition) if optimizer is None: optimizer = tf.train.AdamOptimizer() q_func = policy.get_q_func(is_training=True, reuse=reuse) loss, all_summaries = q_graph_fn(q_func, transition) q_func_vars = contrib_framework.get_trainable_variables(scope='q_func') target_q_func_vars = contrib_framework.get_trainable_variables( scope='target_q_func') global_step = tf.train.get_or_create_global_step() # Only optimize q_func and update its batchnorm params. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='q_func') with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step, var_list=q_func_vars) chief_hooks = [] hooks = [] # Save summaries periodically. if save_summaries_steps is not None: chief_hooks.append( tf.train.SummarySaverHook(save_steps=save_summaries_steps, output_dir=log_dir, summary_op=all_summaries)) # Stop after training_steps if max_training_steps: hooks.append(tf.train.StopAtStepHook(last_step=max_training_steps)) # Report if loss tensor is NaN. hooks.append(tf.train.NanTensorHook(loss)) if log_every_n_steps is not None: tensor_dict = {'global_step': global_step, 'train loss': loss} chief_hooks.append( tf.train.LoggingTensorHook(tensor_dict, every_n_iter=log_every_n_steps)) # Measure how fast we are training per sec and save to summary. chief_hooks.append( tf.train.StepCounterHook(every_n_steps=log_every_n_steps, output_dir=log_dir)) # If target network exists, periodically update target Q network with new # weights (frozen target network). We hack this by # abusing a LoggingTensorHook for this. if target_q_func_vars and update_target_every_n_steps is not None: update_target_expr = [] for var, var_t in zip(sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_expr.append(var_t.assign(var)) update_target_expr = tf.group(*update_target_expr) with tf.control_dependencies([update_target_expr]): update_target = tf.constant(0) chief_hooks.append( tf.train.LoggingTensorHook( {'update_target': update_target}, every_n_iter=update_target_every_n_steps)) # Save checkpoints periodically, save all of them. saver = tf.train.Saver(max_to_keep=None) chief_hooks.append( tf.train.CheckpointSaverHook(log_dir, save_steps=save_checkpoint_steps, saver=saver, checkpoint_basename='model.ckpt')) # Save our experiment params to checkpoint dir. chief_hooks.append( gin.tf.GinConfigSaverHook(log_dir, summarize_config=True)) session_config = tf.ConfigProto(log_device_placement=False) init_fn = None if init_checkpoint: assign_fn = contrib_framework.assign_from_checkpoint_fn( init_checkpoint, contrib_framework.get_model_variables()) init_fn = lambda _, sess: assign_fn(sess) scaffold = tf.train.Scaffold(saver=saver, init_fn=init_fn) with tf.train.MonitoredTrainingSession( master=master, is_chief=(task == 0), config=session_config, checkpoint_dir=log_dir, scaffold=scaffold, hooks=hooks, chief_only_hooks=chief_hooks) as sess: np_step = 0 while not sess.should_stop(): np_step, _ = sess.run([global_step, train_op]) if training_steps and np_step % training_steps == 0: break done = np_step >= max_training_steps return np_step, done
def build_graph(self): """Builds the neural network graph.""" # define graph self.g = tf.Graph() with self.g.as_default(): # create and store a new session for the graph self.sess = tf.Session() # define placeholders self.x = tf.placeholder(shape=[None, self.dim_input], dtype=tf.float32) self.y = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32) # linear layer(WX + b) with tf.variable_scope('last_layer/dense') as scope: weights = tf.get_variable('kernel', [self.dim_input, self.num_classes], dtype=tf.float32) biases = tf.get_variable('bias', [self.num_classes], dtype=tf.float32) wb = tf.concat([weights, tf.expand_dims(biases, axis=0)], 0) wb_renorm = tf.matmul(self.sigma_half_inv, wb) weights_renorm = wb_renorm[:self.dim_input, :] biases_renorm = wb_renorm[-1, :] self.z = tf.add(tf.matmul(self.x, weights_renorm), biases_renorm, name=scope.name) # Gaussian prior # prior = tf.nn.l2_loss(weights) + tf.nn.l2_loss(biases) # Non normalized loss, because of the preconditioning self.loss = self.n * tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y, logits=self.z)) # Bayesian loss self.bayesian_loss = self.loss # + prior self.output_probs = tf.nn.softmax(self.z) # Variables of the last layer self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.ll_vars_concat = tf.concat( [self.ll_vars[0], tf.expand_dims(self.ll_vars[1], axis=0)], 0) # Summary _variable_summaries(self.ll_vars_concat) # saving the weights of last layer when running SGLD/SGD/MCMC algorithm self.saver = tf.train.Saver(var_list=self.ll_vars, max_to_keep=self.num_samples) self.gd_opt = tf.train.GradientDescentOptimizer(self.step_size) # SGLD optimizer for the last layer if self.sampler in ['sgld', 'lmc']: grads_vars = self.gd_opt.compute_gradients(self.bayesian_loss) grads_vars_sgld = [] for g, v in grads_vars: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' # Adding Gaussian noise to the gradient gaussian_noise = (np.sqrt(2. / self.step_size) * tf.random_normal(tf.shape(g))) g_sgld = g + gaussian_noise tf.summary.histogram(''.join(s) + '/grad_hist_mcmc', g) tf.summary.histogram( ''.join(s) + '/gaussian_noise_hist_mcmc', gaussian_noise) tf.summary.histogram( ''.join(s) + '/grad_total_hist_mcmc', g_sgld) grads_vars_sgld.append((g_sgld, v)) self.train_op = self.gd_opt.apply_gradients(grads_vars_sgld) # SGD optimizer for the last layer if self.sampler == 'sgd': grads_vars_sgd = self.gd_opt.compute_gradients(self.loss) self.train_op = self.gd_opt.apply_gradients(grads_vars_sgd) for g, v in grads_vars_sgd: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g) # Merge all the summaries and write them out self.all_summaries = tf.summary.merge_all() location = os.path.join(self.working_dir, 'logs') self.writer = tf.summary.FileWriter(location, graph=self.g) saver_network = tf.train.Saver(var_list=self.ll_vars) print('loading the network ...') # Restores from checkpoint saver_network.restore(self.sess, self.model_dir) print('Graph successfully loaded.')
def compute_regularizer(self, onehot_labels, predictions): """Computes a regularizer, maybe using `predictions` and `onehot_labels`.""" del onehot_labels del predictions return tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
def build_graph(self): """Builds the neural network graph.""" # define graph self.g = tf.Graph() with self.g.as_default(): # create and store a new session for the graph self.sess = tf.Session() # define placeholders self.x = tf.placeholder(shape=[None, self.dim_input], dtype=tf.float32) self.y = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32) # define simple model with tf.variable_scope('last_layer'): self.z = tf.layers.dense(inputs=self.x, units=self.num_classes) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y, logits=self.z)) self.output_probs = tf.nn.softmax(self.z) # Variables of the last layer self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.ll_vars_concat = tf.concat( [self.ll_vars[0], tf.expand_dims(self.ll_vars[1], axis=0)], 0) # Summary _variable_summaries(self.ll_vars_concat) # add regularization that acts as a unit Gaussian prior on the last layer regularizer = tf.contrib.layers.l2_regularizer(1.0) # regularization prior = tf.contrib.layers.apply_regularization( regularizer, self.ll_vars) self.bayesian_loss = self.n * self.loss + prior # saving the weights of last layer when running SGLD/SGD/MCMC algorithm self.saver = tf.train.Saver(var_list=self.ll_vars, max_to_keep=self.num_samples) # SGLD optimizer for the last layer if self.sampler in ['sgld', 'lmc']: step = self.step_size / self.n gd_opt = tf.train.GradientDescentOptimizer(step) grads_vars = gd_opt.compute_gradients(self.bayesian_loss) grads_vars_sgld = [] for g, v in grads_vars: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' # Adding Gaussian noise to the gradient gaussian_noise = (np.sqrt(2. / step) * tf.random_normal(tf.shape(g))) g_sgld = g + gaussian_noise tf.summary.histogram(''.join(s) + '/grad_hist_mcmc', g / self.n) tf.summary.histogram( ''.join(s) + '/gaussian_noise_hist_mcmc', gaussian_noise / self.n) tf.summary.histogram( ''.join(s) + '/grad_total_hist_mcmc', g_sgld / self.n) grads_vars_sgld.append((g_sgld, v)) self.train_op = gd_opt.apply_gradients(grads_vars_sgld) # SGD optimizer for the last layer if self.sampler == 'sgd': gd_opt = tf.train.GradientDescentOptimizer(self.step_size) grads_vars_sgd = gd_opt.compute_gradients(self.loss) self.train_op = gd_opt.apply_gradients(grads_vars_sgd) for g, v in grads_vars_sgd: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g) # Merge all the summaries and write them out self.all_summaries = tf.summary.merge_all() location = os.path.join(self.working_dir, 'logs') self.writer = tf.summary.FileWriter(location, graph=self.g) saver_network = tf.train.Saver(var_list=self.ll_vars) print('loading the network ...') # Restores from checkpoint # self.sess.run(tf.global_variables_initializer()) saver_network.restore(self.sess, self.model_dir) print('Graph successfully loaded.')
def compute_regularizer(self): """Computes a regularizer, independent of the data.""" return tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
def get_train_op(loss, learning_rate=0.001, lr_decay_steps=10000, lr_decay_rate=0.98, gradient_clip_norm=3.0, use_tpu=True, variables=None): """Get training operation with gradient clipping and learning rate decay. Distilled from tf.contrib.layers.optimize_loss(). Args: loss: Scalar tensor of the loss function. learning_rate: Scalar initial learning rate. lr_decay_steps: Exponential decay timescale. lr_decay_rate: Exponential decay magnitude. gradient_clip_norm: Global norm by which to scale gradients. use_tpu: Use tpu for training. variables: List of variables to optimize. tf.trainable_variables() if None. Returns: train_op: Operation that runs one iteration of training. """ global_step = tf.train.get_or_create_global_step() with tf.variable_scope('training', values=[loss, global_step]): # Make sure update ops run before computing loss. update_ops = list(set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))) with tf.control_dependencies(update_ops): loss = tf.identity(loss) # Learning rate variable, with decay. learning_rate_decay_fn = functools.partial(tf.train.exponential_decay, decay_steps=lr_decay_steps, decay_rate=lr_decay_rate, staircase=True) lr = tf.get_variable( 'learning_rate', [], trainable=False, initializer=tf.constant_initializer(learning_rate)) lr = learning_rate_decay_fn(lr, global_step) # Optimizer. opt = tf.train.AdamOptimizer(lr) if use_tpu: opt = tf.tpu.CrossShardOptimizer(opt) # All trainable variables, if specific variables are not specified. if variables is None: variables = tf.trainable_variables() # Compute gradients. gradients = opt.compute_gradients(loss, variables, colocate_gradients_with_ops=False) # Optionally clip gradients by global norm. if isinstance(gradient_clip_norm, float): gradients = _clip_gradients_by_norm(gradients, gradient_clip_norm) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name='train') # Ensure the train_op computes grad_updates. with tf.control_dependencies([grad_updates]): train_op = tf.identity(loss) return train_op