def dns_grad_op(loss, optimizer: tf.train.Optimizer, variables=None, global_step=None): """ Create an operation the updates the weights by gradient descent. In DNS, the weights are updated according to their derivative with respect to the masked values, but the update is applied to the non-masked values, so that zeroed-out weights may still change and in particular be spliced back in if necessary. Parameters ---------- loss: A `tf.Tensor` representing the loss. optimizer: The optimizer to use. variables: The variables for which to create the gradient operation. global_step: An optional global step to increment. Returns ------- train_op: An tensorflow op that when run updates the variables according to the gradient. """ if variables is None: variables = tf.trainable_variables() replaced = {} wrt_variables = [] num_replaced = 0 for v in variables: # look for variables having shadow values. mvs = tf.get_collection(MASKED_WEIGHT_COLLECTION, v.op.name) if len(mvs) == 0: wrt_variables.append(v) elif len(mvs) == 1: num_replaced += 1 wrt_variables.append(mvs[0]) replaced[mvs[0]] = v else: raise ValueError('More than one masked weight for a given variable.') tf.logging.info('Replaced {0} variables for Dynamic Network Surgery'.format(num_replaced)) grads_and_vars = optimizer.compute_gradients(loss, wrt_variables) grads_and_vars = [(g, replaced.get(v, v)) for g, v in grads_and_vars] train_op = optimizer.apply_gradients(grads_and_vars, global_step, 'dns_grad_op') return train_op
def get_gradient_op(tensors: MDPTensors, objective_initial_scales: SRLObjectives, optimizer: tf.train.Optimizer, gradient_clip: Optional[float], **kwargs): objectives: SRLObjectives = SRLObjectives( value_function=ValueFunction(tensors, objective_initial_scales.value_function, **kwargs), reward_prediction=RewardPrediction( tensors, objective_initial_scales.reward_prediction, **kwargs), auto_encoding=AutoEncodingPrediction( tensors, objective_initial_scales.auto_encoding, **kwargs), forward_dynamics=ForwardDynamicsPrediction( tensors, objective_initial_scales.forward_dynamics, **kwargs), inverse_dynamics=InverseDynamicsPrediction( tensors, objective_initial_scales.inverse_dynamics, **kwargs), slowness=SlownessLoss(tensors, objective_initial_scales.slowness, **kwargs), diversity=DiversityLoss(tensors, objective_initial_scales.diversity, **kwargs), ) active_objectives = [ o for o in objectives if o is not None and backend.get_value(o.scale) > 0 ] total_loss = backend.mean( backend.stack([o.loss for o in active_objectives])) if gradient_clip is not None: gradients = optimizer.compute_gradients(total_loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, gradient_clip), var) return optimizer.apply_gradients(gradients) else: return optimizer.minimize(total_loss)
def __init__( self, obs_spec: specs.Array, action_spec: specs.DiscreteArray, network: snt.RNNCore, optimizer: tf.train.Optimizer, sequence_length: int, td_lambda: float, agent_discount: float, seed: int, ): """A recurrent actor-critic agent.""" del action_spec # unused tf.set_random_seed(seed) self._sequence_length = sequence_length self._num_transitions_in_buffer = 0 # Create the policy ops. obs = tf.placeholder(shape=(1,) + obs_spec.shape, dtype=obs_spec.dtype) mask = tf.placeholder(shape=(1,), dtype=tf.float32) state = self._placeholders_like(network.initial_state(batch_size=1)) (online_logits, _), next_state = network((obs, mask), state) action = tf.squeeze(tf.multinomial(online_logits, 1, output_dtype=tf.int32)) # Create placeholders and numpy arrays for learning from trajectories. shapes = [obs_spec.shape, (), (), (), ()] dtypes = [obs_spec.dtype, np.int32, np.float32, np.float32, np.float32] placeholders = [ tf.placeholder(shape=(self._sequence_length, 1) + shape, dtype=dtype) for shape, dtype in zip(shapes, dtypes)] observations, actions, rewards, discounts, masks = placeholders # Build actor and critic losses. (logits, values), final_state = tf.nn.dynamic_rnn( network, (observations, tf.expand_dims(masks, -1)), initial_state=state, dtype=tf.float32, time_major=True) (_, bootstrap_value), _ = network((obs, mask), final_state) values, bootstrap_value = tree.map_structure( lambda t: tf.squeeze(t, axis=-1), (values, bootstrap_value)) critic_loss, (advantages, _) = td_lambda_loss( state_values=values, rewards=rewards, pcontinues=agent_discount * discounts, bootstrap_value=bootstrap_value, lambda_=td_lambda) actor_loss = discrete_policy_gradient_loss(logits, actions, advantages) # Updates. grads_and_vars = optimizer.compute_gradients(actor_loss + critic_loss) grads, _ = tf.clip_by_global_norm([g for g, _ in grads_and_vars], 5.) grads_and_vars = [(g, pair[1]) for g, pair in zip(grads, grads_and_vars)] train_op = optimizer.apply_gradients(grads_and_vars) # Create TF session and callables. session = tf.Session() self._reset_fn = session.make_callable( network.initial_state(batch_size=1)) self._policy_fn = session.make_callable( [action, next_state], [obs, mask, state]) self._update_fn = session.make_callable( [train_op, final_state], placeholders + [obs, mask, state]) session.run(tf.global_variables_initializer()) # Initialize numpy buffers self.state = self._reset_fn() self.update_init_state = self._reset_fn() self.arrays = [ np.zeros(shape=(self._sequence_length, 1) + shape, dtype=dtype) for shape, dtype in zip(shapes, dtypes)]
def compute_adam_gradients(self, adam: tf.train.Optimizer, loss, variables): from tensorflow.python.training.optimizer import Optimizer from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import distribution_strategy_context from tensorflow.python.util import nest def compute_gradients(optimizer, loss, var_list=None, gate_gradients=Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): if callable(loss): from tensorflow.python.eager import backprop with backprop.GradientTape() as tape: if var_list is not None: tape.watch(var_list) loss_value = loss() # Scale loss if using a "mean" loss reduction and multiple towers. # Have to be careful to call distribute_lib.get_loss_reduction() # *after* loss() is evaluated, so we know what loss reduction it uses. # TODO(josh11b): Test that we handle weight decay in a reasonable way. if (distribute_lib.get_loss_reduction() == variable_scope.VariableAggregation.MEAN): num_towers = distribution_strategy_context.get_distribution_strategy( ).num_towers if num_towers > 1: loss_value *= (1. / num_towers) if var_list is None: var_list = tape.watched_variables() # TODO(jhseu): Figure out why GradientTape's gradients don't require loss # to be executed. with ops.control_dependencies([loss_value]): grads = tape.gradient(loss_value, var_list, grad_loss) return list(zip(grads, var_list)) # Non-callable/Tensor loss case if context.executing_eagerly(): raise RuntimeError( "`loss` passed to Optimizer.compute_gradients should " "be a function when eager execution is enabled.") # Scale loss if using a "mean" loss reduction and multiple towers. if (distribute_lib.get_loss_reduction() == variable_scope.VariableAggregation.MEAN): num_towers = distribution_strategy_context.get_distribution_strategy( ).num_towers if num_towers > 1: loss *= (1. / num_towers) if gate_gradients not in [ Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH ]: raise ValueError( "gate_gradients must be one of: Optimizer.GATE_NONE, " "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients) optimizer._assert_valid_dtypes([loss]) if grad_loss is not None: optimizer._assert_valid_dtypes([grad_loss]) if var_list is None: var_list = (variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) else: var_list = nest.flatten(var_list) # pylint: disable=protected-access var_list += ops.get_collection( ops.GraphKeys._STREAMING_MODEL_PORTS) # pylint: enable=protected-access from tensorflow.python.training.optimizer import _get_processor processors = [_get_processor(v) for v in var_list] if not var_list: raise ValueError("No variables to optimize.") var_refs = [p.target() for p in processors] # original gradients computation # grads = tf.gradients( # loss, var_refs, grad_ys=grad_loss, # gate_gradients=(gate_gradients == Optimizer.GATE_OP), # aggregation_method=aggregation_method, # colocate_gradients_with_ops=colocate_gradients_with_ops) # using gradient check-pointing from memory_saving_gradients import gradients # setting outputs of different networks tensors_to_checkpoint = self.get_tensors_to_checkpoint() # just specifying memory as parameter fails grads = gradients( loss, var_refs, grad_ys=grad_loss, gate_gradients=(gate_gradients == Optimizer.GATE_OP), aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, checkpoints='speed') if gate_gradients == Optimizer.GATE_GRAPH: grads = control_flow_ops.tuple(grads) grads_and_vars = list(zip(grads, var_list)) optimizer._assert_valid_dtypes([ v for g, v in grads_and_vars if g is not None and v.dtype != dtypes.resource ]) return grads_and_vars # just copied so I can change gradients # computed_gradients = compute_gradients(adam, loss, var_list=variables) computed_gradients = adam.compute_gradients( loss, var_list=variables) # original gradient return computed_gradients