def testCustomGetter(self): custom_getter = snt.custom_getters.Context(snt.custom_getters.stop_gradient) module = snt.nets.ConvNet2D(output_channels=self.output_channels, kernel_shapes=self.kernel_shapes, rates=self.rates, strides=self.strides, paddings=self.paddings, custom_getter=custom_getter) input_shape = [10, 100, 100, 3] input_to_net = tf.random_normal(dtype=tf.float32, shape=input_shape) if tf.executing_eagerly(): with tf.GradientTape() as tape0: out0 = module(input_to_net) with tf.GradientTape() as tape1: with custom_getter: out1 = module(input_to_net) all_vars = tf.trainable_variables() out0_grads = tape0.gradient(out0, all_vars) out1_grads = tape1.gradient(out1, all_vars) else: out0 = module(input_to_net) with custom_getter: out1 = module(input_to_net) all_vars = tf.trainable_variables() out0_grads = tf.gradients(out0, all_vars) out1_grads = tf.gradients(out1, all_vars) for grad in out0_grads: self.assertNotEqual(None, grad) self.assertEqual([None] * len(out1_grads), out1_grads)
def train_van_step(x_real, y_real): gen.train() dis.train() enc.train() if n_dim > 0: padding = tf.zeros((y_real.shape[0], n_dim)) y_real_pad = tf.concat((y_real, padding), axis=-1) else: y_real_pad = y_real # Alternate discriminator step and generator step with tf.GradientTape(persistent=False) as tape: # Generate z_fake = datasets.paired_randn(batch_size, z_dim, masks) z_fake = z_fake + y_real_pad x_fake = gen(z_fake) # Discriminate logits_fake = dis(x_fake, y_real) gen_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits_fake, labels=targets_real)) gen_grads = tape.gradient(gen_loss, gen.trainable_variables) gen_opt.apply_gradients(zip(gen_grads, gen.trainable_variables)) with tf.GradientTape(persistent=True) as tape: # Generate z_fake = datasets.paired_randn(batch_size, z_dim, masks) z_fake = z_fake + y_real_pad x_fake = tf.stop_gradient(gen(z_fake)) # Discriminate x = tf.concat((x_real, x_fake), 0) y = tf.concat((y_real, y_real), 0) logits = dis(x, y) # Encode p_z = enc(x_fake) dis_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=targets)) # Encoder ignores nuisance parameters (if they exist) enc_loss = -tf.reduce_mean(p_z.log_prob(z_fake[:, :s_dim])) dis_grads = tape.gradient(dis_loss, dis.trainable_variables) enc_grads = tape.gradient(enc_loss, enc.trainable_variables) dis_opt.apply_gradients(zip(dis_grads, dis.trainable_variables)) enc_opt.apply_gradients(zip(enc_grads, enc.trainable_variables)) return dict(gen_loss=gen_loss, dis_loss=dis_loss, enc_loss=enc_loss)
def test_largest_and_smallest_eigenvalue_estimation_correct(self): tf.compat.v1.random.set_random_seed(0) x_shape = (10, 5) y_shape = (10, 1) conv_dims = [] conv_sizes = [] dense_sizes = [5] n_classes = 3 model = classifier.CNN(conv_dims, conv_sizes, dense_sizes, n_classes) itr = dataset_utils.get_supervised_batch_noise_iterator( x_shape, y_shape) loss_fn = ci.make_loss_fn(model, 1.) grad_fn = ci.make_grad_fn(model) map_grad_fn = ci.make_map_grad_fn(model) x, y = itr.next() _, _ = model.get_loss(x, y) loss_fn = ci.make_loss_fn(model, None) grad_fn = ci.make_grad_fn(model) map_grad_fn = ci.make_map_grad_fn(model) with tf.GradientTape(persistent=True) as tape: # First estimate the Hessian using training data from itr. with tf.GradientTape() as tape_inner: loss = tf.reduce_mean(loss_fn(x, y)) grads = grad_fn(loss, tape_inner) concat_grads = tf.concat([tf.reshape(w, [-1, 1]) for w in grads], 0) hessian_mapped = map_grad_fn(concat_grads, tape) # hessian_mapped is a list of n_params x model-shaped tensors # should just be able to flat_concat it hessian = tensor_utils.flat_concat(hessian_mapped) eigs, _ = tf.linalg.eigh(hessian) largest_ev, smallest_ev = eigs[-1], eigs[0] # We don't know what these eigenvalues should be, but just test that # the functions don't crash. est_largest_ev = eigenvalues.estimate_largest_ev(model, 1000, itr, loss_fn, grad_fn, map_grad_fn, burnin=100) est_smallest_ev = eigenvalues.estimate_smallest_ev(largest_ev, model, 1000, itr, loss_fn, grad_fn, map_grad_fn, burnin=100) self.assertAllClose(largest_ev, est_largest_ev, 0.5) self.assertAllClose(smallest_ev, est_smallest_ev, 0.5)
def train_gen_step(x1_real, x2_real, y_real): gen.train() dis.train() enc.train() # Alternate discriminator step and generator step with tf.GradientTape(persistent=True) as tape: # Generate z1, z2, y_fake = datasets.paired_randn(batch_size, z_dim, masks) x1_fake = tf.stop_gradient(gen(z1)) x2_fake = tf.stop_gradient(gen(z2)) # Discriminate x1 = tf.concat((x1_real, x1_fake), 0) x2 = tf.concat((x2_real, x2_fake), 0) y = tf.concat((y_real, y_fake), 0) logits = dis(x1, x2, y) # Encode p_z = enc(x1_fake) dis_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=targets)) # Encoder ignores nuisance parameters (if they exist) enc_loss = -tf.reduce_mean(p_z.log_prob(z1[:, :s_dim])) dis_grads = tape.gradient(dis_loss, dis.trainable_variables) enc_grads = tape.gradient(enc_loss, enc.trainable_variables) dis_opt.apply_gradients(zip(dis_grads, dis.trainable_variables)) enc_opt.apply_gradients(zip(enc_grads, enc.trainable_variables)) with tf.GradientTape(persistent=False) as tape: # Generate z1, z2, y_fake = datasets.paired_randn(batch_size, z_dim, masks) x1_fake = gen(z1) x2_fake = gen(z2) # Discriminate logits_fake = dis(x1_fake, x2_fake, y_fake) gen_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits_fake, labels=targets_real)) gen_grads = tape.gradient(gen_loss, gen.trainable_variables) gen_opt.apply_gradients(zip(gen_grads, gen.trainable_variables)) return dict(gen_loss=gen_loss, dis_loss=dis_loss, enc_loss=enc_loss)
def nll_gnp_step_bandits(model, data, optimizer_config): """Applies gradient updates and returns appropriate metrics. Args: model: An instance of SNP Regressor. data: A 5-tuple consisting of context_x, context_y, target_x, target_y, unseen_targets (i.e., target_x-context_x). optimizer_config: A dictionary with two keys: an 'optimizer' object and a 'max_grad_norm' for clipping gradients. Returns: nll_term: Negative log-likelihood of model for unseen targets. local_kl: KL loss for latent variables of unseen targets. global_kl: KL loss for global latent variable. """ (context_x, context_y, target_x, target_y, unseen_target_y, unseen_target_a) = data num_context = tf.shape(context_x)[1] with tf.GradientTape() as tape: prediction = model(context_x, context_y, target_x, target_y) unseen_predictions = prediction[:, num_context:] nll_term = nll(unseen_target_y, unseen_predictions, unseen_target_a) local_kl = tf.reduce_mean( tf.reduce_sum(model.losses[-1][:, num_context:], axis=[1, 2])) global_kl = tf.reduce_mean(tf.reduce_sum(model.losses[-2], axis=-1)) loss = nll_term + local_kl + global_kl gradients = tape.gradient(loss, model.trainable_variables) max_grad_norm = optimizer_config['max_grad_norm'] optimizer = optimizer_config['optimizer'] clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_grad_norm) optimizer.apply_gradients(zip(clipped_gradients, model.trainable_variables)) return nll_term, local_kl, global_kl
def gradient(): w = tf.Variable([[1.0]]) with tf.GradientTape() as tape: loss = w * w grad = tape.gradient(loss, w) print(grad) return grad
def update(self, training_examples): state_features = np.vstack([f for (f, _, _) in training_examples]) value_targets = np.vstack([v for (_, v, _) in training_examples]) policy_targets = np.vstack([p for (_, _, p) in training_examples]) with self.device: with tf.GradientTape() as tape: values, policy_logits = self.model(state_features, training=True) loss_value = tf.losses.mean_squared_error( values, tf.stop_gradient(value_targets)) loss_policy = tf.nn.softmax_cross_entropy_with_logits_v2( logits=policy_logits, labels=tf.stop_gradient(policy_targets)) loss_policy = tf.reduce_mean(loss_policy) loss_l2 = 0 for weights in self.model.trainable_variables: loss_l2 += self.l2_regularization * tf.nn.l2_loss(weights) loss = loss_policy + loss_value + loss_l2 grads = tape.gradient(loss, self.model.trainable_variables) self.optimizer.apply_gradients( zip(grads, self.model.trainable_variables), global_step=tf.train.get_or_create_global_step()) self.value_and_prior.cache_clear() return LossValues(total=float(loss), policy=float(loss_policy), value=float(loss_value), l2=float(loss_l2))
def train(model): with tf.GradientTape() as t: t.watch(model._x) current_funct_ = system_functions(model) grad_x = t.gradient(current_funct_, model._x) model._x = model._x + model._learning_rate * grad_x model.Energy_mutable()
def train(self, inputs, targets, learning_rate): sess.run( with tf.GradientTape() as calc_gradient: current_loss = self.loss(model(inputs), targets) dW, db = calc_gradient.gradient(current_loss, [model.W, model.b]) model.W.assign_sub(learning_rate * dW) model.b.assign_sub(learning_rate * db))
def grad(dresult, variables=None): with tf.GradientTape() as t: t.watch(args) if variables is not None: t.watch(variables) # we need to outsmart XLA here to force a control dependency zero_with_control_dependency = tf.reduce_mean(dresult[0] * 1e-30) new_args = [] for a in args: if a.dtype.is_floating: new_args.append( a + tf.cast(zero_with_control_dependency, a.dtype)) else: new_args.append(a) with tf.control_dependencies([dresult]): if bf16: with tf.tpu.bfloat16_scope(): with tf.variable_scope(scope, reuse=True): result = f(*new_args, **kwargs) else: with tf.variable_scope(scope, reuse=True): result = f(*new_args, **kwargs) kw_vars = [] if variables is not None: kw_vars = list(variables) grads = t.gradient(result, list(new_args) + kw_vars, output_gradients=[dresult]) return grads[:len(new_args)], grads[len(new_args):]
def testCustomGetterTranspose(self): """Tests passing a custom getter to the transpose method.""" conv2d = snt.nets.ConvNet2D(output_channels=self.output_channels, kernel_shapes=self.kernel_shapes, strides=self.strides, paddings=self.paddings) input_shape = [10, 100, 100, 3] output_of_conv2d = conv2d(tf.zeros(dtype=tf.float32, shape=input_shape)) # We'll be able to check if the custom_getter was used by checking for # gradients. conv2d_transpose = conv2d.transpose( custom_getter=snt.custom_getters.stop_gradient) if tf.executing_eagerly(): with tf.GradientTape() as tape: output_of_transpose = conv2d_transpose(output_of_conv2d) conv2d_transpose_vars = conv2d_transpose.get_variables() self.assertTrue(len(conv2d_transpose_vars)) for tensor in tape.gradient(output_of_transpose, conv2d_transpose_vars): self.assertIsNone(tensor) else: output_of_transpose = conv2d_transpose(output_of_conv2d) conv2d_transpose_vars = conv2d_transpose.get_variables() self.assertTrue(len(conv2d_transpose_vars)) for tensor in tf.gradients(output_of_transpose, conv2d_transpose_vars): self.assertIsNone(tensor)
def train(model, data, batch_size, step_size=1.0, threshold=2.0, random_shuffle_size=None, autoencoder_loss=None): """Train NeuRD `model` on `data`.""" if random_shuffle_size is None: random_shuffle_size = 10 * batch_size data = data.shuffle(random_shuffle_size) data = data.batch(batch_size) data = data.repeat(1) for x, regrets in data: with tf.GradientTape() as tape: output = model(x, training=True) logits = output[:, :1] logits = logits - tf.reduce_mean(logits, keepdims=True) regrets = tf.stop_gradient( thresholded(logits, regrets, threshold=threshold)) utility = tf.reduce_mean(logits * regrets) if autoencoder_loss is not None: utility = utility - autoencoder_loss(x, output[:, 1:]) grad = tape.gradient(utility, model.trainable_variables) for i, var in enumerate(model.trainable_variables): var.assign_add(step_size * grad[i])
def step_model(count): for _ in range(count): with tf.GradientTape() as tape: nll = model(tf.zeros(1, 1), tf.zeros(1, 1)) gradients = tape.gradient(nll, model.trainable_variables) grad_est.apply_gradients(zip(gradients, model.trainable_variables))
def _update_critic_ddpg(self, obs, action, next_obs, reward, mask): """Updates parameters of ddpg critic given samples from the batch. Args: obs: A tfe.Variable with a batch of observations. action: A tfe.Variable with a batch of actions. next_obs: A tfe.Variable with a batch of next observations. reward: A tfe.Variable with a batch of rewards. mask: A tfe.Variable with a batch of masks. """ if self.use_absorbing_state: # Starting from the goal state we can execute only non-actions. a_mask = tf.maximum(0, mask) q_next = self.critic_target(next_obs, self.actor_target(next_obs) * a_mask) q_target = reward + self.discount * q_next else: # Without an absorbing state we assign rewards of 0. q_next = self.critic_target(next_obs, self.actor_target(next_obs)) q_target = reward + self.discount * mask * q_next with tf.GradientTape() as tape: q_pred = self.critic(obs, action) critic_loss = tf.losses.mean_squared_error(q_target, q_pred) grads = tape.gradient(critic_loss, self.critic.variables) self.critic_optimizer.apply_gradients(zip(grads, self.critic.variables), global_step=self.critic_step) with contrib_summary.record_summaries_every_n_global_steps( 100, self.critic_step): contrib_summary.scalar('critic/loss', critic_loss, step=self.critic_step)
def _compute_gradients(self, actions, discounted_rewards, weights=None, sequence_length=None, loss_str='train', use_entropy_regularization=True, **kwargs): """Implement the policy gradient in TF.""" if sequence_length is not None: seq_mask = tf.sequence_mask(sequence_length, dtype=tf.float32) else: seq_mask = None with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(self.trainable_variables) # Returns 0.0 if critic is not being used value_loss = self._compute_value_loss( discounted_rewards, seq_mask=seq_mask, **kwargs) policy_loss = self._compute_policy_loss( discounted_rewards, actions, seq_mask=seq_mask, weights=weights, use_entropy_regularization=use_entropy_regularization, **kwargs) loss = tf.reduce_mean(policy_loss + value_loss) if self.log_summaries and (self._counter % self.log_every == 0): contrib_summary.scalar('{}_loss'.format(loss_str), loss) return tape.gradient(loss, self.trainable_variables)
def train_step(inputs): """Training StepFn.""" images, labels = inputs with tf.GradientTape() as tape: predictions = model(images, training=True) # Loss calculations. # # Part 1: Prediciton loss. prediction_loss = tf.keras.losses.sparse_categorical_crossentropy( labels, predictions) loss1 = tf.reduce_mean(prediction_loss) # Part 2: Model weights regularization loss2 = tf.reduce_sum(model.losses) # Scale the loss given the TPUStrategy will reduce sum all gradients. loss = loss1 + loss2 scaled_loss = loss / strategy.num_replicas_in_sync grads = tape.gradient(scaled_loss, model.trainable_variables) update_vars = optimizer.apply_gradients( zip(grads, model.trainable_variables)) update_loss = training_loss.update_state(loss) update_accuracy = training_accuracy.update_state(labels, predictions) with tf.control_dependencies( [update_vars, update_loss, update_accuracy]): return tf.identity(loss)
def testBaseline(self, cls, num_microbatches, expected_answer): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls(dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
def update(self, train_inputs: Sequence[TrainInput]): """Run an update step.""" batch = TrainInput.stack(train_inputs) with self._device: with tf.GradientTape() as tape: values, policy_logits = self._keras_model( batch.observation, training=True) loss_value = tf.losses.mean_squared_error( values, tf.stop_gradient(batch.value)) loss_policy = tf.nn.softmax_cross_entropy_with_logits_v2( logits=policy_logits, labels=tf.stop_gradient(batch.policy)) loss_policy = tf.reduce_mean(loss_policy) loss_l2 = 0 for weights in self._keras_model.trainable_variables: loss_l2 += self._l2_regularization * tf.nn.l2_loss(weights) loss = loss_policy + loss_value + loss_l2 grads = tape.gradient(loss, self._keras_model.trainable_variables) self._optimizer.apply_gradients( zip(grads, self._keras_model.trainable_variables), global_step=tf.train.get_or_create_global_step()) return Losses(policy=float(loss_policy), value=float(loss_value), l2=float(loss_l2))
def gradient_v2(self,x_test,input_idx=None,output_idx=0): ''' BROKEN VERSION, EXPERIMENTING WITH GRADIENT TAPE ''' print('calculating gradient...') grs = [] for p in range(0, self.__n_trees): print('TREE {}:'.format(p)) x = self.__sess[p].graph.get_tensor_by_name("input:0") pred = self.__sess[p].graph.get_tensor_by_name("prediction:0") unscale = self.__yscale.scale_[output_idx] # if input_idx==None: # gr = tf.gradients(pred[:,output_idx],x) # else: # gr = tf.gradients(pred[:,output_idx],x[:,input_idx]) keep_prob = self.__sess[p].graph.get_tensor_by_name("keep_prob:0") x_test1 = self.__xscale.transform(x_test) with tf.GradientTape() as tape: tape.watch(x) f = self.__yscale.inverse_transform(self.__sess[p].run(pred,\ feed_dict={x:x_test, keep_prob:self.__dropout_keep_prob})) grad_f = tape.gradient(f, x) val = grad_f[0, 0] # if input_idx==None: # val = val*self.__xscale.scale_[0]/unscale # else: # val = val*self.__xscale.scale_[input_idx]/unscale grs.append(val) return grs
def inner_recompute_grad(*dresult): """Nested custom gradient function for computing grads in reverse and forward mode autodiff.""" # Gradient calculation for reverse mode autodiff. variables = grad_kwargs.get("variables") with tf.GradientTape() as t: id_args = tf.nest.map_structure(tf.identity, args) t.watch(id_args) if variables is not None: t.watch(variables) with tf.control_dependencies(dresult): with tf.variable_scope(current_var_scope): result = f(*id_args, **kwargs) kw_vars = [] if variables is not None: kw_vars = list(variables) grads = t.gradient( result, list(id_args) + kw_vars, output_gradients=dresult, unconnected_gradients=tf.UnconnectedGradients.ZERO) def transpose(*t_args, **t_kwargs): """Gradient function calculation for forward mode autodiff.""" # Just throw an error since gradients / activations are not stored on tape for recompute. raise NotImplementedError( "recompute_grad tried to transpose grad of {}. " "Consider not using recompute_grad in forward mode" "autodiff".format(f.__name__)) return (grads[:len(id_args)], grads[len(id_args):]), transpose
def _update_actor(self, obs, mask): """Updates parameters of critic given samples from the batch. Args: obs: A tfe.Variable with a batch of observations. mask: A tfe.Variable with a batch of masks. """ with tf.GradientTape() as tape: if self.use_td3: q_pred, _ = self.critic(obs, self.actor(obs)) else: q_pred = self.critic(obs, self.actor(obs)) if self.use_absorbing_state: # Don't update the actor for absorbing states. # And skip update if all states are absorbing. a_mask = 1.0 - tf.maximum(0, -mask) if tf.reduce_sum(a_mask) < 1e-8: return actor_loss = -tf.reduce_sum( q_pred * a_mask) / tf.reduce_sum(a_mask) else: actor_loss = -tf.reduce_mean(q_pred) grads = tape.gradient(actor_loss, self.actor.variables) # Clipping makes training more stable. grads, _ = tf.clip_by_global_norm(grads, 40.0) self.actor_optimizer.apply_gradients(zip(grads, self.actor.variables), global_step=self.actor_step) with contrib_summary.record_summaries_every_n_global_steps( 100, self.actor_step): contrib_summary.scalar('actor/loss', actor_loss, step=self.actor_step)
def eager_train_step(detection_model, features, labels, unpad_groundtruth_tensors, optimizer, learning_rate, add_regularization_loss=True, clip_gradients_value=None, global_step=None, num_replicas=1.0): is_training = True detection_model._is_training = is_training tf.keras.backend.set_learning_phase(is_training) labels = model_lib.unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) with tf.GradientTape() as tape: losses_dict, _ = _compute_losses_and_predictions_dicts( detection_model, features, labels, add_regularization_loss) total_loss = losses_dict["Loss/total_loss"] total_loss = tf.math.divide( total_loss, tf.constant(num_replicas, dtype=tf.float32)) losses_dict["Loss/normalized_total_loss"] = total_loss for loss_type in losses_dict: tf.compat.v2.summary.scalar(loss_type, losses_dict[loss_type], step=global_step) trainable_variables = detection_model.trainable_variables gradients = tape.gradient(total_loss, trainable_variables) if clip_gradients_value: gradients, _ = tf.clip_by_global_norm(gradients, clip_gradients_value) optimizer.apply_gradients(zip(gradients, trainable_variables)) return total_loss
def hvp(v, iterator, loss_fn, grad_fn, map_grad_fn, n_samples=1): """Multiply the Hessian of clf at inputs (x, y) by vector v. Args: v (tensor): the vector in the HVP. iterator (Iterator): iterator for samples for HVP estimation. loss_fn (function): a function which returns a gradient of losses. grad_fn (function): a function which takes the gradient of a scalar loss. map_grad_fn (function): a function which takes the gradient of each element of a vector of losses. n_samples (int, optional): number of minibatches to sample when estimating Hessian Returns: hessian_vector_val (tensor): the HVP of clf's Hessian with v. """ # tf.GradientTape tracks the operations you take while inside it, in order to # later auto-differentiate through those operations to get gradients. with tf.GradientTape(persistent=True) as tape2: # We need two gradient tapes to calculate second derivatives with tf.GradientTape() as tape: loss = 0. for _ in range(n_samples): x_sample, y_sample = iterator.next() loss += tf.reduce_mean(loss_fn(x_sample, y_sample)) # Outside the tape, we can get the aggregated loss gradient across the # batch. This is the standard usage of GradientTape. grads = grad_fn(loss, tape) # For each weight matrix, we now get the product of the vector v with # the gradient, and the sum over the weights to get a total gradient # per element in x. vlist = [] for g, u in zip(grads, v): g = tf.expand_dims(g, 0) prod = tf.multiply(g, u) vec = tf.reduce_sum(prod, axis=range(1, prod.shape.rank)) vlist.append(vec) vgrads = tf.add_n(vlist) # We now take the gradient of the gradient-vector product. This gives us # the Hessian-vector product. Note that we take this gradient inside # the tape - this allows us to get the HVP value for each element of x. hessian_vector_val = map_grad_fn(vgrads, tape2) return hessian_vector_val
def _optimize_step(self, batch): with tf.GradientTape() as tape: loss, info = self._build_loss(batch) trainable_vars = self._get_vars() grads = tape.gradient(loss, trainable_vars) grads_and_vars = tuple(zip(grads, trainable_vars)) self._optimizer.apply_gradients(grads_and_vars) return info
def _run(feats): with tf.GradientTape(persistent=True) as tape: tape.watch(feats) eager_class_out, eager_box_out = model(feats, True) class_grads, box_grads = tf.nest.map_structure( lambda output: tape.gradient(output, feats), [eager_class_out, eager_box_out]) return eager_class_out, eager_box_out, class_grads, box_grads
def train_step(): with tf.GradientTape() as tape: loss_val = loss() vals = tape.watched_variables() grads = tape.gradient(loss_val, vals) grads_and_vals = list(zip(grads, vals)) opt.apply_gradients(grads_and_vals) return loss_val
def grad_loss_fn(states, actions, children_layer, next_states, q_labels, last_batch_action_assignment, num_children, args_dict): """Defines gradient of bellman loss plus consistency penalization.""" with tf.GradientTape() as tape: loss_value, bellman_loss, regularized_loss, q_average = loss_fn( next_states, states, actions, children_layer, last_batch_action_assignment, q_labels[:, num_children], args_dict) return (tape.gradient(loss_value, children_layer.variables), loss_value.numpy(), bellman_loss, regularized_loss, q_average)
def add_gradients_penalty(x, model, model_train_mode): """https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/3.0-WGAN-GP-fashion-mnist.ipynb#scrollTo=Wyipg-4oSYb1""" with tf.GradientTape() as t: t.watch(x) hidden = model(x, is_training=model_train_mode) gradients = t.gradient(hidden, x) dx = tf.sqrt(tf.reduce_sum(gradients**2, axis=[1, 2, 3])) d_regularizer = tf.reduce_mean((dx - 1.0)**2) return d_regularizer
def _optimize_ae(self, batch): vars_ = self._ae_vars with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(vars_) loss, info = self._build_ae_loss(batch) grads = tape.gradient(loss, vars_) grads_and_vars = tuple(zip(grads, vars_)) self._ae_optimizer.apply_gradients(grads_and_vars) return info
def testResourceVariables(self): v1 = tf.Variable([1., 2.], use_resource=True) v2 = tf.Variable([3., 4.], use_resource=True) with tf.GradientTape() as tape: tape.watch([v1, v2]) loss = tf.reduce_sum(tf.gather(params=v1, indices=[0]) + v2) v1_grad, v2_grad = tape.gradient(loss, [v1, v2]) multistep_opt = multistep_optimizer.MultistepAdamOptimizer(0.1) multistep_opt.apply_gradients(((v1_grad, v1), (v2_grad, v2)))