def validate_model_independence(self, labels, log_probs, task_parameters): """Partition gradients into those assumed active and inactive.""" num_task_parameters = len(task_parameters) # pylint: disable=g-complex-comprehension on_gradients = [[ tf.norm(tensor=on_gradient) for on_gradient in on_gradients ] for on_gradients in [ tf.gradients(ys=tf.gather(log_probs, tf.compat.v1.where(tf.equal(labels, i))), xs=task_parameters[i * num_task_parameters:(i + 1) * num_task_parameters]) for i in range(1) ]] off_gradients = [[ tf.norm(tensor=off_gradient) for off_gradient in off_gradients ] for off_gradients in [ tf.gradients(ys=tf.gather(log_probs, tf.compat.v1.where(tf.equal(labels, i))), xs=task_parameters[i * num_task_parameters:(i + 1) * num_task_parameters]) for i in range(1) ]] # pylint: enable=g-complex-comprehension return (list(itertools.chain.from_iterable(on_gradients)), list(itertools.chain.from_iterable(off_gradients)))
def gradient_descent_step(loss, variables, stop_grads, allow_grads_to_batch_norm_vars, learning_rate, get_update_ops=True): """Returns the updated vars after one step of gradient descent.""" grads = tf.gradients(loss, variables) if stop_grads: grads = [tf.stop_gradient(dv) for dv in grads] def _apply_grads(variables, grads): """Applies gradients using SGD on a list of variables.""" v_new, update_ops = [], [] for (v, dv) in zip(variables, grads): if (not allow_grads_to_batch_norm_vars and ('offset' in v.name or 'scale' in v.name)): updated_value = v # no update. else: updated_value = v - learning_rate * dv # gradient descent update. if get_update_ops: update_ops.append(tf.assign(v, updated_value)) v_new.append(updated_value) return v_new, update_ops updated_vars, update_ops = _apply_grads(variables, grads) return {'updated_vars': updated_vars, 'update_ops': update_ops}
def _reg(cls, batch_size, d, x, x_fake, beta=1e-1): alpha = tf.random_uniform(shape=[batch_size, 1], minval=0., maxval=1.) interpolates = alpha * x + (1 - alpha) * x_fake int_d = d(interpolates) gradients = tf.gradients(int_d, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) return beta * tf.reduce_mean((slopes - 1)**2)
def loop_body(idx, qq_grad, qv_grad, sk_grad, sv_grad): """Compute gradients for a single query.""" qq = query_queries[idx:idx + 1] qv = query_values[idx:idx + 1] x = self._get_dist(qq, qv, support_keys_id, support_values_id, labels) grads = tf.gradients( x, [qq, qv, support_keys_id, support_values_id], grad_ys=dy[:, idx:idx + 1]) qq_grad = tf.concat([qq_grad, grads[0]], axis=0) qv_grad = tf.concat([qv_grad, grads[1]], axis=0) sk_grad += grads[2] sv_grad += grads[3] return (idx + 1, qq_grad, qv_grad, sk_grad, sv_grad)
def optimizer_update(iterate_collection, iteration_idx, objective_fn, update_fn, get_params_fn, first_order, clip_grad_norm): """Returns the next iterate in the optimization of objective_fn wrt variables. Args: iterate_collection: A (potentially structured) container of tf.Tensors corresponding to the state of the current iterate. iteration_idx: An int Tensor; the iteration number. objective_fn: Callable that takes in variables and produces the value of the objective function. update_fn: Callable that takes in the gradient of the objective function and the current iterate and produces the next iterate. get_params_fn: Callable that takes in the gradient of the objective function and the current iterate and produces the next iterate. first_order: If True, prevent the computation of higher order gradients. clip_grad_norm: If not None, gradient dimensions are independently clipped to lie in the interval [-clip_grad_norm, clip_grad_norm]. """ variables = [get_params_fn(iterate) for iterate in iterate_collection] if tf.executing_eagerly(): with tf.GradientTape(persistent=True) as g: g.watch(variables) loss = objective_fn(variables, iteration_idx) grads = g.gradient(loss, variables) else: loss = objective_fn(variables, iteration_idx) grads = tf.gradients(ys=loss, xs=variables) if clip_grad_norm: grads = [ tf.clip_by_value(grad, -1 * clip_grad_norm, clip_grad_norm) for grad in grads ] if first_order: grads = [tf.stop_gradient(dv) for dv in grads] return [ update_fn(i=iteration_idx, grad=dv, state=s) for (s, dv) in zip(iterate_collection, grads) ]
def clip_grads(loss, params, clip=20.): grads = tf.gradients(ys=loss, xs=params) clipped_grads, norm = tf.clip_by_global_norm(grads, clip) gvs = [(g, v) for (g, v) in zip(clipped_grads, params)] return gvs, norm