Beispiel #1
0
    def validate_model_independence(self, labels, log_probs, task_parameters):
        """Partition gradients into those assumed active and inactive."""
        num_task_parameters = len(task_parameters)
        # pylint: disable=g-complex-comprehension
        on_gradients = [[
            tf.norm(tensor=on_gradient) for on_gradient in on_gradients
        ] for on_gradients in [
            tf.gradients(ys=tf.gather(log_probs,
                                      tf.compat.v1.where(tf.equal(labels, i))),
                         xs=task_parameters[i * num_task_parameters:(i + 1) *
                                            num_task_parameters])
            for i in range(1)
        ]]
        off_gradients = [[
            tf.norm(tensor=off_gradient) for off_gradient in off_gradients
        ] for off_gradients in [
            tf.gradients(ys=tf.gather(log_probs,
                                      tf.compat.v1.where(tf.equal(labels, i))),
                         xs=task_parameters[i * num_task_parameters:(i + 1) *
                                            num_task_parameters])
            for i in range(1)
        ]]
        # pylint: enable=g-complex-comprehension

        return (list(itertools.chain.from_iterable(on_gradients)),
                list(itertools.chain.from_iterable(off_gradients)))
Beispiel #2
0
def gradient_descent_step(loss,
                          variables,
                          stop_grads,
                          allow_grads_to_batch_norm_vars,
                          learning_rate,
                          get_update_ops=True):
  """Returns the updated vars after one step of gradient descent."""
  grads = tf.gradients(loss, variables)

  if stop_grads:
    grads = [tf.stop_gradient(dv) for dv in grads]

  def _apply_grads(variables, grads):
    """Applies gradients using SGD on a list of variables."""
    v_new, update_ops = [], []
    for (v, dv) in zip(variables, grads):
      if (not allow_grads_to_batch_norm_vars and
          ('offset' in v.name or 'scale' in v.name)):
        updated_value = v  # no update.
      else:
        updated_value = v - learning_rate * dv  # gradient descent update.
        if get_update_ops:
          update_ops.append(tf.assign(v, updated_value))
      v_new.append(updated_value)
    return v_new, update_ops

  updated_vars, update_ops = _apply_grads(variables, grads)
  return {'updated_vars': updated_vars, 'update_ops': update_ops}
Beispiel #3
0
    def _reg(cls, batch_size, d, x, x_fake, beta=1e-1):
        alpha = tf.random_uniform(shape=[batch_size, 1], minval=0., maxval=1.)
        interpolates = alpha * x + (1 - alpha) * x_fake
        int_d = d(interpolates)
        gradients = tf.gradients(int_d, [interpolates])[0]

        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        return beta * tf.reduce_mean((slopes - 1)**2)
 def loop_body(idx, qq_grad, qv_grad, sk_grad, sv_grad):
   """Compute gradients for a single query."""
   qq = query_queries[idx:idx + 1]
   qv = query_values[idx:idx + 1]
   x = self._get_dist(qq, qv, support_keys_id, support_values_id, labels)
   grads = tf.gradients(
       x, [qq, qv, support_keys_id, support_values_id],
       grad_ys=dy[:, idx:idx + 1])
   qq_grad = tf.concat([qq_grad, grads[0]], axis=0)
   qv_grad = tf.concat([qv_grad, grads[1]], axis=0)
   sk_grad += grads[2]
   sv_grad += grads[3]
   return (idx + 1, qq_grad, qv_grad, sk_grad, sv_grad)
Beispiel #5
0
def optimizer_update(iterate_collection, iteration_idx, objective_fn,
                     update_fn, get_params_fn, first_order, clip_grad_norm):
    """Returns the next iterate in the optimization of objective_fn wrt variables.

  Args:
    iterate_collection: A (potentially structured) container of tf.Tensors
      corresponding to the state of the current iterate.
    iteration_idx: An int Tensor; the iteration number.
    objective_fn: Callable that takes in variables and produces the value of the
      objective function.
    update_fn: Callable that takes in the gradient of the objective function and
      the current iterate and produces the next iterate.
    get_params_fn: Callable that takes in the gradient of the objective function
      and the current iterate and produces the next iterate.
    first_order: If True, prevent the computation of higher order gradients.
    clip_grad_norm: If not None, gradient dimensions are independently clipped
      to lie in the interval [-clip_grad_norm, clip_grad_norm].
  """
    variables = [get_params_fn(iterate) for iterate in iterate_collection]

    if tf.executing_eagerly():
        with tf.GradientTape(persistent=True) as g:
            g.watch(variables)
            loss = objective_fn(variables, iteration_idx)
        grads = g.gradient(loss, variables)
    else:
        loss = objective_fn(variables, iteration_idx)
        grads = tf.gradients(ys=loss, xs=variables)

    if clip_grad_norm:
        grads = [
            tf.clip_by_value(grad, -1 * clip_grad_norm, clip_grad_norm)
            for grad in grads
        ]

    if first_order:
        grads = [tf.stop_gradient(dv) for dv in grads]

    return [
        update_fn(i=iteration_idx, grad=dv, state=s)
        for (s, dv) in zip(iterate_collection, grads)
    ]
Beispiel #6
0
def clip_grads(loss, params, clip=20.):
    grads = tf.gradients(ys=loss, xs=params)
    clipped_grads, norm = tf.clip_by_global_norm(grads, clip)
    gvs = [(g, v) for (g, v) in zip(clipped_grads, params)]
    return gvs, norm