예제 #1
0
 def testProjectToBallLInfTensorList(self):
   tensors = [tf.constant([[1.0, -3.0], [-2.0, 4.0]]),
              tf.constant([[0.0], [-5.0]])]
   projected_tensors = self.evaluate(
       utils.project_to_ball(tensors, 2.5, configs.NormType.INFINITY))
   expected_results = [[[1.0, -2.5], [-2.0, 2.5]], [[0.0], [-2.5]]]
   self.assertAllClose(projected_tensors, expected_results)
 def testProjectToBallL2SingleTensor(self):
     tensor = tf.constant([[3.0, -4.0], [-0.7, 2.4]])
     projected_tensor = self.evaluate(
         utils.project_to_ball(tensor, 1.0, configs.NormType.L2))
     # norm: [5.0, 2.5], scale: [0.2, 0.4]
     expected_result = [[0.6, -0.8], [-0.28, 0.96]]
     self.assertAllClose(projected_tensor, expected_result)
    def testProjectToBallL2MultipleFeatures(self, radius, factor1, factor2):
        # Sum of squares is 25 + 9 + 49 + 169 = 252 for element 1, and 1 for element
        # 2.
        f1 = tf.constant([[[[0.0, 3.0, -4.0], [1.0, 2.0, -2.0]],
                           [[2.0, 3.0, 6.0], [3.0, 4.0, 12.0]]],
                          [[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
                           [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]])
        # Sum of squares is 25 + 144 = 169 for element 1, 0 for element 2.
        f2 = tf.constant([[[3.0, 4.0], [12.0, 0.0]], [[0.0, 0.0], [0.0, 0.0]]])
        input_dict = {'f1': f1, 'f2': f2}
        projected_tensor_dict = self.evaluate(
            utils.project_to_ball(input_dict, radius, configs.NormType.L2))
        expected_f1_sample1 = f1[0] * factor1
        expected_f1_sample2 = f1[1] * factor2
        expected_f2_sample1 = f2[0] * factor1
        expected_f2_sample2 = f2[1] * factor2

        self.assertAllEqual(projected_tensor_dict['f1'][0],
                            expected_f1_sample1)
        self.assertAllEqual(projected_tensor_dict['f1'][1],
                            expected_f1_sample2)
        self.assertAllEqual(projected_tensor_dict['f2'][0],
                            expected_f2_sample1)
        self.assertAllEqual(projected_tensor_dict['f2'][1],
                            expected_f2_sample2)
예제 #4
0
 def testProjectToBallL2TensorList(self):
   tensors = [tf.constant([[1.0, -2.0], [-8.0, 1.0]]),
              tf.constant([[2.0], [-4.0]])]
   projected_tensors = self.evaluate(
       utils.project_to_ball(tensors, 0.9, configs.NormType.L2))
   # norm: [3.0, 0.9], scale: [0.3, 0.1]
   expected_results = [[[0.3, -0.6], [-0.8, 0.1]], [[0.6], [-0.4]]]
   self.assertAllClose(projected_tensors, expected_results)
예제 #5
0
 def testProjectToBallL2(self, eps, first_factor, second_factor):
   target_tensor_dict = {
       'f1': tf.constant([[1.0, -2.0, 2.0], [2.0, 10.0, 11.0]])
   }
   projected_tensor_dict = self.evaluate(
       utils.project_to_ball(target_tensor_dict, eps, configs.NormType.L2))
   expected_tensor = target_tensor_dict['f1'] * tf.constant([[first_factor],
                                                             [second_factor]])
   self.assertAllEqual(projected_tensor_dict['f1'], expected_tensor)
 def testProjectToBallLInfMultipleFeatures(self):
     f1 = tf.constant([[1.0, 2.0, -4.0], [-1.0, 3.0, 5.0]])
     f2 = tf.constant([[1.0, 6.0], [2.0, 4.0]])
     input_dict = {'f1': f1, 'f2': f2}
     projected_tensor_dict = self.evaluate(
         utils.project_to_ball(input_dict, 1.5, configs.NormType.INFINITY))
     expected_f1 = tf.constant([[1.0, 1.5, -1.5], [-1.0, 1.5, 1.5]])
     expected_f2 = tf.constant([[1.0, 1.5], [1.5, 1.5]])
     self.assertAllEqual(projected_tensor_dict['f1'], expected_f1)
     self.assertAllEqual(projected_tensor_dict['f2'], expected_f2)
예제 #7
0
  def _apply_gradients(self, dense_features, keyed_grads,
                       dense_original_features):
    """Applies gradients to the features to generate perturbed features.

    Args:
      dense_features: A dictionary of tensors at which gradients are computed.
      keyed_grads: A dictionary of tensors representing the gradients.
      dense_original_features: A dictionary of tensors which the epsilon
        constraint (`adv_config.pgd_epsilon`) should reference. This should have
        the same structure as `dense_features`.

    Returns:
      A dictionary of tensors with the same structure as `dense_features`
      representing the perturbed features.
    """
    masked_grads = {
        key: utils.apply_feature_mask(grad, self.feature_masks.get(key, None))
        for key, grad in keyed_grads.items()
    }
    perturb_directions = utils.maximize_within_unit_norm(
        masked_grads, self._adv_config.adv_grad_norm)
    # Clip perturbations into epsilon ball here. Note that this ball is
    # centered around the original input point.
    perturbations = {}
    for key, direction in perturb_directions.items():
      perturbations[key] = (
          direction * self._adv_config.adv_step_size + dense_features[key]
          - dense_original_features[key])
    if self._adv_config.pgd_epsilon is not None:
      perturbations = utils.project_to_ball(perturbations,
                                            self._adv_config.pgd_epsilon,
                                            self._adv_config.adv_grad_norm)
    # Apply feature constraints specified in the config.
    perturbed_features = {}
    for key, feature in dense_original_features.items():
      if key not in perturbations:  # No perturbation due to no gradient
        perturbed_features[key] = feature
      else:
        perturbed_features[key] = _apply_feature_constraints(
            feature + tf.stop_gradient(perturbations[key]),
            self.feature_min.get(key, None),
            self.feature_max.get(key, None))
    return perturbed_features
  def _compute_perturbations(self, dense_features, keyed_grads,
                             dense_original_features):
    """Generates perturbations based on gradients and features.

    This computes the steepest descent direction from the gradient, multiplies
    by the step size, and projects into the epsilon ball.

    Args:
      dense_features: A dictionary of tensors at which gradients are computed.
      keyed_grads: A dictionary of tensors representing the gradients.
      dense_original_features: A dictionary of tensors which the epsilon
        constraint (`adv_config.pgd_epsilon`) should reference. This should have
        the same structure as `dense_features`.

    Returns:
      A dictionary of tensors with the same structure as `dense_features`
      representing the perturbations to be applied on `dense_original_features`.
    """
    masked_grads = {
        key: utils.apply_feature_mask(grad, self.feature_masks.get(key, None))
        for key, grad in keyed_grads.items()
    }
    perturb_directions = utils.maximize_within_unit_norm(
        masked_grads, self._adv_config.adv_grad_norm)
    # Clip perturbations into epsilon ball here. Note that this ball is
    # centered around the original input point.
    perturbations = {}
    for key, direction in perturb_directions.items():
      perturbations[key] = (
          direction * self._adv_config.adv_step_size + dense_features[key]
          - dense_original_features[key])
    if self._adv_config.pgd_epsilon is not None:
      perturbations = utils.project_to_ball(perturbations,
                                            self._adv_config.pgd_epsilon,
                                            self._adv_config.adv_grad_norm)
    return perturbations
  def gen_neighbor(self, input_features, pgd_labels=None):
    """Generates adversarial neighbors and the corresponding weights.

    This function perturbs only *dense* tensors to generate adversarial
    neighbors. No perturbation will be applied on sparse tensors  (e.g., string
    or int). Therefore, in the generated adversarial neighbors, the values of
    these sparse tensors will be kept the same as the input_features. In other
    words, if input_features is a dictionary mapping feature names to tensors,
    the dense features will be perturbed and the values of sparse features will
    remain the same.

    Arguments:
      input_features: a dense (float32) tensor, a list of dense tensors, or a
        dictionary of feature names and dense tensors. The shape of the
        tensor(s) should be either:
        (a) pointwise samples: [batch_size, feat_len], or
        (b) sequence samples: [batch_size, seq_len, feat_len]
      pgd_labels: the labels corresponding to each input. This should have shape
        `[batch_size, 1]`. This is required for PGD-generated adversaries, and
        unused otherwise.

    Returns:
      adv_neighbor: the perturbed example, with the same shape and structure as
        input_features
      adv_weight: a dense (float32) tensor with shape of [batch_size, 1],
        representing the weight for each neighbor

    Raises:
      ValueError: if some of the `input_features` cannot be perturbed due to
        (a) it is a `tf.SparseTensor`,
        (b) it has a non-differentiable type like string or integer, or
        (c) it is not involved in loss computation.
        This error is suppressed if `raise_invalid_gradient` is set to False
        (which is the default).
    """
    loss = self._labeled_loss
    gradient_tape = self._gradient_tape

    # Composes both features and feature_masks to dictionaries, so that the
    # feature_masks can be looked up by key.
    features = self._compose_as_dict(input_features)
    dense_original_features, sparse_original_features = self._split_dict(
        features, lambda feature: isinstance(feature, tf.Tensor))
    feature_masks = self._compose_as_dict(self._adv_config.feature_mask)
    feature_min = self._compose_as_dict(self._adv_config.clip_value_min)
    feature_max = self._compose_as_dict(self._adv_config.clip_value_max)
    if sparse_original_features:
      sparse_keys = str(sparse_original_features.keys())
      if self._raise_invalid_gradient:
        raise ValueError('Cannot perturb non-Tensor input: ' + sparse_keys)
      logging.log_first_n(logging.WARNING,
                          'Cannot perturb non-Tensor input: %s', 1, sparse_keys)
    dense_features = dense_original_features
    for t in range(self._adv_config.iterations):
      keyed_grads = self._compute_gradient(loss, dense_features, gradient_tape)
      masked_grads = {
          key: utils.apply_feature_mask(grad, feature_masks.get(key, None))
          for key, grad in keyed_grads.items()
      }

      unit_perturbations = utils.maximize_within_unit_norm(
          masked_grads, self._adv_config.adv_grad_norm)
      perturbations = tf.nest.map_structure(
          lambda t: t * self._adv_config.adv_step_size, unit_perturbations)
      # Clip perturbations into epsilon ball here. Note that this ball is
      # centered around the original input point.
      diff = {}
      bounded_diff = {}
      for key, perturb in perturbations.items():
        # Only include features for which perturbation occurred. There is
        # nothing to project for features without perturbations.
        diff[key] = dense_features[key] + perturb - dense_original_features[key]
      if self._adv_config.epsilon is not None:
        bounded_diff = utils.project_to_ball(diff, self._adv_config.epsilon,
                                             self._adv_config.adv_grad_norm)
      else:
        bounded_diff = diff
      # Backfill the rest of the dense features.
      for key, feature in dense_features.items():
        if key not in bounded_diff:
          bounded_diff[key] = feature - dense_original_features[key]
      adv_neighbor = dict(sparse_original_features)
      for key, feature in dense_original_features.items():
        adv_neighbor[key] = tf.stop_gradient(
            _apply_feature_constraints(
                feature +
                bounded_diff[key] if key in perturbations else feature,
                feature_min.get(key, None), feature_max.get(key, None)))

      # Update for the next iteration.
      if t < self._adv_config.iterations - 1:
        inputs_t = self._decompose_as(input_features, adv_neighbor)
        # Compute the new loss to calculate gradients with.
        features = self._compose_as_dict(inputs_t)
        dense_features, _ = self._split_dict(
            features, lambda feature: isinstance(feature, tf.Tensor))
        if gradient_tape is not None:
          with gradient_tape:
            # Gradient calculated against dense features only.
            gradient_tape.watch(dense_features)
            loss = self._pgd_loss_fn(pgd_labels, self._pgd_model_fn(inputs_t))
        else:
          loss = self._pgd_loss_fn(pgd_labels, self._pgd_model_fn(inputs_t))

    # Converts the perturbed examples back to their original structure.
    adv_neighbor = self._decompose_as(input_features, adv_neighbor)

    batch_size = tf.shape(list(features.values())[0])[0]
    adv_weight = tf.ones([batch_size, 1])

    return adv_neighbor, adv_weight
 def testProjectToBallL2WithZero(self):
     input_dict = {'f1': tf.constant(0.0, shape=[2, 3])}
     projected_tensor_dict = self.evaluate(
         utils.project_to_ball(input_dict, 0.5, configs.NormType.L2))
     expected_tensor = tf.constant(0.0, shape=[2, 3])
     self.assertAllEqual(projected_tensor_dict['f1'], expected_tensor)
 def testProjectToBallL1(self):
     target_tensor = tf.constant([[1.0, 2.0, -4.0]])
     with self.assertRaises(NotImplementedError):
         self.evaluate(
             utils.project_to_ball(target_tensor, 0.2, configs.NormType.L1))
 def testProjectToBallLInfSingleTensor(self):
     tensor = tf.constant([[1.0, -3.0], [-2.0, 4.0]])
     projected_tensor = self.evaluate(
         utils.project_to_ball(tensor, 2.5, configs.NormType.INFINITY))
     expected_result = [[1.0, -2.5], [-2.0, 2.5]]
     self.assertAllClose(projected_tensor, expected_result)
 def testProjectToBallLInf(self, input_tensor, eps, expected_tensor):
     input_dict = {'f1': tf.constant(input_tensor)}
     projected_tensor_dict = self.evaluate(
         utils.project_to_ball(input_dict, eps, configs.NormType.INFINITY))
     self.assertAllEqual(projected_tensor_dict['f1'],
                         tf.constant(expected_tensor))