def testInvalidStructureRaisesError(self):
    # Input dataset, content doesn't matter.
    inputs = tf.data.Dataset.from_tensor_slices(
        (self.original_x, self.original_y)).batch(5)

    nested_inputs = {"a": inputs, "b": inputs}
    bad_nested_inputs = {"a": inputs, "b": [inputs]}

    # No errors raised for valid nested structures.
    _ = input_utils.build_min_diff_dataset(
        sensitive_group_dataset=nested_inputs,
        nonsensitive_group_dataset=nested_inputs)

    # Assert raises error for invalid sensitive_group_dataset structure.
    with self.assertRaisesRegex(
        ValueError, "sensitive_group_dataset.*unnested"
        ".*only elements of type.*Dataset.*Given"):
      _ = input_utils.build_min_diff_dataset(bad_nested_inputs, nested_inputs)

    # Assert raises error for invalid nonsensitive_group_dataset structure.
    with self.assertRaisesRegex(
        ValueError, "nonsensitive_group_dataset.*unnested.*only elements of "
        "type.*Dataset.*Given"):
      _ = input_utils.build_min_diff_dataset(nested_inputs, bad_nested_inputs)

    # Assert raises error for different sensitive and nonsensitive structures.
    different_nested_inputs = {"a": inputs, "c": inputs}
    with self.assertRaisesRegex(
        ValueError, "sensitive_group_dataset.*"
        "nonsensitive_group_dataset.*do "
        "not have the same structure(.|\n)*don't have the same set of keys"):
      _ = input_utils.build_min_diff_dataset(nested_inputs,
                                             different_nested_inputs)
Ejemplo n.º 2
0
    def testBuildFromSingleDatasets(self):
        sensitive_batch_size = 3
        sensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (self.sensitive_x, None,
             self.sensitive_w)).batch(sensitive_batch_size)

        nonsensitive_batch_size = 1
        nonsensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (self.nonsensitive_x, None,
             self.nonsensitive_w)).batch(nonsensitive_batch_size)

        dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                     nonsensitive_dataset)

        for batch_ind, min_diff_batch in enumerate(dataset):
            # Assert min_diff batch properly formed.
            min_diff_x, min_diff_membership, min_diff_w = min_diff_batch

            self.assertAllClose(
                min_diff_x,
                _get_min_diff_batch(self.sensitive_x, self.nonsensitive_x,
                                    sensitive_batch_size,
                                    nonsensitive_batch_size, batch_ind))
            self.assertAllClose(
                min_diff_membership,
                _get_min_diff_membership_batch(sensitive_batch_size,
                                               nonsensitive_batch_size))
            self.assertAllClose(
                min_diff_w,
                _get_min_diff_batch(self.sensitive_w, self.nonsensitive_w,
                                    sensitive_batch_size,
                                    nonsensitive_batch_size, batch_ind))
  def testBuildFromSingleDatasets(self):
    sensitive_batch_size = 3
    sensitive_dataset = tf.data.Dataset.from_tensor_slices(
        (self.sensitive_x, None, self.sensitive_w)).batch(sensitive_batch_size)

    nonsensitive_batch_size = 1
    nonsensitive_dataset = tf.data.Dataset.from_tensor_slices(
        (self.nonsensitive_x, None,
         self.nonsensitive_w)).batch(nonsensitive_batch_size)

    dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                 nonsensitive_dataset)

    # The resulting dataset will repeat infinitely so we only take the first 20
    # batches which corresponds to 2 full epochs of the nonsensitive dataset.
    for batch_ind, min_diff_batch in enumerate(dataset.take(20)):
      # Assert min_diff batch properly formed.
      min_diff_x, min_diff_membership, min_diff_w = min_diff_batch

      self.assertAllClose(
          min_diff_x,
          _get_min_diff_batch(self.sensitive_x, self.nonsensitive_x,
                              sensitive_batch_size, nonsensitive_batch_size,
                              batch_ind))
      self.assertAllClose(
          min_diff_membership,
          _get_min_diff_membership_batch(sensitive_batch_size,
                                         nonsensitive_batch_size))
      self.assertAllClose(
          min_diff_w,
          _get_min_diff_batch(self.sensitive_w, self.nonsensitive_w,
                              sensitive_batch_size, nonsensitive_batch_size,
                              batch_ind))
Ejemplo n.º 4
0
    def testDifferentWeightsShapeRaisesError(self):
        sensitive_batch_size = 3
        # Create weights with different shape.
        sensitive_w = self.sensitive_w[:, tf.newaxis, :]
        sensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (self.sensitive_x, None, sensitive_w)).batch(sensitive_batch_size)

        nonsensitive_batch_size = 2
        nonsensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (self.nonsensitive_x, None,
             self.nonsensitive_w)).batch(nonsensitive_batch_size)

        with self.assertRaisesRegex(ValueError, "must be rank.*but is rank"):
            _ = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                   nonsensitive_dataset)
  def testBuildFromDictsOfDatasets(self):
    sensitive_batch_sizes = [3, 5]
    sensitive_dataset = {
        key: tf.data.Dataset.from_tensor_slices(
            (self.sensitive_x, None, self.sensitive_w)).batch(batch_size)
        for key, batch_size in zip(["k1", "k2"], sensitive_batch_sizes)
    }

    nonsensitive_batch_sizes = [1, 2]
    nonsensitive_dataset = {
        key: tf.data.Dataset.from_tensor_slices(
            (self.nonsensitive_x, None, self.nonsensitive_w)).batch(batch_size)
        for key, batch_size in zip(["k1", "k2"], nonsensitive_batch_sizes)
    }

    dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                 nonsensitive_dataset)

    # The resulting dataset will repeat infinitely so we only take the first 20
    # batches which corresponds to at least 2 full epochs of the nonsensitive
    # dataset.
    for batch_ind, min_diff_batches in enumerate(dataset.take(20)):
      min_diff_keys = sorted(min_diff_batches.keys())
      # Assert min_diff_batches has the right structure (i.e. set of keys).
      self.assertAllEqual(min_diff_keys, ["k1", "k2"])

      min_diff_batches = [min_diff_batches[key] for key in min_diff_keys]
      for sensitive_batch_size, nonsensitive_batch_size, min_diff_batch in zip(
          sensitive_batch_sizes, nonsensitive_batch_sizes, min_diff_batches):
        # Assert min_diff batch properly formed.
        min_diff_x, min_diff_membership, min_diff_w = min_diff_batch

        self.assertAllClose(
            min_diff_x,
            _get_min_diff_batch(self.sensitive_x, self.nonsensitive_x,
                                sensitive_batch_size, nonsensitive_batch_size,
                                batch_ind))
        self.assertAllClose(
            min_diff_membership,
            _get_min_diff_membership_batch(sensitive_batch_size,
                                           nonsensitive_batch_size))
        self.assertAllClose(
            min_diff_w,
            _get_min_diff_batch(self.sensitive_w, self.nonsensitive_w,
                                sensitive_batch_size, nonsensitive_batch_size,
                                batch_ind))
Ejemplo n.º 6
0
    def testBuildFromDictsOfDatasets(self):
        sensitive_batch_sizes = [3, 5]
        sensitive_dataset = {
            key: tf.data.Dataset.from_tensor_slices(
                (self.sensitive_x, None, self.sensitive_w)).batch(batch_size)
            for key, batch_size in zip(["k1", "k2"], sensitive_batch_sizes)
        }

        nonsensitive_batch_sizes = [1, 2]
        nonsensitive_dataset = {
            key: tf.data.Dataset.from_tensor_slices(
                (self.nonsensitive_x, None,
                 self.nonsensitive_w)).batch(batch_size)
            for key, batch_size in zip(["k1", "k2"], nonsensitive_batch_sizes)
        }

        dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                     nonsensitive_dataset)

        for batch_ind, min_diff_batches in enumerate(dataset):
            min_diff_keys = sorted(min_diff_batches.keys())
            # Assert min_diff_batches has the right structure (i.e. set of keys).
            self.assertAllEqual(min_diff_keys, ["k1", "k2"])

            min_diff_batches = [min_diff_batches[key] for key in min_diff_keys]
            for sensitive_batch_size, nonsensitive_batch_size, min_diff_batch in zip(
                    sensitive_batch_sizes, nonsensitive_batch_sizes,
                    min_diff_batches):
                # Assert min_diff batch properly formed.
                min_diff_x, min_diff_membership, min_diff_w = min_diff_batch

                self.assertAllClose(
                    min_diff_x,
                    _get_min_diff_batch(self.sensitive_x, self.nonsensitive_x,
                                        sensitive_batch_size,
                                        nonsensitive_batch_size, batch_ind))
                self.assertAllClose(
                    min_diff_membership,
                    _get_min_diff_membership_batch(sensitive_batch_size,
                                                   nonsensitive_batch_size))
                self.assertAllClose(
                    min_diff_w,
                    _get_min_diff_batch(self.sensitive_w, self.nonsensitive_w,
                                        sensitive_batch_size,
                                        nonsensitive_batch_size, batch_ind))
  def testWithBothMinDiffWeightsNone(self):
    sensitive_batch_size = 3
    sensitive_dataset = tf.data.Dataset.from_tensor_slices(
        (self.sensitive_x, None, None)).batch(sensitive_batch_size)

    nonsensitive_batch_size = 2
    nonsensitive_dataset = tf.data.Dataset.from_tensor_slices(
        (self.nonsensitive_x, None, None)).batch(nonsensitive_batch_size)

    dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                 nonsensitive_dataset)

    # The resulting dataset will repeat infinitely so we only take the first 10
    # batches which corresponds to 2 full epochs of the nonsensitive dataset.
    for min_diff_batch in dataset.take(10):
      # Skip all min_diff_data assertions except for weight.
      _, _, min_diff_w = tf.keras.utils.unpack_x_y_sample_weight(min_diff_batch)
      self.assertIsNone(min_diff_w)
Ejemplo n.º 8
0
    def testWithOnlySensitiveWeightsNone(self):
        sensitive_batch_size = 3
        sensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (self.sensitive_x, None, None)).batch(sensitive_batch_size)

        nonsensitive_batch_size = 2
        nonsensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (self.nonsensitive_x, None,
             self.nonsensitive_w)).batch(nonsensitive_batch_size)

        dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                     nonsensitive_dataset)

        for batch_ind, min_diff_batch in enumerate(dataset):
            # Skip all min_diff_data assertions except for weight.
            _, _, min_diff_w = tf.keras.utils.unpack_x_y_sample_weight(
                min_diff_batch)
            self.assertAllClose(
                min_diff_w,
                _get_min_diff_batch(tf.fill([sensitive_batch_size, 1], 1.0),
                                    self.nonsensitive_w, sensitive_batch_size,
                                    nonsensitive_batch_size, batch_ind))
Ejemplo n.º 9
0
    def testWithVariableSizeSparseTensors(self):
        sensitive_batch_size = 3
        sensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (self.sensitive_x, self.sensitive_w,
             None)).batch(sensitive_batch_size)

        nonsensitive_batch_size = 2
        nonsensitive_x = copy.copy(self.nonsensitive_x)
        # Modify so that f2_sparse has a different dense shape in non_sensitive
        # than in sensitive.
        nonsensitive_x["f2_sparse"] = tf.sparse.reset_shape(
            nonsensitive_x["f2_sparse"], [10, 5])
        nonsensitive_dataset = tf.data.Dataset.from_tensor_slices(
            (nonsensitive_x, None,
             self.nonsensitive_w)).batch(nonsensitive_batch_size)

        dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                     nonsensitive_dataset)
        for _, min_diff_batch in enumerate(dataset.take(10)):
            min_diff_x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(
                min_diff_batch)
            self.assertEqual(min_diff_x["f2_sparse"].dense_shape[1], 5)
  def testWithOnlyNonsensitiveWeightsNone(self):
    sensitive_batch_size = 3
    sensitive_dataset = tf.data.Dataset.from_tensor_slices(
        (self.sensitive_x, None, self.sensitive_w)).batch(sensitive_batch_size)

    nonsensitive_batch_size = 2
    nonsensitive_dataset = tf.data.Dataset.from_tensor_slices(
        (self.nonsensitive_x, None, None)).batch(nonsensitive_batch_size)

    dataset = input_utils.build_min_diff_dataset(sensitive_dataset,
                                                 nonsensitive_dataset)

    # The resulting dataset will repeat infinitely so we only take the first 10
    # batches which corresponds to 2 full epochs of the nonsensitive dataset.
    for batch_ind, min_diff_batch in enumerate(dataset.take(10)):
      # Skip all min_diff_data assertions except for weight.
      _, _, min_diff_w = tf.keras.utils.unpack_x_y_sample_weight(min_diff_batch)
      self.assertAllClose(
          min_diff_w,
          _get_min_diff_batch(self.sensitive_w,
                              tf.fill([nonsensitive_batch_size, 1],
                                      1.0), sensitive_batch_size,
                              nonsensitive_batch_size, batch_ind))
Ejemplo n.º 11
0
    def testPackDictsOfDatasets(self):
        original_batch_size = 5
        original_dataset = tf.data.Dataset.from_tensor_slices(
            (self.original_x, self.original_y,
             self.original_w)).batch(original_batch_size)

        sensitive_batch_sizes = [3, 5]
        sensitive_dataset = {
            key: tf.data.Dataset.from_tensor_slices(
                (self.sensitive_x, None, self.sensitive_w)).batch(batch_size)
            for key, batch_size in zip(["k1", "k2"], sensitive_batch_sizes)
        }

        nonsensitive_batch_sizes = [1, 2]
        nonsensitive_dataset = {
            key: tf.data.Dataset.from_tensor_slices(
                (self.nonsensitive_x, None,
                 self.nonsensitive_w)).batch(batch_size)
            for key, batch_size in zip(["k1", "k2"], nonsensitive_batch_sizes)
        }

        dataset = input_utils.pack_min_diff_data(
            original_dataset,
            min_diff_dataset=input_utils.build_min_diff_dataset(
                sensitive_dataset, nonsensitive_dataset))

        for batch_ind, (packed_inputs, y, w) in enumerate(dataset):
            self.assertIsInstance(packed_inputs,
                                  input_utils.MinDiffPackedInputs)

            # Assert original batch is conserved
            self.assertAllClose(
                packed_inputs.original_inputs,
                _get_batch(self.original_x, original_batch_size, batch_ind))
            self.assertAllClose(
                y, _get_batch(self.original_y, original_batch_size, batch_ind))
            self.assertAllClose(
                w, _get_batch(self.original_w, original_batch_size, batch_ind))

            min_diff_keys = sorted(packed_inputs.min_diff_data.keys())
            # Assert min_diff_batches has the right structure (i.e. set of keys).
            self.assertAllEqual(min_diff_keys, ["k1", "k2"])

            min_diff_batches = [
                packed_inputs.min_diff_data[key] for key in min_diff_keys
            ]
            for sensitive_batch_size, nonsensitive_batch_size, min_diff_batch in zip(
                    sensitive_batch_sizes, nonsensitive_batch_sizes,
                    min_diff_batches):

                # Assert min_diff batch properly formed.
                min_diff_x, min_diff_membership, min_diff_w = min_diff_batch

                self.assertAllClose(
                    min_diff_x,
                    _get_min_diff_batch(self.sensitive_x, self.nonsensitive_x,
                                        sensitive_batch_size,
                                        nonsensitive_batch_size, batch_ind))
                self.assertAllClose(
                    min_diff_membership,
                    _get_min_diff_membership_batch(sensitive_batch_size,
                                                   nonsensitive_batch_size))
                self.assertAllClose(
                    min_diff_w,
                    _get_min_diff_batch(self.sensitive_w, self.nonsensitive_w,
                                        sensitive_batch_size,
                                        nonsensitive_batch_size, batch_ind))