def test_max_words_per_user_validation(self):
     with self.assertRaisesRegex(ValueError, 'max_words_per_user'):
         iblt_tff.build_iblt_computation(max_words_per_user=0)
     with self.assertRaisesRegex(ValueError, 'max_words_per_user'):
         iblt_tff.build_iblt_computation(max_words_per_user=-1)
     iblt_tff.build_iblt_computation(max_words_per_user=1)
     iblt_tff.build_iblt_computation(max_words_per_user=None)
 def test_max_heavy_hitters_validation(self):
     with self.assertRaisesRegex(ValueError, 'max_heavy_hitters'):
         iblt_tff.build_iblt_computation(max_heavy_hitters=0)
     with self.assertRaisesRegex(ValueError, 'max_heavy_hitters'):
         iblt_tff.build_iblt_computation(max_heavy_hitters=-1)
     iblt_tff.build_iblt_computation(max_heavy_hitters=1)
     iblt_tff.build_iblt_computation(max_heavy_hitters=None)
 def test_default_construction(self):
     iblt_computation = iblt_tff.build_iblt_computation()
     self.assertIsInstance(iblt_computation, computation_base.Computation)
     type_test_utils.assert_types_identical(
         iblt_computation.type_signature,
         computation_types.FunctionType(
             parameter=computation_types.at_clients(
                 computation_types.SequenceType(
                     computation_types.TensorType(shape=[None],
                                                  dtype=tf.string))),
             result=computation_types.at_server(
                 iblt_tff.ServerOutput(
                     clients=tf.int32,
                     heavy_hitters=computation_types.TensorType(
                         shape=[None], dtype=tf.string),
                     heavy_hitters_unique_counts=computation_types.
                     TensorType(shape=[None], dtype=tf.int64),
                     heavy_hitters_counts=computation_types.TensorType(
                         shape=[None], dtype=tf.int64),
                     num_not_decoded=tf.int64,
                     round_timestamp=tf.int64,
                 ))))
def _execute_computation(
    data: List[List[str]],
    *,
    batch_size: int = 1,
    capacity: int = 1000,
    max_string_length: int = 10,
    repetitions: int = 3,
    seed: int = 0,
    max_heavy_hitters: Optional[int] = None,
    max_words_per_user: Optional[int] = None,
    k_anonymity: int = 1,
    secure_sum_bitwidth: Optional[int] = None,
    multi_contribution: bool = True,
    string_postprocessor: Optional[Callable[[tf.Tensor], tf.Tensor]] = None
) -> Dict[str, tf.Tensor]:
    """Executes one round of IBLT computation over the given datasets.

  Args:
    data: A reference to all ClientData on device.
    batch_size: The number of elements in each batch of the dataset. Defaults to
      `1`, means the input dataset is processed by `tf.data.Dataset.batch(1)`.
    capacity: Capacity of the underlying IBLT. Defaults to `1000`.
    max_string_length: Maximum length (in bytes) of an item in the IBLT. Multi-
      byte characters in the string will be truncated on byte (not character)
      boundaries. Defaults to `10`.
    repetitions: The number of repetitions in IBLT data structure (must be >=
      3). Defaults to `3`.
    seed: An integer seed for hash functions. Defaults to `0`.
    max_heavy_hitters: The maximum number of items to return. If the decoded
      results have more than this number of items, will order decreasingly by
      the estimated counts and return the top max_heavy_hitters items. Default
      max_heavy_hitters == `None`, which means to return all the heavy hitters
      in the result.
    max_words_per_user: If set, bounds the number of contributions any user can
      make to the total counts in the iblt. If not `None`, must be a positive
      integer. Defaults to `None`.
    k_anonymity: Sets the number of users required for an element's count to be
      visible. Defaults to `1`.
    secure_sum_bitwidth: The bitwidth used for secure sum. The default value is
      `None`, which disables secure sum. If not `None`, must be in the range
      `[1,62]`. See `tff.federated_secure_sum_bitwidth`.
    multi_contribution: Whether each client is allowed to contribute multiple
      counts or only a count of one for each unique word. Defaults to `True`.
    string_postprocessor: A callable function that is run after strings are
      decoded from the IBLT in order to postprocess them. It should accept a
      single string tensor and output a single string tensor of the same shape.
      If `None`, no postprocessing is done.

  Returns:
    A dictionary containing the heavy hitter results.
  """
    one_round_computation = iblt_tff.build_iblt_computation(
        capacity=capacity,
        max_string_length=max_string_length,
        repetitions=repetitions,
        seed=seed,
        max_heavy_hitters=max_heavy_hitters,
        max_words_per_user=max_words_per_user,
        k_anonymity=k_anonymity,
        secure_sum_bitwidth=secure_sum_bitwidth,
        batch_size=batch_size,
        multi_contribution=multi_contribution,
        string_postprocessor=string_postprocessor)
    datasets = _iblt_test_data_sampler(data, batch_size)

    output = one_round_computation(datasets)

    heavy_hitters = output.heavy_hitters
    heavy_hitters_counts = output.heavy_hitters_counts
    heavy_hitters_unique_counts = output.heavy_hitters_unique_counts

    heavy_hitters = [word.decode('utf-8', 'ignore') for word in heavy_hitters]

    iteration_results = dict(
        zip(heavy_hitters,
            zip(heavy_hitters_unique_counts, heavy_hitters_counts)))

    return dict(
        iteration_results), output.num_not_decoded, output.round_timestamp
 def test_multi_contribution_validation(self):
     iblt_tff.build_iblt_computation(multi_contribution=True)
     iblt_tff.build_iblt_computation(multi_contribution=False)
 def test_batch_size_validation(self):
     with self.assertRaisesRegex(ValueError, 'batch_size'):
         iblt_tff.build_iblt_computation(batch_size=0)
     with self.assertRaisesRegex(ValueError, 'batch_size'):
         iblt_tff.build_iblt_computation(batch_size=-1)
     iblt_tff.build_iblt_computation(batch_size=1)
 def test_secure_sum_bitwidth_validation(self):
     with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'):
         iblt_tff.build_iblt_computation(secure_sum_bitwidth=-1)
     with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'):
         iblt_tff.build_iblt_computation(secure_sum_bitwidth=0)
     with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'):
         iblt_tff.build_iblt_computation(secure_sum_bitwidth=63)
     with self.assertRaisesRegex(ValueError, 'secure_sum_bitwidth'):
         iblt_tff.build_iblt_computation(secure_sum_bitwidth=64)
     iblt_tff.build_iblt_computation(secure_sum_bitwidth=None)
     iblt_tff.build_iblt_computation(secure_sum_bitwidth=1)
     iblt_tff.build_iblt_computation(secure_sum_bitwidth=62)
 def test_k_anonymity_validation(self):
     with self.assertRaisesRegex(ValueError, 'k_anonymity'):
         iblt_tff.build_iblt_computation(k_anonymity=0)
     with self.assertRaisesRegex(ValueError, 'k_anonymity'):
         iblt_tff.build_iblt_computation(k_anonymity=-1)
     iblt_tff.build_iblt_computation(k_anonymity=1)
 def test_repetitions_validation(self):
     with self.assertRaisesRegex(ValueError, 'repetitions'):
         iblt_tff.build_iblt_computation(repetitions=0)
     with self.assertRaisesRegex(ValueError, 'repetitions'):
         iblt_tff.build_iblt_computation(repetitions=2)
     iblt_tff.build_iblt_computation(repetitions=3)
Exemple #10
0
 def test_max_string_length_validation(self):
     with self.assertRaisesRegex(ValueError, 'max_string_length'):
         iblt_tff.build_iblt_computation(max_string_length=0)
     with self.assertRaisesRegex(ValueError, 'max_string_length'):
         iblt_tff.build_iblt_computation(max_string_length=-1)
     iblt_tff.build_iblt_computation(max_string_length=1)