Beispiel #1
0
  def test_type_properties(self, value_type):
    factory = stochastic_discretization.StochasticDiscretizationFactory(
        step_size=0.1,
        inner_agg_factory=_measurement_aggregator,
        distortion_aggregation_factory=mean.UnweightedMeanFactory())
    value_type = computation_types.to_type(value_type)
    quantize_type = type_conversions.structure_from_tensor_type_tree(
        lambda x: (tf.int32, x.shape), value_type)
    process = factory.create(value_type)
    self.assertIsInstance(process, aggregation_process.AggregationProcess)

    server_state_type = computation_types.StructType([('step_size', tf.float32),
                                                      ('inner_agg_process', ())
                                                     ])
    server_state_type = computation_types.at_server(server_state_type)
    expected_initialize_type = computation_types.FunctionType(
        parameter=None, result=server_state_type)
    type_test_utils.assert_types_equivalent(process.initialize.type_signature,
                                            expected_initialize_type)

    expected_measurements_type = computation_types.StructType([
        ('stochastic_discretization', quantize_type), ('distortion', tf.float32)
    ])
    expected_measurements_type = computation_types.at_server(
        expected_measurements_type)
    expected_next_type = computation_types.FunctionType(
        parameter=collections.OrderedDict(
            state=server_state_type,
            value=computation_types.at_clients(value_type)),
        result=measured_process.MeasuredProcessOutput(
            state=server_state_type,
            result=computation_types.at_server(value_type),
            measurements=expected_measurements_type))
    type_test_utils.assert_types_equivalent(process.next.type_signature,
                                            expected_next_type)
 def test_structure(self):
     struct_type = computation_types.StructType([('a', tf.int32),
                                                 (None, tf.int32)])
     return_incr = self.get_incrementing_function()
     result = type_conversions.structure_from_tensor_type_tree(
         return_incr, struct_type)
     self.assertEqual(result, structure.Struct([('a', 0), (None, 1)]))
Beispiel #3
0
def _get_accumulator_type(member_type):
  """Constructs a `tff.Type` for the accumulator in sample aggregation.

  Args:
    member_type: A `tff.Type` representing the member components of the
      federated type.

  Returns:
    The `tff.StructType` associated with the accumulator. The tuple contains
    two parts, `accumulators` and `rands`, that are parallel lists (e.g. the
    i-th index in one corresponds to the i-th index in the other). These two
    lists are used to sample from the accumulators with equal probability.
  """

  def add_unknown_first_dim(tensor_type):
    return computation_types.TensorType(tensor_type.dtype,
                                        [None] + tensor_type.shape.dims)

  accumulator_type = type_conversions.structure_from_tensor_type_tree(
      add_unknown_first_dim, member_type)
  return computation_types.to_type(
      _Samples(
          accumulators=accumulator_type,
          rands=computation_types.TensorType(tf.float32, shape=[None]),
      ))
    def test_single_tensor(self):
        def expect_tfint32_return_5(tensor_type):
            self.assert_types_identical(tensor_type,
                                        computation_types.TensorType(tf.int32))
            return 5

        result = type_conversions.structure_from_tensor_type_tree(
            expect_tfint32_return_5, tf.int32)
        self.assertEqual(result, 5)
Beispiel #5
0
    def initialize():
        # Allow fixed seeds, otherwise set a sentinel that signals a seed should be
        # generated upon the first `accumulate` call of the `federated_aggregate`.
        if seed is None:
            real_seed = tf.convert_to_tensor(SEED_SENTINEL, dtype=tf.int64)
        elif tf.is_tensor(seed):
            if seed.dtype != tf.int64:
                real_seed = tf.cast(seed, dtype=tf.int64)
        else:
            real_seed = tf.convert_to_tensor(seed, dtype=tf.int64)

        def zero_for_tensor_type(t: computation_types.TensorType):
            """Add an extra first dimension to create a tensor that collects samples.

      The first dimension will have size `0` for the algebraic zero, resulting
      in an "empty" tensor. This will be conctenated as samples fill the
      reservoir.

      Args:
        t: A `tff.TensorType` to build a sampling zero value for.

      Returns:
        A tensor whose rank is one larger than before, and whose first dimension
        is zero.

      Raises:
        `TypeError` if `t` is not a `tff.TensorType`.
      """
            if not t.is_tensor():
                raise TypeError(
                    f'Cannot create zero for non TesnorType: {type(t)}')
            return tf.zeros([0] + t.shape, dtype=t.dtype)

        try:
            initial_samples = type_conversions.structure_from_tensor_type_tree(
                zero_for_tensor_type, sample_value_type)
        except ValueError as e:
            raise TypeError(
                'Cannot build initial reservoir for structure that has '
                'types other than StructWithPythonType or TensorType, '
                f'got {sample_value_type!r}.') from e
        return collections.OrderedDict(random_seed=tf.fill(dims=(2, ),
                                                           value=real_seed),
                                       random_values=tf.zeros([0], tf.int32),
                                       samples=initial_samples)
Beispiel #6
0
    def create(self, value_type):
        # Checks value_type and compute client data dimension.
        if (value_type.is_struct_with_python()
                and type_analysis.is_structure_of_tensors(value_type)):
            num_elements_struct = type_conversions.structure_from_tensor_type_tree(
                lambda x: x.shape.num_elements(), value_type)
            self._client_dim = sum(tf.nest.flatten(num_elements_struct))
        elif value_type.is_tensor():
            self._client_dim = value_type.shape.num_elements()
        else:
            raise TypeError(
                'Expected `value_type` to be `TensorType` or '
                '`StructWithPythonType` containing only `TensorType`. '
                f'Found type: {repr(value_type)}')
        # Checks that all values are integers or floats.
        if not (type_analysis.is_structure_of_floats(value_type)
                or type_analysis.is_structure_of_integers(value_type)):
            raise TypeError(
                'Component dtypes of `value_type` must all be integers '
                f'or floats. Found {repr(value_type)}.')

        ddp_agg_process = self._build_aggregation_factory().create(value_type)
        init_fn = ddp_agg_process.initialize

        @federated_computation.federated_computation(
            init_fn.type_signature.result,
            computation_types.at_clients(value_type))
        def next_fn(state, value):
            agg_output = ddp_agg_process.next(state, value)
            new_measurements = self._derive_measurements(
                agg_output.state, agg_output.measurements)
            new_state = agg_output.state
            if self._auto_l2_clip:
                new_state = self._autotune_component_states(agg_output.state)

            return measured_process.MeasuredProcessOutput(
                state=new_state,
                result=agg_output.result,
                measurements=new_measurements)

        return aggregation_process.AggregationProcess(init_fn, next_fn)
    def create(
        self, value_type: factory.ValueType
    ) -> aggregation_process.AggregationProcess:
        # Checks value_type and compute client data dimension.
        if (value_type.is_struct()
                and type_analysis.is_structure_of_tensors(value_type)):
            num_elements_struct = type_conversions.structure_from_tensor_type_tree(
                lambda x: x.shape.num_elements(), value_type)
            client_dim = sum(tf.nest.flatten(num_elements_struct))
        elif value_type.is_tensor():
            client_dim = value_type.shape.num_elements()
        else:
            raise TypeError('Expected `value_type` to be `TensorType` or '
                            '`StructType` containing only `TensorType`. '
                            f'Found type: {repr(value_type)}')
        # Checks that all values are integers.
        if not type_analysis.is_structure_of_integers(value_type):
            raise TypeError(
                'Component dtypes of `value_type` must all be integers. '
                f'Found {repr(value_type)}.')
        # Checks that we have enough elements to estimate standard deviation.
        if self._estimate_stddev:
            if client_dim <= 1:
                raise ValueError(
                    'The stddev estimation procedure expects more than '
                    '1 element from `value_type`. Found `value_type` of '
                    f'{value_type} with {client_dim} elements.')
            elif client_dim <= 100:
                warnings.warn(
                    f'`value_type` has only {client_dim} elements. The '
                    'estimated standard deviation may be noisy. Consider '
                    'setting `estimate_stddev` to True only if the input '
                    'tensor/structure have more than 100 elements.')

        inner_agg_process = self._inner_agg_factory.create(value_type)
        init_fn = inner_agg_process.initialize
        next_fn = self._create_next_fn(inner_agg_process.next,
                                       init_fn.type_signature.result,
                                       value_type)
        return aggregation_process.AggregationProcess(init_fn, next_fn)
Beispiel #8
0
def _unique_dtypes_in_structure(
        type_spec: computation_types.Type) -> Set[tf.DType]:
    """Returns a set of unique dtypes in `type_spec`.

  Args:
    type_spec: A `computation_types.Type`.

  Returns:
    A `set` containing unique dtypes found in `type_spec`.
  """
    py_typecheck.check_type(type_spec, computation_types.Type)
    if type_spec.is_tensor():
        py_typecheck.check_type(type_spec.dtype, tf.DType)
        return set([type_spec.dtype])
    elif type_spec.is_struct():
        return set(
            tf.nest.flatten(
                type_conversions.structure_from_tensor_type_tree(
                    lambda t: t.dtype, type_spec)))
    elif type_spec.is_federated():
        return _unique_dtypes_in_structure(type_spec.member)
    else:
        return set()
 def test_nested_python_type(self):
     return_incr = self.get_incrementing_function()
     result = type_conversions.structure_from_tensor_type_tree(
         return_incr, [tf.int32, (tf.string, tf.int32)])
     self.assertEqual(result, [0, (1, 2)])
def from_keras_model(
    keras_model: tf.keras.Model,
    loss: Loss,
    input_spec,
    loss_weights: Optional[List[float]] = None,
    metrics: Optional[List[tf.keras.metrics.Metric]] = None
) -> model_lib.Model:
    """Builds a `tff.learning.Model` from a `tf.keras.Model`.

  The `tff.learning.Model` returned by this function uses `keras_model` for
  its forward pass and autodifferentiation steps.

  Notice that since TFF couples the `tf.keras.Model` and `loss`,
  TFF needs a slightly different notion of "fully specified type" than
  pure Keras does. That is, the model `M` takes inputs of type `x` and
  produces predictions of type `p`; the loss function `L` takes inputs of type
  `<p, y>` (where `y` is the ground truth label type) and produces a scalar.
  Therefore in order to fully specify the type signatures for computations in
  which the generated `tff.learning.Model` will appear, TFF needs the type `y`
  in addition to the type `x`.

  Note: This function does not currently accept subclassed `tf.keras.Models`,
  as it makes assumptions about presence of certain attributes which are
  guaranteed to exist through the functional or Sequential API but are
  not necessarily present for subclassed models.

  Args:
    keras_model: A `tf.keras.Model` object that is not compiled.
    loss: A single `tf.keras.losses.Loss` or a list of losses-per-output. If a
      single loss is provided, then all model output (as well as all prediction
      information) is passed to the loss; this includes situations of multiple
      model outputs and/or predictions. If multiple losses are provided as a
      list, then each loss is expected to correspond to a model output; the
      model will attempt to minimize the sum of all individual losses
      (optionally weighted using the `loss_weights` argument).
    input_spec: A structure of `tf.TensorSpec`s or `tff.Type` specifying the
      type of arguments the model expects. If `input_spec` is a `tff.Type`, its
      leaf nodes must be `TensorType`s. Note that `input_spec` must be a
      compound structure of two elements, specifying both the data fed into the
      model (x) to generate predictions as well as the expected type of the
      ground truth (y). If provided as a list, it must be in the order [x, y].
      If provided as a dictionary, the keys must explicitly be named `'x'` and
      `'y'`.
    loss_weights: (Optional) A list of Python floats used to weight the loss
      contribution of each model output (when providing a list of losses for the
      `loss` argument).
    metrics: (Optional) a list of `tf.keras.metrics.Metric` objects.

  Returns:
    A `tff.learning.Model` object.

  Raises:
    TypeError: If `keras_model` is not an instance of `tf.keras.Model`, if
      `loss` is not an instance of `tf.keras.losses.Loss` nor a list of
      instances of `tf.keras.losses.Loss`, if `input_spec` is a `tff.Type` but
      the leaf nodes are not `tff.TensorType`s, if `loss_weight` is provided but
      is not a list of floats, or if `metrics` is provided but is not a list of
      instances of `tf.keras.metrics.Metric`.
    ValueError: If `keras_model` was compiled, if `loss` is a list of unequal
      length to the number of outputs of `keras_model`, if `loss_weights` is
      specified but `loss` is not a list, if `input_spec` does not contain
      exactly two elements, or if `input_spec` is a dictionary and does not
      contain keys `'x'` and `'y'`.
  """
    # Validate `keras_model`
    py_typecheck.check_type(keras_model, tf.keras.Model)
    if keras_model._is_compiled:  # pylint: disable=protected-access
        raise ValueError('`keras_model` must not be compiled')

    # Validate and normalize `loss` and `loss_weights`
    if not isinstance(loss, list):
        py_typecheck.check_type(loss, tf.keras.losses.Loss)
        if loss_weights is not None:
            raise ValueError(
                '`loss_weights` cannot be used if `loss` is not a list.')
        loss = [loss]
        loss_weights = [1.0]
    else:
        if len(loss) != len(keras_model.outputs):
            raise ValueError(
                'If a loss list is provided, `keras_model` must have '
                'equal number of outputs to the losses.\nloss: {}\nof '
                'length: {}.\noutputs: {}\nof length: {}.'.format(
                    loss, len(loss), keras_model.outputs,
                    len(keras_model.outputs)))
        for loss_fn in loss:
            py_typecheck.check_type(loss_fn, tf.keras.losses.Loss)

        if loss_weights is None:
            loss_weights = [1.0] * len(loss)
        else:
            if len(loss) != len(loss_weights):
                raise ValueError(
                    '`keras_model` must have equal number of losses and loss_weights.'
                    '\nloss: {}\nof length: {}.'
                    '\nloss_weights: {}\nof length: {}.'.format(
                        loss, len(loss), loss_weights, len(loss_weights)))
            for loss_weight in loss_weights:
                py_typecheck.check_type(loss_weight, float)

    if len(input_spec) != 2:
        raise ValueError(
            'The top-level structure in `input_spec` must contain '
            'exactly two top-level elements, as it must specify type '
            'information for both inputs to and predictions from the '
            'model. You passed input spec {}.'.format(input_spec))
    if isinstance(input_spec, computation_types.Type):
        if not type_analysis.is_structure_of_tensors(input_spec):
            raise TypeError(
                'Expected a `tff.Type` with all the leaf nodes being '
                '`tff.TensorType`s, found an input spec {}.'.format(
                    input_spec))
        input_spec = type_conversions.structure_from_tensor_type_tree(
            lambda tensor_type: tf.TensorSpec(tensor_type.shape, tensor_type.
                                              dtype), input_spec)
    else:
        tf.nest.map_structure(
            lambda s: py_typecheck.check_type(s, tf.TensorSpec,
                                              'input spec member'), input_spec)
    if isinstance(input_spec, collections.abc.Mapping):
        if 'x' not in input_spec:
            raise ValueError(
                'The `input_spec` is a collections.abc.Mapping (e.g., a dict), so it '
                'must contain an entry with key `\'x\'`, representing the input(s) '
                'to the Keras model.')
        if 'y' not in input_spec:
            raise ValueError(
                'The `input_spec` is a collections.abc.Mapping (e.g., a dict), so it '
                'must contain an entry with key `\'y\'`, representing the label(s) '
                'to be used in the Keras loss(es).')

    if metrics is None:
        metrics = []
    else:
        py_typecheck.check_type(metrics, list)
        for metric in metrics:
            py_typecheck.check_type(metric, tf.keras.metrics.Metric)

    return _KerasModel(keras_model,
                       input_spec=input_spec,
                       loss_fns=loss,
                       loss_weights=loss_weights,
                       metrics=metrics)
Beispiel #11
0
    def create(self, value_type):
        # Validate input args and value_type and parse out the TF dtypes.
        if value_type.is_tensor():
            tf_dtype = value_type.dtype
        elif (value_type.is_struct_with_python()
              and type_analysis.is_structure_of_tensors(value_type)):
            if self._prior_norm_bound:
                raise TypeError(
                    'If `prior_norm_bound` is specified, `value_type` must '
                    f'be `TensorType`. Found type: {repr(value_type)}.')
            tf_dtype = type_conversions.structure_from_tensor_type_tree(
                lambda x: x.dtype, value_type)
        else:
            raise TypeError(
                'Expected `value_type` to be `TensorType` or '
                '`StructWithPythonType` containing only `TensorType`. '
                f'Found type: {repr(value_type)}')

        # Check that all values are floats.
        if not type_analysis.is_structure_of_floats(value_type):
            raise TypeError(
                'Component dtypes of `value_type` must all be floats. '
                f'Found {repr(value_type)}.')

        discretize_fn = _build_discretize_fn(value_type, self._stochastic,
                                             self._beta)

        @tensorflow_computation.tf_computation(
            discretize_fn.type_signature.result, tf.float32)
        def undiscretize_fn(value, scale_factor):
            return _undiscretize_struct(value, scale_factor, tf_dtype)

        inner_value_type = discretize_fn.type_signature.result
        inner_agg_process = self._inner_agg_factory.create(inner_value_type)

        @federated_computation.federated_computation()
        def init_fn():
            state = collections.OrderedDict(
                scale_factor=intrinsics.federated_value(
                    self._scale_factor, placements.SERVER),
                prior_norm_bound=intrinsics.federated_value(
                    self._prior_norm_bound, placements.SERVER),
                inner_agg_process=inner_agg_process.initialize())
            return intrinsics.federated_zip(state)

        @federated_computation.federated_computation(
            init_fn.type_signature.result,
            computation_types.at_clients(value_type))
        def next_fn(state, value):
            server_scale_factor = state['scale_factor']
            client_scale_factor = intrinsics.federated_broadcast(
                server_scale_factor)
            server_prior_norm_bound = state['prior_norm_bound']
            prior_norm_bound = intrinsics.federated_broadcast(
                server_prior_norm_bound)

            discretized_value = intrinsics.federated_map(
                discretize_fn, (value, client_scale_factor, prior_norm_bound))

            inner_state = state['inner_agg_process']
            inner_agg_output = inner_agg_process.next(inner_state,
                                                      discretized_value)

            undiscretized_agg_value = intrinsics.federated_map(
                undiscretize_fn,
                (inner_agg_output.result, server_scale_factor))

            new_state = collections.OrderedDict(
                scale_factor=server_scale_factor,
                prior_norm_bound=server_prior_norm_bound,
                inner_agg_process=inner_agg_output.state)
            measurements = collections.OrderedDict(
                discretize=inner_agg_output.measurements)

            return measured_process.MeasuredProcessOutput(
                state=intrinsics.federated_zip(new_state),
                result=undiscretized_agg_value,
                measurements=intrinsics.federated_zip(measurements))

        return aggregation_process.AggregationProcess(init_fn, next_fn)
Beispiel #12
0
 def zeros_fn():
   return type_conversions.structure_from_tensor_type_tree(
       validate_and_fill, member_type)
Beispiel #13
0
def create_default_secure_sum_quantization_ranges(
    local_unfinalized_metrics_type: computation_types.StructWithPythonType,
    lower_bound: Union[int, float] = DEFAULT_SECURE_LOWER_BOUND,
    upper_bound: Union[int, float] = DEFAULT_SECURE_UPPER_BOUND
) -> MetricValueRangeDict:
    """Create a nested structure of quantization ranges for secure sum encoding.

  Args:
    local_unfinalized_metrics_type: The `tff.Type` structure to generate default
      secure sum quantization ranges form. Must be a `tff.Type` tree containing
      only `tff.TensorType` and `tff.StructType`. Each `tff.TensorType` must be
      of floating point or integer dtype.
    lower_bound: An optional integer or floating point lower bound for the
      secure sum quantization range. Values smaller than this will be clipped to
      this value. By default is `0`. If a `float`, any `tff.TensorType` in
      `local_unfinalized_metrics_type` with an integer dtype will use
      `math.ceil(lower_bound)` as a bound.
    upper_bound: An optional integer or floating point upper bound for the
      secure sum quantization range. Values larger than this will be clipped to
      this value. By default is `2^20 - 1` (~1 million). If a `float`, any
      `tff.TensorType` in `local_unfinalized_metrics_type` with an integer dtype
      will use `math.floor(lower_bound)` as a bound.

  Returns:
    A nested structure matching the structure of
    `local_unfinalized_metrics_type` where each `tf.TensorType` has been
    replaced with a 2-tuple of lower bound and upper bound, where the tupel
    elements are `float` for floating dtypes, and `int` for integer dtypes.

  Raises:
    UnquantizableDTypeError: If A `tff.TensorType` in
      `local_unfinalized_metrics_type` has a non-float or non-integer dtype.
    ValueError: If an integer dtype in `local_unfinalized_metrics_type` will
      have a zero range (e.g. `math.ceil(lower_bound) - math.floor(upper_bound)
      < 1`).
  """
    py_typecheck.check_type(upper_bound, (int, float))
    py_typecheck.check_type(lower_bound, (int, float))
    if lower_bound >= upper_bound:
        raise ValueError('`upper_bound` must be greater than `lower_bound`.')
    integer_range_width = math.floor(upper_bound) - math.ceil(lower_bound)

    def create_default_range(
            type_spec: computation_types.TensorType) -> MetricValueRange:
        if type_spec.dtype.is_floating:
            return float(lower_bound), float(upper_bound)
        elif type_spec.dtype.is_integer:
            if integer_range_width < 1:
                raise ValueError(
                    'Encounter an integer tensor in the type, but quantization range '
                    f'[{lower_bound}, {upper_bound}] is not wide enough to quantize '
                    f'any integers (becomes [{int(lower_bound)}, {int(upper_bound)}]).'
                )
            return math.ceil(lower_bound), math.floor(upper_bound)
        else:
            raise UnquantizableDTypeError(
                'Do not know how to create a default range for dtype '
                f'{type_spec.dtype}. Only floating or integer types are supported.'
            )

    return type_conversions.structure_from_tensor_type_tree(
        create_default_range, local_unfinalized_metrics_type)
Beispiel #14
0
 def accumlator_type_fn():
   """Gets the type for the accumulators."""
   accumulator_zeros = type_conversions.structure_from_tensor_type_tree(
       zeros_for_tensor_type, member_type)
   return _Samples(accumulator_zeros, tf.zeros([0], tf.float32))
Beispiel #15
0
def _deserialize_dataset_from_graph_def(serialized_graph_def: bytes,
                                        element_type: computation_types.Type):
    """Deserializes a serialized `tf.compat.v1.GraphDef` to a `tf.data.Dataset`.

  Args:
    serialized_graph_def: `bytes` object produced by
      `tensorflow_serialization.serialize_dataset`
    element_type: a `tff.Type` object representing the type structure of the
      elements yielded from the dataset.

  Returns:
    A `tf.data.Dataset` instance.
  """
    py_typecheck.check_type(element_type, computation_types.Type)
    type_analysis.check_tensorflow_compatible_type(element_type)

    def transform_to_tff_known_type(
        type_spec: computation_types.Type
    ) -> Tuple[computation_types.Type, bool]:
        """Transforms `StructType` to `StructWithPythonType`."""
        if type_spec.is_struct() and not type_spec.is_struct_with_python():
            field_is_named = tuple(
                name is not None
                for name, _ in structure.iter_elements(type_spec))
            has_names = any(field_is_named)
            is_all_named = all(field_is_named)
            if is_all_named:
                return computation_types.StructWithPythonType(
                    elements=structure.iter_elements(type_spec),
                    container_type=collections.OrderedDict), True
            elif not has_names:
                return computation_types.StructWithPythonType(
                    elements=structure.iter_elements(type_spec),
                    container_type=tuple), True
            else:
                raise TypeError(
                    'Cannot represent TFF type in TF because it contains '
                    f'partially named structures. Type: {type_spec}')
        return type_spec, False

    if element_type.is_struct():
        # TF doesn't suppor `structure.Strut` types, so we must transform the
        # `StructType` into a `StructWithPythonType` for use as the
        # `tf.data.Dataset.element_spec` later.
        tf_compatible_type, _ = type_transformations.transform_type_postorder(
            element_type, transform_to_tff_known_type)
    else:
        # We've checked this is only a struct or tensors, so we know this is a
        # `TensorType` here and will use as-is.
        tf_compatible_type = element_type

    def type_to_tensorspec(t: computation_types.TensorType) -> tf.TensorSpec:
        return tf.TensorSpec(shape=t.shape, dtype=t.dtype)

    element_spec = type_conversions.structure_from_tensor_type_tree(
        type_to_tensorspec, tf_compatible_type)
    ds = tf.data.experimental.from_variant(
        tf.raw_ops.DatasetFromGraph(graph_def=serialized_graph_def),
        structure=element_spec)
    # If a serialized dataset had elements of nested structes of tensors (e.g.
    # `dict`, `OrderedDict`), the deserialized dataset will return `dict`,
    # `tuple`, or `namedtuple` (loses `collections.OrderedDict` in a conversion).
    #
    # Since the dataset will only be used inside TFF, we wrap the dictionary
    # coming from TF in an `OrderedDict` when necessary (a type that both TF and
    # TFF understand), using the field order stored in the TFF type stored during
    # serialization.
    return tensorflow_utils.coerce_dataset_elements_to_tff_type_spec(
        ds, tf_compatible_type)
Beispiel #16
0
  def create(
      self,
      value_type: factory.ValueType) -> aggregation_process.AggregationProcess:
    # Validate input args and value_type and parse out the TF dtypes.
    if value_type.is_tensor():
      tf_dtype = value_type.dtype
    elif (value_type.is_struct_with_python() and
          type_analysis.is_structure_of_tensors(value_type)):
      tf_dtype = type_conversions.structure_from_tensor_type_tree(
          lambda x: x.dtype, value_type)
    else:
      raise TypeError('Expected `value_type` to be `TensorType` or '
                      '`StructWithPythonType` containing only `TensorType`. '
                      f'Found type: {repr(value_type)}')

    # Check that all values are floats.
    if not type_analysis.is_structure_of_floats(value_type):
      raise TypeError('Component dtypes of `value_type` must all be floats. '
                      f'Found {repr(value_type)}.')

    if self._distortion_aggregation_factory is not None:
      distortion_aggregation_process = self._distortion_aggregation_factory.create(
          computation_types.to_type(tf.float32))

    @tensorflow_computation.tf_computation(value_type, tf.float32)
    def discretize_fn(value, step_size):
      return _discretize_struct(value, step_size)

    @tensorflow_computation.tf_computation(discretize_fn.type_signature.result,
                                           tf.float32)
    def undiscretize_fn(value, step_size):
      return _undiscretize_struct(value, step_size, tf_dtype)

    @tensorflow_computation.tf_computation(value_type, tf.float32)
    def distortion_measurement_fn(value, step_size):
      reconstructed_value = undiscretize_fn(
          discretize_fn(value, step_size), step_size)
      err = tf.nest.map_structure(tf.subtract, reconstructed_value, value)
      squared_err = tf.nest.map_structure(tf.square, err)
      flat_squared_errs = [
          tf.cast(tf.reshape(t, [-1]), tf.float32)
          for t in tf.nest.flatten(squared_err)
      ]
      all_squared_errs = tf.concat(flat_squared_errs, axis=0)
      mean_squared_err = tf.reduce_mean(all_squared_errs)
      return mean_squared_err

    inner_agg_process = self._inner_agg_factory.create(
        discretize_fn.type_signature.result)

    @federated_computation.federated_computation()
    def init_fn():
      state = collections.OrderedDict(
          step_size=intrinsics.federated_value(self._step_size,
                                               placements.SERVER),
          inner_agg_process=inner_agg_process.initialize())
      return intrinsics.federated_zip(state)

    @federated_computation.federated_computation(
        init_fn.type_signature.result, computation_types.at_clients(value_type))
    def next_fn(state, value):
      server_step_size = state['step_size']
      client_step_size = intrinsics.federated_broadcast(server_step_size)

      discretized_value = intrinsics.federated_map(discretize_fn,
                                                   (value, client_step_size))

      inner_state = state['inner_agg_process']
      inner_agg_output = inner_agg_process.next(inner_state, discretized_value)

      undiscretized_agg_value = intrinsics.federated_map(
          undiscretize_fn, (inner_agg_output.result, server_step_size))

      new_state = collections.OrderedDict(
          step_size=server_step_size, inner_agg_process=inner_agg_output.state)
      measurements = collections.OrderedDict(
          deterministic_discretization=inner_agg_output.measurements)

      if self._distortion_aggregation_factory is not None:
        distortions = intrinsics.federated_map(distortion_measurement_fn,
                                               (value, client_step_size))
        aggregate_distortion = distortion_aggregation_process.next(
            distortion_aggregation_process.initialize(), distortions).result
        measurements['distortion'] = aggregate_distortion

      return measured_process.MeasuredProcessOutput(
          state=intrinsics.federated_zip(new_state),
          result=undiscretized_agg_value,
          measurements=intrinsics.federated_zip(measurements))

    return aggregation_process.AggregationProcess(init_fn, next_fn)
Beispiel #17
0
 def zeros_fn():
   return type_conversions.structure_from_tensor_type_tree(
       lambda t: tf.zeros(shape=t.shape, dtype=t.dtype), member_types)
 def test_weird_result_elements(self):
     result = type_conversions.structure_from_tensor_type_tree(
         lambda _: set(), [tf.int32, (tf.string, tf.int32)])
     self.assertEqual(result, [set(), (set(), set())])
Beispiel #19
0
def secure_sum_then_finalize(
    metric_finalizers: model_lib.MetricFinalizersType,
    local_unfinalized_metrics_type: computation_types.StructWithPythonType,
    metric_value_ranges: Optional[MetricValueRangeDict] = None
) -> computation_base.Computation:
    """Creates a TFF computation that aggregates metrics using secure summation.

  The returned federated TFF computation has the following type signature:

  ```
  (local_unfinalized_metrics@CLIENTS ->
   <aggregated_metrics@SERVER, secure_sum_measurements@SERVER)
  ```

  where the input is given by
  `tff.learning.Model.report_local_unfinalized_metrics()` at `CLIENTS`, and the
  first output (`aggregated_metrics`) is computed by first securely summing the
  unfinalized metrics from `CLIENTS`, followed by applying the finalizers at
  `SERVER`. The second output (`secure_sum_measurements`) is an `OrderedDict`
  that maps from `factory_key`s to the secure summation measurements (e.g. the
  number of clients gets clipped. See `tff.aggregators.SecureSumFactory` for
  details). A `factory_key` is uniquely defined by three scalars: lower bound,
  upper bound, and tensor dtype (denoted as datatype enum). Metric values of the
  same `factory_key` are grouped and aggegrated together (and hence, the
  `secure_sum_measurements` are also computed at a group level).

  Since secure summation works in fixed-point arithmetic space, floating point
  numbers must be encoding using integer quantization. By default, each tensor
  in `local_unfinalized_metrics_type` will be clipped to `[0, 2**20 - 1]` and
  encoded to integers inside `tff.aggregators.SecureSumFactory`. Callers can
  change this range by setting `metric_value_ranges`, which may be a partial
  tree matching the structure of `local_unfinalized_metrics_type`.

  Example partial value range specification:

  >>> finalizers = ...
  >>> metrics_type = tff.to_type(collections.OrderedDict(
      a=tff.types.TensorType(tf.int32),
      b=tff.types.TensorType(tf.float32),
      c=[tff.types.TensorType(tf.float32), tff.types.TensorType(tf.float32)])
  >>> value_ranges = collections.OrderedDict(
      b=(0.0, 1.0),
      c=[None, (0.0, 1.0)])
  >>> aggregator = tff.learning.metrics.secure_sum_then_finalize(
      finalizers, metrics_type, value_ranges)

  This sets the range of the *second* tensor of `b` in the dictionary, using the
  range for the first tensor, and the `a` tensor.

  Args:
    metric_finalizers: An `OrderedDict` of `string` metric names to finalizer
      functions returned by `tff.learning.Model.metric_finalizers()`. It should
      have the same keys (i.e., metric names) as the `OrderedDict` returned by
      `tff.learning.Model.report_local_unfinalized_metrics()`. A finalizer is a
      callable (typically `tf.function` or `tff.tf_computation` decoreated
      function) that takes in a metric's unfinalized values, and returns the
      finalized values.
    local_unfinalized_metrics_type: A `tff.types.StructWithPythonType` (with
      `OrderedDict` as the Python container) of a client's local unfinalized
      metrics. Let `local_unfinalized_metrics` be the output of
      `tff.learning.Model.report_local_unfinalized_metrics()`. Its type can be
      obtained by `tff.framework.type_from_tensors(local_unfinalized_metrics)`.
    metric_value_ranges: A `collections.OrderedDict` that matches the structure
      of `local_unfinalized_metrics_type` (a value for each
      `tff.types.TensorType` in the type tree). Each leaf in the tree should
      have a 2-tuple that defines the range of expected values for that variable
      in the metric. If the entire structure is `None`, a default range of
      `[0.0, 2.0**20 - 1]` will be applied to all variables. Each leaf may also
      be `None`, which will also get the default range; allowing partial user
      sepcialization. At runtime, values that fall outside the ranges specified
      at the leaves, those values will be clipped to within the range.

  Returns:
    A federated TFF computation that securely sums the unfinalized metrics from
    `CLIENTS`, and applies the correponding finalizers at `SERVER`.

  Raises:
    TypeError: If the inputs are of the wrong types.
    ValueError: If the keys (i.e., metric names) in `metric_finalizers` are not
      the same as those expected by `local_unfinalized_metrics_type`.
  """
    check_metric_finalizers(metric_finalizers)
    check_local_unfinalzied_metrics_type(local_unfinalized_metrics_type)
    check_finalizers_matches_unfinalized_metrics(
        metric_finalizers, local_unfinalized_metrics_type)

    default_metric_value_ranges = create_default_secure_sum_quantization_ranges(
        local_unfinalized_metrics_type)
    if metric_value_ranges is None:
        metric_value_ranges = default_metric_value_ranges

    # Walk the incoming `metric_value_ranges` and `default_metric_value_ranges`
    # and fill in any missing ranges using the defaults.
    def fill_missing_values_with_defaults(default_values, user_values):
        if isinstance(default_values, collections.abc.Mapping):
            if user_values is None:
                user_values = {}
            return type(default_values)(
                (key,
                 fill_missing_values_with_defaults(default_value,
                                                   user_values.get(key)))
                for key, default_value in default_values.items())
        elif isinstance(default_values, list):
            if user_values is None:
                user_values = [None] * len(default_values)
            return [
                fill_missing_values_with_defaults(default_value,
                                                  user_values[idx])
                for idx, default_value in enumerate(default_values)
            ]
        elif user_values is None:
            return _MetricRange(*default_values)
        else:
            _check_range(user_values)
            return _MetricRange(*user_values)

    try:
        metric_value_ranges = fill_missing_values_with_defaults(
            default_metric_value_ranges, metric_value_ranges)
    except TypeError as e:
        raise TypeError('Failed to create encoding value range from: '
                        f'{metric_value_ranges}') from e

    # Create an aggregator factory for each unique value range, rather than each
    # leaf tensor (which could introduce a lot of duplication).
    aggregator_factories = {
        value_range: secure.SecureSumFactory(value_range.upper,
                                             value_range.lower)
        for value_range in set(tree.flatten(metric_value_ranges))
    }
    # Construct a python container of `tff.TensorType` so we can traverse it in
    # parallel with the value ranges during AggregationProcess construction.
    # Otherwise we have a `tff.Type` but `metric_value_ranges` is a Python
    # container which are difficult to traverse in parallel.
    structure_of_tensor_types = type_conversions.structure_from_tensor_type_tree(
        lambda t: t, local_unfinalized_metrics_type)

    # We will construct groups of tensors with the same dtype and quantization
    # value range so that we can construct fewer aggregations-of-structures,
    # rather than a large structure-of-aggregations. Without this, the TFF
    # compiler pipeline results in large slow downs (see b/218312198).
    factory_key_by_path = collections.OrderedDict()
    value_range_by_factory_key = collections.OrderedDict()
    path_list_by_factory_key = collections.defaultdict(list)
    # Maintain a flattened list of paths. This is useful to flatten the aggregated
    # values, which will then be used by `tf.nest.pack_sequence_as`.
    flattened_path_list = []
    for (path, tensor_spec), (_, value_range) in zip(
            tree.flatten_with_path(structure_of_tensor_types),
            tree.flatten_with_path(metric_value_ranges)):
        factory_key = _create_factory_key(value_range.lower, value_range.upper,
                                          tensor_spec.dtype)
        factory_key_by_path[path] = factory_key
        value_range_by_factory_key[factory_key] = value_range
        path_list_by_factory_key[factory_key].append(path)
        flattened_path_list.append(path)

    @tensorflow_computation.tf_computation(local_unfinalized_metrics_type)
    def group_value_by_factory_key(local_unfinalized_metrics):
        """Groups client local metrics into a map of `factory_key` to value list."""
        # We cannot use `collections.defaultdict(list)` here because its result is
        # incompatible with `structure_from_tensor_type_tree`.
        value_list_by_factory_key = collections.OrderedDict()
        for path, value in tree.flatten_with_path(local_unfinalized_metrics):
            factory_key = factory_key_by_path[path]
            if factory_key in value_list_by_factory_key:
                value_list_by_factory_key[factory_key].append(value)
            else:
                value_list_by_factory_key[factory_key] = [value]
        return value_list_by_factory_key

    def flatten_grouped_values(value_list_by_factory_key):
        """Flatten the values in the same order as in `flattened_path_list`."""
        value_by_path = collections.OrderedDict()
        for factory_key in value_list_by_factory_key:
            path_list = path_list_by_factory_key[factory_key]
            value_list = value_list_by_factory_key[factory_key]
            for path, value in zip(path_list, value_list):
                value_by_path[path] = value
        flattened_value_list = [
            value_by_path[path] for path in flattened_path_list
        ]
        return flattened_value_list

    # Create a aggregation process for each factory key.
    aggregation_process_by_factory_key = collections.OrderedDict()
    # Construct a python container of `tff.TensorType` so we can traverse it and
    # create aggregation processes from the factories.
    tensor_type_list_by_factory_key = (
        type_conversions.structure_from_tensor_type_tree(
            lambda t: t, group_value_by_factory_key.type_signature.result))
    for factory_key, tensor_type_list in tensor_type_list_by_factory_key.items(
    ):
        value_range = value_range_by_factory_key[factory_key]
        aggregation_process_by_factory_key[
            factory_key] = aggregator_factories.get(value_range).create(
                computation_types.to_type(tensor_type_list))

    @federated_computation.federated_computation(
        computation_types.at_clients(local_unfinalized_metrics_type))
    def aggregator_computation(client_local_unfinalized_metrics):
        unused_state = intrinsics.federated_value((), placements.SERVER)

        client_local_grouped_unfinalized_metrics = intrinsics.federated_map(
            group_value_by_factory_key, client_local_unfinalized_metrics)
        metrics_aggregation_output = collections.OrderedDict()
        for factory_key, process in aggregation_process_by_factory_key.items():
            metrics_aggregation_output[factory_key] = process.next(
                unused_state,
                client_local_grouped_unfinalized_metrics[factory_key])

        metrics_aggregation_output = intrinsics.federated_zip(
            metrics_aggregation_output)

        @tensorflow_computation.tf_computation(
            metrics_aggregation_output.type_signature.member)
        def finalizer_computation(grouped_aggregation_output):

            # One minor downside of grouping the aggregation processes is that the
            # SecAgg measurements (e.g., clipped_count) are computed at a group level
            # (a group means all metric values belonging to the same `factory_key`).
            secure_sum_measurements = collections.OrderedDict(
                (factory_key, output.measurements)
                for factory_key, output in grouped_aggregation_output.items())
            finalized_metrics = collections.OrderedDict(
                secure_sum_measurements=secure_sum_measurements)
            grouped_unfinalized_metrics = collections.OrderedDict(
                (factory_key, output.result)
                for factory_key, output in grouped_aggregation_output.items())
            flattened_unfinalized_metrics_list = flatten_grouped_values(
                grouped_unfinalized_metrics)
            unfinalized_metrics = tf.nest.pack_sequence_as(
                structure_of_tensor_types, flattened_unfinalized_metrics_list)
            for metric_name, metric_finalizer in metric_finalizers.items():
                finalized_metrics[metric_name] = metric_finalizer(
                    unfinalized_metrics[metric_name])
            return finalized_metrics

        return intrinsics.federated_map(finalizer_computation,
                                        metrics_aggregation_output)

    return aggregator_computation
Beispiel #20
0
  def create(
      self,
      value_type: factory.ValueType) -> aggregation_process.AggregationProcess:
    _check_value_type(value_type)
    value_specs = type_conversions.structure_from_tensor_type_tree(
        lambda x: tf.TensorSpec(x.shape, x.dtype), value_type)
    seeds_per_round = self._num_repeats * len(structure.flatten(value_type))
    next_global_seed_fn = _build_next_global_seed_fn(stride=seeds_per_round)

    @tensorflow_computation.tf_computation(value_type, SEED_TFF_TYPE)
    def client_transform(value, global_seed):

      @tf.function
      def transform(tensor, seed):
        for _ in range(self._num_repeats):
          tensor = tf.reshape(tensor, [2, -1])
          tensor = tf.complex(real=tensor[0], imag=tensor[1])
          tensor *= sample_cis(tf.shape(tensor), seed, inverse=False)
          tensor = tf.signal.fft(tensor)
          tensor = tf.concat(
              [tf.math.real(tensor), tf.math.imag(tensor)], axis=0)
          tensor /= tf.cast(tf.sqrt(tf.size(tensor) / 2), OUTPUT_TF_DTYPE)
          seed += 1
        return tensor

      value = _flatten_and_pad_zeros_even(value)
      seeds = _unique_seeds_for_struct(
          value, global_seed, stride=self._num_repeats)
      return tf.nest.map_structure(transform, value, seeds)

    inner_agg_process = self._inner_agg_factory.create(
        client_transform.type_signature.result)

    @tensorflow_computation.tf_computation(
        client_transform.type_signature.result, SEED_TFF_TYPE)
    def server_transform(value, global_seed):

      @tf.function
      def transform(tensor, seed):
        seed += self._num_repeats - 1
        for _ in range(self._num_repeats):
          tensor *= tf.sqrt(tf.size(tensor, out_type=tensor.dtype) / 2.0)
          tensor = tf.reshape(tensor, [2, -1])
          tensor = tf.complex(real=tensor[0], imag=tensor[1])
          tensor = tf.signal.ifft(tensor)
          tensor *= sample_cis(tf.shape(tensor), seed, inverse=True)
          tensor = tf.concat(
              [tf.math.real(tensor), tf.math.imag(tensor)], axis=0)
          seed -= 1
        return tensor

      seeds = _unique_seeds_for_struct(
          value, global_seed, stride=self._num_repeats)
      value = tf.nest.map_structure(transform, value, seeds)
      return tf.nest.map_structure(_slice_and_reshape_to_template_spec, value,
                                   value_specs)

    @federated_computation.federated_computation()
    def init_fn():
      inner_state = inner_agg_process.initialize()
      my_state = intrinsics.federated_eval(
          tensorflow_computation.tf_computation(_init_global_seed),
          placements.SERVER)
      return intrinsics.federated_zip((inner_state, my_state))

    @federated_computation.federated_computation(
        init_fn.type_signature.result, computation_types.at_clients(value_type))
    def next_fn(state, value):
      next_fn_impl = _build_next_fn(client_transform, inner_agg_process,
                                    server_transform, next_global_seed_fn,
                                    'dft')
      return next_fn_impl(state, value)

    return aggregation_process.AggregationProcess(init_fn, next_fn)
 def create_all_zero_state():
   return type_conversions.structure_from_tensor_type_tree(
       lambda t: tf.zeros(shape=t.shape, dtype=t.dtype),
       local_unfinalized_metrics_type)
 def create_value():
     return type_conversions.structure_from_tensor_type_tree(
         lambda t: tf.zeros(dtype=t.dtype, shape=t.shape),
         type_spec.member if type_spec.is_federated() else type_spec)