def test_type_properties(self, value_type): factory = stochastic_discretization.StochasticDiscretizationFactory( step_size=0.1, inner_agg_factory=_measurement_aggregator, distortion_aggregation_factory=mean.UnweightedMeanFactory()) value_type = computation_types.to_type(value_type) quantize_type = type_conversions.structure_from_tensor_type_tree( lambda x: (tf.int32, x.shape), value_type) process = factory.create(value_type) self.assertIsInstance(process, aggregation_process.AggregationProcess) server_state_type = computation_types.StructType([('step_size', tf.float32), ('inner_agg_process', ()) ]) server_state_type = computation_types.at_server(server_state_type) expected_initialize_type = computation_types.FunctionType( parameter=None, result=server_state_type) type_test_utils.assert_types_equivalent(process.initialize.type_signature, expected_initialize_type) expected_measurements_type = computation_types.StructType([ ('stochastic_discretization', quantize_type), ('distortion', tf.float32) ]) expected_measurements_type = computation_types.at_server( expected_measurements_type) expected_next_type = computation_types.FunctionType( parameter=collections.OrderedDict( state=server_state_type, value=computation_types.at_clients(value_type)), result=measured_process.MeasuredProcessOutput( state=server_state_type, result=computation_types.at_server(value_type), measurements=expected_measurements_type)) type_test_utils.assert_types_equivalent(process.next.type_signature, expected_next_type)
def test_structure(self): struct_type = computation_types.StructType([('a', tf.int32), (None, tf.int32)]) return_incr = self.get_incrementing_function() result = type_conversions.structure_from_tensor_type_tree( return_incr, struct_type) self.assertEqual(result, structure.Struct([('a', 0), (None, 1)]))
def _get_accumulator_type(member_type): """Constructs a `tff.Type` for the accumulator in sample aggregation. Args: member_type: A `tff.Type` representing the member components of the federated type. Returns: The `tff.StructType` associated with the accumulator. The tuple contains two parts, `accumulators` and `rands`, that are parallel lists (e.g. the i-th index in one corresponds to the i-th index in the other). These two lists are used to sample from the accumulators with equal probability. """ def add_unknown_first_dim(tensor_type): return computation_types.TensorType(tensor_type.dtype, [None] + tensor_type.shape.dims) accumulator_type = type_conversions.structure_from_tensor_type_tree( add_unknown_first_dim, member_type) return computation_types.to_type( _Samples( accumulators=accumulator_type, rands=computation_types.TensorType(tf.float32, shape=[None]), ))
def test_single_tensor(self): def expect_tfint32_return_5(tensor_type): self.assert_types_identical(tensor_type, computation_types.TensorType(tf.int32)) return 5 result = type_conversions.structure_from_tensor_type_tree( expect_tfint32_return_5, tf.int32) self.assertEqual(result, 5)
def initialize(): # Allow fixed seeds, otherwise set a sentinel that signals a seed should be # generated upon the first `accumulate` call of the `federated_aggregate`. if seed is None: real_seed = tf.convert_to_tensor(SEED_SENTINEL, dtype=tf.int64) elif tf.is_tensor(seed): if seed.dtype != tf.int64: real_seed = tf.cast(seed, dtype=tf.int64) else: real_seed = tf.convert_to_tensor(seed, dtype=tf.int64) def zero_for_tensor_type(t: computation_types.TensorType): """Add an extra first dimension to create a tensor that collects samples. The first dimension will have size `0` for the algebraic zero, resulting in an "empty" tensor. This will be conctenated as samples fill the reservoir. Args: t: A `tff.TensorType` to build a sampling zero value for. Returns: A tensor whose rank is one larger than before, and whose first dimension is zero. Raises: `TypeError` if `t` is not a `tff.TensorType`. """ if not t.is_tensor(): raise TypeError( f'Cannot create zero for non TesnorType: {type(t)}') return tf.zeros([0] + t.shape, dtype=t.dtype) try: initial_samples = type_conversions.structure_from_tensor_type_tree( zero_for_tensor_type, sample_value_type) except ValueError as e: raise TypeError( 'Cannot build initial reservoir for structure that has ' 'types other than StructWithPythonType or TensorType, ' f'got {sample_value_type!r}.') from e return collections.OrderedDict(random_seed=tf.fill(dims=(2, ), value=real_seed), random_values=tf.zeros([0], tf.int32), samples=initial_samples)
def create(self, value_type): # Checks value_type and compute client data dimension. if (value_type.is_struct_with_python() and type_analysis.is_structure_of_tensors(value_type)): num_elements_struct = type_conversions.structure_from_tensor_type_tree( lambda x: x.shape.num_elements(), value_type) self._client_dim = sum(tf.nest.flatten(num_elements_struct)) elif value_type.is_tensor(): self._client_dim = value_type.shape.num_elements() else: raise TypeError( 'Expected `value_type` to be `TensorType` or ' '`StructWithPythonType` containing only `TensorType`. ' f'Found type: {repr(value_type)}') # Checks that all values are integers or floats. if not (type_analysis.is_structure_of_floats(value_type) or type_analysis.is_structure_of_integers(value_type)): raise TypeError( 'Component dtypes of `value_type` must all be integers ' f'or floats. Found {repr(value_type)}.') ddp_agg_process = self._build_aggregation_factory().create(value_type) init_fn = ddp_agg_process.initialize @federated_computation.federated_computation( init_fn.type_signature.result, computation_types.at_clients(value_type)) def next_fn(state, value): agg_output = ddp_agg_process.next(state, value) new_measurements = self._derive_measurements( agg_output.state, agg_output.measurements) new_state = agg_output.state if self._auto_l2_clip: new_state = self._autotune_component_states(agg_output.state) return measured_process.MeasuredProcessOutput( state=new_state, result=agg_output.result, measurements=new_measurements) return aggregation_process.AggregationProcess(init_fn, next_fn)
def create( self, value_type: factory.ValueType ) -> aggregation_process.AggregationProcess: # Checks value_type and compute client data dimension. if (value_type.is_struct() and type_analysis.is_structure_of_tensors(value_type)): num_elements_struct = type_conversions.structure_from_tensor_type_tree( lambda x: x.shape.num_elements(), value_type) client_dim = sum(tf.nest.flatten(num_elements_struct)) elif value_type.is_tensor(): client_dim = value_type.shape.num_elements() else: raise TypeError('Expected `value_type` to be `TensorType` or ' '`StructType` containing only `TensorType`. ' f'Found type: {repr(value_type)}') # Checks that all values are integers. if not type_analysis.is_structure_of_integers(value_type): raise TypeError( 'Component dtypes of `value_type` must all be integers. ' f'Found {repr(value_type)}.') # Checks that we have enough elements to estimate standard deviation. if self._estimate_stddev: if client_dim <= 1: raise ValueError( 'The stddev estimation procedure expects more than ' '1 element from `value_type`. Found `value_type` of ' f'{value_type} with {client_dim} elements.') elif client_dim <= 100: warnings.warn( f'`value_type` has only {client_dim} elements. The ' 'estimated standard deviation may be noisy. Consider ' 'setting `estimate_stddev` to True only if the input ' 'tensor/structure have more than 100 elements.') inner_agg_process = self._inner_agg_factory.create(value_type) init_fn = inner_agg_process.initialize next_fn = self._create_next_fn(inner_agg_process.next, init_fn.type_signature.result, value_type) return aggregation_process.AggregationProcess(init_fn, next_fn)
def _unique_dtypes_in_structure( type_spec: computation_types.Type) -> Set[tf.DType]: """Returns a set of unique dtypes in `type_spec`. Args: type_spec: A `computation_types.Type`. Returns: A `set` containing unique dtypes found in `type_spec`. """ py_typecheck.check_type(type_spec, computation_types.Type) if type_spec.is_tensor(): py_typecheck.check_type(type_spec.dtype, tf.DType) return set([type_spec.dtype]) elif type_spec.is_struct(): return set( tf.nest.flatten( type_conversions.structure_from_tensor_type_tree( lambda t: t.dtype, type_spec))) elif type_spec.is_federated(): return _unique_dtypes_in_structure(type_spec.member) else: return set()
def test_nested_python_type(self): return_incr = self.get_incrementing_function() result = type_conversions.structure_from_tensor_type_tree( return_incr, [tf.int32, (tf.string, tf.int32)]) self.assertEqual(result, [0, (1, 2)])
def from_keras_model( keras_model: tf.keras.Model, loss: Loss, input_spec, loss_weights: Optional[List[float]] = None, metrics: Optional[List[tf.keras.metrics.Metric]] = None ) -> model_lib.Model: """Builds a `tff.learning.Model` from a `tf.keras.Model`. The `tff.learning.Model` returned by this function uses `keras_model` for its forward pass and autodifferentiation steps. Notice that since TFF couples the `tf.keras.Model` and `loss`, TFF needs a slightly different notion of "fully specified type" than pure Keras does. That is, the model `M` takes inputs of type `x` and produces predictions of type `p`; the loss function `L` takes inputs of type `<p, y>` (where `y` is the ground truth label type) and produces a scalar. Therefore in order to fully specify the type signatures for computations in which the generated `tff.learning.Model` will appear, TFF needs the type `y` in addition to the type `x`. Note: This function does not currently accept subclassed `tf.keras.Models`, as it makes assumptions about presence of certain attributes which are guaranteed to exist through the functional or Sequential API but are not necessarily present for subclassed models. Args: keras_model: A `tf.keras.Model` object that is not compiled. loss: A single `tf.keras.losses.Loss` or a list of losses-per-output. If a single loss is provided, then all model output (as well as all prediction information) is passed to the loss; this includes situations of multiple model outputs and/or predictions. If multiple losses are provided as a list, then each loss is expected to correspond to a model output; the model will attempt to minimize the sum of all individual losses (optionally weighted using the `loss_weights` argument). input_spec: A structure of `tf.TensorSpec`s or `tff.Type` specifying the type of arguments the model expects. If `input_spec` is a `tff.Type`, its leaf nodes must be `TensorType`s. Note that `input_spec` must be a compound structure of two elements, specifying both the data fed into the model (x) to generate predictions as well as the expected type of the ground truth (y). If provided as a list, it must be in the order [x, y]. If provided as a dictionary, the keys must explicitly be named `'x'` and `'y'`. loss_weights: (Optional) A list of Python floats used to weight the loss contribution of each model output (when providing a list of losses for the `loss` argument). metrics: (Optional) a list of `tf.keras.metrics.Metric` objects. Returns: A `tff.learning.Model` object. Raises: TypeError: If `keras_model` is not an instance of `tf.keras.Model`, if `loss` is not an instance of `tf.keras.losses.Loss` nor a list of instances of `tf.keras.losses.Loss`, if `input_spec` is a `tff.Type` but the leaf nodes are not `tff.TensorType`s, if `loss_weight` is provided but is not a list of floats, or if `metrics` is provided but is not a list of instances of `tf.keras.metrics.Metric`. ValueError: If `keras_model` was compiled, if `loss` is a list of unequal length to the number of outputs of `keras_model`, if `loss_weights` is specified but `loss` is not a list, if `input_spec` does not contain exactly two elements, or if `input_spec` is a dictionary and does not contain keys `'x'` and `'y'`. """ # Validate `keras_model` py_typecheck.check_type(keras_model, tf.keras.Model) if keras_model._is_compiled: # pylint: disable=protected-access raise ValueError('`keras_model` must not be compiled') # Validate and normalize `loss` and `loss_weights` if not isinstance(loss, list): py_typecheck.check_type(loss, tf.keras.losses.Loss) if loss_weights is not None: raise ValueError( '`loss_weights` cannot be used if `loss` is not a list.') loss = [loss] loss_weights = [1.0] else: if len(loss) != len(keras_model.outputs): raise ValueError( 'If a loss list is provided, `keras_model` must have ' 'equal number of outputs to the losses.\nloss: {}\nof ' 'length: {}.\noutputs: {}\nof length: {}.'.format( loss, len(loss), keras_model.outputs, len(keras_model.outputs))) for loss_fn in loss: py_typecheck.check_type(loss_fn, tf.keras.losses.Loss) if loss_weights is None: loss_weights = [1.0] * len(loss) else: if len(loss) != len(loss_weights): raise ValueError( '`keras_model` must have equal number of losses and loss_weights.' '\nloss: {}\nof length: {}.' '\nloss_weights: {}\nof length: {}.'.format( loss, len(loss), loss_weights, len(loss_weights))) for loss_weight in loss_weights: py_typecheck.check_type(loss_weight, float) if len(input_spec) != 2: raise ValueError( 'The top-level structure in `input_spec` must contain ' 'exactly two top-level elements, as it must specify type ' 'information for both inputs to and predictions from the ' 'model. You passed input spec {}.'.format(input_spec)) if isinstance(input_spec, computation_types.Type): if not type_analysis.is_structure_of_tensors(input_spec): raise TypeError( 'Expected a `tff.Type` with all the leaf nodes being ' '`tff.TensorType`s, found an input spec {}.'.format( input_spec)) input_spec = type_conversions.structure_from_tensor_type_tree( lambda tensor_type: tf.TensorSpec(tensor_type.shape, tensor_type. dtype), input_spec) else: tf.nest.map_structure( lambda s: py_typecheck.check_type(s, tf.TensorSpec, 'input spec member'), input_spec) if isinstance(input_spec, collections.abc.Mapping): if 'x' not in input_spec: raise ValueError( 'The `input_spec` is a collections.abc.Mapping (e.g., a dict), so it ' 'must contain an entry with key `\'x\'`, representing the input(s) ' 'to the Keras model.') if 'y' not in input_spec: raise ValueError( 'The `input_spec` is a collections.abc.Mapping (e.g., a dict), so it ' 'must contain an entry with key `\'y\'`, representing the label(s) ' 'to be used in the Keras loss(es).') if metrics is None: metrics = [] else: py_typecheck.check_type(metrics, list) for metric in metrics: py_typecheck.check_type(metric, tf.keras.metrics.Metric) return _KerasModel(keras_model, input_spec=input_spec, loss_fns=loss, loss_weights=loss_weights, metrics=metrics)
def create(self, value_type): # Validate input args and value_type and parse out the TF dtypes. if value_type.is_tensor(): tf_dtype = value_type.dtype elif (value_type.is_struct_with_python() and type_analysis.is_structure_of_tensors(value_type)): if self._prior_norm_bound: raise TypeError( 'If `prior_norm_bound` is specified, `value_type` must ' f'be `TensorType`. Found type: {repr(value_type)}.') tf_dtype = type_conversions.structure_from_tensor_type_tree( lambda x: x.dtype, value_type) else: raise TypeError( 'Expected `value_type` to be `TensorType` or ' '`StructWithPythonType` containing only `TensorType`. ' f'Found type: {repr(value_type)}') # Check that all values are floats. if not type_analysis.is_structure_of_floats(value_type): raise TypeError( 'Component dtypes of `value_type` must all be floats. ' f'Found {repr(value_type)}.') discretize_fn = _build_discretize_fn(value_type, self._stochastic, self._beta) @tensorflow_computation.tf_computation( discretize_fn.type_signature.result, tf.float32) def undiscretize_fn(value, scale_factor): return _undiscretize_struct(value, scale_factor, tf_dtype) inner_value_type = discretize_fn.type_signature.result inner_agg_process = self._inner_agg_factory.create(inner_value_type) @federated_computation.federated_computation() def init_fn(): state = collections.OrderedDict( scale_factor=intrinsics.federated_value( self._scale_factor, placements.SERVER), prior_norm_bound=intrinsics.federated_value( self._prior_norm_bound, placements.SERVER), inner_agg_process=inner_agg_process.initialize()) return intrinsics.federated_zip(state) @federated_computation.federated_computation( init_fn.type_signature.result, computation_types.at_clients(value_type)) def next_fn(state, value): server_scale_factor = state['scale_factor'] client_scale_factor = intrinsics.federated_broadcast( server_scale_factor) server_prior_norm_bound = state['prior_norm_bound'] prior_norm_bound = intrinsics.federated_broadcast( server_prior_norm_bound) discretized_value = intrinsics.federated_map( discretize_fn, (value, client_scale_factor, prior_norm_bound)) inner_state = state['inner_agg_process'] inner_agg_output = inner_agg_process.next(inner_state, discretized_value) undiscretized_agg_value = intrinsics.federated_map( undiscretize_fn, (inner_agg_output.result, server_scale_factor)) new_state = collections.OrderedDict( scale_factor=server_scale_factor, prior_norm_bound=server_prior_norm_bound, inner_agg_process=inner_agg_output.state) measurements = collections.OrderedDict( discretize=inner_agg_output.measurements) return measured_process.MeasuredProcessOutput( state=intrinsics.federated_zip(new_state), result=undiscretized_agg_value, measurements=intrinsics.federated_zip(measurements)) return aggregation_process.AggregationProcess(init_fn, next_fn)
def zeros_fn(): return type_conversions.structure_from_tensor_type_tree( validate_and_fill, member_type)
def create_default_secure_sum_quantization_ranges( local_unfinalized_metrics_type: computation_types.StructWithPythonType, lower_bound: Union[int, float] = DEFAULT_SECURE_LOWER_BOUND, upper_bound: Union[int, float] = DEFAULT_SECURE_UPPER_BOUND ) -> MetricValueRangeDict: """Create a nested structure of quantization ranges for secure sum encoding. Args: local_unfinalized_metrics_type: The `tff.Type` structure to generate default secure sum quantization ranges form. Must be a `tff.Type` tree containing only `tff.TensorType` and `tff.StructType`. Each `tff.TensorType` must be of floating point or integer dtype. lower_bound: An optional integer or floating point lower bound for the secure sum quantization range. Values smaller than this will be clipped to this value. By default is `0`. If a `float`, any `tff.TensorType` in `local_unfinalized_metrics_type` with an integer dtype will use `math.ceil(lower_bound)` as a bound. upper_bound: An optional integer or floating point upper bound for the secure sum quantization range. Values larger than this will be clipped to this value. By default is `2^20 - 1` (~1 million). If a `float`, any `tff.TensorType` in `local_unfinalized_metrics_type` with an integer dtype will use `math.floor(lower_bound)` as a bound. Returns: A nested structure matching the structure of `local_unfinalized_metrics_type` where each `tf.TensorType` has been replaced with a 2-tuple of lower bound and upper bound, where the tupel elements are `float` for floating dtypes, and `int` for integer dtypes. Raises: UnquantizableDTypeError: If A `tff.TensorType` in `local_unfinalized_metrics_type` has a non-float or non-integer dtype. ValueError: If an integer dtype in `local_unfinalized_metrics_type` will have a zero range (e.g. `math.ceil(lower_bound) - math.floor(upper_bound) < 1`). """ py_typecheck.check_type(upper_bound, (int, float)) py_typecheck.check_type(lower_bound, (int, float)) if lower_bound >= upper_bound: raise ValueError('`upper_bound` must be greater than `lower_bound`.') integer_range_width = math.floor(upper_bound) - math.ceil(lower_bound) def create_default_range( type_spec: computation_types.TensorType) -> MetricValueRange: if type_spec.dtype.is_floating: return float(lower_bound), float(upper_bound) elif type_spec.dtype.is_integer: if integer_range_width < 1: raise ValueError( 'Encounter an integer tensor in the type, but quantization range ' f'[{lower_bound}, {upper_bound}] is not wide enough to quantize ' f'any integers (becomes [{int(lower_bound)}, {int(upper_bound)}]).' ) return math.ceil(lower_bound), math.floor(upper_bound) else: raise UnquantizableDTypeError( 'Do not know how to create a default range for dtype ' f'{type_spec.dtype}. Only floating or integer types are supported.' ) return type_conversions.structure_from_tensor_type_tree( create_default_range, local_unfinalized_metrics_type)
def accumlator_type_fn(): """Gets the type for the accumulators.""" accumulator_zeros = type_conversions.structure_from_tensor_type_tree( zeros_for_tensor_type, member_type) return _Samples(accumulator_zeros, tf.zeros([0], tf.float32))
def _deserialize_dataset_from_graph_def(serialized_graph_def: bytes, element_type: computation_types.Type): """Deserializes a serialized `tf.compat.v1.GraphDef` to a `tf.data.Dataset`. Args: serialized_graph_def: `bytes` object produced by `tensorflow_serialization.serialize_dataset` element_type: a `tff.Type` object representing the type structure of the elements yielded from the dataset. Returns: A `tf.data.Dataset` instance. """ py_typecheck.check_type(element_type, computation_types.Type) type_analysis.check_tensorflow_compatible_type(element_type) def transform_to_tff_known_type( type_spec: computation_types.Type ) -> Tuple[computation_types.Type, bool]: """Transforms `StructType` to `StructWithPythonType`.""" if type_spec.is_struct() and not type_spec.is_struct_with_python(): field_is_named = tuple( name is not None for name, _ in structure.iter_elements(type_spec)) has_names = any(field_is_named) is_all_named = all(field_is_named) if is_all_named: return computation_types.StructWithPythonType( elements=structure.iter_elements(type_spec), container_type=collections.OrderedDict), True elif not has_names: return computation_types.StructWithPythonType( elements=structure.iter_elements(type_spec), container_type=tuple), True else: raise TypeError( 'Cannot represent TFF type in TF because it contains ' f'partially named structures. Type: {type_spec}') return type_spec, False if element_type.is_struct(): # TF doesn't suppor `structure.Strut` types, so we must transform the # `StructType` into a `StructWithPythonType` for use as the # `tf.data.Dataset.element_spec` later. tf_compatible_type, _ = type_transformations.transform_type_postorder( element_type, transform_to_tff_known_type) else: # We've checked this is only a struct or tensors, so we know this is a # `TensorType` here and will use as-is. tf_compatible_type = element_type def type_to_tensorspec(t: computation_types.TensorType) -> tf.TensorSpec: return tf.TensorSpec(shape=t.shape, dtype=t.dtype) element_spec = type_conversions.structure_from_tensor_type_tree( type_to_tensorspec, tf_compatible_type) ds = tf.data.experimental.from_variant( tf.raw_ops.DatasetFromGraph(graph_def=serialized_graph_def), structure=element_spec) # If a serialized dataset had elements of nested structes of tensors (e.g. # `dict`, `OrderedDict`), the deserialized dataset will return `dict`, # `tuple`, or `namedtuple` (loses `collections.OrderedDict` in a conversion). # # Since the dataset will only be used inside TFF, we wrap the dictionary # coming from TF in an `OrderedDict` when necessary (a type that both TF and # TFF understand), using the field order stored in the TFF type stored during # serialization. return tensorflow_utils.coerce_dataset_elements_to_tff_type_spec( ds, tf_compatible_type)
def create( self, value_type: factory.ValueType) -> aggregation_process.AggregationProcess: # Validate input args and value_type and parse out the TF dtypes. if value_type.is_tensor(): tf_dtype = value_type.dtype elif (value_type.is_struct_with_python() and type_analysis.is_structure_of_tensors(value_type)): tf_dtype = type_conversions.structure_from_tensor_type_tree( lambda x: x.dtype, value_type) else: raise TypeError('Expected `value_type` to be `TensorType` or ' '`StructWithPythonType` containing only `TensorType`. ' f'Found type: {repr(value_type)}') # Check that all values are floats. if not type_analysis.is_structure_of_floats(value_type): raise TypeError('Component dtypes of `value_type` must all be floats. ' f'Found {repr(value_type)}.') if self._distortion_aggregation_factory is not None: distortion_aggregation_process = self._distortion_aggregation_factory.create( computation_types.to_type(tf.float32)) @tensorflow_computation.tf_computation(value_type, tf.float32) def discretize_fn(value, step_size): return _discretize_struct(value, step_size) @tensorflow_computation.tf_computation(discretize_fn.type_signature.result, tf.float32) def undiscretize_fn(value, step_size): return _undiscretize_struct(value, step_size, tf_dtype) @tensorflow_computation.tf_computation(value_type, tf.float32) def distortion_measurement_fn(value, step_size): reconstructed_value = undiscretize_fn( discretize_fn(value, step_size), step_size) err = tf.nest.map_structure(tf.subtract, reconstructed_value, value) squared_err = tf.nest.map_structure(tf.square, err) flat_squared_errs = [ tf.cast(tf.reshape(t, [-1]), tf.float32) for t in tf.nest.flatten(squared_err) ] all_squared_errs = tf.concat(flat_squared_errs, axis=0) mean_squared_err = tf.reduce_mean(all_squared_errs) return mean_squared_err inner_agg_process = self._inner_agg_factory.create( discretize_fn.type_signature.result) @federated_computation.federated_computation() def init_fn(): state = collections.OrderedDict( step_size=intrinsics.federated_value(self._step_size, placements.SERVER), inner_agg_process=inner_agg_process.initialize()) return intrinsics.federated_zip(state) @federated_computation.federated_computation( init_fn.type_signature.result, computation_types.at_clients(value_type)) def next_fn(state, value): server_step_size = state['step_size'] client_step_size = intrinsics.federated_broadcast(server_step_size) discretized_value = intrinsics.federated_map(discretize_fn, (value, client_step_size)) inner_state = state['inner_agg_process'] inner_agg_output = inner_agg_process.next(inner_state, discretized_value) undiscretized_agg_value = intrinsics.federated_map( undiscretize_fn, (inner_agg_output.result, server_step_size)) new_state = collections.OrderedDict( step_size=server_step_size, inner_agg_process=inner_agg_output.state) measurements = collections.OrderedDict( deterministic_discretization=inner_agg_output.measurements) if self._distortion_aggregation_factory is not None: distortions = intrinsics.federated_map(distortion_measurement_fn, (value, client_step_size)) aggregate_distortion = distortion_aggregation_process.next( distortion_aggregation_process.initialize(), distortions).result measurements['distortion'] = aggregate_distortion return measured_process.MeasuredProcessOutput( state=intrinsics.federated_zip(new_state), result=undiscretized_agg_value, measurements=intrinsics.federated_zip(measurements)) return aggregation_process.AggregationProcess(init_fn, next_fn)
def zeros_fn(): return type_conversions.structure_from_tensor_type_tree( lambda t: tf.zeros(shape=t.shape, dtype=t.dtype), member_types)
def test_weird_result_elements(self): result = type_conversions.structure_from_tensor_type_tree( lambda _: set(), [tf.int32, (tf.string, tf.int32)]) self.assertEqual(result, [set(), (set(), set())])
def secure_sum_then_finalize( metric_finalizers: model_lib.MetricFinalizersType, local_unfinalized_metrics_type: computation_types.StructWithPythonType, metric_value_ranges: Optional[MetricValueRangeDict] = None ) -> computation_base.Computation: """Creates a TFF computation that aggregates metrics using secure summation. The returned federated TFF computation has the following type signature: ``` (local_unfinalized_metrics@CLIENTS -> <aggregated_metrics@SERVER, secure_sum_measurements@SERVER) ``` where the input is given by `tff.learning.Model.report_local_unfinalized_metrics()` at `CLIENTS`, and the first output (`aggregated_metrics`) is computed by first securely summing the unfinalized metrics from `CLIENTS`, followed by applying the finalizers at `SERVER`. The second output (`secure_sum_measurements`) is an `OrderedDict` that maps from `factory_key`s to the secure summation measurements (e.g. the number of clients gets clipped. See `tff.aggregators.SecureSumFactory` for details). A `factory_key` is uniquely defined by three scalars: lower bound, upper bound, and tensor dtype (denoted as datatype enum). Metric values of the same `factory_key` are grouped and aggegrated together (and hence, the `secure_sum_measurements` are also computed at a group level). Since secure summation works in fixed-point arithmetic space, floating point numbers must be encoding using integer quantization. By default, each tensor in `local_unfinalized_metrics_type` will be clipped to `[0, 2**20 - 1]` and encoded to integers inside `tff.aggregators.SecureSumFactory`. Callers can change this range by setting `metric_value_ranges`, which may be a partial tree matching the structure of `local_unfinalized_metrics_type`. Example partial value range specification: >>> finalizers = ... >>> metrics_type = tff.to_type(collections.OrderedDict( a=tff.types.TensorType(tf.int32), b=tff.types.TensorType(tf.float32), c=[tff.types.TensorType(tf.float32), tff.types.TensorType(tf.float32)]) >>> value_ranges = collections.OrderedDict( b=(0.0, 1.0), c=[None, (0.0, 1.0)]) >>> aggregator = tff.learning.metrics.secure_sum_then_finalize( finalizers, metrics_type, value_ranges) This sets the range of the *second* tensor of `b` in the dictionary, using the range for the first tensor, and the `a` tensor. Args: metric_finalizers: An `OrderedDict` of `string` metric names to finalizer functions returned by `tff.learning.Model.metric_finalizers()`. It should have the same keys (i.e., metric names) as the `OrderedDict` returned by `tff.learning.Model.report_local_unfinalized_metrics()`. A finalizer is a callable (typically `tf.function` or `tff.tf_computation` decoreated function) that takes in a metric's unfinalized values, and returns the finalized values. local_unfinalized_metrics_type: A `tff.types.StructWithPythonType` (with `OrderedDict` as the Python container) of a client's local unfinalized metrics. Let `local_unfinalized_metrics` be the output of `tff.learning.Model.report_local_unfinalized_metrics()`. Its type can be obtained by `tff.framework.type_from_tensors(local_unfinalized_metrics)`. metric_value_ranges: A `collections.OrderedDict` that matches the structure of `local_unfinalized_metrics_type` (a value for each `tff.types.TensorType` in the type tree). Each leaf in the tree should have a 2-tuple that defines the range of expected values for that variable in the metric. If the entire structure is `None`, a default range of `[0.0, 2.0**20 - 1]` will be applied to all variables. Each leaf may also be `None`, which will also get the default range; allowing partial user sepcialization. At runtime, values that fall outside the ranges specified at the leaves, those values will be clipped to within the range. Returns: A federated TFF computation that securely sums the unfinalized metrics from `CLIENTS`, and applies the correponding finalizers at `SERVER`. Raises: TypeError: If the inputs are of the wrong types. ValueError: If the keys (i.e., metric names) in `metric_finalizers` are not the same as those expected by `local_unfinalized_metrics_type`. """ check_metric_finalizers(metric_finalizers) check_local_unfinalzied_metrics_type(local_unfinalized_metrics_type) check_finalizers_matches_unfinalized_metrics( metric_finalizers, local_unfinalized_metrics_type) default_metric_value_ranges = create_default_secure_sum_quantization_ranges( local_unfinalized_metrics_type) if metric_value_ranges is None: metric_value_ranges = default_metric_value_ranges # Walk the incoming `metric_value_ranges` and `default_metric_value_ranges` # and fill in any missing ranges using the defaults. def fill_missing_values_with_defaults(default_values, user_values): if isinstance(default_values, collections.abc.Mapping): if user_values is None: user_values = {} return type(default_values)( (key, fill_missing_values_with_defaults(default_value, user_values.get(key))) for key, default_value in default_values.items()) elif isinstance(default_values, list): if user_values is None: user_values = [None] * len(default_values) return [ fill_missing_values_with_defaults(default_value, user_values[idx]) for idx, default_value in enumerate(default_values) ] elif user_values is None: return _MetricRange(*default_values) else: _check_range(user_values) return _MetricRange(*user_values) try: metric_value_ranges = fill_missing_values_with_defaults( default_metric_value_ranges, metric_value_ranges) except TypeError as e: raise TypeError('Failed to create encoding value range from: ' f'{metric_value_ranges}') from e # Create an aggregator factory for each unique value range, rather than each # leaf tensor (which could introduce a lot of duplication). aggregator_factories = { value_range: secure.SecureSumFactory(value_range.upper, value_range.lower) for value_range in set(tree.flatten(metric_value_ranges)) } # Construct a python container of `tff.TensorType` so we can traverse it in # parallel with the value ranges during AggregationProcess construction. # Otherwise we have a `tff.Type` but `metric_value_ranges` is a Python # container which are difficult to traverse in parallel. structure_of_tensor_types = type_conversions.structure_from_tensor_type_tree( lambda t: t, local_unfinalized_metrics_type) # We will construct groups of tensors with the same dtype and quantization # value range so that we can construct fewer aggregations-of-structures, # rather than a large structure-of-aggregations. Without this, the TFF # compiler pipeline results in large slow downs (see b/218312198). factory_key_by_path = collections.OrderedDict() value_range_by_factory_key = collections.OrderedDict() path_list_by_factory_key = collections.defaultdict(list) # Maintain a flattened list of paths. This is useful to flatten the aggregated # values, which will then be used by `tf.nest.pack_sequence_as`. flattened_path_list = [] for (path, tensor_spec), (_, value_range) in zip( tree.flatten_with_path(structure_of_tensor_types), tree.flatten_with_path(metric_value_ranges)): factory_key = _create_factory_key(value_range.lower, value_range.upper, tensor_spec.dtype) factory_key_by_path[path] = factory_key value_range_by_factory_key[factory_key] = value_range path_list_by_factory_key[factory_key].append(path) flattened_path_list.append(path) @tensorflow_computation.tf_computation(local_unfinalized_metrics_type) def group_value_by_factory_key(local_unfinalized_metrics): """Groups client local metrics into a map of `factory_key` to value list.""" # We cannot use `collections.defaultdict(list)` here because its result is # incompatible with `structure_from_tensor_type_tree`. value_list_by_factory_key = collections.OrderedDict() for path, value in tree.flatten_with_path(local_unfinalized_metrics): factory_key = factory_key_by_path[path] if factory_key in value_list_by_factory_key: value_list_by_factory_key[factory_key].append(value) else: value_list_by_factory_key[factory_key] = [value] return value_list_by_factory_key def flatten_grouped_values(value_list_by_factory_key): """Flatten the values in the same order as in `flattened_path_list`.""" value_by_path = collections.OrderedDict() for factory_key in value_list_by_factory_key: path_list = path_list_by_factory_key[factory_key] value_list = value_list_by_factory_key[factory_key] for path, value in zip(path_list, value_list): value_by_path[path] = value flattened_value_list = [ value_by_path[path] for path in flattened_path_list ] return flattened_value_list # Create a aggregation process for each factory key. aggregation_process_by_factory_key = collections.OrderedDict() # Construct a python container of `tff.TensorType` so we can traverse it and # create aggregation processes from the factories. tensor_type_list_by_factory_key = ( type_conversions.structure_from_tensor_type_tree( lambda t: t, group_value_by_factory_key.type_signature.result)) for factory_key, tensor_type_list in tensor_type_list_by_factory_key.items( ): value_range = value_range_by_factory_key[factory_key] aggregation_process_by_factory_key[ factory_key] = aggregator_factories.get(value_range).create( computation_types.to_type(tensor_type_list)) @federated_computation.federated_computation( computation_types.at_clients(local_unfinalized_metrics_type)) def aggregator_computation(client_local_unfinalized_metrics): unused_state = intrinsics.federated_value((), placements.SERVER) client_local_grouped_unfinalized_metrics = intrinsics.federated_map( group_value_by_factory_key, client_local_unfinalized_metrics) metrics_aggregation_output = collections.OrderedDict() for factory_key, process in aggregation_process_by_factory_key.items(): metrics_aggregation_output[factory_key] = process.next( unused_state, client_local_grouped_unfinalized_metrics[factory_key]) metrics_aggregation_output = intrinsics.federated_zip( metrics_aggregation_output) @tensorflow_computation.tf_computation( metrics_aggregation_output.type_signature.member) def finalizer_computation(grouped_aggregation_output): # One minor downside of grouping the aggregation processes is that the # SecAgg measurements (e.g., clipped_count) are computed at a group level # (a group means all metric values belonging to the same `factory_key`). secure_sum_measurements = collections.OrderedDict( (factory_key, output.measurements) for factory_key, output in grouped_aggregation_output.items()) finalized_metrics = collections.OrderedDict( secure_sum_measurements=secure_sum_measurements) grouped_unfinalized_metrics = collections.OrderedDict( (factory_key, output.result) for factory_key, output in grouped_aggregation_output.items()) flattened_unfinalized_metrics_list = flatten_grouped_values( grouped_unfinalized_metrics) unfinalized_metrics = tf.nest.pack_sequence_as( structure_of_tensor_types, flattened_unfinalized_metrics_list) for metric_name, metric_finalizer in metric_finalizers.items(): finalized_metrics[metric_name] = metric_finalizer( unfinalized_metrics[metric_name]) return finalized_metrics return intrinsics.federated_map(finalizer_computation, metrics_aggregation_output) return aggregator_computation
def create( self, value_type: factory.ValueType) -> aggregation_process.AggregationProcess: _check_value_type(value_type) value_specs = type_conversions.structure_from_tensor_type_tree( lambda x: tf.TensorSpec(x.shape, x.dtype), value_type) seeds_per_round = self._num_repeats * len(structure.flatten(value_type)) next_global_seed_fn = _build_next_global_seed_fn(stride=seeds_per_round) @tensorflow_computation.tf_computation(value_type, SEED_TFF_TYPE) def client_transform(value, global_seed): @tf.function def transform(tensor, seed): for _ in range(self._num_repeats): tensor = tf.reshape(tensor, [2, -1]) tensor = tf.complex(real=tensor[0], imag=tensor[1]) tensor *= sample_cis(tf.shape(tensor), seed, inverse=False) tensor = tf.signal.fft(tensor) tensor = tf.concat( [tf.math.real(tensor), tf.math.imag(tensor)], axis=0) tensor /= tf.cast(tf.sqrt(tf.size(tensor) / 2), OUTPUT_TF_DTYPE) seed += 1 return tensor value = _flatten_and_pad_zeros_even(value) seeds = _unique_seeds_for_struct( value, global_seed, stride=self._num_repeats) return tf.nest.map_structure(transform, value, seeds) inner_agg_process = self._inner_agg_factory.create( client_transform.type_signature.result) @tensorflow_computation.tf_computation( client_transform.type_signature.result, SEED_TFF_TYPE) def server_transform(value, global_seed): @tf.function def transform(tensor, seed): seed += self._num_repeats - 1 for _ in range(self._num_repeats): tensor *= tf.sqrt(tf.size(tensor, out_type=tensor.dtype) / 2.0) tensor = tf.reshape(tensor, [2, -1]) tensor = tf.complex(real=tensor[0], imag=tensor[1]) tensor = tf.signal.ifft(tensor) tensor *= sample_cis(tf.shape(tensor), seed, inverse=True) tensor = tf.concat( [tf.math.real(tensor), tf.math.imag(tensor)], axis=0) seed -= 1 return tensor seeds = _unique_seeds_for_struct( value, global_seed, stride=self._num_repeats) value = tf.nest.map_structure(transform, value, seeds) return tf.nest.map_structure(_slice_and_reshape_to_template_spec, value, value_specs) @federated_computation.federated_computation() def init_fn(): inner_state = inner_agg_process.initialize() my_state = intrinsics.federated_eval( tensorflow_computation.tf_computation(_init_global_seed), placements.SERVER) return intrinsics.federated_zip((inner_state, my_state)) @federated_computation.federated_computation( init_fn.type_signature.result, computation_types.at_clients(value_type)) def next_fn(state, value): next_fn_impl = _build_next_fn(client_transform, inner_agg_process, server_transform, next_global_seed_fn, 'dft') return next_fn_impl(state, value) return aggregation_process.AggregationProcess(init_fn, next_fn)
def create_all_zero_state(): return type_conversions.structure_from_tensor_type_tree( lambda t: tf.zeros(shape=t.shape, dtype=t.dtype), local_unfinalized_metrics_type)
def create_value(): return type_conversions.structure_from_tensor_type_tree( lambda t: tf.zeros(dtype=t.dtype, shape=t.shape), type_spec.member if type_spec.is_federated() else type_spec)