def testPartitionedVariableFailures(self): with ops.Graph().as_default(): with self.assertRaisesRegexp(ValueError, "empty"): variables.PartitionedVariable( name="fail", shape=2, dtype=dtypes.int32, variable_list=[], partitions=[]) with self.assertRaisesRegexp(ValueError, "must have a save_slice_info"): v0 = variables.Variable([0]) partitions = [1] variables.PartitionedVariable( name="two_vars", shape=[1], dtype=v0.dtype, variable_list=[v0], partitions=partitions) with self.assertRaisesRegexp(ValueError, "full shapes must match"): v0 = variables.Variable([0]) v1 = variables.Variable([1]) v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) v1._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1])) partitions = [2] variables.PartitionedVariable( name="two_vars", shape=[3], dtype=v0.dtype, variable_list=[v1, v0], partitions=partitions) with self.assertRaisesRegexp(ValueError, "must be positive"): v0 = variables.Variable([0]) v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) partitions = [0] variables.PartitionedVariable( name="two_vars", shape=[2], dtype=v0.dtype, variable_list=[v0], partitions=partitions)
def testPartitionedVariable(self): with ops.Graph().as_default(): v0 = variables.Variable([0]) v1 = variables.Variable([1]) v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) v1._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1])) partitions = [2] # Pass variable_list as [v1, v0] to ensure they are properly # re-sorted to [v0, v1] based on their slice info offsets. partitioned_variable = variables.PartitionedVariable( name="two_vars", shape=[2], dtype=v0.dtype, variable_list=[v1, v0], partitions=partitions) concatenated = ops.convert_to_tensor(partitioned_variable) num_partitions = len(partitioned_variable) iterated_partitions = list(partitioned_variable) self.assertEqual(2, num_partitions) self.assertEqual([v0, v1], iterated_partitions) self.assertEqual([2], partitioned_variable.get_shape()) self.assertEqual([2], partitioned_variable.shape) self.assertEqual([2], concatenated.get_shape()) self.assertEqual([2], concatenated.shape)
def testPartitionedVariableAssignments(self): with ops.Graph().as_default(), self.cached_session() as sess: v0 = variables.Variable(initial_value=[0.0]) v1 = variables.Variable(initial_value=[1.0]) v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) v1._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1])) partitions = [2] # Pass variable_list as [v1, v0] to ensure they are properly # re-sorted to [v0, v1] based on their slice info offsets. partitioned_variable = variables.PartitionedVariable( name="two_vars", shape=[2], dtype=v0.dtype, variable_list=[v0, v1], partitions=partitions) deltas_a = constant_op.constant([1.0, 2.0]) deltas_b = constant_op.constant([3.0, 4.0]) ones = array_ops.ones([2]) plus_delta = partitioned_variable.assign_add(deltas_a) minus_delta = partitioned_variable.assign_sub(deltas_b) assign_ones = partitioned_variable.assign(ones) variables.global_variables_initializer().run() self.assertEqual([1.0], plus_delta[0].eval()) self.assertEqual([1.0], v0.eval()) self.assertEqual([3.0], plus_delta[1].eval()) self.assertEqual([3.0], v1.eval()) self.assertEqual([-2.0], minus_delta[0].eval()) self.assertEqual([-2.0], v0.eval()) self.assertEqual([-1.0], minus_delta[1].eval()) self.assertEqual([-1.0], v1.eval()) self.assertEqual([1.0], assign_ones[0].eval()) self.assertEqual([1.0], v0.eval()) self.assertEqual([1.0], assign_ones[1].eval()) self.assertEqual([1.0], v1.eval())
def testPartitionedVariableAssignments(self): with ops.Graph().as_default(), self.cached_session(): v0 = variables.Variable(initial_value=[0.0]) v1 = variables.Variable(initial_value=[1.0]) v2 = variables.Variable(initial_value=[20.0]) v3 = variables.Variable(initial_value=[30.0]) v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) v1._set_save_slice_info( variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1])) v2._set_save_slice_info( variables.Variable.SaveSliceInfo(v2.name, [2], [0], [1])) v3._set_save_slice_info( variables.Variable.SaveSliceInfo(v3.name, [2], [1], [1])) partitions = [2] # Pass variable_list as [v1, v0] to ensure they are properly # re-sorted to [v0, v1] based on their slice info offsets. pv_0 = variables.PartitionedVariable(name="two_vars", shape=[2], dtype=v0.dtype, variable_list=[v0, v1], partitions=partitions) pv_1 = variables.PartitionedVariable(name="two_vars", shape=[2], dtype=v0.dtype, variable_list=[v2, v3], partitions=partitions) deltas_a = constant_op.constant([1.0, 2.0]) deltas_b = constant_op.constant([3.0, 4.0]) ones = array_ops.ones([2]) plus_delta = pv_0.assign_add(deltas_a) minus_delta = pv_0.assign_sub(deltas_b) assign_ones = pv_0.assign(ones) c_0 = constant_op.constant([2.0]) c_1 = constant_op.constant([3.0]) assign_list = pv_1.assign([c_0, c_1]) assign_part_value = pv_1.assign_add(assign_ones) assign_part_var = pv_1.assign_sub(pv_0) self.evaluate(variables.global_variables_initializer()) self.assertEqual([1.0], self.evaluate(plus_delta[0])) self.assertEqual([1.0], self.evaluate(v0)) self.assertEqual([3.0], self.evaluate(plus_delta[1])) self.assertEqual([3.0], self.evaluate(v1)) self.assertEqual([-2.0], self.evaluate(minus_delta[0])) self.assertEqual([-2.0], self.evaluate(v0)) self.assertEqual([-1.0], self.evaluate(minus_delta[1])) self.assertEqual([-1.0], self.evaluate(v1)) self.assertEqual([1.0], self.evaluate(assign_ones[0])) self.assertEqual([1.0], self.evaluate(v0)) self.assertEqual([1.0], self.evaluate(assign_ones[1])) self.assertEqual([1.0], self.evaluate(v1)) self.assertEqual([2.0], self.evaluate(assign_list[0])) self.assertEqual([2.0], self.evaluate(v2)) self.assertEqual([3.0], self.evaluate(assign_list[1])) self.assertEqual([3.0], self.evaluate(v3)) self.assertEqual([3.0], self.evaluate(assign_part_value[0])) self.assertEqual([3.0], self.evaluate(v2)) self.assertEqual([4.0], self.evaluate(assign_part_value[1])) self.assertEqual([4.0], self.evaluate(v3)) self.assertEqual([2.0], self.evaluate(assign_part_var[0])) self.assertEqual([2.0], self.evaluate(v2)) self.assertEqual([3.0], self.evaluate(assign_part_var[1])) self.assertEqual([3.0], self.evaluate(v3))
def _get_partitioned_variable(self, var_store, name, partitioner, shape=None, dtype=dtypes.float32, ktype=dtypes.int64, initializer=None, regularizer=None, reuse=None, trainable=None, collections=None, caching_device=None, validate_shape=True, constraint=None, synchronization=VariableSynchronization.AUTO, aggregation=VariableAggregation.NONE): """Gets or creates a sharded variable list with these parameters. The `partitioner` must be a callable that accepts a fully defined `TensorShape` and returns a sequence of integers (the `partitions`). These integers describe how to partition the given sharded `Variable` along the given dimension. That is, `partitions[1] = 3` means split the `Variable` into 3 shards along dimension 1. Currently, sharding along only one axis is supported. If the list of variables with the given name (prefix) is already stored, we return the stored variables. Otherwise, we create a new one. Set `reuse` to `True` when you only want to reuse existing Variables. Set `reuse` to `False` when you only want to create new Variables. Set `reuse` to None (the default) or tf.compat.v1.AUTO_REUSE when you want variables to be created if they don't exist or returned if they do. If initializer is `None` (the default), the default initializer passed in the constructor is used. If that one is `None` too, we use a new `glorot_uniform_initializer`. If initializer is a Tensor, we use it as a value and derive the shape from the initializer. If the initializer is a callable, then it will be called for each shard. Otherwise the initializer should match the shape of the entire sharded Variable, and it will be sliced accordingly for each shard. Some useful partitioners are available. See, e.g., `variable_axis_size_partitioner` and `min_max_variable_partitioner`. Args: name: the name of the new or existing sharded variable. partitioner: Optional callable that accepts a fully defined `TensorShape` and `dtype` of the Variable to be created, and returns a list of partitions for each axis (currently only one axis can be partitioned). shape: shape of the new or existing sharded variable. dtype: type of the new or existing sharded variable (defaults to `DT_FLOAT`). initializer: initializer for the sharded variable. regularizer: a (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation of variables. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). collections: List of graph collections keys to add the Variable to. Defaults to `[GraphKeys.GLOBAL_VARIABLES]` (see `tf.Variable`). caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. validate_shape: If False, allows the variable to be initialized with a value of unknown shape. If True, the default, the shape of initial_value must be known. constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to `AUTO` and the current `DistributionStrategy` chooses when to synchronize. aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class `tf.VariableAggregation`. Returns: A `PartitionedVariable` object. Raises: ValueError: when creating a new variable and shape is not declared, when reusing a variable and specifying a conflicting shape, when violating reuse during variable creation, or if an existing sharded variable exists for the given name but with different sharding. """ initializing_from_value = initializer is not None and isinstance( initializer, ops.Tensor) if name in var_store._vars: raise ValueError( "A partitioner was provided, but an unpartitioned version of the " "variable was found: %s. Perhaps a variable of the same name was " "already created without partitioning?" % name) shape = tensor_shape.as_shape(shape) if initializing_from_value: shape = shape.merge_with(initializer.get_shape()) shape_t = tensor_shape.as_shape([sys.maxsize]).concatenate(shape) fd_partition_num = partitioner(shape=shape_t, dtype=dtype)[0] shape = tensor_shape.as_shape([fd_partition_num]).concatenate(shape) partitions = None if not reuse or partitioner: partitions = _call_partitioner(partitioner, shape, dtype) if name in var_store._partitioned_vars: if reuse is False: raise ValueError( "Partitioned variable with name %s already exists. Did you mean to " "set reuse=True or reuse=tf.AUTO_REUSE in VarScope?" % name) existing_var = var_store._partitioned_vars[name] if not shape.is_compatible_with(existing_var.get_shape()): raise ValueError( "Trying to reuse partitioned variable %s, but specified shape %s " "and found shape %s." % (name, shape, existing_var.get_shape())) if not dtype.is_compatible_with(existing_var.dtype): raise ValueError( "Trying to reuse partitioned variable %s, but specified dtype %s " "and found dtype %s." % (name, dtype.name, existing_var.dtype.name)) # pylint: disable=protected-access if (partitions is not None and existing_var._get_partitions() != partitions): raise ValueError( "Trying to reuse partitioned variable %s, but specified partitions " "%s and found partitions %s." % (name, partitions, existing_var._get_partitions())) # pylint: enable=protected-access return existing_var if reuse is True: raise ValueError( "PartitionedVariable %s does not exist, or was not " "created with tf.get_variable(). Did you mean to set " "reuse=False or reuse=tf.AUTO_REUSE in VarScope?" % name) slice_dim, num_slices = _get_slice_dim_and_num_slices(partitions) if "%s/part_0" % name in var_store._vars: if "%s/part_%d" % (name, num_slices - 1) not in self._vars: raise ValueError( "Partitioner returned a different partitioning than what was " "already found. Partitioner returned %d shards, and shard " "%s/part_0 was found, but %s/part_%d was not." % (num_slices, name, name, num_slices - 1)) if "%s/part_%d" % (name, num_slices) in self._vars: raise ValueError( "Partitioner returned a different partitioning than what was " "already found. Partitioner returned %d shards, and shard " "%s/part_0 was found, but so was the extra shard %s/part_%d." % (num_slices, name, name, num_slices)) vs = [] for i, (var_offset, var_shape) in enumerate( _iter_slices(shape.as_list(), num_slices, slice_dim)): partition_info = _PartitionInfo(full_shape=shape.as_list(), var_offset=var_offset) var_full_name = "%s/part_%d" % (name, i) with ops.name_scope(var_full_name + "/PartitionedInitializer", skip_on_eager=False): # Create the tensor to initialize the variable with default value. if initializer is None: init, initializing_from_value = self._get_default_initializer( name=name, shape=shape, dtype=dtype) if initializing_from_value: init_shape = None else: init_shape = var_shape elif callable(initializer): init = initializer init_shape = var_shape elif isinstance(initializer, ops.Tensor): init = array_ops.slice(initializer, var_offset, var_shape) # Use the dtype of the given tensor. dtype = init.dtype.base_dtype init_shape = None else: init = ops.convert_to_tensor(initializer, dtype=dtype) init = array_ops.slice(init, var_offset, var_shape) init_shape = None init_shape = shape.as_list()[1:] with ops.name_scope(None): var = self._get_single_variable( name=var_full_name, var_store=var_store, shape=init_shape, dtype=dtype, ktype=ktype, initializer=init, partition_info=partition_info, regularizer=regularizer, reuse=reuse, trainable=trainable, collections=collections, caching_device=caching_device, validate_shape=validate_shape, constraint=constraint, synchronization=synchronization, aggregation=aggregation) # pylint: disable=protected-access var._set_save_slice_info( variables.Variable.SaveSliceInfo(name, shape.as_list(), var_offset, var_shape)) vs.append(var) # pylint: enable=protected-access partitioned_var = variables.PartitionedVariable(name=name, shape=shape, dtype=dtype, variable_list=vs, partitions=partitions) if not context.executing_eagerly() or var_store._store_eager_variables: var_store._partitioned_vars[name] = partitioned_var return partitioned_var