def testPartitionedVariableFailures(self):
    with ops.Graph().as_default():
      with self.assertRaisesRegexp(ValueError, "empty"):
        variables.PartitionedVariable(
            name="fail",
            shape=2,
            dtype=dtypes.int32,
            variable_list=[],
            partitions=[])

      with self.assertRaisesRegexp(ValueError, "must have a save_slice_info"):
        v0 = variables.Variable([0])
        partitions = [1]
        variables.PartitionedVariable(
            name="two_vars",
            shape=[1],
            dtype=v0.dtype,
            variable_list=[v0],
            partitions=partitions)

      with self.assertRaisesRegexp(ValueError, "full shapes must match"):
        v0 = variables.Variable([0])
        v1 = variables.Variable([1])
        v0._set_save_slice_info(
            variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
        v1._set_save_slice_info(
            variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
        partitions = [2]

        variables.PartitionedVariable(
            name="two_vars",
            shape=[3],
            dtype=v0.dtype,
            variable_list=[v1, v0],
            partitions=partitions)

      with self.assertRaisesRegexp(ValueError, "must be positive"):
        v0 = variables.Variable([0])
        v0._set_save_slice_info(
            variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
        partitions = [0]

        variables.PartitionedVariable(
            name="two_vars",
            shape=[2],
            dtype=v0.dtype,
            variable_list=[v0],
            partitions=partitions)
  def testPartitionedVariable(self):
    with ops.Graph().as_default():
      v0 = variables.Variable([0])
      v1 = variables.Variable([1])
      v0._set_save_slice_info(
          variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
      v1._set_save_slice_info(
          variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
      partitions = [2]

      # Pass variable_list as [v1, v0] to ensure they are properly
      # re-sorted to [v0, v1] based on their slice info offsets.
      partitioned_variable = variables.PartitionedVariable(
          name="two_vars",
          shape=[2],
          dtype=v0.dtype,
          variable_list=[v1, v0],
          partitions=partitions)

      concatenated = ops.convert_to_tensor(partitioned_variable)
      num_partitions = len(partitioned_variable)
      iterated_partitions = list(partitioned_variable)
      self.assertEqual(2, num_partitions)
      self.assertEqual([v0, v1], iterated_partitions)
      self.assertEqual([2], partitioned_variable.get_shape())
      self.assertEqual([2], partitioned_variable.shape)
      self.assertEqual([2], concatenated.get_shape())
      self.assertEqual([2], concatenated.shape)
Exemple #3
0
    def testPartitionedVariableAssignments(self):
        with ops.Graph().as_default(), self.cached_session() as sess:
            v0 = variables.Variable(initial_value=[0.0])
            v1 = variables.Variable(initial_value=[1.0])
            v0._set_save_slice_info(
                variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
            v1._set_save_slice_info(
                variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
            partitions = [2]

            # Pass variable_list as [v1, v0] to ensure they are properly
            # re-sorted to [v0, v1] based on their slice info offsets.
            partitioned_variable = variables.PartitionedVariable(
                name="two_vars",
                shape=[2],
                dtype=v0.dtype,
                variable_list=[v0, v1],
                partitions=partitions)

            deltas_a = constant_op.constant([1.0, 2.0])
            deltas_b = constant_op.constant([3.0, 4.0])
            ones = array_ops.ones([2])
            plus_delta = partitioned_variable.assign_add(deltas_a)
            minus_delta = partitioned_variable.assign_sub(deltas_b)
            assign_ones = partitioned_variable.assign(ones)
            variables.global_variables_initializer().run()

            self.assertEqual([1.0], plus_delta[0].eval())
            self.assertEqual([1.0], v0.eval())
            self.assertEqual([3.0], plus_delta[1].eval())
            self.assertEqual([3.0], v1.eval())

            self.assertEqual([-2.0], minus_delta[0].eval())
            self.assertEqual([-2.0], v0.eval())
            self.assertEqual([-1.0], minus_delta[1].eval())
            self.assertEqual([-1.0], v1.eval())

            self.assertEqual([1.0], assign_ones[0].eval())
            self.assertEqual([1.0], v0.eval())
            self.assertEqual([1.0], assign_ones[1].eval())
            self.assertEqual([1.0], v1.eval())
Exemple #4
0
    def testPartitionedVariableAssignments(self):
        with ops.Graph().as_default(), self.cached_session():
            v0 = variables.Variable(initial_value=[0.0])
            v1 = variables.Variable(initial_value=[1.0])
            v2 = variables.Variable(initial_value=[20.0])
            v3 = variables.Variable(initial_value=[30.0])
            v0._set_save_slice_info(
                variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
            v1._set_save_slice_info(
                variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1]))
            v2._set_save_slice_info(
                variables.Variable.SaveSliceInfo(v2.name, [2], [0], [1]))
            v3._set_save_slice_info(
                variables.Variable.SaveSliceInfo(v3.name, [2], [1], [1]))

            partitions = [2]

            # Pass variable_list as [v1, v0] to ensure they are properly
            # re-sorted to [v0, v1] based on their slice info offsets.
            pv_0 = variables.PartitionedVariable(name="two_vars",
                                                 shape=[2],
                                                 dtype=v0.dtype,
                                                 variable_list=[v0, v1],
                                                 partitions=partitions)

            pv_1 = variables.PartitionedVariable(name="two_vars",
                                                 shape=[2],
                                                 dtype=v0.dtype,
                                                 variable_list=[v2, v3],
                                                 partitions=partitions)

            deltas_a = constant_op.constant([1.0, 2.0])
            deltas_b = constant_op.constant([3.0, 4.0])
            ones = array_ops.ones([2])
            plus_delta = pv_0.assign_add(deltas_a)
            minus_delta = pv_0.assign_sub(deltas_b)
            assign_ones = pv_0.assign(ones)

            c_0 = constant_op.constant([2.0])
            c_1 = constant_op.constant([3.0])
            assign_list = pv_1.assign([c_0, c_1])
            assign_part_value = pv_1.assign_add(assign_ones)
            assign_part_var = pv_1.assign_sub(pv_0)
            self.evaluate(variables.global_variables_initializer())

            self.assertEqual([1.0], self.evaluate(plus_delta[0]))
            self.assertEqual([1.0], self.evaluate(v0))
            self.assertEqual([3.0], self.evaluate(plus_delta[1]))
            self.assertEqual([3.0], self.evaluate(v1))

            self.assertEqual([-2.0], self.evaluate(minus_delta[0]))
            self.assertEqual([-2.0], self.evaluate(v0))
            self.assertEqual([-1.0], self.evaluate(minus_delta[1]))
            self.assertEqual([-1.0], self.evaluate(v1))

            self.assertEqual([1.0], self.evaluate(assign_ones[0]))
            self.assertEqual([1.0], self.evaluate(v0))
            self.assertEqual([1.0], self.evaluate(assign_ones[1]))
            self.assertEqual([1.0], self.evaluate(v1))

            self.assertEqual([2.0], self.evaluate(assign_list[0]))
            self.assertEqual([2.0], self.evaluate(v2))
            self.assertEqual([3.0], self.evaluate(assign_list[1]))
            self.assertEqual([3.0], self.evaluate(v3))

            self.assertEqual([3.0], self.evaluate(assign_part_value[0]))
            self.assertEqual([3.0], self.evaluate(v2))
            self.assertEqual([4.0], self.evaluate(assign_part_value[1]))
            self.assertEqual([4.0], self.evaluate(v3))

            self.assertEqual([2.0], self.evaluate(assign_part_var[0]))
            self.assertEqual([2.0], self.evaluate(v2))
            self.assertEqual([3.0], self.evaluate(assign_part_var[1]))
            self.assertEqual([3.0], self.evaluate(v3))
Exemple #5
0
    def _get_partitioned_variable(self,
                                  var_store,
                                  name,
                                  partitioner,
                                  shape=None,
                                  dtype=dtypes.float32,
                                  ktype=dtypes.int64,
                                  initializer=None,
                                  regularizer=None,
                                  reuse=None,
                                  trainable=None,
                                  collections=None,
                                  caching_device=None,
                                  validate_shape=True,
                                  constraint=None,
                                  synchronization=VariableSynchronization.AUTO,
                                  aggregation=VariableAggregation.NONE):
        """Gets or creates a sharded variable list with these parameters.

    The `partitioner` must be a callable that accepts a fully defined
    `TensorShape` and returns a sequence of integers (the `partitions`).
    These integers describe how to partition the given sharded `Variable`
    along the given dimension.  That is, `partitions[1] = 3` means split
    the `Variable` into 3 shards along dimension 1.  Currently, sharding along
    only one axis is supported.

    If the list of variables with the given name (prefix) is already stored,
    we return the stored variables. Otherwise, we create a new one.

    Set `reuse` to `True` when you only want to reuse existing Variables.
    Set `reuse` to `False` when you only want to create new Variables.
    Set `reuse` to None (the default) or tf.compat.v1.AUTO_REUSE when you want
    variables to be created if they don't exist or returned if they do.

    If initializer is `None` (the default), the default initializer passed in
    the constructor is used. If that one is `None` too, we use a new
    `glorot_uniform_initializer`. If initializer is a Tensor, we use
    it as a value and derive the shape from the initializer.

    If the initializer is a callable, then it will be called for each
    shard.  Otherwise the initializer should match the shape of the entire
    sharded Variable, and it will be sliced accordingly for each shard.

    Some useful partitioners are available.  See, e.g.,
    `variable_axis_size_partitioner` and `min_max_variable_partitioner`.

    Args:
      name: the name of the new or existing sharded variable.
      partitioner: Optional callable that accepts a fully defined `TensorShape`
        and `dtype` of the Variable to be created, and returns a list of
        partitions for each axis (currently only one axis can be partitioned).
      shape: shape of the new or existing sharded variable.
      dtype: type of the new or existing sharded variable (defaults to
        `DT_FLOAT`).
      initializer: initializer for the sharded variable.
      regularizer: a (Tensor -> Tensor or None) function; the result of applying
        it on a newly created variable will be added to the collection
        GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
      reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation of
        variables.
      trainable: If `True` also add the variable to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      collections: List of graph collections keys to add the Variable to.
        Defaults to `[GraphKeys.GLOBAL_VARIABLES]` (see `tf.Variable`).
      caching_device: Optional device string or function describing where the
        Variable should be cached for reading.  Defaults to the Variable's
        device.  If not `None`, caches on another device.  Typical use is to
        cache on the device where the Ops using the Variable reside, to
        deduplicate copying through `Switch` and other conditional statements.
      validate_shape: If False, allows the variable to be initialized with a
        value of unknown shape. If True, the default, the shape of initial_value
        must be known.
      constraint: An optional projection function to be applied to the variable
        after being updated by an `Optimizer` (e.g. used to implement norm
        constraints or value constraints for layer weights). The function must
        take as input the unprojected Tensor representing the value of the
        variable and return the Tensor for the projected value (which must have
        the same shape). Constraints are not safe to use when doing asynchronous
        distributed training.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
        `AUTO` and the current `DistributionStrategy` chooses when to
        synchronize.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.

    Returns:
      A `PartitionedVariable` object.

    Raises:
      ValueError: when creating a new variable and shape is not declared,
        when reusing a variable and specifying a conflicting shape,
        when violating reuse during variable creation, or if an existing
        sharded variable exists for the given name but with different sharding.
    """
        initializing_from_value = initializer is not None and isinstance(
            initializer, ops.Tensor)
        if name in var_store._vars:
            raise ValueError(
                "A partitioner was provided, but an unpartitioned version of the "
                "variable was found: %s.  Perhaps a variable of the same name was "
                "already created without partitioning?" % name)

        shape = tensor_shape.as_shape(shape)
        if initializing_from_value:
            shape = shape.merge_with(initializer.get_shape())

        shape_t = tensor_shape.as_shape([sys.maxsize]).concatenate(shape)
        fd_partition_num = partitioner(shape=shape_t, dtype=dtype)[0]
        shape = tensor_shape.as_shape([fd_partition_num]).concatenate(shape)
        partitions = None
        if not reuse or partitioner:
            partitions = _call_partitioner(partitioner, shape, dtype)

        if name in var_store._partitioned_vars:
            if reuse is False:
                raise ValueError(
                    "Partitioned variable with name %s already exists. Did you mean to "
                    "set reuse=True or reuse=tf.AUTO_REUSE in VarScope?" %
                    name)

            existing_var = var_store._partitioned_vars[name]
            if not shape.is_compatible_with(existing_var.get_shape()):
                raise ValueError(
                    "Trying to reuse partitioned variable %s, but specified shape %s "
                    "and found shape %s." %
                    (name, shape, existing_var.get_shape()))
            if not dtype.is_compatible_with(existing_var.dtype):
                raise ValueError(
                    "Trying to reuse partitioned variable %s, but specified dtype %s "
                    "and found dtype %s." %
                    (name, dtype.name, existing_var.dtype.name))

            # pylint: disable=protected-access
            if (partitions is not None
                    and existing_var._get_partitions() != partitions):
                raise ValueError(
                    "Trying to reuse partitioned variable %s, but specified partitions "
                    "%s and found partitions %s." %
                    (name, partitions, existing_var._get_partitions()))
            # pylint: enable=protected-access

            return existing_var

        if reuse is True:
            raise ValueError(
                "PartitionedVariable %s does not exist, or was not "
                "created with tf.get_variable(). Did you mean to set "
                "reuse=False or reuse=tf.AUTO_REUSE in VarScope?" % name)

        slice_dim, num_slices = _get_slice_dim_and_num_slices(partitions)

        if "%s/part_0" % name in var_store._vars:
            if "%s/part_%d" % (name, num_slices - 1) not in self._vars:
                raise ValueError(
                    "Partitioner returned a different partitioning than what was "
                    "already found.  Partitioner returned %d shards, and shard "
                    "%s/part_0 was found, but %s/part_%d was not." %
                    (num_slices, name, name, num_slices - 1))
            if "%s/part_%d" % (name, num_slices) in self._vars:
                raise ValueError(
                    "Partitioner returned a different partitioning than what was "
                    "already found.  Partitioner returned %d shards, and shard "
                    "%s/part_0 was found, but so was the extra shard %s/part_%d."
                    % (num_slices, name, name, num_slices))

        vs = []
        for i, (var_offset, var_shape) in enumerate(
                _iter_slices(shape.as_list(), num_slices, slice_dim)):
            partition_info = _PartitionInfo(full_shape=shape.as_list(),
                                            var_offset=var_offset)
            var_full_name = "%s/part_%d" % (name, i)
            with ops.name_scope(var_full_name + "/PartitionedInitializer",
                                skip_on_eager=False):
                # Create the tensor to initialize the variable with default value.
                if initializer is None:
                    init, initializing_from_value = self._get_default_initializer(
                        name=name, shape=shape, dtype=dtype)
                    if initializing_from_value:
                        init_shape = None
                    else:
                        init_shape = var_shape
                elif callable(initializer):
                    init = initializer
                    init_shape = var_shape
                elif isinstance(initializer, ops.Tensor):
                    init = array_ops.slice(initializer, var_offset, var_shape)
                    # Use the dtype of the given tensor.
                    dtype = init.dtype.base_dtype
                    init_shape = None
                else:
                    init = ops.convert_to_tensor(initializer, dtype=dtype)
                    init = array_ops.slice(init, var_offset, var_shape)
                    init_shape = None
            init_shape = shape.as_list()[1:]
            with ops.name_scope(None):
                var = self._get_single_variable(
                    name=var_full_name,
                    var_store=var_store,
                    shape=init_shape,
                    dtype=dtype,
                    ktype=ktype,
                    initializer=init,
                    partition_info=partition_info,
                    regularizer=regularizer,
                    reuse=reuse,
                    trainable=trainable,
                    collections=collections,
                    caching_device=caching_device,
                    validate_shape=validate_shape,
                    constraint=constraint,
                    synchronization=synchronization,
                    aggregation=aggregation)

            # pylint: disable=protected-access
            var._set_save_slice_info(
                variables.Variable.SaveSliceInfo(name, shape.as_list(),
                                                 var_offset, var_shape))
            vs.append(var)
            # pylint: enable=protected-access

        partitioned_var = variables.PartitionedVariable(name=name,
                                                        shape=shape,
                                                        dtype=dtype,
                                                        variable_list=vs,
                                                        partitions=partitions)
        if not context.executing_eagerly() or var_store._store_eager_variables:
            var_store._partitioned_vars[name] = partitioned_var
        return partitioned_var