def testMinimalRun(self):
        x = basic.TrainableVariable(shape=(),
                                    initializers={'w':
                                                  tf.ones_initializer()})()
        x2 = x**2.0
        min_value = 0.5
        constr = optimization_constraints.OptimizationConstraints().add(
            x > min_value)

        self.assertFalse(constr._is_connected)
        loss = moving_average.MovingAverage()(x2 + tf.random.normal(
            (), stddev=1.0)) + constr()

        self.assertTrue(constr._is_connected)
        with self.assertRaisesRegexp(ValueError,
                                     'Cannot add further constraints'):
            constr.add(x > min_value)
        with self.assertRaisesRegexp(ValueError,
                                     'Cannot add further constraints'):
            constr.add_geq(x, min_value)
        with self.assertRaisesRegexp(ValueError,
                                     'Cannot add further constraints'):
            constr.add_leq(min_value < x)

        opt = tf.train.AdamOptimizer(1e-2, beta1=0.0)
        update = opt.minimize(loss)
        with tf.control_dependencies([update]):
            x2 = tf.identity(x2)

        with tf.train.MonitoredSession() as sess:
            for _ in range(500):
                v, _ = sess.run([x2, update])
        self.assertAllClose(v, min_value**2)
Ejemplo n.º 2
0
def trainable_initial_state(batch_size,
                            state_size,
                            dtype,
                            initializers=None,
                            regularizers=None,
                            name=None):
    """Creates an initial state consisting of trainable variables.

  The trainable variables are created with the same shapes as the elements of
  `state_size` and are tiled to produce an initial state.

  Args:
    batch_size: An int, or scalar int32 Tensor representing the batch size.
    state_size: A `TensorShape` or nested tuple of `TensorShape`s to use for the
        shape of the trainable variables.
    dtype: The data type used to create the variables and thus initial state.
    initializers: An optional container of the same structure as `state_size`
        containing initializers for the variables.
    regularizers: An optional container of the same structure as `state_size`
        containing regularizers for the variables.
    name: optional string used to prefix the initial state variable names.

  Returns:
    A `Tensor` or nested tuple of `Tensor`s with the same size and structure
    as `state_size`, where each `Tensor` is a tiled trainable `Variable`.

  Raises:
    ValueError: if the user passes initializers that are not functions.
    ValueError: if the user passes regularizers that are not functions.
  """
    flat_state_size = nest.flatten(state_size)

    if not initializers:
        flat_initializer = tuple(tf.zeros_initializer()
                                 for _ in flat_state_size)
    else:
        nest.assert_same_structure(initializers, state_size)
        flat_initializer = nest.flatten(initializers)
        if not all([callable(init) for init in flat_initializer]):
            raise ValueError(
                "Not all the passed initializers are callable objects.")

    if not regularizers:
        flat_regularizer = tuple({} for _ in flat_state_size)
    else:
        nest.assert_same_structure(regularizers, state_size)
        flat_regularizer = nest.flatten(regularizers)
        if not all([callable(regularizer)
                    for regularizer in flat_regularizer]):
            raise ValueError(
                "Not all the passed regularizers are callable objects.")

    # Produce names for the variables. In the case of a tuple or nested tuple,
    # this is just a sequence of numbers, but for a flat `namedtuple`, we use
    # the field names. NOTE: this could be extended to nested `namedtuple`s,
    # but for now that's extra complexity that's not used anywhere.
    name_prefix = name or "initial_state"
    try:
        name_suffixes = [
            state_size._fields[i] for i in xrange(len(flat_state_size))
        ]
    except (AttributeError, IndexError):
        name_suffixes = range(len(flat_state_size))

    flat_initial_state = []

    for name_suffix, size, init, regularizer in zip(name_suffixes,
                                                    flat_state_size,
                                                    flat_initializer,
                                                    flat_regularizer):
        shape_with_batch_dim = [1] + tf.TensorShape(size).as_list()

        variable_name = "{}_{}".format(name_prefix, name_suffix)
        initial_state_module = basic.TrainableVariable(
            shape_with_batch_dim,
            dtype=dtype,
            initializers={"w": init},
            regularizers={"w": regularizer},
            name=variable_name)
        initial_state_variable = initial_state_module()

        tiled_name = "state_{}_tiled".format(name_suffix)

        initial_state_variable_dims = initial_state_variable.get_shape().ndims
        tile_dims = [batch_size] + [1] * (initial_state_variable_dims - 1)
        flat_initial_state.append(
            tf.tile(initial_state_variable, tile_dims, name=tiled_name))

    return nest.pack_sequence_as(structure=state_size,
                                 flat_sequence=flat_initial_state)
Ejemplo n.º 3
0
def get_lagrange_multiplier(shape=(),
                            rate=1.0,
                            initializer=1.0,
                            maximize=True,
                            valid_range=None,
                            name='lagrange_multiplier'):
    """Lagrange multiplier factory.

  This factory returns ops that help setting up constrained optimization
  problems in Tensorflow. Given a constraint function op (either scalar or
  vectorial), use this function to instantiate a Lagrange multiplier op, then
  dot product the two and add them to the loss that is being optimized over.
  There is no need to instantiate a second optimizer to solve the minmax
  problem, as the Lagrange Multiplier op is setup to manipulate its own
  gradients so that a single optmizer can be used to update all the variables
  correctly.

  Args:
    shape: Lagrange multipliers can be used with both scalar and vector
        constraint functions; when using vector constraints use the shape kwarg
        to pass in shape information and instantiate variables of the correct
        shape.
    rate: Scalar used to scale the magnitude of gradients of the Lagrange
        multipliers, defaulting to 1e-2. Using the default value will make the
        Lagrange multipliers updates slower compared to the ones for the model's
        parameters.
    initializer: Initializer for the Lagrange multipliers. Note that
        when using inequality constraints the initial value of the multiplier
        will be transformed via the parametrization function.
    maximize: Boolean, True if we want to maximize the loss w.r.t. the Lagrange
        multipliers, False otherwise.
    valid_range: tuple, or list. of values used to clip the value of the
        (possibly reparametrized) Lagrange multipliers.
    name: Name of the Lagrange multiplier op.

  Returns:
    An op to be inserted in the graph, by multipling it with a constraint op
        and adding the resulting op to a loss. The Lagrange multiplier
        gradients are modified to that by calling minimize on the loss the
        optimizer will actually minimize w.r.t. to the model's parameters and
        maximize w.r.t. the Lagrande multipliers, hence enforcing the
        constraints.

  Raises:
    ValueError: If the Lagrange multiplier is set to enforce an equality
        constraint and a parametrization function is also provided.
  """
    initializer = initializer or np.ones(shape=shape)
    if isinstance(initializer, (numbers.Number, np.ndarray, list, tuple)):
        initializer = tf.constant_initializer(initializer)
    initializer = _LagrangeMultiplierInitializer(initializer)

    lambda_var = basic.TrainableVariable(name=name,
                                         shape=shape,
                                         initializers={'w': initializer})()
    tf.add_to_collection(LAGRANGE_MULTIPLIERS, lambda_var)

    lag_multiplier = _parametrize(lambda_var, rate=rate)
    lag_multiplier.set_shape(shape)
    if valid_range:
        lag_multiplier = _constrain_to_range(lag_multiplier, *valid_range)

    return lag_multiplier if maximize else -lag_multiplier