def log_joint_fn(*param_vals):
        """Generated log-density function."""

        # Sum the log_prob values from parameter priors.
        param_lp = sum([
            param.prior.log_prob(param_val)
            for (param, param_val) in zip(self.parameters, param_vals)
        ])

        # Build a linear Gaussian state space model and evaluate the marginal
        # log_prob on observations.
        lgssm = self.make_state_space_model(
            param_vals=param_vals, num_timesteps=num_timesteps)
        observation_lp = lgssm.log_prob(observed_time_series)

        # Sum over likelihoods from iid observations. Without this sum,
        # adding `param_lp + observation_lp` would broadcast the param priors
        # over the sample shape, which incorrectly multi-counts the param
        # priors.
        sample_ndims = tf.maximum(0,
                                  tf.rank(observation_lp) - tf.rank(param_lp))
        observation_lp = tf.reduce_sum(
            observation_lp, axis=tf.range(sample_ndims))

        return param_lp + observation_lp
예제 #2
0
    def testSparseShape(self):
        with self.test_session():
            sp_value = tf.SparseTensorValue(indices=((0, 1), (1, 0)), values=(42, 24), shape=(2, 2))
            self.assertAllEqual((2, 2), tf.shape(sp_value).eval())
            self.assertEqual(4, tf.size(sp_value).eval())
            self.assertEqual(2, tf.rank(sp_value).eval())

            sp = tf.SparseTensor.from_value(sp_value)
            self.assertAllEqual((2, 2), tf.shape(sp).eval())
            self.assertEqual(4, tf.size(sp).eval())
            self.assertEqual(2, tf.rank(sp).eval())
예제 #3
0
  def testDenseShape(self):
    with self.test_session():
      t_value = [[0, 42], [24, 0]]
      self.assertAllEqual((2, 2), tf.shape(t_value).eval())
      self.assertEqual(4, tf.size(t_value).eval())
      self.assertEqual(2, tf.rank(t_value).eval())

      t = tf.constant(t_value)
      self.assertAllEqual((2, 2), tf.shape(t).eval())
      self.assertEqual(4, tf.size(t).eval())
      self.assertEqual(2, tf.rank(t).eval())
예제 #4
0
 def new_target_log_prob(*transformed_state_parts):
   """Log prob of the transformed state."""
   # TODO(b/72831017): Use `tf.identity` to disable caching (since HMC takes
   # gradient with respect to input).
   transformed_state_parts = [
       tf.identity(sp) for sp in transformed_state_parts
   ]
   tlp = target_log_prob_fn(
       *self._forward_transform(transformed_state_parts))
   event_ndims = [
       tf.rank(sp) - tf.rank(tlp) for sp in transformed_state_parts
   ]
   return tlp + self._forward_log_det_jacobian(
       transformed_state_parts=transformed_state_parts,
       event_ndims=event_ndims)
예제 #5
0
def _tensor_product(t1, t2):
  """Computes the tensor product of two tensors.

  If the rank of `t1` is `q` and the rank of `t2` is `r`, the result `z` is
  of rank `q+r` with shape `t1.shape + t2.shape`. The components of `z` are:

  ```None
    z[i1, i2, .., iq, j1, j2, .., jr] = t1[i1, .., iq] * t2[j1, .., jq]
  ```

  If both inputs are of rank 1, then the tensor product is equivalent to outer
  product of vectors.

  Note that tensor product is not commutative in general.

  Args:
    t1: A `tf.Tensor` of any dtype and non zero rank.
    t2: A `tf.Tensor` of same dtype as `t1` and non zero rank.

  Returns:
    product: A tensor with the same elements as the input `x` but with rank
      `r + n` where `r` is the rank of `x`.
  """
  t1_shape = tf.shape(t1)
  padding = tf.ones([tf.rank(t2)], dtype=t1_shape.dtype)
  padded_shape = tf.concat([t1_shape, padding], axis=0)
  t1_padded = tf.reshape(t1, padded_shape)
  return t1_padded * t2
예제 #6
0
 def _expand_is_accepted_like(x):
   """Helper to expand `is_accepted` like the shape of some input arg."""
   with tf.name_scope('expand_is_accepted_like'):
     expand_shape = tf.concat([
         tf.shape(is_accepted),
         tf.ones([tf.rank(x) - tf.rank(is_accepted)],
                 dtype=tf.int32),
     ], axis=0)
     multiples = tf.concat([
         tf.ones([tf.rank(is_accepted)], dtype=tf.int32),
         tf.shape(x)[tf.rank(is_accepted):],
     ], axis=0)
     m = tf.tile(tf.reshape(is_accepted, expand_shape),
                 multiples)
     m.set_shape(m.shape.merge_with(x.shape))
     return m
예제 #7
0
  def _inverse_log_det_jacobian(self, y, **kwargs):
    y = tf.convert_to_tensor(y, name="y")
    ildj = tf.cast(0., dtype=y.dtype.base_dtype)

    if not self.bijectors:
      return ildj

    event_ndims = self._maybe_get_static_event_ndims(
        self.inverse_min_event_ndims)

    if _use_static_shape(y, event_ndims):
      event_shape = y.shape[y.shape.ndims - event_ndims:]
    else:
      event_shape = tf.shape(y)[tf.rank(y) - event_ndims:]

    for b in self.bijectors:
      ildj += b.inverse_log_det_jacobian(
          y, event_ndims=event_ndims, **kwargs.get(b.name, {}))

      if _use_static_shape(y, event_ndims):
        event_shape = b.inverse_event_shape(event_shape)
        event_ndims = self._maybe_get_static_event_ndims(
            event_shape.ndims)
      else:
        event_shape = b.inverse_event_shape_tensor(event_shape)
        event_ndims = tf.size(event_shape)
        event_ndims_ = self._maybe_get_static_event_ndims(event_ndims)
        if event_ndims_ is not None:
          event_ndims = event_ndims_

      y = b.inverse(y, **kwargs.get(b.name, {}))
    return ildj
예제 #8
0
  def _forward_log_det_jacobian(self, x, **kwargs):
    x = tf.convert_to_tensor(x, name="x")

    fldj = tf.cast(0., dtype=x.dtype.base_dtype)

    if not self.bijectors:
      return fldj

    event_ndims = self._maybe_get_static_event_ndims(
        self.forward_min_event_ndims)

    if _use_static_shape(x, event_ndims):
      event_shape = x.shape[x.shape.ndims - event_ndims:]
    else:
      event_shape = tf.shape(x)[tf.rank(x) - event_ndims:]

    for b in reversed(self.bijectors):
      fldj += b.forward_log_det_jacobian(
          x, event_ndims=event_ndims, **kwargs.get(b.name, {}))
      if _use_static_shape(x, event_ndims):
        event_shape = b.forward_event_shape(event_shape)
        event_ndims = self._maybe_get_static_event_ndims(event_shape.ndims)
      else:
        event_shape = b.forward_event_shape_tensor(event_shape)
        event_ndims = tf.size(event_shape)
        event_ndims_ = self._maybe_get_static_event_ndims(event_ndims)
        if event_ndims_ is not None:
          event_ndims = event_ndims_

      x = b.forward(x, **kwargs.get(b.name, {}))

    return fldj
예제 #9
0
  def _expand_sample_shape_to_vector(self, x, name):
    """Helper to `sample` which ensures input is 1D."""
    x_static_val = tf.contrib.util.constant_value(x)
    if x_static_val is None:
      prod = tf.reduce_prod(x)
    else:
      prod = np.prod(x_static_val, dtype=x.dtype.as_numpy_dtype())

    ndims = x.shape.ndims  # != sample_ndims
    if ndims is None:
      # Maybe expand_dims.
      ndims = tf.rank(x)
      expanded_shape = util.pick_vector(
          tf.equal(ndims, 0),
          np.array([1], dtype=np.int32), tf.shape(x))
      x = tf.reshape(x, expanded_shape)
    elif ndims == 0:
      # Definitely expand_dims.
      if x_static_val is not None:
        x = tf.convert_to_tensor(
            np.array([x_static_val], dtype=x.dtype.as_numpy_dtype()),
            name=name)
      else:
        x = tf.reshape(x, [1])
    elif ndims != 1:
      raise ValueError("Input is neither scalar nor vector.")

    return x, prod
예제 #10
0
 def _compareRank(self, x, use_gpu=False):
   np_ans = np.asarray(np.ndim(x))
   with self.test_session(use_gpu=use_gpu):
     tf_ans = tf.rank(x)
     result = tf_ans.eval()
   self.assertAllEqual(np_ans, result)
   self.assertShapeEqual(np_ans, tf_ans)
  def _make_columnar(self, x):
    """Ensures non-scalar input has at least one column.

    Example:
      If `x = [1, 2, 3]` then the output is `[[1], [2], [3]]`.

      If `x = [[1, 2, 3], [4, 5, 6]]` then the output is unchanged.

      If `x = 1` then the output is unchanged.

    Args:
      x: `Tensor`.

    Returns:
      columnar_x: `Tensor` with at least two dimensions.
    """
    if x.shape.ndims is not None:
      if x.shape.ndims == 1:
        x = x[tf.newaxis, :]
      return x
    shape = tf.shape(x)
    maybe_expanded_shape = tf.concat([
        shape[:-1],
        distribution_util.pick_vector(
            tf.equal(tf.rank(x), 1), [1], np.array([], dtype=np.int32)),
        shape[-1:],
    ], 0)
    return tf.reshape(x, maybe_expanded_shape)
예제 #12
0
def _do_maximum_mean(samples, envelope, high, name=None):
  """Common code between maximum_mean and minimum_mean."""
  with tf.name_scope(name, "do_maximum_mean", [samples, envelope, high]):
    dtype = dtype_util.common_dtype([samples, envelope, high], tf.float32)
    samples = tf.convert_to_tensor(samples, name="samples", dtype=dtype)
    envelope = tf.convert_to_tensor(envelope, name="envelope", dtype=dtype)
    high = tf.convert_to_tensor(high, name="high", dtype=dtype)
    n = tf.rank(samples)
    # Move the batch dimension of `samples` to the rightmost position,
    # where the _batch_sort_vector function wants it.
    perm = tf.concat([tf.range(1, n), [0]], axis=0)
    samples = tf.transpose(samples, perm)

    samples = _batch_sort_vector(samples)

    # The maximum mean is given by taking `envelope`-worth of
    # probability from the smallest samples and moving it to the
    # maximum value.  This amounts to:
    # - ignoring the smallest k samples, where `k/n < envelope`
    # - taking a `1/n - (envelope - k/n)` part of the index k sample
    # - taking all the other samples
    # - and adding `envelope * high` at the end.
    # The following is a vectorized and batched way of computing this.
    # `max_mean_contrib` is a mask implementing the previous.
    batch_size = tf.shape(samples)[-1]
    batch_size = tf.cast(batch_size, dtype=dtype)
    step = 1. / batch_size
    cum_steps = step * tf.range(1, batch_size + 1, dtype=dtype)
    max_mean_contrib = tf.clip_by_value(
        cum_steps - envelope[..., tf.newaxis],
        clip_value_min=0.,
        clip_value_max=step)
    return tf.reduce_sum(samples * max_mean_contrib, axis=-1) + envelope * high
예제 #13
0
 def _pad_sample_dims(self, x):
   with tf.name_scope("pad_sample_dims", values=[x]):
     ndims = x.shape.ndims if x.shape.ndims is not None else tf.rank(x)
     shape = tf.shape(x)
     d = ndims - self._event_ndims
     x = tf.reshape(x, shape=tf.concat([shape[:d], [1], shape[d:]], axis=0))
     return x
예제 #14
0
def _transpose_batch_time(x):
    """Transpose the batch and time dimensions of a Tensor.

    Retains as much of the static shape information as possible.

    Args:
        x: A tensor of rank 2 or higher.

    Returns: x transposed along the first two dimensions.

    Raises:
        ValueError: if `x` is rank 1 or lower.
    """
    x_static_shape = x.get_shape()
    if x_static_shape.ndims is not None and x_static_shape.ndims < 2:
        raise ValueError(
            "Expected input tensor %s to have rank at least 2, but saw shape: %s" %
            (x, x_static_shape))
    x_rank = tf.rank(x)
    x_t = tf.transpose(
        x, tf.concat(
            ([1, 0], tf.range(2, x_rank)), axis=0))
    x_t.set_shape(
        tf.TensorShape([
            x_static_shape[1].value, x_static_shape[0].value
        ]).concatenate(x_static_shape[2:]))
    return x_t
예제 #15
0
 def _transpose(self, x, perm):
   sample_batch_ndims = tf.rank(x) - self.rightmost_transposed_ndims
   perm = tf.concat([
       tf.range(sample_batch_ndims),
       sample_batch_ndims + perm,
   ], axis=0)
   return tf.transpose(x, perm)
예제 #16
0
 def _compareRankSparse(self, x_np, use_gpu=False):
   np_ans = np.asarray(np.ndim(x_np))
   x_tf, unused_nnz = _sparsify(x_np)
   with self.test_session(use_gpu=use_gpu):
     tf_ans = tf.rank(x_tf)
     result = tf_ans.eval()
   self.assertAllEqual(np_ans, result)
   self.assertShapeEqual(np_ans, tf_ans)
 def _maybe_rotate_dims(self, x, rotate_right=False):
   """Helper which rolls left event_dims left or right event_dims right."""
   needs_rotation_const = tf.contrib.util.constant_value(self._needs_rotation)
   if needs_rotation_const is not None and not needs_rotation_const:
     return x
   ndims = tf.rank(x)
   n = (ndims - self._rotate_ndims) if rotate_right else self._rotate_ndims
   return tf.transpose(
       x, _concat_vectors(tf.range(n, ndims), tf.range(0, n)))
예제 #18
0
def _squeeze(x, axis):
  """A version of squeeze that works with dynamic axis."""
  x = tf.convert_to_tensor(x, name='x')
  if axis is None:
    return tf.squeeze(x, axis=None)
  axis = tf.convert_to_tensor(axis, name='axis', dtype=tf.int32)
  axis += tf.zeros([1], dtype=axis.dtype)  # Make axis at least 1d.
  keep_axis, _ = tf.setdiff1d(tf.range(0, tf.rank(x)), axis)
  return tf.reshape(x, tf.gather(tf.shape(x), keep_axis))
예제 #19
0
	def step(self, x, c=None, g=None, softmax=False):
		"""Forward step

		Args:
			x: Tensor of shape [batch_size, channels, time_length], One-hot encoded audio signal.
			c: Tensor of shape [batch_size, cin_channels, time_length], Local conditioning features.
			g: Tensor of shape [batch_size, gin_channels, 1] or Ids of shape [batch_size, 1], 
				Global conditioning features.
				Note: set hparams.use_speaker_embedding to False to disable embedding layer and 
				use extrnal One-hot encoded features.
			softmax: Boolean, Whether to apply softmax.

		Returns:
			a Tensor of shape [batch_size, out_channels, time_length]
		"""
		#[batch_size, channels, time_length] -> [batch_size, time_length, channels]
		batch_size = tf.shape(x)[0]
		time_length = tf.shape(x)[-1]

		if g is not None:
			if self.embed_speakers is not None:
				#[batch_size, 1] ==> [batch_size, 1, gin_channels]
				g = self.embed_speakers(tf.reshape(g, [batch_size, -1]))
				#[batch_size, gin_channels, 1]
				with tf.control_dependencies([tf.assert_equal(tf.rank(g), 3)]):
					g = tf.transpose(g, [0, 2, 1])

		#Expand global conditioning features to all time steps
		g_bct = _expand_global_features(batch_size, time_length, g, data_format='BCT')

		if c is not None and self.upsample_conv is not None:
			#[batch_size, 1, cin_channels, time_length]
			c = tf.expand_dims(c, axis=1)
			for transposed_conv in self.upsample_conv:
				c = transposed_conv(c)

			#[batch_size, cin_channels, time_length]
			c = tf.squeeze(c, [1])
			with tf.control_dependencies([tf.assert_equal(tf.shape(c)[-1], tf.shape(x)[-1])]):
				c = tf.identity(c, name='control_c_and_x_shape')

		#Feed data to network
		x = self.first_conv(x)
		skips = None
		for conv in self.conv_layers:
			x, h = conv(x, c, g_bct)
			if skips is None:
				skips = h
			else:
				skips = skips + h
		x = skips

		for conv in self.last_conv_layers:
			x = conv(x)

		return tf.nn.softmax(x, axis=1) if softmax else x
예제 #20
0
def pad_to_bounding_box(image, offset_height, offset_width, target_height,
                        target_width, pad_value):
  """Pads the given image with the given pad_value.

  Works like tf.image.pad_to_bounding_box, except it can pad the image
  with any given arbitrary pad value and also handle images whose sizes are not
  known during graph construction.

  Args:
    image: 3-D tensor with shape [height, width, channels]
    offset_height: Number of rows of zeros to add on top.
    offset_width: Number of columns of zeros to add on the left.
    target_height: Height of output image.
    target_width: Width of output image.
    pad_value: Value to pad the image tensor with.

  Returns:
    3-D tensor of shape [target_height, target_width, channels].

  Raises:
    ValueError: If the shape of image is incompatible with the offset_* or
    target_* arguments.
  """
  image_rank = tf.rank(image)
  image_rank_assert = tf.Assert(
      tf.equal(image_rank, 3),
      ['Wrong image tensor rank [Expected] [Actual]',
       3, image_rank])
  with tf.control_dependencies([image_rank_assert]):
    image -= pad_value
  image_shape = tf.shape(image)
  height, width = image_shape[0], image_shape[1]
  target_width_assert = tf.Assert(
      tf.greater_equal(
          target_width, width),
      ['target_width must be >= width'])
  target_height_assert = tf.Assert(
      tf.greater_equal(target_height, height),
      ['target_height must be >= height'])
  with tf.control_dependencies([target_width_assert]):
    after_padding_width = target_width - offset_width - width
  with tf.control_dependencies([target_height_assert]):
    after_padding_height = target_height - offset_height - height
  offset_assert = tf.Assert(
      tf.logical_and(
          tf.greater_equal(after_padding_width, 0),
          tf.greater_equal(after_padding_height, 0)),
      ['target size not possible with the given target offsets'])

  height_params = tf.stack([offset_height, after_padding_height])
  width_params = tf.stack([offset_width, after_padding_width])
  channel_params = tf.stack([0, 0])
  with tf.control_dependencies([offset_assert]):
    paddings = tf.stack([height_params, width_params, channel_params])
  padded = tf.pad(image, paddings)
  return padded + pad_value
예제 #21
0
  def __init__(self,
               logits=None,
               probs=None,
               dtype=tf.int32,
               validate_args=False,
               allow_nan_stats=True,
               name="OneHotCategorical"):
    """Initialize OneHotCategorical distributions using class log-probabilities.

    Args:
      logits: An N-D `Tensor`, `N >= 1`, representing the log probabilities of a
        set of Categorical distributions. The first `N - 1` dimensions index
        into a batch of independent distributions and the last dimension
        represents a vector of logits for each class. Only one of `logits` or
        `probs` should be passed in.
      probs: An N-D `Tensor`, `N >= 1`, representing the probabilities of a set
        of Categorical distributions. The first `N - 1` dimensions index into a
        batch of independent distributions and the last dimension represents a
        vector of probabilities for each class. Only one of `logits` or `probs`
        should be passed in.
      dtype: The type of the event samples (default: int32).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    parameters = dict(locals())
    with tf.name_scope(name, values=[logits, probs]) as name:
      self._logits, self._probs = distribution_util.get_logits_and_probs(
          name=name, logits=logits, probs=probs, validate_args=validate_args,
          multidimensional=True)

      logits_shape_static = self._logits.shape.with_rank_at_least(1)
      if logits_shape_static.ndims is not None:
        self._batch_rank = tf.convert_to_tensor(
            logits_shape_static.ndims - 1, dtype=tf.int32, name="batch_rank")
      else:
        with tf.name_scope(name="batch_rank"):
          self._batch_rank = tf.rank(self._logits) - 1

      with tf.name_scope(name="event_size"):
        self._event_size = tf.shape(self._logits)[-1]

    super(OneHotCategorical, self).__init__(
        dtype=dtype,
        reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
        validate_args=validate_args,
        allow_nan_stats=allow_nan_stats,
        parameters=parameters,
        graph_parents=[self._logits, self._probs],
        name=name)
예제 #22
0
def sum_rightmost_ndims_preserving_shape(x, ndims):
  """Return `Tensor` with right-most ndims summed.

  Args:
    x: the `Tensor` whose right-most `ndims` dimensions to sum
    ndims: number of right-most dimensions to sum.

  Returns:
    A `Tensor` resulting from calling `reduce_sum` on the `ndims` right-most
    dimensions. If the shape of `x` is statically known, the result will also
    have statically known shape. Otherwise, the resulting shape will only be
    known at runtime.
  """
  x = tf.convert_to_tensor(x)
  if x.shape.ndims is not None:
    axes = tf.range(x.shape.ndims - ndims, x.shape.ndims)
  else:
    axes = tf.range(tf.rank(x) - ndims, tf.rank(x))
  return tf.reduce_sum(x, axis=axes)
예제 #23
0
 def make_response_likelihood(self, w, x):
   w_shape = tf.pad(
       tf.shape(w),
       paddings=[[tf.where(tf.rank(w) > 1, 0, 1), 0]],
       constant_values=1)
   y_shape = tf.concat([tf.shape(w)[:-1], [tf.shape(x)[-1]]], axis=0)
   w_expand = tf.reshape(w, w_shape)
   return tfd.Normal(
       loc=tf.reshape(tf.matmul(w_expand, x), y_shape),
       scale=np.array(1, w.dtype.as_numpy_dtype))  # [n]
예제 #24
0
 def check(t):
   samples_batch_shape = tf.shape(samples)[1:]
   broadcasted_batch_shape = tf.broadcast_dynamic_shape(
       samples_batch_shape, tf.shape(t))
   # This rank check ensures that I don't get a wrong answer from the
   # _shapes_ broadcasting against each other.
   samples_batch_ndims = tf.size(samples_batch_shape)
   ge = tf.assert_greater_equal(samples_batch_ndims, tf.rank(t))
   eq = tf.assert_equal(samples_batch_shape, broadcasted_batch_shape)
   return ge, eq
예제 #25
0
파일: utils.py 프로젝트: siddk/lang2program
def assert_broadcastable(low_tensor, high_tensor):
    low_shape = tf.shape(low_tensor)
    high_shape = tf.shape(high_tensor)

    low_rank = tf.rank(low_tensor)

    # assert that shapes are compatible
    high_shape_prefix = tf.slice(high_shape, [0], [low_rank])
    assert_op = tf.assert_equal(high_shape_prefix, low_shape, name="assert_shape_prefix")
    return assert_op
예제 #26
0
def _effective_sample_size_single_state(states, filter_beyond_lag,
                                        filter_threshold):
  """ESS computation for one single Tensor argument."""

  with tf.name_scope(
      'effective_sample_size_single_state',
      values=[states, filter_beyond_lag, filter_threshold]):

    states = tf.convert_to_tensor(states, name='states')
    dt = states.dtype

    # filter_beyond_lag == None ==> auto_corr is the full sequence.
    auto_corr = stats.auto_correlation(
        states, axis=0, max_lags=filter_beyond_lag)
    if filter_threshold is not None:
      filter_threshold = tf.convert_to_tensor(
          filter_threshold, dtype=dt, name='filter_threshold')
      # Get a binary mask to zero out values of auto_corr below the threshold.
      #   mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i,
      #   mask[i, ...] = 0, otherwise.
      # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...]
      # Building step by step,
      #   Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2.
      # Step 1:  mask = [False, False, True, False]
      mask = auto_corr < filter_threshold
      # Step 2:  mask = [0, 0, 1, 1]
      mask = tf.cast(mask, dtype=dt)
      # Step 3:  mask = [0, 0, 1, 2]
      mask = tf.cumsum(mask, axis=0)
      # Step 4:  mask = [1, 1, 0, 0]
      mask = tf.maximum(1. - mask, 0.)
      auto_corr *= mask

    # With R[k] := auto_corr[k, ...],
    # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]}
    #     = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1)
    #     approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]}
    # where M is the filter_beyond_lag truncation point chosen above.

    # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total
    # ndims the same as auto_corr
    n = _axis_size(states, axis=0)
    k = tf.range(0., _axis_size(auto_corr, axis=0))
    nk_factor = (n - k) / n
    if auto_corr.shape.ndims is not None:
      new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1)
    else:
      new_shape = tf.concat(
          ([-1],
           tf.ones([tf.rank(auto_corr) - 1], dtype=tf.int32)),
          axis=0)
    nk_factor = tf.reshape(nk_factor, new_shape)

    return n / (-1 + 2 * tf.reduce_sum(nk_factor * auto_corr, axis=0))
예제 #27
0
def _get_best_effort_ndims(x,
                           expect_ndims=None,
                           expect_ndims_at_least=None,
                           expect_ndims_no_more_than=None):
  """Get static ndims if possible.  Fallback on `tf.rank(x)`."""
  ndims_static = _get_static_ndims(
      x,
      expect_ndims=expect_ndims,
      expect_ndims_at_least=expect_ndims_at_least,
      expect_ndims_no_more_than=expect_ndims_no_more_than)
  if ndims_static is not None:
    return ndims_static
  return tf.rank(x)
예제 #28
0
def _sample_mvn(mean, cov, cov_structure=None, num_samples=None):
    """
    Returns a sample from a D-dimensional Multivariate Normal distribution
    :param mean: N x D
    :param cov: N x D or N x D x D
    :param cov_structure: "diag" or "full"
    - "diag": cov holds the diagonal elements of the covariance matrix
    - "full": cov holds the full covariance matrix (without jitter)
    :return: sample from the MVN of shape N x D
    """
    mean_shape = tf.shape(mean)
    cov_shape = tf.shape(cov)
    N, D = mean_shape[0], mean_shape[1]
    S = num_samples if num_samples is not None else 1
    # assert shape(cov) == (N, D) or (N, D, D)
    with tf.control_dependencies([
            tf.Assert(tf.equal(cov_shape[0], N) & tf.reduce_all(tf.equal(cov_shape[1:], D)),
                      data=[mean_shape, cov_shape])
            ]):

        if cov_structure == "diag":
            with tf.control_dependencies([tf.assert_equal(tf.rank(mean), tf.rank(cov))]):
                eps = tf.random_normal([S, N, D], dtype=settings.float_type)  # S x N x D
                samples = mean + tf.sqrt(cov) * eps  # S x N x D
        elif cov_structure == "full":
            with tf.control_dependencies([tf.assert_equal(tf.rank(mean) + 1, tf.rank(cov))]):
                jittermat = settings.numerics.jitter_level * \
                            tf.eye(D, batch_shape=[N], dtype=settings.float_type)  # N x D x D
                eps = tf.random_normal([N, D, S], dtype=settings.float_type)  # N x D x S
                chol = tf.cholesky(cov + jittermat)  # N x D x D
                samples = mean[..., None] + tf.matmul(chol, eps)  # N x D x S
                samples = tf.transpose(samples, [2, 0, 1])  # S x N x D
        else:
            raise NotImplementedError  # pragma: no cover

        if num_samples is None:
            return samples[0]  # N x D
        return samples  # S x N x D
예제 #29
0
def softmax(x, axis, name=None):
  """Equivalent to tf.nn.softmax but works around b/70297725."""
  with tf.name_scope(name, "softmax", [x, axis]):
    x = tf.convert_to_tensor(x, name="x")
    ndims = (
        x.shape.ndims
        if x.shape.ndims is not None else tf.rank(x, name="ndims"))
    axis = tf.convert_to_tensor(axis, dtype=tf.int32, name="axis")
    axis_ = tf.contrib.util.constant_value(axis)
    if axis_ is not None:
      axis = np.int(ndims + axis_ if axis_ < 0 else axis_)
    else:
      axis = tf.where(axis < 0, ndims + axis, axis)
  return tf.nn.softmax(x, axis=axis)
예제 #30
0
 def testTranspose(self):
   with self.test_session(use_gpu=False) as sess:
     np.random.seed(1618)
     shapes = [np.random.randint(1, 10, size=rank) for rank in range(1, 6)]
     for shape in shapes:
       for dtype in [np.int32, np.int64, np.float32, np.float64]:
         dn_input = np.random.randn(*shape).astype(dtype)
         rank = tf.rank(dn_input).eval()
         perm = np.random.choice(rank, rank, False)
         sp_input, unused_a_nnz = _sparsify(dn_input)
         sp_trans = tf.sparse_transpose(sp_input, perm=perm)
         dn_trans = tf.sparse_tensor_to_dense(sp_trans).eval()
         expected_trans = tf.transpose(dn_input, perm=perm).eval()
         self.assertAllEqual(dn_trans, expected_trans)
예제 #31
0
#The science (and art) of creating tensors
scalar_val = tf.Variable(123, tf.int16)
floating_val = tf.Variable(123.456, tf.float32)
string_val = tf.Variable("hello everyone. Nice to learn tensoflow!", tf.string)

#Let us display the values (print) these tensors
print(scalar_val)
print(floating_val)
print(string_val)

#The idea behind shape and rank of tensors
#Shape: Describes the dimension of the tensor (total elements contained along each dimension)
scalar_val_shap = tf.shape(scalar_val)
print(scalar_val_shap)

floating_val_shap = tf.shape(floating_val)
print(floating_val_shap)

#Now, if we use e.g. lists/nested lists instead of just a “single” scalar value
list_tensor1 = tf.Variable([1, 3, 5, 6], tf.int16)
print(list_tensor1)
print(tf.shape(list_tensor1))

list_tensor2 = tf.Variable([[1, 2, 3], [4, 5, 6]], tf.int16)
print(list_tensor2)
print(tf.shape(list_tensor2))

#how about the rank? It describes the level of nesting within the tensor in simple words.
print(tf.rank(list_tensor1))
print(tf.rank(list_tensor2))
def triplet_semihard_loss(embeddings, labels, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    #pdist_matrix = cdist(embeddings, embeddings, metric=metric)

    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    # Compute the mask.
    ## Is there any element with different label and is farther than me? If Yes, then there exists a semi-hard negative
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))

    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(tf.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = tf.cast(adjacency_not, dtype=dtypes.float32)

    mask = tf.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])

    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    if isinstance(margin, numbers.Real):
        # diff = tf.maximum(diff + margin, 0.0)
        loss_mat = pdist_matrix - semi_hard_negatives + margin
    elif margin == 'soft':
        # diff = tf.nn.softplus(diff)
        loss_mat = pdist_matrix - semi_hard_negatives
    elif margin.lower() == 'none':
        pass
    else:
        raise NotImplementedError(
            'The margin {} is not implemented in batch_hard'.format(margin))

    mask_positives = tf.cast(adjacency, dtype=dtypes.float32) - array_ops.diag(
        array_ops.ones([batch_size]))

    if isinstance(margin, numbers.Real):
        print('Margin is real')
        triplet_loss_result = math_ops.maximum(
            tf.boolean_mask(loss_mat, tf.cast(mask_positives, tf.bool)), 0.0)
        assert_op = tf.Assert(tf.equal(tf.rank(triplet_loss_result), 1),
                              ['Rank of image must be equal to 1.'])
        with tf.control_dependencies([assert_op]):
            triplet_loss = triplet_loss_result
    elif margin == 'soft':
        triplet_loss_result = tf.nn.softplus(
            tf.boolean_mask(loss_mat, tf.cast(mask_positives, tf.bool)))
        assert_op = tf.Assert(tf.equal(tf.rank(triplet_loss_result), 1),
                              ['Rank of image must be equal to 1.'])
        with tf.control_dependencies([assert_op]):
            triplet_loss = triplet_loss_result
    elif margin.lower() == 'none':
        pass
    else:
        raise NotImplementedError(
            'The margin {} is not implemented in batch_hard'.format(margin))

    return triplet_loss
예제 #33
0
 def rank(self):
     return tf.rank(self._quantity)
예제 #34
0
    def call(self, inputs, state):
        char_inputs = inputs[0]  # shape = [batch_size, input_dimension]
        state_inputs = inputs[
            1]  # shape = [batch_size, max_num_of_lexicon words, lexicon_state_dimension]

        # check whether the last dimension of state_inputs are all zero.
        # check_state_0 should be in the shape of [batch_size, max_num_of_lexicon words]
        check_state_0 = tf.reduce_sum(state_inputs, axis=-1)
        # check_state_1 should be in the shape of [batch_size]
        check_state_1 = tf.reduce_sum(check_state_0, axis=-1)

        # 查找匹配含有词汇的索引,只处理该部分信息,避免较多无词库匹配的信息参与计算消耗资源
        # state_inputs_indices_for_lexicon should be in the shape of [batch_size, 2]
        state_inputs_indices_for_lexicon = tf.where(
            tf.not_equal(check_state_0, 0))

        # 查找不含有词汇的索引,避免较多无词库匹配的信息参与计算消耗资源
        # tf.where(tf.equal(check_state_1, 0)) should be in the shape of [batch_size, 1]
        # state_inputs_indices_for_not_lexicon should be in the shape of [batch_size]
        state_inputs_indices_for_not_lexicon = tf.squeeze(
            tf.where(tf.equal(check_state_1, 0)))

        # 对不含词汇的细胞状态进行选择,主要是针对标量数据,因其秩为0,需进行维度扩展
        # in case `[i]` is squeezed to scalar `i`, change it back to 1-dimension tensor `[i]` by `tf.expand_dims()`
        # otherwise, `[]` and `[i, j]` will remain as-is after tf.squeeze() and further conversion on it
        state_inputs_indices_for_not_lexicon = tf.cond(
            pred=tf.equal(tf.rank(state_inputs_indices_for_not_lexicon), 0),
            true_fn=lambda: tf.expand_dims(
                state_inputs_indices_for_not_lexicon, axis=0),
            false_fn=lambda: state_inputs_indices_for_not_lexicon)

        # 含有词汇匹配的字符索引
        # char_inputs_indices_for_lexicon should be in the shape of [batch_size, 1]
        char_inputs_indices_for_lexicon = tf.where(
            tf.not_equal(check_state_1, 0))

        # 不含有词汇匹配的字符索引
        # char_inputs_indices_for_not_lexicon should be in the shape of [batch_size, 1]
        char_inputs_indices_for_not_lexicon = tf.where(
            tf.equal(check_state_1, 0))

        if self._state_is_tuple:
            c, h = state
        else:
            c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

        # tf.concat([char_inputs, h], 1) should be in the shape of
        # [batch_size, char_embedding_size + state_dimension]
        # h should be in the shape of [batch_size, state_dimension]
        # self._kernel should be in the shape of [char_embedding_size + state_dimension, X]
        # gate_inputs should be in the shape of [batch_size, 4 * state_dimension]
        gate_inputs = tf.matmul(tf.concat([char_inputs, h], 1), self._kernel)
        gate_inputs = tf.nn.bias_add(gate_inputs, self._bias)

        i, j, f, o = tf.split(value=gate_inputs, num_or_size_splits=4, axis=1)

        new_c_without_lexicon = self._new_c_without_lexicon(
            i=i,
            f=f,
            j=j,
            c=c,
            indices_tensor=state_inputs_indices_for_not_lexicon)
        new_c = tf.scatter_nd_update(
            self._char_state_tensor,
            indices=char_inputs_indices_for_not_lexicon,
            updates=new_c_without_lexicon)

        new_c = tf.cond(tf.not_equal(
            tf.shape(state_inputs_indices_for_not_lexicon)[-1],
            tf.shape(state_inputs)[0]),
                        true_fn=lambda: self._if_not_empty_lexicon_state(
                            i, j, char_inputs, state_inputs,
                            char_inputs_indices_for_lexicon,
                            state_inputs_indices_for_lexicon, new_c),
                        false_fn=lambda: new_c)

        # 计算输出隐状态
        new_h = tf.multiply(self._activation(new_c), tf.nn.sigmoid(o))

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat([new_c, new_h], 1)

        return new_h, new_state
예제 #35
0
def swap_time_and_batch_axes(inputs):
    """Swaps time and batch axis (the first two axis)."""
    transposed_axes = tf.concat([[1, 0], tf.range(2, tf.rank(inputs))], axis=0)
    return tf.transpose(inputs, transposed_axes)
예제 #36
0
  def _log_prob(self, x):
    if self.cholesky_input_output_matrices:
      x_sqrt = x
    else:
      # Complexity: O(nbk^3)
      x_sqrt = tf.cholesky(x)

    batch_shape = self.batch_shape_tensor()
    event_shape = self.event_shape_tensor()
    ndims = tf.rank(x_sqrt)
    # sample_ndims = ndims - batch_ndims - event_ndims
    sample_ndims = ndims - tf.shape(batch_shape)[0] - 2
    sample_shape = tf.strided_slice(tf.shape(x_sqrt), [0], [sample_ndims])

    # We need to be able to pre-multiply each matrix by its corresponding
    # batch scale matrix. Since a Distribution Tensor supports multiple
    # samples per batch, this means we need to reshape the input matrix `x`
    # so that the first b dimensions are batch dimensions and the last two
    # are of shape [dimension, dimensions*number_of_samples]. Doing these
    # gymnastics allows us to do a batch_solve.
    #
    # After we're done with sqrt_solve (the batch operation) we need to undo
    # this reshaping so what we're left with is a Tensor partitionable by
    # sample, batch, event dimensions.

    # Complexity: O(nbk**2) since transpose must access every element.
    scale_sqrt_inv_x_sqrt = x_sqrt
    perm = tf.concat([tf.range(sample_ndims, ndims),
                      tf.range(0, sample_ndims)], 0)
    scale_sqrt_inv_x_sqrt = tf.transpose(scale_sqrt_inv_x_sqrt, perm)
    shape = tf.concat((batch_shape,
                       (tf.cast(self.dimension, dtype=tf.int32), -1)), 0)
    scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)

    # Complexity: O(nbM*k) where M is the complexity of the operator solving
    # a vector system. E.g., for LinearOperatorDiag, each solve is O(k), so
    # this complexity is O(nbk**2). For LinearOperatorLowerTriangular,
    # each solve is O(k**2) so this step has complexity O(nbk^3).
    scale_sqrt_inv_x_sqrt = self.scale_operator.solve(
        scale_sqrt_inv_x_sqrt)

    # Undo make batch-op ready.
    # Complexity: O(nbk**2)
    shape = tf.concat([batch_shape, event_shape, sample_shape], 0)
    scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)
    perm = tf.concat([
        tf.range(ndims - sample_ndims, ndims),
        tf.range(0, ndims - sample_ndims)
    ], 0)
    scale_sqrt_inv_x_sqrt = tf.transpose(scale_sqrt_inv_x_sqrt, perm)

    # Write V = SS', X = LL'. Then:
    # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
    #              = tr[inv(S) L L' inv(S)']
    #              = tr[(inv(S) L) (inv(S) L)']
    #              = sum_{ik} (inv(S) L)_{ik}**2
    # The second equality follows from the cyclic permutation property.
    # Complexity: O(nbk**2)
    trace_scale_inv_x = tf.reduce_sum(
        tf.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1])

    # Complexity: O(nbk)
    half_log_det_x = tf.reduce_sum(
        tf.log(tf.matrix_diag_part(x_sqrt)), axis=[-1])

    # Complexity: O(nbk**2)
    log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                0.5 * trace_scale_inv_x -
                self.log_normalization())

    # Set shape hints.
    # Try to merge what we know from the input then what we know from the
    # parameters of this distribution.
    if x.get_shape().ndims is not None:
      log_prob.set_shape(x.get_shape()[:-2])
    if (log_prob.get_shape().ndims is not None and
        self.batch_shape.ndims is not None and
        self.batch_shape.ndims > 0):
      log_prob.get_shape()[-self.batch_shape.ndims:].merge_with(
          self.batch_shape)

    return log_prob
예제 #37
0
tf.exp(a)#e的a次方
tf.log(a)#一次输入是以e为底a的对数,两次输入是以第二个为底
tf.maximum(a,b)#返回最大值
tf.minimum(a,b)#返回最小值
tf.cos(a)#三角函数cos



#数据类型转换
e = tf.constant("abcde")
tf.string_to_number(e)#字符串转换为数字
tf.to_double(a)
tf.cast(a)#转换为整数,比如1.8 = 1,2.2 = 2

#形状操作
tf.shape()#返回数据的shape
tf.size()#返回数据的元素数量
tf.rank()#返回tensor的rank
tf.reshape()#改变tensor的形状
tf.expand_dims()#插入维度1进入一个tensor

#切片与合并
tf.slice()#切片操作
tf.split()#沿着某一维度将tensor分离
tf.concat()#沿着某一维度连接
tf.pack()#将一系列rank-R的tensor打包为一个rank-(R+1)的tensor
tf.reverse()#沿着某维度进行序列反转
tf.transpose()#调换tensor的维度顺序
tf.gather()#合并索引所指示的切片

예제 #38
0
파일: wavenet.py 프로젝트: yunzqq/LPCTron
    def incremental(self, initial_input, c=None, g=None,
        time_length=100, test_inputs=None,
        softmax=True, quantize=True, log_scale_min=-7.0):
        """Inceremental forward step

        Inputs of shape [batch_size, channels, time_length] are reshaped to [batch_size, time_length, channels]
        Input of each time step is of shape [batch_size, 1, channels]

        Args:
            Initial input: Tensor of shape [batch_size, channels, 1], initial recurrence input.
            c: Tensor of shape [batch_size, cin_channels, time_length], Local conditioning features
            g: Tensor of shape [batch_size, gin_channels, time_length] or [batch_size, gin_channels, 1]
                global conditioning features
            T: int, number of timesteps to generate
            test_inputs: Tensor, teacher forcing inputs (debug)
            softmax: Boolean, whether to apply softmax activation
            quantize: Whether to quantize softmax output before feeding to
                next time step input
            log_scale_min: float, log scale minimum value.

        Returns:
            Tensor of shape [batch_size, channels, time_length] or [batch_size, channels, 1]
                Generated one_hot encoded samples
        """
        self.clear_queue()
        batch_size = 1

        #Note: should reshape to [batch_size, time_length, channels]
        #not [batch_size, channels, time_length]
        if test_inputs is not None:
            if self.scalar_input:
                if tf.shape(test_inputs)[1] == 1:
                    test_inputs = tf.transpose(test_inputs, [0, 2, 1])
            else:
                if tf.shape(test_inputs)[1] == self._hparams.out_channels:
                    test_inputs = tf.transpose(test_inputs, [0, 2, 1])

            batch_size = tf.shape(test_inputs)[0]
            if time_length is None:
                time_length = tf.shape(test_inputs)[1]
            else:
                time_length = tf.maximum(time_length, tf.shape(test_inputs)[1])

        #Global conditioning
        if g is not None:
            if self.embed_speakers is not None:
                g = self.embed_speakers(tf.reshape(g, [batch_size, -1]))
                #[batch_size, channels, 1]
                with tf.control_dependencies([tf.assert_equal(tf.rank(g), 3)]):
                    g = tf.transpose(g, [0, 2, 1])

        self.g_btc = _expand_global_features(batch_size, time_length, g, data_format='BTC')

        #Local conditioning
        if c is not None and self.upsample_conv is not None:
            #[batch_size, 1, channels, time_length]
            c = tf.expand_dims(c, axis=1)
            for upsample_conv in self.upsample_conv:
                c = upsample_conv(c)
            #[batch_size, channels, time_length]
            c = tf.squeeze(c, [1])
            tf.assert_equal(tf.shape(c)[-1], time_length)

        self.c = tf.cond(tf.logical_and(c is not None, tf.equal(tf.shape(c)[-1], time_length)),
            lambda: tf.transpose(c, [0, 2, 1]),
            lambda: c)

        #Initialize loop variables
        if initial_input.shape[1] == self._hparams.out_channels:
            initial_input = tf.transpose(initial_input, [0, 2, 1])

        initial_time = tf.constant(0, dtype=tf.int32)
        if test_inputs is not None:
            initial_input = tf.expand_dims(test_inputs[:, 0, :], axis=1)
        initial_outputs_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
        initial_loss_outputs_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)

        def condition(time, unused_outputs_ta, unused_current_input, unused_loss_outputs_ta):
            return tf.less(time, time_length)

        def body(time, outputs_ta, current_input, loss_outputs_ta):
            #conditioning features for single time step
            ct = None if self.c is None else tf.expand_dims(self.c[:, time, :], axis=1)
            gt = None if self.g_btc is None else tf.expand_dims(self.g_btc[:, time, :], axis=1)

            x = self.first_conv.incremental_step(current_input)
            skips = None
            for conv in self.conv_layers:
                x, h = conv.incremental_step(x, ct, gt)
                skips = h if skips is None else (skips + h)
            x = skips
            for conv in self.last_conv_layers:
                try:
                    x = conv.incremental_step(x)
                except AttributeError: #When calling Relu activation
                    x = conv(x)

            #Save x for eval loss computation
            loss_outputs_ta = loss_outputs_ta.write(time, tf.squeeze(x, [1])) #squeeze time_length dimension (=1)

            #Generate next input by sampling
            if self.scalar_input:
                x = sample_from_discretized_mix_logistic(
                    tf.reshape(x, [batch_size, -1, 1]), log_scale_min=log_scale_min)
            else:
                x = tf.nn.softmax(tf.reshape(x, [batch_size, -1]), axis=1) if softmax \
                    else tf.reshape(x, [batch_size, -1])
                if quantize:
                    x = tf.reshape(x, [batch_size, -1])
                    sample = tf.multinomial(tf.reshape(x, [batch_size, -1]), 1)[0] #Pick a sample using x as probability
                    x = tf.one_hot(sample, depth=self._hparams.quantize_channels)

            outputs_ta = outputs_ta.write(time, x)
            time = time + 1
            #output = x (maybe next input)
            if test_inputs is not None:
                next_input = tf.expand_dims(test_inputs[:, time, :], axis=1)
            else:
                if is_mulaw_quantize(self._hparams.input_type):
                    next_input = tf.expand_dims(x, axis=1) #Expand on the time dimension
                else:
                    next_input = tf.expand_dims(x, axis=-1) #Expand on the channels dimension

            return (time, outputs_ta, next_input, loss_outputs_ta)

        res = tf.while_loop(
            condition,
            body,
            loop_vars=[
                initial_time, initial_outputs_ta, initial_input, initial_loss_outputs_ta
            ],
            parallel_iterations=32,
            swap_memory=self._hparams.wavenet_swap_with_cpu)

        outputs_ta = res[1]
        #[time_length, batch_size, channels]
        outputs = outputs_ta.stack()

        #Save eval prediction for eval loss computation
        eval_outputs = res[-1].stack()

        if is_mulaw_quantize(self._hparams.input_type):
            self.y_hat_eval = tf.transpose(eval_outputs, [1, 0, 2])
        else:
            self.y_hat_eval = tf.transpose(eval_outputs, [1, 2, 0])

        #[batch_size, channels, time_length]
        self.clear_queue()
        return tf.transpose(outputs, [1, 2, 0])
예제 #39
0
def model_eval(sess,
               x,
               y,
               predictions,
               X_test=None,
               Y_test=None,
               feed=None,
               args=None):
    """
    Compute the accuracy of a TF model on some data
    :param sess: TF session to use when training the graph
    :param x: input placeholder
    :param y: output placeholder (for labels)
    :param predictions: model output predictions
    :param X_test: numpy array with training inputs
    :param Y_test: numpy array with training outputs
    :param feed: An optional dictionary that is appended to the feeding
             dictionary before the session runs. Can be used to feed
             the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `batch_size`
    :return: a float with the accuracy value
    """
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")

    # Define accuracy symbolically
    if LooseVersion(tf.__version__) >= LooseVersion('1.0.0'):
        correct_preds = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions, axis=-1))
    else:
        correct_preds = tf.equal(
            tf.argmax(y, axis=tf.rank(y) - 1),
            tf.argmax(predictions, axis=tf.rank(predictions) - 1))

    # Init result var
    accuracy = 0.0

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        X_cur = np.zeros((args.batch_size, ) + X_test.shape[1:],
                         dtype=X_test.dtype)
        Y_cur = np.zeros((args.batch_size, ) + Y_test.shape[1:],
                         dtype=Y_test.dtype)
        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuarcy disproportionately.
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = X_test[start:end]
            Y_cur[:cur_batch_size] = Y_test[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            if feed is not None:
                feed_dict.update(feed)
            cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)

            accuracy += cur_corr_preds[:cur_batch_size].sum()

        assert end >= len(X_test)

        # Divide by number of examples to get final value
        accuracy /= len(X_test)

    return accuracy
예제 #40
0
 def padding_list(length_diff, arg):
     if axis == 1:
         return [[[0, 0], [0, length_diff]],
                 tf.zeros([tf.rank(arg) - 2, 2], dtype=tf.int32)]
     return [[[0, 0], [0, 0], [0, length_diff]],
             tf.zeros([tf.rank(arg) - 3, 2], dtype=tf.int32)]
예제 #41
0
def _compute_log_acceptance_correction(current_state_parts,
                                       proposed_state_parts,
                                       current_volatility_parts,
                                       proposed_volatility_parts,
                                       current_drift_parts,
                                       proposed_drift_parts,
                                       step_size_parts,
                                       independent_chain_ndims,
                                       name=None):
    r"""Helper to `kernel` which computes the log acceptance-correction.

  Computes `log_acceptance_correction` as described in `MetropolisHastings`
  class. The proposal density is normal. More specifically,

   ```none
  q(proposed_state | current_state) \sim N(current_state + current_drift,
  step_size * current_volatility**2)

  q(current_state | proposed_state) \sim N(proposed_state + proposed_drift,
  step_size * proposed_volatility**2)
  ```

  The `log_acceptance_correction` is then

  ```none
  log_acceptance_correctio = q(current_state | proposed_state)
  - q(proposed_state | current_state)
  ```

  Args:
    current_state_parts: Python `list` of `Tensor`s representing the value(s) of
      the current state of the chain.
    proposed_state_parts:  Python `list` of `Tensor`s representing the value(s)
      of the proposed state of the chain. Must broadcast with the shape of
      `current_state_parts`.
    current_volatility_parts: Python `list` of `Tensor`s representing the value
      of `volatility_fn(*current_volatility_parts)`. Must broadcast with the
      shape of `current_state_parts`.
    proposed_volatility_parts: Python `list` of `Tensor`s representing the value
      of `volatility_fn(*proposed_volatility_parts)`. Must broadcast with the
      shape of `current_state_parts`
    current_drift_parts: Python `list` of `Tensor`s representing value of the
      drift `_get_drift(*current_state_parts, ..)`. Must broadcast with the
      shape of `current_state_parts`.
    proposed_drift_parts: Python `list` of `Tensor`s representing value of the
      drift `_get_drift(*proposed_drift_parts, ..)`. Must broadcast with the
      shape of `current_state_parts`.
    step_size_parts: Python `list` of `Tensor`s representing the step size for
      Euler-Maruyama method. Must broadcast with the shape of
      `current_state_parts`.
    independent_chain_ndims: Scalar `int` `Tensor` representing the number of
      leftmost `Tensor` dimensions which index independent chains.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'compute_log_acceptance_correction').

  Returns:
    log_acceptance_correction: `Tensor` representing the `log`
      acceptance-correction.  (See docstring for mathematical definition.)
  """

    with tf.name_scope(name, 'compute_log_acceptance_correction', [
            current_state_parts, proposed_state_parts,
            current_volatility_parts, proposed_volatility_parts,
            current_drift_parts, proposed_drift_parts, step_size_parts,
            independent_chain_ndims
    ]):

        proposed_log_density_parts = []
        dual_log_density_parts = []

        for [
                current_state,
                proposed_state,
                current_volatility,
                proposed_volatility,
                current_drift,
                proposed_drift,
                step_size,
        ] in zip(
                current_state_parts,
                proposed_state_parts,
                current_volatility_parts,
                proposed_volatility_parts,
                current_drift_parts,
                proposed_drift_parts,
                step_size_parts,
        ):
            axis = tf.range(independent_chain_ndims, tf.rank(current_state))

            state_diff = proposed_state - current_state

            current_volatility *= tf.sqrt(step_size)

            proposed_energy = (state_diff - current_drift) / current_volatility

            proposed_volatility *= tf.sqrt(step_size)
            # Compute part of `q(proposed_state | current_state)`
            proposed_energy = (tf.reduce_sum(mcmc_util.safe_sum(
                [tf.log(current_volatility), 0.5 * (proposed_energy**2)]),
                                             axis=axis))
            proposed_log_density_parts.append(-proposed_energy)

            # Compute part of `q(current_state | proposed_state)`
            dual_energy = (state_diff + proposed_drift) / proposed_volatility
            dual_energy = (tf.reduce_sum(mcmc_util.safe_sum(
                [tf.log(proposed_volatility), 0.5 * (dual_energy**2)]),
                                         axis=axis))
            dual_log_density_parts.append(-dual_energy)

        # Compute `q(proposed_state | current_state)`
        proposed_log_density_reduce = tf.reduce_sum(tf.stack(
            proposed_log_density_parts, axis=-1),
                                                    axis=-1)
        # Compute `q(current_state | proposed_state)`
        dual_log_density_reduce = tf.reduce_sum(tf.stack(
            dual_log_density_parts, axis=-1),
                                                axis=-1)

        return mcmc_util.safe_sum(
            [dual_log_density_reduce, -proposed_log_density_reduce])
예제 #42
0
def random_crop(image_list, crop_height, crop_width):
  """Crops the given list of images.

  The function applies the same crop to each image in the list. This can be
  effectively applied when there are multiple image inputs of the same
  dimension such as:

    image, depths, normals = random_crop([image, depths, normals], 120, 150)

  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the new height.
    crop_width: the new width.

  Returns:
    the image_list with cropped images.

  Raises:
    ValueError: if there are multiple image inputs provided with different size
      or the images are smaller than the crop dimensions.
  """
  if not image_list:
    raise ValueError('Empty image_list.')

  # Compute the rank assertions.
  rank_assertions = []
  for i in range(len(image_list)):
    image_rank = tf.rank(image_list[i])
    rank_assert = tf.Assert(
        tf.equal(image_rank, 3),
        ['Wrong rank for tensor  %s [expected] [actual]',
         image_list[i].name, 3, image_rank])
    rank_assertions.append(rank_assert)

  with tf.control_dependencies([rank_assertions[0]]):
    image_shape = tf.shape(image_list[0])
  image_height = image_shape[0]
  image_width = image_shape[1]
  crop_size_assert = tf.Assert(
      tf.logical_and(
          tf.greater_equal(image_height, crop_height),
          tf.greater_equal(image_width, crop_width)),
      ['Crop size greater than the image size.'])

  asserts = [rank_assertions[0], crop_size_assert]

  for i in range(1, len(image_list)):
    image = image_list[i]
    asserts.append(rank_assertions[i])
    with tf.control_dependencies([rank_assertions[i]]):
      shape = tf.shape(image)
    height = shape[0]
    width = shape[1]

    height_assert = tf.Assert(
        tf.equal(height, image_height),
        ['Wrong height for tensor %s [expected][actual]',
         image.name, height, image_height])
    width_assert = tf.Assert(
        tf.equal(width, image_width),
        ['Wrong width for tensor %s [expected][actual]',
         image.name, width, image_width])
    asserts.extend([height_assert, width_assert])

  # Create a random bounding box.
  #
  # Use tf.random.uniform and not numpy.random.rand as doing the former would
  # generate random numbers at graph eval time, unlike the latter which
  # generates random numbers at graph definition time.
  with tf.control_dependencies(asserts):
    max_offset_height = tf.reshape(image_height - crop_height + 1, [])
    max_offset_width = tf.reshape(image_width - crop_width + 1, [])
  offset_height = tf.random.uniform(
      [], maxval=max_offset_height, dtype=tf.int32)
  offset_width = tf.random.uniform(
      [], maxval=max_offset_width, dtype=tf.int32)

  return [_crop(image, offset_height, offset_width,
                crop_height, crop_width) for image in image_list]
예제 #43
0
#Working and understaning Numpy with Tensorflow
# numpy is a good working array lib for intergration with tensorflow
#we going to look at arrays in both cases
 import tensorflow as tf
 import numpy as np
 
 sess = tf.Session()
 
 zeroD = np.array( 40, dtype=np.int32)
 sess.run(tf.rank(zeroD))
 
 #get the shape of the numpy 
 sess.run(tf.shape(zeroD)) #gives the shape of the array and data type
 
 oneDArray = np.array([3.2, 5.6, 2.5, 4.5, 6.8], dtype=np.float32) 
 sess.run(tf.rank(oneDArray)) #gives you shape and output datatype
 sess.run(tf.shape(oneDArray))
 
 #the three to N-Dimentional array follow the same steps
 
예제 #44
0
    def step(self, x, c=None, g=None, softmax=False):
        """Forward step

		Args:
			x: Tensor of shape [batch_size, channels, time_length], One-hot encoded audio signal.
			c: Tensor of shape [batch_size, cin_channels, time_length], Local conditioning features.
			g: Tensor of shape [batch_size, gin_channels, 1] or Ids of shape [batch_size, 1],
				Global conditioning features.
				Note: set hparams.use_speaker_embedding to False to disable embedding layer and
				use extrnal One-hot encoded features.
			softmax: Boolean, Whether to apply softmax.

		Returns:
			a Tensor of shape [batch_size, out_channels, time_length]
		"""
        #[batch_size, channels, time_length] -> [batch_size, time_length, channels]
        batch_size = tf.shape(x)[0]
        time_length = tf.shape(x)[-1]

        if g is not None:
            if self.embed_speakers is not None:
                #[batch_size, 1] ==> [batch_size, 1, gin_channels]
                g = self.embed_speakers(tf.reshape(g, [batch_size, -1]))
                #[batch_size, gin_channels, 1]
                with tf.control_dependencies([tf.assert_equal(tf.rank(g), 3)]):
                    g = tf.transpose(g, [0, 2, 1])

        #Expand global conditioning features to all time steps
        g_bct = _expand_global_features(batch_size,
                                        time_length,
                                        g,
                                        data_format='BCT')

        if c is not None and self.upsample_conv is not None:
            #[batch_size, 1, cin_channels, time_length]
            c = tf.expand_dims(c, axis=1)
            for transposed_conv in self.upsample_conv:
                c = transposed_conv(c)

            #[batch_size, cin_channels, time_length]
            c = tf.squeeze(c, [1])
            with tf.control_dependencies(
                [tf.assert_equal(tf.shape(c)[-1],
                                 tf.shape(x)[-1])]):
                c = tf.identity(c, name='control_c_and_x_shape')

        #Feed data to network
        x = self.first_conv(x)
        skips = None
        for conv in self.conv_layers:
            x, h = conv(x, c, g_bct)
            if skips is None:
                skips = h
            else:
                skips = skips + h
                skips = skips * np.sqrt(0.5)
        x = skips

        for conv in self.last_conv_layers:
            x = conv(x)

        return tf.nn.softmax(x, axis=1) if softmax else x
예제 #45
0
    def call(self, x):
        '''x:(batch_size,h,w,c)'''
        if not self.trainable:
            return x
        else:
            if tf.math.equal(tf.rank(x),4):
                x_shape = tf.shape(x)
                x_size = x_shape[1:3]
                x_size_f = tf.cast(x_size, tf.float32)
                # 计算block_size
                x_block_size_f = tf.constant((self.block_size, self.block_size), tf.float32)
                # x_block_size_f = x_size_f * self.block_size
                # x_block_size_f = tf.math.maximum(x_block_size_f, 1)
                x_block_size = tf.cast(x_block_size_f, tf.int32)
                # 根据dist_prob,计算block_num
                x_block_num = (x_size_f[0] * x_size_f[1]) * self.dist_prob / (x_block_size_f[0] * x_block_size_f[1])
                # 计算block在中心区域出现的概率
                x_block_rate = x_block_num / ((x_size_f[0] - x_block_size_f[0] + 1) * (x_size_f[1] - x_block_size_f[1] + 1))
                # tf.print('x_block_rate:', x_block_rate)
                # 根据概率生成block区域
                x_block_center = tf.random.uniform((x_shape[0], x_size[0] - x_block_size[0] + 1, x_size[1] - x_block_size[1] + 1, x_shape[3]), dtype=tf.float32)
                x_block_padding_t = x_block_size[0] // 2
                x_block_padding_b = x_size_f[0] - tf.cast(x_block_padding_t, tf.float32) - (x_size_f[0] - x_block_size_f[0] + 1.0)
                x_block_padding_b = tf.cast(x_block_padding_b, tf.int32)
                x_block_padding_l = x_block_size[1] // 2
                x_block_padding_r = x_size_f[1] - tf.cast(x_block_padding_l, tf.float32) - (x_size_f[1] - x_block_size_f[1] + 1.0)
                x_block_padding_r = tf.cast(x_block_padding_r, tf.int32)
                x_block_padding = tf.pad(x_block_center,[[0, 0],[x_block_padding_t, x_block_padding_b],[x_block_padding_l, x_block_padding_r],[0, 0]])
                x_block = tf.cast(x_block_padding<x_block_rate, tf.float32)
                x_block = tf.nn.max_pool2d(x_block, ksize=[self.block_size, self.block_size], strides=[1, 1], padding='SAME')
                # block百分比
                # x_block_percent_ones = tf.reduce_sum(x_block) / tf.reduce_prod(tf.cast(tf.shape(x_block), tf.float32))
                # tf.print('x_block_percent_ones:', x_block_percent_ones, tf.shape(x_block))
                # 特征叠加
                x_abs = tf.abs(x)
                x_sum = tf.math.reduce_sum(x_abs, axis=-1, keepdims=True)
                x_max = tf.math.reduce_max(x_sum, axis=(1, 2), keepdims=True)
                x_max_c = tf.math.reduce_max(x_abs, axis=(1, 2), keepdims=True)
                x_sum_c = tf.math.reduce_sum(x_max_c, axis=-1, keepdims=True)
                x_v = x_sum / x_sum_c
                # tf.print('x_v:', tf.shape(x_v), tf.math.reduce_min(x_v), tf.math.reduce_max(x_v))
                # 特征方差
                # x_variance = tf.math.reduce_variance(x_sum, axis=(1, 2), keepdims=True)
                # tf.print('x_variance:', tf.shape(x_variance), tf.math.reduce_min(x_variance), tf.math.reduce_max(x_variance))
                # 叠加扰动
                x_max = tf.reduce_max(x, axis=(1,2), keepdims=True)
                x_min = tf.reduce_min(x, axis=(1,2), keepdims=True)
                x_block_random = tf.random.uniform(x_shape, dtype=x.dtype) * (x_max - x_min) + x_min
                x_block_random = x_block_random * (self.alpha * x_v + 0.3) + x * (1.0 - self.alpha * x_v - 0.3)
                x = x * (1-x_block) + x_block_random * x_block

                # if not (self.weight_behind is None) and not(len(self.weight_behind)==0):
                #     wtsize=tf.shape(self.weight_behind[0])[0]
                #     weight_max=tf.math.reduce_max(self.weight_behind[0], axis=-2, keepdims=True)
                #     sig = tf.ones(tf.shape(weight_max),dtype=weight_max.dtype)
                #     sig_mask = tf.cast(tf.random.uniform(tf.shape(weight_max),dtype=sig.dtype)<0.5,dtype=tf.float32)
                #     sig = sig * (1 - sig_mask) - sig_mask
                #     weight_max = weight_max * sig 
                #     weight_mean = tf.math.reduce_mean(weight_max, axis=(0,1), keepdims=True)
                #     if wtsize==1:
                #         weight_mean=0.1*weight_mean
                #     #print(weight_mean)
                # mean=tf.math.reduce_mean(x)
                # var=tf.math.reduce_variance(x)

                # if not (self.weight_behind is None) and not(len(self.weight_behind)==0):
                #     dist=self.alpha*weight_mean*(var**0.5)*tf.random.normal(tf.shape(x), dtype=x.dtype)
                # else:
                #     dist=self.alpha*0.01*(var**0.5)*tf.random.normal(tf.shape(x), dtype=x.dtype)

                # x=x*x_block
                # dist=dist*(1-x_block)
                # x=x+dist
                # x=x/x_block_percent_ones
                return x
            else:
                return x
예제 #46
0
    def network(self):

        # Input Placeholder

        self.brain.addInput(shape=[
            None, self.NUM_FRAMES, self.obsv_shape[0], self.obsv_shape[1]
        ],
                            name='Observation')

        # Reshape Input to CNN (B,T,D1,D2)->(B*T,D1,D2,1)

        self.obs = self.brain.tensor('Observation')
        self.obs = tf.expand_dims(self.obs, axis=tf.rank(self.obs))
        self.obs = tf.reshape(self.obs, [
            tf.shape(self.obs)[0] * self.NUM_FRAMES, self.obsv_shape[0],
            self.obsv_shape[1], 1
        ])
        self.brain.addInput(tensor=self.obs, name='InputCNN')

        # Convolutional Layers

        self.brain.setLayerDefaults(type=tb.layers.conv2d,
                                    activation=tb.activs.relu,
                                    pooling=2,
                                    weight_stddev=0.01,
                                    bias_stddev=0.01)

        self.brain.addLayer(out_channels=32,
                            ksize=8,
                            strides=4,
                            input='InputCNN')
        self.brain.addLayer(out_channels=64, ksize=4, strides=2)
        self.brain.addLayer(out_channels=64, ksize=3, strides=1)

        #  Fully

        self.brain.addLayer(type=tb.layers.flatten, name='Flatten')

        self.brain.addLayer(type=tb.layers.fully,
                            out_channels=256,
                            activation=tb.activs.elu,
                            name='OutputFully')

        # Reshape OutputFully to RNN (B*T,C)->(B,T,C)

        self.outfully = tf.reshape(self.brain.tensor('OutputFully'),
                                   [-1, self.NUM_FRAMES, 256])

        self.brain.addInput(tensor=self.outfully, name='InputRNN')

        # RNN Layers

        self.brain.addLayer(input='InputRNN',
                            type=tb.layers.rnn,
                            cell_type='LSTM',
                            num_cells=2,
                            out_channels=256,
                            name='RNN')

        # Fully Connected Layers

        self.brain.setLayerDefaults(type=tb.layers.fully,
                                    weight_stddev=0.01,
                                    bias_stddev=0.01)

        self.brain.addLayer(out_channels=self.num_actions,
                            activation=tb.activs.softmax,
                            name='Output')
예제 #47
0
def cross_entropy_loss(pred,gt): # last dim is one_hot
    return - tf.reduce_mean(tf.reduce_sum(loge(pred) * gt, axis=tf.rank(pred)-1))
예제 #48
0
    def call(self, inputs, cps=None, isFirstLayer=False):

        if isFirstLayer:
            tf.print("computed cp for the last input", cps[-1])
            bOnA = inputs - self.firstLayerTA0 - cps / (self.firstLayerK1M *
                                                        self.E0)
            tf.print("computed bOna for the last input", bOnA[-1])
            tf.print("second parts",
                     (4 * inputs * cps / (self.firstLayerK1M * self.E0))[-1])
            tf.assert_rank(
                self.firstLayerK1M * self.E0 * self.firstLayerTA0 / cps,
                tf.rank(inputs))
            olderInput = tf.where(
                tf.equal(
                    self.firstLayerK1M * self.E0 * self.firstLayerTA0 / cps,
                    0), inputs,
                0.5 * (bOnA + (tf.pow(bOnA, 2) + 4 * inputs * cps /
                               (self.firstLayerK1M * self.E0))**0.5))
            tf.print("solution for the last inputs of first layer: ",
                     olderInput[-1])
            olderInput = self.firstLayerk2 * self.firstLayerK1M / self.firstLayerkdT * self.firstLayerTA0 * self.E0 / cps * olderInput / (
                1 + self.firstLayerK1M * self.E0 / cps * olderInput)
        else:
            olderInput = inputs

        #For batch computation: k1m * tf.transpose(inputs) doesn't work and we need to add a 1 axis in the end and use only *
        #Just above we use rank1 vector so should keep rank2 batches of input
        cpsExpand = tf.expand_dims(cps, -1)
        tf.assert_rank(cpsExpand, 3)
        tf.assert_rank(cps, 2)
        olderInputExpand = tf.expand_dims(olderInput, -1)
        tf.assert_rank(olderInputExpand, 3)
        olderInputMidExpand = tf.expand_dims(olderInput, 1)

        Cactivs = tf.where(
            self.mask > 0, self.Cactiv /
            (1 + self.k1M * self.E0 * olderInputExpand / cpsExpand), 0)
        Cinhibs = tf.where(
            self.mask < 0, self.Cinhib /
            (1 + self.k3M * self.E0 * olderInputExpand / cpsExpand), 0)
        tf.assert_rank(Cactivs, 3)
        tf.assert_rank(Cinhibs, 3)
        Inhib = tf.squeeze(tf.matmul(olderInputMidExpand, Cinhibs),
                           axis=1) / self.kdT
        x_eq_clipped = tf.squeeze(tf.matmul(olderInputMidExpand, Cactivs),
                                  axis=1) / (self.kdI * cps + Inhib / cps)

        #unclipped version:
        Cactivs_unclipped = tf.where(
            self.kernel > 0, self.Cactiv * self.kernel /
            (1 + self.k1M * self.E0 * olderInputExpand / cpsExpand), 0)
        Cinhibs_unclipped = tf.where(
            self.kernel < 0, (-1) * self.Cinhib * self.kernel /
            (1 + self.k3M * self.E0 * olderInputExpand / cpsExpand), 0)
        tf.assert_rank(Cactivs_unclipped, 3)
        tf.assert_rank(Cinhibs_unclipped, 3)
        #CAREFUL: now the cactivs has taken the batch size, it is of rank 3 : [None,inputdims,outputdims]
        # THUS WE NEED: [None,1,inputdims] to use the matmul, and then squeeze the result!
        Inhib_unclipped = tf.squeeze(tf.matmul(olderInputMidExpand,
                                               Cinhibs_unclipped),
                                     axis=1) / self.kdT
        x_eq_unclipped = tf.squeeze(
            tf.matmul(olderInputMidExpand, Cactivs_unclipped),
            axis=1) / (self.kdI * cps + Inhib_unclipped / cps)
        tf.assert_rank(tf.squeeze(tf.matmul(olderInputMidExpand,
                                            Cinhibs_unclipped),
                                  axis=1),
                       2,
                       message="compute not good dims")

        tf.print("solution from an unknown layer:", x_eq_clipped[-1])

        outputs = tf.stop_gradient(x_eq_clipped -
                                   x_eq_unclipped) + x_eq_unclipped
        tf.assert_rank(outputs, 2, message="outputs not good dims")
        return outputs
예제 #49
0
def _remove_last_channel(y_probs):
    permuted_probs = K.permute_dimensions(
        y_probs, (tf.rank(y_probs) - 1, *K.arange(tf.rank(y_probs) - 1)))
    timbre_probs = K.permute_dimensions(
        permuted_probs[:-1], (*K.arange(tf.rank(permuted_probs) - 1) + 1, 0))
    return timbre_probs
예제 #50
0
 def batchwise_reduce_sum(x):
     return tf.reduce_sum(input_tensor=x, axis=tf.range(1, tf.rank(x)))
예제 #51
0
파일: ops.py 프로젝트: sammcgrail/tfdeploy
 def test_Rank(self):
     t = tf.rank(self.random(3, 3))
     self.check(t)
예제 #52
0
파일: wavenet.py 프로젝트: yunzqq/LPCTron
    def initialize(self, y, c, g, input_lengths, x=None, synthesis_length=None):
        '''Initialize wavenet graph for train, eval and test cases.
        '''
        hparams = self._hparams
        self.is_training = x is not None
        self.is_evaluating = not self.is_training and y is not None
        #Set all convolutions to corresponding mode
        self.set_mode(self.is_training)

        log('Initializing Wavenet model.  Dimensions (? = dynamic shape): ')
        log('  Train mode:                {}'.format(self.is_training))
        log('  Eval mode:                 {}'.format(self.is_evaluating))
        log('  Synthesis mode:            {}'.format(not (self.is_training or self.is_evaluating)))
        with tf.variable_scope('inference') as scope:
            #Training
            if self.is_training:
                batch_size = tf.shape(x)[0]
                #[batch_size, time_length, 1]
                self.mask = self.get_mask(input_lengths, maxlen=tf.shape(x)[-1]) #To be used in loss computation
                #[batch_size, channels, time_length]
                y_hat = self.step(x, c, g, softmax=False) #softmax is automatically computed inside softmax_cross_entropy if needed

                if is_mulaw_quantize(hparams.input_type):
                    #[batch_size, time_length, channels]
                    self.y_hat_q = tf.transpose(y_hat, [0, 2, 1])

                self.y_hat = y_hat
                self.y = y
                self.input_lengths = input_lengths

                #Graph extension for log saving
                #[batch_size, time_length]
                shape_control = (batch_size, tf.shape(x)[-1], 1)
                with tf.control_dependencies([tf.assert_equal(tf.shape(y), shape_control)]):
                    y_log = tf.squeeze(y, [-1])
                    if is_mulaw_quantize(hparams.input_type):
                        self.y = y_log

                y_hat_log = tf.cond(tf.equal(tf.rank(y_hat), 4),
                    lambda: tf.squeeze(y_hat, [-1]),
                    lambda: y_hat)
                y_hat_log = tf.reshape(y_hat_log, [batch_size, hparams.out_channels, -1])

                if is_mulaw_quantize(hparams.input_type):
                    #[batch_size, time_length]
                    y_hat_log = tf.reduce_max(tf.nn.softmax(y_hat_log, axis=1), 1)

                    y_hat_log = util.inv_mulaw_quantize(y_hat_log, hparams.quantize_channels)
                    y_log = util.inv_mulaw_quantize(y_log, hparams.quantize_channels)

                else:
                    #[batch_size, time_length]
                    y_hat_log = sample_from_discretized_mix_logistic(
                        y_hat_log, log_scale_min=hparams.log_scale_min)

                    if is_mulaw(hparams.input_type):
                        y_hat_log = util.inv_mulaw(y_hat_log, hparams.quantize_channels)
                        y_log = util.inv_mulaw(y_log, hparams.quantize_channels)

                self.y_hat_log = y_hat_log
                self.y_log = y_log
                
                log('  inputs:                    {}'.format(x.shape))
                if self.local_conditioning_enabled():
                    log('  local_condition:           {}'.format(c.shape))
                if self.has_speaker_embedding():
                    log('  global_condition:          {}'.format(g.shape))
                log('  targets:                   {}'.format(y_log.shape))
                log('  outputs:                   {}'.format(y_hat_log.shape))


            #evaluating
            elif self.is_evaluating: 
                #[time_length, ]
                idx = 0
                length = input_lengths[idx]
                y_target = tf.reshape(y[idx], [-1])[:length]

                if c is not None:
                    c = tf.expand_dims(c[idx, :, :length], axis=0)
                    with tf.control_dependencies([tf.assert_equal(tf.rank(c), 3)]):
                        c = tf.identity(c, name='eval_assert_c_rank_op')
                if g is not None:
                    g = g[idx]

                #Start silence frame
                if is_mulaw_quantize(hparams.input_type):
                    initial_value = mulaw_quantize(0, hparams.quantize_channels)
                elif is_mulaw(hparams.input_type):
                    initial_value = mulaw(0.0, hparams.quantize_channels)
                else:
                    initial_value = 0.0

                #[channels, ]
                if is_mulaw_quantize(hparams.input_type):
                    initial_input = tf.one_hot(indices=initial_value, depth=hparams.quantize_channels, dtype=tf.float32)
                    initial_input = tf.reshape(initial_input, [1, 1, hparams.quantize_channels])
                else:
                    initial_input = tf.ones([1, 1, 1], tf.float32) * initial_value

                #Fast eval
                y_hat = self.incremental(initial_input, c=c, g=g, time_length=length,
                    softmax=True, quantize=True, log_scale_min=hparams.log_scale_min)

                #Save targets and length for eval loss computation
                if is_mulaw_quantize(hparams.input_type):
                    self.y_eval = tf.reshape(y[idx], [1, -1])[:, :length]
                else:
                    self.y_eval = tf.expand_dims(y[idx], axis=0)[:, :length, :]
                self.eval_length = length

                if is_mulaw_quantize(hparams.input_type):
                    y_hat = tf.reshape(tf.reduce_max(y_hat, axis=1), [-1])
                    y_hat = inv_mulaw_quantize(y_hat, hparams.quantize_channels)
                    y_target = inv_mulaw_quantize(y_target, hparams.quantize_channels)
                elif is_mulaw(hparams.input_type):
                    y_hat = inv_mulaw(tf.reshape(y_hat, [-1]), hparams.quantize_channels)
                    y_target = inv_mulaw(y_target, hparams.quantize_channels)
                else:
                    y_hat = tf.reshape(y_hat, [-1])

                self.y_hat = y_hat
                self.y_target = y_target

                if self.local_conditioning_enabled():
                    log('  local_condition:           {}'.format(c.shape))
                if self.has_speaker_embedding():
                    log('  global_condition:          {}'.format(g.shape))
                log('  targets:                   {}'.format(y_target.shape))
                log('  outputs:                   {}'.format(y_hat.shape))

            #synthesizing
            else:
                if c is None:
                    assert synthesis_length is not None
                else:
                    #[batch_size, local_condition_time, local_condition_dimension(num_mels)]
                    message = ('Expected 3 dimension shape [batch_size(1), time_length, {}] for local condition features but found {}'.format(
                            hparams.cin_channels, c.shape))
                    with tf.control_dependencies([tf.assert_equal(tf.rank(c), 3, message=message)]):
                        c = tf.identity(c, name='synthesis_assert_c_rank_op')

                    Tc = tf.shape(c)[1]
                    upsample_factor = audio.get_hop_size(self._hparams)

                    #Overwrite length with respect to local condition features
                    synthesis_length = Tc * upsample_factor

                    #[batch_size, local_condition_dimension, local_condition_time]
                    #time_length will be corrected using the upsample network
                    c = tf.transpose(c, [0, 2, 1])

                #Start silence frame
                if is_mulaw_quantize(hparams.input_type):
                    initial_value = mulaw_quantize(0, hparams.quantize_channels)
                elif is_mulaw(hparams.input_type):
                    initial_value = mulaw(0.0, hparams.quantize_channels)
                else:
                    initial_value = 0.0

                if is_mulaw_quantize(hparams.input_type):
                    assert initial_value >= 0 and initial_value < hparams.quantize_channels
                    initial_input = tf.one_hot(indices=initial_value, depth=hparams.quantize_channels, dtype=tf.float32)
                    initial_input = tf.reshape(initial_input, [1, 1, hparams.quantize_channels])
                else:
                    initial_input = tf.ones([1, 1, 1], tf.float32) * initial_value

                y_hat = self.incremental(initial_input, c=c, g=g, time_length=synthesis_length,
                    softmax=True, quantize=True, log_scale_min=hparams.log_scale_min)

                if is_mulaw_quantize(hparams.input_type):
                    y_hat = tf.reshape(tf.reduce_max(y_hat, axis=1), [-1])
                    y_hat = util.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
                elif is_mulaw(hparams.input_type):
                    y_hat = util.inv_mulaw(tf.reshape(y_hat, [-1]), hparams.quantize_channels)
                else:
                    y_hat = tf.reshape(y_hat, [-1])

                self.y_hat = y_hat

                if self.local_conditioning_enabled():
                    log('  local_condition:            {}'.format(c.shape))
                if self.has_speaker_embedding():
                    log('  global_condition:           {}'.format(g.shape))
                log('  outputs:                    {}'.format(y_hat.shape))

        self.variables = tf.trainable_variables()
        self.ema = tf.train.ExponentialMovingAverage(decay=hparams.wavenet_ema_decay)
예제 #53
0
import tensorflow as tf

a = tf.constant(5.0, name="constant_a")
b = tf.constant(6.0, name="constant_b")
c = tf.constant(64.0, name="constant_c")
m = tf.constant([[1, 2], [3, 4]], name="matrix")

square_val = tf.square(a, name="square_op")
sqrt_val = tf.sqrt(c, name="sqrt_op")
pow_val = tf.pow(a, b, name="power_op")

add_all_val = tf.add_n([square_val, sqrt_val, pow_val], name="add_all")
session = tf.Session()

print("square of a ", session.run(a), " >> ", session.run(square_val))
print("square root of b ", session.run(b), " >> ", session.run(sqrt_val))
print("a to the power of b  >> ", session.run(pow_val))
print("add all the above values >> ", session.run(add_all_val))

print("rank of add all value is >> ",
      session.run(tf.rank(add_all_val)))  # scalar has dimension 0
print("rank of matrix m is >> ",
      session.run(tf.rank(m)))  # number of dimensions

writer = tf.summary.FileWriter('./graph_output2', session.graph)

writer.close()
session.close()
예제 #54
0
import tensorflow as tf

print(tf.__version__)

a = tf.constant(6.5, name='const_a')
b = tf.constant(3.4, name='const_b')
c = tf.constant(3.0, name='const_c')
d = tf.constant(100.2, name='const_d')

sq_a = tf.square(a, name='square_a')
b_pow_c = tf.pow(b, c, name='b_pow_c')
sqrt_d = tf.sqrt(d, name='sqrt_d')

tf.rank(sqrt_d)

final_sum = tf.add_n([sq_a, b_pow_c, sqrt_d], name='final_sum')

sess = tf.Session()
print(sess.run(final_sum))

writer = tf.summary.FileWriter('./m2_example2', sess.graph)
writer.close()
sess.close()
예제 #55
0
def cw(x,
       y=None,
       eps=2.5,
       ord_='inf',
       T=512,
       optimizer=tf.train.AdamOptimizer(learning_rate=0.1),
       alpha=0.9,
       min_prob=0.,
       clip=(0.0, 1.0)):
    xshape = x.get_shape().as_list()
    noise = tf.get_variable('noise',
                            xshape,
                            tf.float32,
                            initializer=tf.initializers.zeros)
    x = x + tf.random_uniform(tf.shape(x), minval=-1e-2, maxval=1e-2) * \
        create_perlin_noise(seed=None, color=False, batch_size=configs['batch_size'], image_size=28, normalize=True,
                            precalc_fade=None)
    # scale input to (0, 1)
    x_scaled = (x - clip[0]) / (clip[1] - clip[0])

    # change to sigmoid-space, clip to avoid overflow.
    z = tf.clip_by_value(x_scaled, 1e-8, 1 - 1e-8)
    xinv = tf.log(z / (1 - z))

    # add noise in sigmoid-space and map back to input domain
    xadv = tf.sigmoid(xinv + T * noise)
    xadv = xadv * (clip[1] - clip[0]) + clip[0]

    logits = []
    ybars = []
    for i in rand_index:
        logit, ybar = models[i](xadv)
        logits.append(logit)
        ybars.append(ybar)
    ybar = tf.add_n(ybars)
    logits = tf.add_n(logits)
    ydim = ybar.get_shape().as_list()[1]

    if y is not None:
        y_temp = tf.cond(tf.equal(tf.rank(y),
                                  0), lambda: tf.fill([xshape[0]], y),
                         lambda: tf.identity(y))
    else:
        # we set target to the least-likely label
        y_temp = tf.argmin(ybar, axis=1, output_type=tf.int32)

    mask = tf.one_hot(y_temp, ydim, on_value=0.0, off_value=float('inf'))
    yt = tf.reduce_max(logits - mask, axis=1)
    yo = tf.reduce_max(logits, axis=1)

    # encourage to classify to a wrong category
    loss0 = tf.nn.relu(yo - yt + min_prob)

    axis = list(range(1, len(xshape)))
    ord_ = float(ord_)

    # make sure the adversarial images are visually close
    if 2 == ord_:
        # CW-L2 Original paper uses the reduce_sum version.  These two
        # implementation does not differ much.

        # loss1 = tf.reduce_sum(tf.square(xadv-x), axis=axis)
        loss1 = tf.reduce_mean(tf.square(xadv - x))
    else:
        # CW-Linf
        tau0 = tf.fill([xshape[0]] + [1] * len(axis), clip[1])
        tau = tf.get_variable('cw8-noise-upperbound',
                              dtype=tf.float32,
                              initializer=tau0,
                              trainable=False)
        diff = xadv - x - tau

        # if all values are smaller than the upper bound value tau, we reduce
        # this value via tau*0.9 to make sure L-inf does not get stuck.
        tau = alpha * tf.to_float(tf.reduce_all(diff < 0, axis=axis))
        loss1 = tf.nn.relu(tf.reduce_sum(diff, axis=axis))

    loss = eps * loss0 + loss1
    train_op = optimizer.minimize(loss, var_list=[noise])

    # We may need to update tau after each iteration.  Refer to the CW-Linf
    # section in the original paper.
    if 2 != ord_:
        train_op = tf.group(train_op, tau)

    return train_op, xadv, noise
예제 #56
0
def main(_):

    pp.pprint(flags.FLAGS.__flags)

    # Load the class order
    order = []
    with open('cifar-100_%s.txt' % FLAGS.order_file) as file_in:
        for line in file_in.readlines():
            order.append(int(line))
    order = np.array(order)

    assert FLAGS.mode == 'wgan-gp'

    import cifar100
    NUM_CLASSES = 100  # number of classes
    NUM_TRAIN_SAMPLES_PER_CLASS = 500  # number of training samples per class
    NUM_TEST_SAMPLES_PER_CLASS = 100  # number of test samples per class
    train_images, train_labels, train_one_hot_labels, \
        test_images, test_labels, test_one_hot_labels, \
        raw_images_train, raw_images_test, pixel_mean = cifar100.load_data(order, mean_subtraction=True)

    # Number of all training samples
    NUM_TRAIN_SAMPLES_TOTAL = NUM_CLASSES * NUM_TRAIN_SAMPLES_PER_CLASS
    NUM_TEST_SAMPLES_TOTAL = NUM_CLASSES * NUM_TEST_SAMPLES_PER_CLASS

    def build_cnn(inputs, is_training):
        train_or_test = {True: 'train', False: 'test'}
        if FLAGS.network_arch == 'lenet':
            logits, end_points = utils_lenet.lenet(inputs, num_classes=NUM_CLASSES, is_training=is_training,
                                                   use_dropout=FLAGS.use_dropout,
                                                   scope=('LeNet-'+train_or_test[is_training]))
        elif FLAGS.network_arch == 'resnet':
            logits, end_points = utils_resnet.ResNet(inputs, train_or_test[is_training], num_outputs=NUM_CLASSES,
                                                     alpha=0.0, n=FLAGS.num_resblocks,
                                                     scope=('ResNet-'+train_or_test[is_training]))
        elif FLAGS.network_arch == 'nin':
            logits, end_points = utils_nin.nin(inputs, is_training=is_training, num_classes=NUM_CLASSES,
                                               scope=('NIN-' + train_or_test[is_training]))
        else:
            raise Exception('Invalid network architecture')
        return logits, end_points

    '''
    Define variables
    '''
    if not FLAGS.only_gen_no_cls:

        # Save all intermediate result in the result_folder
        method_name = '_'.join(os.path.basename(__file__).split('.')[0].split('_')[2:])
        method_name += '_gen_%d_and_select' % FLAGS.gen_how_many if FLAGS.gen_more_and_select else ''
        method_name += '_balanced' if FLAGS.balanced else ''
        method_name += '_auto-%.1f-%.1f' % (FLAGS.auto_param1, FLAGS.auto_param2) \
            if FLAGS.auto_choose_num_exemplars else ('_%d' % FLAGS.num_exemplars_per_class if not FLAGS.memory_constrained else '')
        method_name += '_%s' % FLAGS.exemplar_select_criterion
        method_name += '_%.1f-%.1f' % (FLAGS.proto_weight, FLAGS.gen_weight)
        method_name += '_cache_%d' % FLAGS.cache_size_per_class if FLAGS.use_cache_for_gen_samples else ''
        method_name += '_icarl_%d' % FLAGS.memory_upperbound if FLAGS.memory_constrained else ''
        method_name += '_reorder' if FLAGS.reorder_exemplars else ''
        method_name += '' if FLAGS.label_smoothing == 1. else '_smoothing_%.1f' % FLAGS.label_smoothing

        cls_func = '' if FLAGS.use_softmax else '_sigmoid'
        result_folder = os.path.join(FLAGS.result_dir, 'cifar-100_' + FLAGS.order_file,
                                     'nb_cl_' + str(FLAGS.nb_cl),
                                     'non_truncated' if FLAGS.no_truncate else 'truncated',
                                     FLAGS.network_arch + ('_%d' % FLAGS.num_resblocks if FLAGS.network_arch == 'resnet' else '') + cls_func + '_init_' + FLAGS.init_strategy,
                                     'weight_decay_' + str(FLAGS.weight_decay),
                                     'base_lr_' + str(FLAGS.base_lr),
                                     'adam_lr_' + str(FLAGS.adam_lr))
        if FLAGS.gan_finetune and 'gan' in method_name:
            result_folder = os.path.join(result_folder,
                                         method_name + '_finetune_' + FLAGS.pretrained_model_sub_dir.replace('/', '_'))
        else:
            result_folder = os.path.join(result_folder,
                                         method_name)

        # Add a "_run-i" suffix to the folder name if the folder exists
        if os.path.exists(result_folder):
            temp_i = 2
            while True:
                result_folder_mod = result_folder + '_run-' + str(temp_i)
                if not os.path.exists(result_folder_mod):
                    result_folder = result_folder_mod
                    break
                temp_i += 1
        os.makedirs(result_folder)
        print('Result folder: %s' % result_folder)

        graph_cls = tf.Graph()
        with graph_cls.as_default():
            '''
            Define variables
            '''
            batch_images = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
            batch = tf.Variable(0, trainable=False, name='LeNet-train/iteration')
            learning_rate = tf.placeholder(tf.float32, shape=[])

            '''
            Network output mask
            '''
            mask_output = tf.placeholder(tf.bool, shape=[NUM_CLASSES])

            '''
            Old and new ground truth
            '''
            one_hot_labels_truncated = tf.placeholder(tf.float32, shape=[None, None])

            '''
            Define the training network
            '''
            train_logits, _ = build_cnn(batch_images, True)
            train_masked_logits = tf.gather(train_logits, tf.squeeze(tf.where(mask_output)), axis=1)
            train_masked_logits = tf.cond(tf.equal(tf.rank(train_masked_logits), 1),
                                          lambda: tf.expand_dims(train_masked_logits, 1),
                                          lambda: train_masked_logits)
            train_pred = tf.argmax(train_masked_logits, 1)
            train_ground_truth = tf.argmax(one_hot_labels_truncated, 1)
            correct_prediction = tf.equal(train_pred, train_ground_truth)
            train_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            train_batch_weights = tf.placeholder(tf.float32, shape=[None])
            reg_weights = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            regularization_loss = FLAGS.weight_decay * tf.add_n(reg_weights)

            '''
            More Settings
            '''
            if FLAGS.use_softmax:
                empirical_loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels_truncated,
                                                                 logits=train_masked_logits,
                                                                 weights=train_batch_weights)
            else:
                empirical_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=one_hot_labels_truncated,
                                                                 logits=train_masked_logits,
                                                                 weights=train_batch_weights)

            loss = empirical_loss + regularization_loss
            if FLAGS.use_momentum:
                opt = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum).minimize(loss, global_step=batch)
            else:
                opt = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=batch)

            '''
            Define the testing network
            '''
            test_logits, _ = build_cnn(batch_images, False)
            test_masked_logits = tf.gather(test_logits, tf.squeeze(tf.where(mask_output)), axis=1)
            test_masked_logits = tf.cond(tf.equal(tf.rank(test_masked_logits), 1),
                                         lambda: tf.expand_dims(test_masked_logits, 1),
                                         lambda: test_masked_logits)
            test_masked_prob = tf.nn.softmax(test_masked_logits)
            test_pred = tf.argmax(test_masked_logits, 1)
            test_accuracy = tf.placeholder(tf.float32)

            '''
            Copy network (define the copying op)
            '''
            if FLAGS.network_arch == 'resnet':
                all_variables = tf.get_collection(tf.GraphKeys.WEIGHTS)
            else:
                all_variables = tf.trainable_variables()
            copy_ops = [all_variables[ix + len(all_variables) // 2].assign(var.value()) for ix, var in
                        enumerate(all_variables[0:len(all_variables) // 2])]

            '''
            Init certain layers when new classes added
            '''
            init_ops = tf.no_op()
            if FLAGS.init_strategy == 'all':
                init_ops = tf.global_variables_initializer()
            elif FLAGS.init_strategy == 'last':
                if FLAGS.network_arch == 'lenet':
                    init_vars = [var for var in tf.global_variables() if 'fc4' in var.name and 'train' in var.name]
                elif FLAGS.network_arch == 'resnet':
                    init_vars = [var for var in tf.global_variables() if 'fc' in var.name and 'train' in var.name]
                elif FLAGS.network_arch == 'nin':
                    init_vars = [var for var in tf.global_variables() if 'ccp6' in var.name and 'train' in var.name]
                init_ops = tf.initialize_variables(init_vars)

            '''
            Create session
            '''
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.Session(config=config, graph=graph_cls)
            sess.run(tf.global_variables_initializer())

            saver = tf.train.Saver()

        '''
        Summary
        '''
        train_loss_summary = tf.summary.scalar('train_loss', loss)
        train_acc_summary = tf.summary.scalar('train_accuracy', train_accuracy)
        test_acc_summary = tf.summary.scalar('test_accuracy', test_accuracy)

        summary_dir = os.path.join(result_folder, 'summary')
        if not os.path.exists(summary_dir):
            os.makedirs(summary_dir)
        train_summary_writer = tf.summary.FileWriter(os.path.join(summary_dir, 'train'), sess.graph)
        test_summary_writer = tf.summary.FileWriter(os.path.join(summary_dir, 'test'))

        iteration = 0

        '''
        Declaration of other vars
        '''
        # Average accuracy on seen classes
        aver_acc_over_time = dict()
        aver_acc_per_class_over_time = dict()
        conf_mat_over_time = dict()

        # Network mask
        mask_output_val = np.zeros([NUM_CLASSES], dtype=bool)

        '''
        Cache(accelerate)
        '''
        cache_dir = os.path.join(result_folder, 'cache')
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

        '''
        Exemplars(for ablation study and other purposes)
        '''
        exemplars_dir = os.path.join(result_folder, 'exemplars')
        if not os.path.exists(exemplars_dir):
            os.makedirs(exemplars_dir)

    '''
    Train generative model(DC-GAN)
    '''
    run_config = tf.ConfigProto()
    run_config.gpu_options.allow_growth = True
    graph_gen = tf.Graph()
    sess_wgan = tf.Session(config=run_config, graph=graph_gen)

    wgan_obj = GAN(sess_wgan, graph_gen,
                   dataset_name='cifar-100',
                   mode=FLAGS.mode,
                   batch_size=FLAGS.batch_size,
                   dim=FLAGS.dim,
                   output_dim=FLAGS.output_dim,
                   lambda_param=FLAGS.lambda_param,
                   critic_iters=FLAGS.critic_iters,
                   iters=FLAGS.iters,
                   result_dir=FLAGS.result_dir_wgan,
                   checkpoint_interval=FLAGS.gan_save_interval,
                   adam_lr=FLAGS.adam_lr,
                   adam_beta1=FLAGS.adam_beta1,
                   adam_beta2=FLAGS.adam_beta2,
                   finetune=FLAGS.gan_finetune,
                   finetune_from=FLAGS.gan_finetune_from,
                   pretrained_model_base_dir=FLAGS.pretrained_model_base_dir,
                   pretrained_model_sub_dir=FLAGS.pretrained_model_sub_dir)

    exemplars = []

    '''
    Class Incremental Learning
    '''
    print('Starting from category ' + str(FLAGS.from_class_idx + 1) + ' to ' + str(FLAGS.to_class_idx + 1))
    print('Adding %d categories every time' % FLAGS.nb_cl)
    assert (FLAGS.from_class_idx % FLAGS.nb_cl == 0)
    for category_idx in range(FLAGS.from_class_idx, FLAGS.to_class_idx + 1, FLAGS.nb_cl):

        to_category_idx = category_idx + FLAGS.nb_cl - 1
        if FLAGS.nb_cl == 1:
            print('Adding Category ' + str(category_idx + 1))
        else:
            print('Adding Category %d-%d' % (category_idx + 1, to_category_idx + 1))

        for category_idx_in_group in range(category_idx, to_category_idx + 1):
            # Training set(current category)
            train_indices_gan = [idx for idx in range(NUM_TRAIN_SAMPLES_TOTAL) if train_labels[idx] == category_idx_in_group]
            test_indices_cur_cls_gan = [idx for idx in range(NUM_TEST_SAMPLES_TOTAL) if test_labels[idx] == category_idx_in_group]

            train_x_gan = raw_images_train[train_indices_gan, :]
            test_x_cur_cls_gan = raw_images_test[test_indices_cur_cls_gan, :]

            '''
            Train generative model(W-GAN)
            '''
            real_class_idx = order[category_idx_in_group]
            if wgan_obj.check_model(real_class_idx):
                print(" [*] Model of Class %d exists. Skip the training process" % (real_class_idx + 1))
            else:
                print(" [*] Model of Class %d does not exist. Start the training process" % (real_class_idx + 1))
                wgan_obj.train(train_x_gan, test_x_cur_cls_gan, real_class_idx)

        '''
        Train classification model
        '''
        # No need to train the classifier if there is only one class
        if not FLAGS.only_gen_no_cls:

            if FLAGS.no_truncate:
                mask_output_val[:] = True
            else:
                mask_output_val[:to_category_idx + 1] = True

            if to_category_idx > 0:

                # init certain layers
                sess.run(init_ops)

                '''
                Generate samples of old classes
                '''
                if FLAGS.balanced:

                    if len(exemplars) == 0:
                        num_real_samples_needed = NUM_TRAIN_SAMPLES_PER_CLASS
                    else:
                        num_real_samples_needed = len(exemplars[0])
                    num_gen_samples_x_needed = NUM_TRAIN_SAMPLES_PER_CLASS - num_real_samples_needed

                    # init
                    train_x = raw_images_train[[], :]
                    if FLAGS.no_truncate:
                        train_y_truncated = train_one_hot_labels[[], :]
                    else:
                        train_y_truncated = train_one_hot_labels[[], :to_category_idx + 1]
                    train_weights_val = np.zeros([0])

                    for category_idx_in_group in range(category_idx, to_category_idx + 1):

                        train_indices = [idx for idx in range(NUM_TRAIN_SAMPLES_TOTAL) if
                                         train_labels[idx] == category_idx_in_group]

                        train_indices = np.random.choice(train_indices, num_real_samples_needed, replace=False)
                        train_x = np.concatenate((train_x, raw_images_train[train_indices]))
                        if FLAGS.no_truncate:
                            train_y_truncated = np.concatenate((train_y_truncated,
                                                                train_one_hot_labels[train_indices, :]))
                        else:
                            train_y_truncated = np.concatenate((train_y_truncated,
                                                                train_one_hot_labels[train_indices, :to_category_idx + 1]))
                        train_weights_val = np.concatenate((train_weights_val, np.ones(len(train_indices))))

                        # Load old class model
                        if num_gen_samples_x_needed > 0:

                            real_class_idx = order[category_idx_in_group]
                            if not wgan_obj.load(real_class_idx)[0]:
                                raise Exception("[!] Train a model first, then run test mode")
                            if FLAGS.gen_more_and_select:
                                gen_samples_x_more, _, _ = wgan_obj.test(FLAGS.gen_how_many)
                                gen_samples_x_more_real = cifar100.convert_images(gen_samples_x_more,
                                                                                  pixel_mean=pixel_mean)
                                gen_samples_prob = sess.run(test_masked_prob,
                                                            feed_dict={batch_images: gen_samples_x_more_real,
                                                                       mask_output: mask_output_val})
                                gen_samples_scores_cur_cls = gen_samples_prob[:, category_idx_in_group]
                                top_k_indices = np.argsort(-gen_samples_scores_cur_cls)[:num_gen_samples_x_needed]
                                gen_samples_x = gen_samples_x_more[top_k_indices]
                            else:
                                gen_samples_x, _, _ = wgan_obj.test(num_gen_samples_x_needed)

                            # import wgan.tflib.save_images
                            # wgan.tflib.save_images.save_images(gen_samples_x[:128].reshape((128, 3, 32, 32)),
                            #                                    'test.jpg')
                            train_x = np.concatenate((train_x, gen_samples_x))
                            train_weights_val = np.concatenate((train_weights_val,
                                                                np.ones(len(gen_samples_x)) * FLAGS.gen_weight))

                            if FLAGS.no_truncate:
                                gen_samples_y = np.ones((num_gen_samples_x_needed, NUM_CLASSES)) * (
                                        (1 - FLAGS.label_smoothing) / (NUM_CLASSES - 1))
                            else:
                                gen_samples_y = np.ones((num_gen_samples_x_needed, to_category_idx + 1)) * (
                                        (1 - FLAGS.label_smoothing) / to_category_idx)
                            gen_samples_y[:, category_idx_in_group] = np.ones(
                                (num_gen_samples_x_needed)) * FLAGS.label_smoothing
                            train_y_truncated = np.concatenate((train_y_truncated, gen_samples_y))

                else:
                    train_indices = [idx for idx in range(NUM_TRAIN_SAMPLES_TOTAL) if category_idx <= train_labels[idx] <= to_category_idx]
                    train_x = raw_images_train[train_indices]
                    if FLAGS.no_truncate:
                        train_y_truncated = train_one_hot_labels[train_indices, :]
                    else:
                        train_y_truncated = train_one_hot_labels[train_indices, :to_category_idx + 1]
                    train_weights_val = np.ones(len(train_x))

                for old_category_idx in range(0, category_idx):

                    # Load old class model
                    num_gen_samples_x_needed = NUM_TRAIN_SAMPLES_PER_CLASS - len(exemplars[old_category_idx])
                    if num_gen_samples_x_needed > 0:

                        # if FLAGS.use_cache_for_gen_samples:
                        #     cache_file = os.path.join(cache_dir, 'class_%d.npy' % (old_category_idx + 1))
                        #     if os.path.exists(cache_file):
                        #         gen_samples_x = np.load(cache_file)
                        #     else:
                        #         if not wgan_obj.load(old_category_idx)[0]:
                        #             raise Exception("[!] Train a model first, then run test mode")
                        #         gen_samples_x, _, _ = wgan_obj.test(FLAGS.cache_size_per_class)
                        #         np.save(cache_file, gen_samples_x)
                        #
                        #     gen_samples_x_idx = np.random.choice(len(gen_samples_x),
                        #                                          num_gen_samples_x_needed,
                        #                                          replace=False)
                        #     gen_samples_x = gen_samples_x[gen_samples_x_idx]
                        # else:
                        #     if not wgan_obj.load(old_category_idx)[0]:
                        #         raise Exception("[!] Train a model first, then run test mode")
                        #     gen_samples_x, _, _ = wgan_obj.test(num_gen_samples_x_needed)

                        real_class_idx = order[old_category_idx]
                        if not wgan_obj.load(real_class_idx)[0]:
                            raise Exception("[!] Train a model first, then run test mode")
                        if FLAGS.gen_more_and_select:
                            gen_samples_x_more, _, _ = wgan_obj.test(FLAGS.gen_how_many)
                            gen_samples_x_more_real = cifar100.convert_images(gen_samples_x_more, pixel_mean=pixel_mean)
                            gen_samples_prob = sess.run(test_masked_prob,
                                                        feed_dict={batch_images: gen_samples_x_more_real,
                                                                   mask_output: mask_output_val})
                            gen_samples_scores_cur_cls = gen_samples_prob[:, old_category_idx]
                            top_k_indices = np.argsort(-gen_samples_scores_cur_cls)[:num_gen_samples_x_needed]
                            gen_samples_x = gen_samples_x_more[top_k_indices]
                        else:
                            gen_samples_x, _, _ = wgan_obj.test(num_gen_samples_x_needed)

                        # import wgan.tflib.save_images
                        # wgan.tflib.save_images.save_images(gen_samples_x[:128].reshape((128, 3, 32, 32)),
                        #                                    'test.jpg')
                        train_x = np.concatenate((train_x, gen_samples_x, exemplars[old_category_idx]))
                        train_weights_val = np.concatenate((train_weights_val,
                                                            np.ones(len(gen_samples_x)) * FLAGS.gen_weight,
                                                            np.ones(len(exemplars[old_category_idx])) * FLAGS.proto_weight))
                    elif num_gen_samples_x_needed == 0:
                        train_x = np.concatenate((train_x, exemplars[old_category_idx]))
                        train_weights_val = np.concatenate((train_weights_val,
                                                            np.ones(len(exemplars[old_category_idx])) * FLAGS.proto_weight))
                    # if FLAGS.no_truncate:
                    #     gen_samples_y = np.zeros((NUM_TRAIN_SAMPLES_PER_CLASS, NUM_CLASSES))
                    # else:
                    #     gen_samples_y = np.zeros((NUM_TRAIN_SAMPLES_PER_CLASS, to_category_idx+1))
                    # gen_samples_y[:, old_category_idx] = np.ones((NUM_TRAIN_SAMPLES_PER_CLASS))

                    if FLAGS.no_truncate:
                        gen_samples_y = np.ones((NUM_TRAIN_SAMPLES_PER_CLASS, NUM_CLASSES)) * (
                            (1 - FLAGS.label_smoothing) / (NUM_CLASSES - 1))
                    else:
                        gen_samples_y = np.ones((NUM_TRAIN_SAMPLES_PER_CLASS, to_category_idx+1)) * (
                            (1 - FLAGS.label_smoothing) / to_category_idx)
                    gen_samples_y[:, old_category_idx] = np.ones((NUM_TRAIN_SAMPLES_PER_CLASS)) * FLAGS.label_smoothing

                    train_y_truncated = np.concatenate((train_y_truncated, gen_samples_y))

                # Training set
                # Convert the raw images from the data-files to floating-points.
                train_x = cifar100.convert_images(train_x, pixel_mean=pixel_mean)

                # Testing set
                test_indices = [idx for idx in range(len(test_labels)) if test_labels[idx] <= to_category_idx]
                test_x = test_images[test_indices]
                test_y = test_labels[test_indices]

                # Shuffle the indices and create mini-batch
                batch_indices_perm = []

                epoch_idx = 0
                lr = FLAGS.base_lr

                '''
                Training with mixed data
                '''
                while True:
                    # Generate mini-batch
                    if len(batch_indices_perm) == 0:
                        if epoch_idx >= FLAGS.epochs_per_category:
                            break
                        if epoch_idx in lr_strat:
                            lr /= FLAGS.lr_factor
                            print("NEW LEARNING RATE: %f" % lr)
                        epoch_idx = epoch_idx + 1

                        # print('Epoch %d' % epoch_idx)

                        shuffled_indices = range(train_x.shape[0])
                        np.random.shuffle(shuffled_indices)
                        for i in range(0, len(shuffled_indices), FLAGS.train_batch_size):
                            batch_indices_perm.append(shuffled_indices[i:i + FLAGS.train_batch_size])
                        batch_indices_perm.reverse()

                    popped_batch_idx = batch_indices_perm.pop()

                    # Use the random index to select random images and labels.
                    train_weights_batch_val = train_weights_val[popped_batch_idx]
                    train_x_batch = train_x[popped_batch_idx, :, :, :]
                    train_y_batch = [train_y_truncated[k] for k in popped_batch_idx]

                    # Train
                    train_loss_summary_str, train_acc_summary_str, train_accuracy_val, \
                        train_loss_val, train_empirical_loss_val, train_reg_loss_val, _ = sess.run(
                            [train_loss_summary, train_acc_summary, train_accuracy, loss, empirical_loss,
                                regularization_loss, opt], feed_dict={batch_images: train_x_batch,
                                                                      one_hot_labels_truncated: train_y_batch,
                                                                      mask_output: mask_output_val,
                                                                      learning_rate: lr,
                                                                      train_batch_weights: train_weights_batch_val})

                    # Test
                    if iteration % FLAGS.test_interval == 0:
                        sess.run(copy_ops)

                        # Divide and conquer: to avoid allocating too much GPU memory
                        test_pred_val = []
                        for i in range(0, len(test_x), FLAGS.test_batch_size):
                            test_x_batch = test_x[i:i + FLAGS.test_batch_size]
                            test_pred_val_batch = sess.run(test_pred, feed_dict={batch_images: test_x_batch,
                                                                                 mask_output: mask_output_val})
                            test_pred_val.extend(test_pred_val_batch)

                        test_accuracy_val = 1. * np.sum(np.equal(test_pred_val, test_y)) / (len(test_pred_val))
                        test_per_class_accuracy_val = np.diag(confusion_matrix(test_y, test_pred_val))

                        test_acc_summary_str = sess.run(test_acc_summary, feed_dict={test_accuracy: test_accuracy_val})

                        test_summary_writer.add_summary(test_acc_summary_str, iteration)

                        print("TEST: step %d, lr %.4f, accuracy %g" % (iteration, lr, test_accuracy_val))
                        print("PER CLASS ACCURACY: " + " | ".join(str(o) + '%' for o in test_per_class_accuracy_val))

                    # Print the training logs
                    if iteration % FLAGS.display_interval == 0:
                        train_summary_writer.add_summary(train_loss_summary_str, iteration)
                        train_summary_writer.add_summary(train_acc_summary_str, iteration)
                        print("TRAIN: epoch %d, step %d, lr %.4f, accuracy %g, loss %g, empirical %g, reg %g" % (
                            epoch_idx, iteration, lr, train_accuracy_val, train_loss_val,
                            train_empirical_loss_val, train_reg_loss_val))

                    iteration = iteration + 1

                '''
                Final test(before the next class is added)
                '''
                sess.run(copy_ops)
                # Divide and conquer: to avoid allocating too much GPU memory
                test_pred_val = []
                for i in range(0, len(test_x), FLAGS.test_batch_size):
                    test_x_batch = test_x[i:i + FLAGS.test_batch_size]
                    test_pred_val_batch = sess.run(test_pred, feed_dict={batch_images: test_x_batch,
                                                                         mask_output: mask_output_val})
                    test_pred_val.extend(test_pred_val_batch)

                test_accuracy_val = 1. * np.sum(np.equal(test_pred_val, test_y)) / (len(test_pred_val))
                conf_mat = confusion_matrix(test_y, test_pred_val)
                test_per_class_accuracy_val = np.diag(conf_mat)

                # Record and save the cumulative accuracy
                aver_acc_over_time[to_category_idx] = test_accuracy_val
                aver_acc_per_class_over_time[to_category_idx] = test_per_class_accuracy_val
                conf_mat_over_time[to_category_idx] = conf_mat

                dump_obj = dict()
                dump_obj['flags'] = flags.FLAGS.__flags
                dump_obj['aver_acc_over_time'] = aver_acc_over_time
                dump_obj['aver_acc_per_class_over_time'] = aver_acc_per_class_over_time
                dump_obj['conf_mat_over_time'] = conf_mat_over_time

                np_file_result = os.path.join(result_folder, 'acc_over_time.pkl')
                with open(np_file_result, 'wb') as file:
                    pickle.dump(dump_obj, file)

                visualize_result.vis(np_file_result)

            # reorder the exemplars
            if FLAGS.reorder_exemplars:
                for old_category_idx in range(category_idx):

                    sess.run(copy_ops)
                    # Divide and conquer: to avoid allocating too much GPU memory
                    train_prob_cur_cls_exemplars_val = sess.run(test_masked_prob,
                                                                feed_dict={batch_images: cifar100.convert_images(
                                                                           exemplars[old_category_idx]),
                                                                           mask_output: mask_output_val})
                    train_prob_cur_cls_exemplars_val = train_prob_cur_cls_exemplars_val[:, old_category_idx]
                    reorder_indices = np.argsort(-train_prob_cur_cls_exemplars_val)
                    exemplars[old_category_idx] = exemplars[old_category_idx][reorder_indices]

            # select the exemplars
            for category_idx_in_group in range(category_idx, to_category_idx + 1):
                train_indices_cur_cls = [idx for idx in range(NUM_TRAIN_SAMPLES_TOTAL) if train_labels[idx] == category_idx_in_group]
                train_x_cur_cls = raw_images_train[train_indices_cur_cls]
                train_x_cur_cls_normalized = cifar100.convert_images(train_x_cur_cls, pixel_mean=pixel_mean)
                sess.run(copy_ops)
                # Divide and conquer: to avoid allocating too much GPU memory
                train_prob_cur_cls_val = sess.run(test_masked_prob, feed_dict={batch_images: train_x_cur_cls_normalized,
                                                                               mask_output: mask_output_val})
                train_prob_cur_cls_val = train_prob_cur_cls_val[:, category_idx_in_group]

                # use iCaRL-like memory mechanism to save exemplars or not
                if FLAGS.memory_constrained:

                    if FLAGS.auto_choose_num_exemplars:  # auto or fixed number of exemplars
                        # check if we can save all new samples as exemplars
                        if NUM_TRAIN_SAMPLES_PER_CLASS > FLAGS.memory_upperbound - sum([len(exemplars[i]) for i in range(len(exemplars))]):
                            # load inception scores of all classes
                            save_exemplars_ratios = []
                            for i in range(category_idx_in_group + 1):
                                real_class_idx = order[i]
                                inception_score = wgan_obj.load_inception_score(real_class_idx)
                                save_exemplars_ratio = FLAGS.auto_param1 - FLAGS.auto_param2 * inception_score
                                save_exemplars_ratios.append(save_exemplars_ratio)

                            save_exemplars_ratios = np.array(save_exemplars_ratios)
                            keep_exemplars_num = np.floor(save_exemplars_ratios * FLAGS.memory_upperbound
                                                             / sum(save_exemplars_ratios)).astype(int)
                            for old_category_idx in range(category_idx_in_group):
                                exemplars[old_category_idx] = exemplars[old_category_idx][:keep_exemplars_num[old_category_idx]]
                            num_exemplars_cur_cls = keep_exemplars_num[-1]
                        else:
                            num_exemplars_cur_cls = NUM_TRAIN_SAMPLES_PER_CLASS

                    else:
                        num_exemplars_per_cls = int(FLAGS.memory_upperbound // (category_idx_in_group + 1))
                        num_exemplars_per_cls = min(num_exemplars_per_cls, NUM_TRAIN_SAMPLES_PER_CLASS)
                        # remove redundant elements in the memory for previous classes
                        if category_idx_in_group > 0 and len(exemplars[0]) > num_exemplars_per_cls:
                            for old_category_idx in range(category_idx_in_group):
                                exemplars[old_category_idx] = exemplars[old_category_idx][:num_exemplars_per_cls]


                        # add how many new elements in the memory for the current class
                        num_exemplars_cur_cls = num_exemplars_per_cls
                        print(' [*] Store %d exemplars for each class' % num_exemplars_cur_cls)

                else:
                    if FLAGS.auto_choose_num_exemplars: # auto or fixed number of exemplars
                        real_class_idx = order[category_idx_in_group]
                        inception_score = wgan_obj.load_inception_score(real_class_idx)
                        num_exemplars_cur_cls = int(np.floor(FLAGS.auto_param1 - FLAGS.auto_param2 * inception_score))
                        print(' [*] Inception score %f, store %d exemplars' % (inception_score, num_exemplars_cur_cls))
                    else:
                        num_exemplars_cur_cls = FLAGS.num_exemplars_per_class

                selected_indices = np.array(range(len(train_prob_cur_cls_val)))
                if FLAGS.exemplar_select_criterion == 'high':
                    selected_indices = train_prob_cur_cls_val.argsort()[:-(num_exemplars_cur_cls+1):-1]    # select the last 20
                elif FLAGS.exemplar_select_criterion == 'low':
                    selected_indices = train_prob_cur_cls_val.argsort()[:num_exemplars_cur_cls]    # select the last 20
                elif FLAGS.exemplar_select_criterion == 'random':
                    random_idx = range(len(train_prob_cur_cls_val))
                    np.random.shuffle(random_idx)
                    selected_indices = random_idx[:num_exemplars_cur_cls]

                exemplars.append(train_x_cur_cls[selected_indices])

                np_file_exemplars = os.path.join(exemplars_dir, 'exemplars_%d' % (category_idx_in_group+1))
                np.save(np_file_exemplars, exemplars)

    # Save the final model
    if not FLAGS.only_gen_no_cls:
        checkpoint_dir = os.path.join(result_folder, 'checkpoints')
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver.save(sess, os.path.join(checkpoint_dir, 'model.ckpt'))
        sess.close()
def minimize_one_step(gradient_unregularized_loss,
                      hessian_unregularized_loss_outer,
                      hessian_unregularized_loss_middle,
                      x_start,
                      tolerance,
                      l1_regularizer,
                      l2_regularizer=None,
                      maximum_full_sweeps=1,
                      learning_rate=None,
                      name=None):
    """One step of (the outer loop of) the minimization algorithm.

  This function returns a new value of `x`, equal to `x_start + x_update`.  The
  increment `x_update in R^n` is computed by a coordinate descent method, that
  is, by a loop in which each iteration updates exactly one coordinate of
  `x_update`.  (Some updates may leave the value of the coordinate unchanged.)

  The particular update method used is to apply an L1-based proximity operator,
  "soft threshold", whose fixed point `x_update_fix` is the desired minimum

  ```none
  x_update_fix = argmin{
      Loss(x_start + x_update')
        + l1_regularizer * ||x_start + x_update'||_1
        + l2_regularizer * ||x_start + x_update'||_2**2
      : x_update' }
  ```

  where in each iteration `x_update'` is constrained to have at most one nonzero
  coordinate.

  This update method preserves sparsity, i.e., tends to find sparse solutions if
  `x_start` is sparse.  Additionally, the choice of step size is based on
  curvature (Hessian), which significantly speeds up convergence.

  This algorithm assumes that `Loss` is convex, at least in a region surrounding
  the optimum.  (If `l2_regularizer > 0`, then only weak convexity is needed.)

  Args:
    gradient_unregularized_loss: (Batch of) `Tensor` with the same shape and
      dtype as `x_start` representing the gradient, evaluated at `x_start`, of
      the unregularized loss function (denoted `Loss` above).  (In all current
      use cases, `Loss` is the negative log likelihood.)
    hessian_unregularized_loss_outer: (Batch of) `Tensor` or `SparseTensor`
      having the same dtype as `x_start`, and shape `[N, n]` where `x_start` has
      shape `[n]`, satisfying the property
      `Transpose(hessian_unregularized_loss_outer)
      @ diag(hessian_unregularized_loss_middle)
      @ hessian_unregularized_loss_inner
      = (approximation of) Hessian matrix of Loss, evaluated at x_start`.
    hessian_unregularized_loss_middle: (Batch of) vector-shaped `Tensor` having
      the same dtype as `x_start`, and shape `[N]` where
      `hessian_unregularized_loss_outer` has shape `[N, n]`, satisfying the
      property
      `Transpose(hessian_unregularized_loss_outer)
      @ diag(hessian_unregularized_loss_middle)
      @ hessian_unregularized_loss_inner
      = (approximation of) Hessian matrix of Loss, evaluated at x_start`.
    x_start: (Batch of) vector-shaped, `float` `Tensor` representing the current
      value of the argument to the Loss function.
    tolerance: scalar, `float` `Tensor` representing the convergence threshold.
      The optimization step will terminate early, returning its current value of
      `x_start + x_update`, once the following condition is met:
      `||x_update_end - x_update_start||_2 / (1 + ||x_start||_2)
      < sqrt(tolerance)`,
      where `x_update_end` is the value of `x_update` at the end of a sweep and
      `x_update_start` is the value of `x_update` at the beginning of that
      sweep.
    l1_regularizer: scalar, `float` `Tensor` representing the weight of the L1
      regularization term (see equation above).  If L1 regularization is not
      required, then `tfp.glm.fit_one_step` is preferable.
    l2_regularizer: scalar, `float` `Tensor` representing the weight of the L2
      regularization term (see equation above).
      Default value: `None` (i.e., no L2 regularization).
    maximum_full_sweeps: Python integer specifying maximum number of sweeps to
      run.  A "sweep" consists of an iteration of coordinate descent on each
      coordinate. After this many sweeps, the algorithm will terminate even if
      convergence has not been reached.
      Default value: `1`.
    learning_rate: scalar, `float` `Tensor` representing a multiplicative factor
      used to dampen the proximal gradient descent steps.
      Default value: `None` (i.e., factor is conceptually `1`).
    name: Python string representing the name of the TensorFlow operation.
      The default name is `"minimize_one_step"`.

  Returns:
    x: (Batch of) `Tensor` having the same shape and dtype as `x_start`,
      representing the updated value of `x`, that is, `x_start + x_update`.
    is_converged: scalar, `bool` `Tensor` indicating whether convergence
      occurred across all batches within the specified number of sweeps.
    iter: scalar, `int` `Tensor` representing the actual number of coordinate
      updates made (before achieving convergence).  Since each sweep consists of
      `tf.size(x_start)` iterations, the maximum number of updates is
      `maximum_full_sweeps * tf.size(x_start)`.

  #### References

  [1]: Jerome Friedman, Trevor Hastie and Rob Tibshirani. Regularization Paths
       for Generalized Linear Models via Coordinate Descent. _Journal of
       Statistical Software_, 33(1), 2010.
       https://www.jstatsoft.org/article/view/v033i01/v33i01.pdf

  [2]: Guo-Xun Yuan, Chia-Hua Ho and Chih-Jen Lin. An Improved GLMNET for
       L1-regularized Logistic Regression. _Journal of Machine Learning
       Research_, 13, 2012.
       http://www.jmlr.org/papers/volume13/yuan12a/yuan12a.pdf
  """
    graph_deps = [
        gradient_unregularized_loss,
        hessian_unregularized_loss_outer,
        hessian_unregularized_loss_middle,
        x_start,
        l1_regularizer,
        l2_regularizer,
        maximum_full_sweeps,
        tolerance,
        learning_rate,
    ]
    with tf.name_scope(name, 'minimize_one_step', graph_deps):
        x_shape = _get_shape(x_start)
        batch_shape = x_shape[:-1]
        dims = x_shape[-1]

        def _hessian_diag_elt_with_l2(coord):  # pylint: disable=missing-docstring
            # Returns the (coord, coord) entry of
            #
            #   Hessian(UnregularizedLoss(x) + l2_regularizer * ||x||_2**2)
            #
            # evaluated at x = x_start.
            inner_square = tf.reduce_sum(
                input_tensor=_sparse_or_dense_matmul_onehot(
                    hessian_unregularized_loss_outer, coord)**2,
                axis=-1)
            unregularized_component = (
                hessian_unregularized_loss_middle[..., coord] * inner_square)
            l2_component = _mul_or_none(2., l2_regularizer)
            return _add_ignoring_nones(unregularized_component, l2_component)

        grad_loss_with_l2 = _add_ignoring_nones(
            gradient_unregularized_loss,
            _mul_or_none(2., l2_regularizer, x_start))

        # We define `x_update_diff_norm_sq_convergence_threshold` such that the
        # convergence condition
        #     ||x_update_end - x_update_start||_2 / (1 + ||x_start||_2)
        #     < sqrt(tolerance)
        # is equivalent to
        #     ||x_update_end - x_update_start||_2**2
        #     < x_update_diff_norm_sq_convergence_threshold.
        x_update_diff_norm_sq_convergence_threshold = (
            tolerance * (1. + tf.norm(tensor=x_start, ord=2, axis=-1))**2)

        # Reshape update vectors so that the coordinate sweeps happen along the
        # first dimension. This is so that we can use tensor_scatter_update to make
        # sparse updates along the first axis without copying the Tensor.
        # TODO(b/118789120): Switch to something like tf.tensor_scatter_nd_add if
        # or when it exists.
        update_shape = tf.concat([[dims], batch_shape], axis=-1)

        def _loop_cond(iter_, x_update_diff_norm_sq, x_update,
                       hess_matmul_x_update):
            del x_update
            del hess_matmul_x_update
            sweep_complete = (iter_ > 0) & tf.equal(iter_ % dims, 0)
            small_delta = (x_update_diff_norm_sq <
                           x_update_diff_norm_sq_convergence_threshold)
            converged = sweep_complete & small_delta
            allowed_more_iterations = iter_ < maximum_full_sweeps * dims
            return allowed_more_iterations & tf.reduce_any(
                input_tensor=~converged)

        def _loop_body(  # pylint: disable=missing-docstring
                iter_, x_update_diff_norm_sq, x_update, hess_matmul_x_update):
            # Inner loop of the minimizer.
            #
            # This loop updates a single coordinate of x_update.  Ideally, an
            # iteration of this loop would set
            #
            #   x_update[j] += argmin{ LocalLoss(x_update + z*e_j) : z in R }
            #
            # where
            #
            #   LocalLoss(x_update')
            #     = LocalLossSmoothComponent(x_update')
            #         + l1_regularizer * (||x_start + x_update'||_1 -
            #                             ||x_start + x_update||_1)
            #    := (UnregularizedLoss(x_start + x_update') -
            #        UnregularizedLoss(x_start + x_update)
            #         + l2_regularizer * (||x_start + x_update'||_2**2 -
            #                             ||x_start + x_update||_2**2)
            #         + l1_regularizer * (||x_start + x_update'||_1 -
            #                             ||x_start + x_update||_1)
            #
            # In this algorithm approximate the above argmin using (univariate)
            # proximal gradient descent:
            #
            # (*)  x_update[j] = prox_{t * l1_regularizer * L1}(
            #                 x_update[j] -
            #                 t * d/dz|z=0 UnivariateLocalLossSmoothComponent(z))
            #
            # where
            #
            #   UnivariateLocalLossSmoothComponent(z)
            #       := LocalLossSmoothComponent(x_update + z*e_j)
            #
            # and we approximate
            #
            #       d/dz UnivariateLocalLossSmoothComponent(z)
            #     = grad LocalLossSmoothComponent(x_update))[j]
            #    ~= (grad LossSmoothComponent(x_start)
            #         + x_update matmul HessianOfLossSmoothComponent(x_start))[j].
            #
            # To choose the parameter t, we squint and pretend that the inner term of
            # (*) is a Newton update as if we were using Newton's method to minimize
            # UnivariateLocalLossSmoothComponent.  That is, we choose t such that
            #
            #   -t * d/dz ULLSC = -learning_rate * (d/dz ULLSC) / (d^2/dz^2 ULLSC)
            #
            # at z=0.  Hence
            #
            #   t = learning_rate / (d^2/dz^2|z=0 ULLSC)
            #     = learning_rate / HessianOfLossSmoothComponent(
            #                           x_start + x_update)[j,j]
            #    ~= learning_rate / HessianOfLossSmoothComponent(
            #                           x_start)[j,j]
            #
            # The above approximation is equivalent to assuming that
            # HessianOfUnregularizedLoss is constant, i.e., ignoring third-order
            # effects.
            #
            # Note that because LossSmoothComponent is (assumed to be) convex, t is
            # positive.

            # In above notation, coord = j.
            coord = iter_ % dims
            # x_update_diff_norm_sq := ||x_update_end - x_update_start||_2**2,
            # computed incrementally, where x_update_end and x_update_start are as
            # defined in the convergence criteria.  Accordingly, we reset
            # x_update_diff_norm_sq to zero at the beginning of each sweep.
            x_update_diff_norm_sq = tf.where(
                tf.equal(coord, 0), tf.zeros_like(x_update_diff_norm_sq),
                x_update_diff_norm_sq)

            # Recall that x_update and hess_matmul_x_update has the rightmost
            # dimension transposed to the leftmost dimension.
            w_old = x_start[..., coord] + x_update[coord, ...]
            # This is the coordinatewise Newton update if no L1 regularization.
            # In above notation, newton_step = -t * (approximation of d/dz|z=0 ULLSC).
            second_deriv = _hessian_diag_elt_with_l2(coord)
            newton_step = -_mul_ignoring_nones(  # pylint: disable=invalid-unary-operand-type
                learning_rate, grad_loss_with_l2[..., coord] +
                hess_matmul_x_update[coord, ...]) / second_deriv

            # Applying the soft-threshold operator accounts for L1 regularization.
            # In above notation, delta =
            #     prox_{t*l1_regularizer*L1}(w_old + newton_step) - w_old.
            delta = (soft_threshold(
                w_old + newton_step,
                _mul_ignoring_nones(learning_rate, l1_regularizer) /
                second_deriv) - w_old)

            def _do_update(x_update_diff_norm_sq, x_update,
                           hess_matmul_x_update):  # pylint: disable=missing-docstring
                hessian_column_with_l2 = sparse_or_dense_matvecmul(
                    hessian_unregularized_loss_outer,
                    hessian_unregularized_loss_middle *
                    _sparse_or_dense_matmul_onehot(
                        hessian_unregularized_loss_outer, coord),
                    adjoint_a=True)

                if l2_regularizer is not None:
                    hessian_column_with_l2 += _one_hot_like(
                        hessian_column_with_l2,
                        coord,
                        on_value=2. * l2_regularizer)

                # Move the batch dimensions of `hessian_column_with_l2` to rightmost in
                # order to conform to `hess_matmul_x_update`.
                n = tf.rank(hessian_column_with_l2)
                perm = tf.roll(tf.range(n), shift=1, axis=0)
                hessian_column_with_l2 = tf.transpose(a=hessian_column_with_l2,
                                                      perm=perm)

                # Update the entire batch at `coord` even if `delta` may be 0 at some
                # batch coordinates. In those cases, adding `delta` is a no-op.
                x_update = tf.tensor_scatter_add(x_update, [[coord]], [delta])

                with tf.control_dependencies([x_update]):
                    x_update_diff_norm_sq_ = x_update_diff_norm_sq + delta**2
                    hess_matmul_x_update_ = (hess_matmul_x_update +
                                             delta * hessian_column_with_l2)

                    # Hint that loop vars retain the same shape.
                    x_update_diff_norm_sq_.set_shape(
                        x_update_diff_norm_sq_.shape.merge_with(
                            x_update_diff_norm_sq.shape))
                    hess_matmul_x_update_.set_shape(
                        hess_matmul_x_update_.shape.merge_with(
                            hess_matmul_x_update.shape))

                    return [
                        x_update_diff_norm_sq_, x_update, hess_matmul_x_update_
                    ]

            inputs_to_update = [
                x_update_diff_norm_sq, x_update, hess_matmul_x_update
            ]
            return [iter_ + 1] + tf.contrib.framework.smart_cond(
                # Note on why checking delta (a difference of floats) for equality to
                # zero is ok:
                #
                # First of all, x - x == 0 in floating point -- see
                # https://stackoverflow.com/a/2686671
                #
                # Delta will conceptually equal zero when one of the following holds:
                # (i)   |w_old + newton_step| <= threshold and w_old == 0
                # (ii)  |w_old + newton_step| > threshold and
                #       w_old + newton_step - sign(w_old + newton_step) * threshold
                #          == w_old
                #
                # In case (i) comparing delta to zero is fine.
                #
                # In case (ii), newton_step conceptually equals
                #     sign(w_old + newton_step) * threshold.
                # Also remember
                #     threshold = -newton_step / (approximation of d/dz|z=0 ULLSC).
                # So (i) happens when
                #     (approximation of d/dz|z=0 ULLSC) == -sign(w_old + newton_step).
                # If we did not require LossSmoothComponent to be strictly convex,
                # then this could actually happen a non-negligible amount of the time,
                # e.g. if the loss function is piecewise linear and one of the pieces
                # has slope 1.  But since LossSmoothComponent is strictly convex, (i)
                # should not systematically happen.
                tf.reduce_all(input_tensor=tf.equal(delta, 0.)),
                lambda: inputs_to_update,
                lambda: _do_update(*inputs_to_update))

        base_dtype = x_start.dtype.base_dtype
        iter_, x_update_diff_norm_sq, x_update, _ = tf.while_loop(
            cond=_loop_cond,
            body=_loop_body,
            loop_vars=[
                tf.zeros([], dtype=np.int32, name='iter'),
                tf.zeros(batch_shape,
                         dtype=base_dtype,
                         name='x_update_diff_norm_sq'),
                tf.zeros(update_shape, dtype=base_dtype, name='x_update'),
                tf.zeros(update_shape,
                         dtype=base_dtype,
                         name='hess_matmul_x_update'),
            ])

        # Convert back x_update to the shape of x_start by transposing the leftmost
        # dimension to the rightmost.
        n = tf.rank(x_update)
        perm = tf.roll(tf.range(n), shift=-1, axis=0)
        x_update = tf.transpose(a=x_update, perm=perm)

        converged = tf.reduce_all(input_tensor=x_update_diff_norm_sq <
                                  x_update_diff_norm_sq_convergence_threshold)
        return x_start + x_update, converged, iter_ / dims
예제 #58
0
    def __init__(self,
                 temperature,
                 logits=None,
                 probs=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="ExpRelaxedOneHotCategorical"):
        """Initialize ExpRelaxedOneHotCategorical using class log-probabilities.

    Args:
      temperature: An 0-D `Tensor`, representing the temperature
        of a set of ExpRelaxedCategorical distributions. The temperature should
        be positive.
      logits: An N-D `Tensor`, `N >= 1`, representing the log probabilities
        of a set of ExpRelaxedCategorical distributions. The first
        `N - 1` dimensions index into a batch of independent distributions and
        the last dimension represents a vector of logits for each class. Only
        one of `logits` or `probs` should be passed in.
      probs: An N-D `Tensor`, `N >= 1`, representing the probabilities
        of a set of ExpRelaxedCategorical distributions. The first
        `N - 1` dimensions index into a batch of independent distributions and
        the last dimension represents a vector of probabilities for each
        class. Only one of `logits` or `probs` should be passed in.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())
        with tf.compat.v1.name_scope(name, values=[logits, probs,
                                                   temperature]) as name:

            dtype = dtype_util.common_dtype([logits, probs, temperature],
                                            tf.float32)
            self._logits, self._probs = distribution_util.get_logits_and_probs(
                name=name,
                logits=logits,
                probs=probs,
                validate_args=validate_args,
                multidimensional=True,
                dtype=dtype)

            with tf.control_dependencies(
                [tf.compat.v1.assert_positive(temperature
                                              )] if validate_args else []):
                self._temperature = tf.convert_to_tensor(value=temperature,
                                                         name="temperature",
                                                         dtype=dtype)
                self._temperature_2d = tf.reshape(self._temperature, [-1, 1],
                                                  name="temperature_2d")

            logits_shape_static = self._logits.shape.with_rank_at_least(1)
            if logits_shape_static.ndims is not None:
                self._batch_rank = tf.convert_to_tensor(
                    value=logits_shape_static.ndims - 1,
                    dtype=tf.int32,
                    name="batch_rank")
            else:
                with tf.compat.v1.name_scope(name="batch_rank"):
                    self._batch_rank = tf.rank(self._logits) - 1

            with tf.compat.v1.name_scope(name="event_size"):
                self._event_size = tf.shape(input=self._logits)[-1]

        super(ExpRelaxedOneHotCategorical, self).__init__(
            dtype=dtype,
            reparameterization_type=reparameterization.FULLY_REPARAMETERIZED,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            parameters=parameters,
            graph_parents=[self._logits, self._probs, self._temperature],
            name=name)
예제 #59
0
def pad_to_bounding_box(image, offset_height, offset_width, target_height,
                        target_width, pad_value):
  """Pads the given image with the given pad_value.

  Works like tf.image.pad_to_bounding_box, except it can pad the image
  with any given arbitrary pad value and also handle images whose sizes are not
  known during graph construction.

  Args:
    image: 3-D tensor with shape [height, width, channels]
    offset_height: Number of rows of zeros to add on top.
    offset_width: Number of columns of zeros to add on the left.
    target_height: Height of output image.
    target_width: Width of output image.
    pad_value: Value to pad the image tensor with.

  Returns:
    3-D tensor of shape [target_height, target_width, channels].

  Raises:
    ValueError: If the shape of image is incompatible with the offset_* or
    target_* arguments.
  """
  with tf.name_scope(None, 'pad_to_bounding_box', [image]):
    image = tf.convert_to_tensor(image, name='image')
    original_dtype = image.dtype
    if original_dtype != tf.float32 and original_dtype != tf.float64:
      # If image dtype is not float, we convert it to int32 to avoid overflow.
      image = tf.cast(image, tf.int32)
    image_rank_assert = tf.Assert(
        tf.logical_or(
            tf.equal(tf.rank(image), 3),
            tf.equal(tf.rank(image), 4)),
        ['Wrong image tensor rank.'])
    with tf.control_dependencies([image_rank_assert]):
      image -= pad_value
    image_shape = image.get_shape()
    is_batch = True
    if image_shape.ndims == 3:
      is_batch = False
      image = tf.expand_dims(image, 0)
    elif image_shape.ndims is None:
      is_batch = False
      image = tf.expand_dims(image, 0)
      image.set_shape([None] * 4)
    elif image.get_shape().ndims != 4:
      raise ValueError('Input image must have either 3 or 4 dimensions.')
    _, height, width, _ = _image_dimensions(image, rank=4)
    target_width_assert = tf.Assert(
        tf.greater_equal(
            target_width, width),
        ['target_width must be >= width'])
    target_height_assert = tf.Assert(
        tf.greater_equal(target_height, height),
        ['target_height must be >= height'])
    with tf.control_dependencies([target_width_assert]):
      after_padding_width = target_width - offset_width - width
    with tf.control_dependencies([target_height_assert]):
      after_padding_height = target_height - offset_height - height
    offset_assert = tf.Assert(
        tf.logical_and(
            tf.greater_equal(after_padding_width, 0),
            tf.greater_equal(after_padding_height, 0)),
        ['target size not possible with the given target offsets'])
    batch_params = tf.stack([0, 0])
    height_params = tf.stack([offset_height, after_padding_height])
    width_params = tf.stack([offset_width, after_padding_width])
    channel_params = tf.stack([0, 0])
    with tf.control_dependencies([offset_assert]):
      paddings = tf.stack([batch_params, height_params, width_params,
                           channel_params])
    padded = tf.pad(image, paddings)
    if not is_batch:
      padded = tf.squeeze(padded, axis=[0])
    outputs = padded + pad_value
    if outputs.dtype != original_dtype:
      outputs = tf.cast(outputs, original_dtype)
    return outputs
예제 #60
0
def cutout(
    images: TensorLike,
    mask_size: TensorLike,
    offset: TensorLike = (0, 0),
    constant_values: Number = 0,
    data_format: str = "channels_last",
) -> tf.Tensor:
    """Apply cutout (https://arxiv.org/abs/1708.04552) to images.

    This operation applies a `(mask_height x mask_width)` mask of zeros to
    a location within `images` specified by the offset.
    The pixel values filled in will be of the value `replace`.
    The located where the mask will be applied is randomly
    chosen uniformly over the whole images.

    Args:
      images: A tensor of shape `(batch_size, height, width, channels)`
        (NHWC), `(batch_size, channels, height, width)` (NCHW).
      mask_size: Specifies how big the zero mask that will be generated is that
        is applied to the images. The mask will be of size
        `(mask_height x mask_width)`. Note: mask_size should be divisible by 2.
      offset: A tuple of `(height, width)` or `(batch_size, 2)`
      constant_values: What pixel value to fill in the images in the area that has
        the cutout mask applied to it.
      data_format: A string, one of `channels_last` (default) or `channels_first`.
        The ordering of the dimensions in the inputs.
        `channels_last` corresponds to inputs with shape
        `(batch_size, ..., channels)` while `channels_first` corresponds to
        inputs with shape `(batch_size, channels, ...)`.
    Returns:
      An image Tensor.
    Raises:
      InvalidArgumentError: if `mask_size` can't be divisible by 2.
    """
    if data_format == "channels_first":
        warnings.warn(
            "Addons will support only channel-last image operations in the future."
            "The argument `data_format` will be removed in Addons `0.12`",
            DeprecationWarning,
        )

    with tf.name_scope("cutout"):
        origin_shape = images.shape
        offset = tf.convert_to_tensor(offset)
        mask_size, data_format, image_height, image_width = _norm_params(
            images, mask_size, data_format)
        mask_size = mask_size // 2

        if tf.rank(offset) == 1:
            offset = tf.expand_dims(offset, 0)
        cutout_center_heights = offset[:, 0]
        cutout_center_widths = offset[:, 1]

        lower_pads = tf.maximum(0, cutout_center_heights - mask_size[0])
        upper_pads = tf.maximum(
            0, image_height - cutout_center_heights - mask_size[0])
        left_pads = tf.maximum(0, cutout_center_widths - mask_size[1])
        right_pads = tf.maximum(
            0, image_width - cutout_center_widths - mask_size[1])

        cutout_shape = tf.transpose(
            [
                image_height - (lower_pads + upper_pads),
                image_width - (left_pads + right_pads),
            ],
            [1, 0],
        )
        masks = tf.TensorArray(images.dtype, 0, dynamic_size=True)
        for i in tf.range(tf.shape(cutout_shape)[0]):
            padding_dims = [
                [lower_pads[i], upper_pads[i]],
                [left_pads[i], right_pads[i]],
            ]
            mask = tf.pad(
                tf.zeros(cutout_shape[i], dtype=images.dtype),
                padding_dims,
                constant_values=1,
            )
            masks = masks.write(i, mask)

        if data_format == "channels_last":
            mask_4d = tf.expand_dims(masks.stack(), -1)
            mask = tf.tile(mask_4d, [1, 1, 1, tf.shape(images)[-1]])
        else:
            mask_4d = tf.expand_dims(masks.stack(), 1)
            mask = tf.tile(mask_4d, [1, tf.shape(images)[1], 1, 1])
        images = tf.where(
            mask == 0,
            tf.ones_like(images, dtype=images.dtype) * constant_values,
            images,
        )
        images.set_shape(origin_shape)
        return images