Exemple #1
0
    def transform(self, x, compute_y=True, compute_log_det=True, name=None):
        """
        Transform `x` into `y`, and compute the log-determinant of `f` at `x`
        (i.e., :math:`\\log \\det \\frac{\\partial f(x)}{\\partial x}`).

        Args:
            x (Tensor): The samples of `x`.
            compute_y (bool): Whether or not to compute :math:`y = f(x)`?
                Default :obj:`True`.
            compute_log_det (bool): Whether or not to compute the
                log-determinant?  Default :obj:`True`.
            name (str): If specified, will use this name as the TensorFlow
                operational name scope.

        Returns:
            (tf.Tensor, tf.Tensor): `y` and the (maybe summed) log-determinant.
                The items in the returned tuple might be :obj:`None`
                if corresponding `compute_?` argument is set to :obj:`False`.

        Raises:
            RuntimeError: If both `compute_y` and `compute_log_det` are set
                to :obj:`False`.
        """
        if not compute_y and not compute_log_det:
            raise ValueError('At least one of `compute_y` and '
                             '`compute_log_det` should be True.')

        x = tf.convert_to_tensor(x)
        if not self._has_built:
            self.build(x)

        x = self._x_input_spec.validate('x', x)

        with tf.name_scope(name,
                           default_name=get_default_scope_name(
                               'transform', self),
                           values=[x]):
            y, log_det = self._transform(x, compute_y, compute_log_det)

            if compute_log_det:
                with assert_deps([
                        assert_log_det_shape_matches_input(
                            log_det=log_det,
                            input=x,
                            value_ndims=self.x_value_ndims)
                ]) as asserted:
                    if asserted:  # pragma: no cover
                        log_det = tf.identity(log_det)

            if y is not None:
                maybe_add_histogram(y, 'y')
                y = maybe_check_numerics(y, 'y')

            if log_det is not None:
                maybe_add_histogram(log_det, 'log_det')
                log_det = maybe_check_numerics(log_det, 'log_det')

            return y, log_det
Exemple #2
0
    def sample(self,
               n_samples=None,
               group_ndims=0,
               is_reparameterized=None,
               compute_density=None,
               name=None):
        self._validate_sample_is_reparameterized_arg(is_reparameterized)
        if is_reparameterized is None:
            is_reparameterized = self.is_reparameterized

        with tf.name_scope(name, default_name='DiscretizedLogistic.sample'):
            # sample from uniform distribution
            sample_shape = self.batch_shape
            static_sample_shape = self.get_batch_shape()
            if n_samples is not None:
                sample_shape = tf.concat([[n_samples], sample_shape], 0)
                static_sample_shape = tf.TensorShape(
                    [None if is_tensor_object(n_samples) else n_samples]). \
                    concatenate(static_sample_shape)

            u = tf.random_uniform(shape=sample_shape,
                                  minval=self._epsilon,
                                  maxval=1. - self._epsilon,
                                  dtype=self._param_dtype)
            u.set_shape(static_sample_shape)

            # inverse CDF of the logistic
            inverse_logistic_cdf = maybe_check_numerics(
                tf.log(u) - tf.log(1. - u), 'inverse_logistic_cdf')

            # obtain the actual sample
            scale = maybe_check_numerics(tf.exp(self.log_scale, name='scale'),
                                         'scale')
            sample = self.mean + scale * inverse_logistic_cdf
            if self.discretize_sample:
                sample = self._discretize(sample)
            sample = maybe_check_numerics(sample, 'sample')
            sample = convert_to_tensor_and_cast(sample, self.dtype)

            if not is_reparameterized:
                sample = tf.stop_gradient(sample)

            t = StochasticTensor(distribution=self,
                                 tensor=sample,
                                 n_samples=n_samples,
                                 group_ndims=group_ndims,
                                 is_reparameterized=is_reparameterized)

            # compute the density
            if compute_density:
                compute_density_immediately(t)

            return t
Exemple #3
0
 def _check_tensor(self, tensor, name):
     tensor_name = '{}.{}'.format(self.__class__.__name__, name)
     maybe_add_histogram(tensor, tensor_name)
     return maybe_check_numerics(tensor, tensor_name)
Exemple #4
0
def deconv2d(input,
             out_channels,
             kernel_size,
             strides=(1, 1),
             padding='same',
             channels_last=True,
             output_shape=None,
             activation_fn=None,
             normalizer_fn=None,
             weight_norm=False,
             gated=False,
             gate_sigmoid_bias=2.,
             kernel=None,
             kernel_initializer=None,
             kernel_regularizer=None,
             kernel_constraint=None,
             use_bias=None,
             bias=None,
             bias_initializer=tf.zeros_initializer(),
             bias_regularizer=None,
             bias_constraint=None,
             trainable=True,
             name=None,
             scope=None):
    """
    2D deconvolutional layer.

    Args:
        input (Tensor): The input tensor, at least 4-d.
        out_channels (int): The channel numbers of the deconvolution output.
        kernel_size (int or (int, int)): Kernel size over spatial dimensions.
        strides (int or (int, int)): Strides over spatial dimensions.
        padding: One of {"valid", "same"}, case in-sensitive.
        channels_last (bool): Whether or not the channel axis is the last
            axis in `input`? (i.e., the data format is "NHWC")
        output_shape: If specified, use this as the shape of the
            deconvolution output; otherwise compute the size of each dimension
            by::

                output_size = input_size * strides
                if padding == 'valid':
                    output_size += max(kernel_size - strides, 0)

        activation_fn: The activation function.
        normalizer_fn: The normalizer function.
        weight_norm (bool or (tf.Tensor) -> tf.Tensor)):
            If :obj:`True`, apply :func:`~tfsnippet.layers.weight_norm` on
            `kernel`.  `use_scale` will be :obj:`True` if `normalizer_fn`
            is not specified, and :obj:`False` otherwise.  The axis reduction
            will be determined by the layer.

            If it is a callable function, then it will be used to normalize
            the `kernel` instead of :func:`~tfsnippet.layers.weight_norm`.
            The user must ensure the axis reduction is correct by themselves.
        gated (bool): Whether or not to use gate on output?
            `output = activation_fn(output) * sigmoid(gate)`.
        gate_sigmoid_bias (Tensor): The bias added to `gate` before applying
            the `sigmoid` activation.
        kernel (Tensor): Instead of creating a new variable, use this tensor.
        kernel_initializer: The initializer for `kernel`.
            Would be ``default_kernel_initializer(...)`` if not specified.
        kernel_regularizer: The regularizer for `kernel`.
        kernel_constraint: The constraint for `kernel`.
        use_bias (bool or None): Whether or not to use `bias`?
            If :obj:`True`, will always use bias.
            If :obj:`None`, will use bias only if `normalizer_fn` is not given.
            If :obj:`False`, will never use bias.
            Default is :obj:`None`.
        bias (Tensor): Instead of creating a new variable, use this tensor.
        bias_initializer: The initializer for `bias`.
        bias_regularizer: The regularizer for `bias`.
        bias_constraint: The constraint for `bias`.
        trainable (bool): Whether or not the parameters are trainable?

    Returns:
        tf.Tensor: The output tensor.
    """
    input, in_channels, data_format = \
        validate_conv2d_input(input, channels_last)
    out_channels = validate_positive_int_arg('out_channels', out_channels)
    dtype = input.dtype.base_dtype
    if gated:
        out_channels *= 2

    # check functional arguments
    padding = validate_enum_arg('padding',
                                str(padding).upper(), ['VALID', 'SAME'])
    strides = validate_conv2d_strides_tuple('strides', strides, channels_last)

    weight_norm_fn = validate_weight_norm_arg(weight_norm,
                                              axis=-1,
                                              use_scale=normalizer_fn is None)
    if use_bias is None:
        use_bias = normalizer_fn is None

    # get the specification of outputs and parameters
    kernel_size = validate_conv2d_size_tuple('kernel_size', kernel_size)
    kernel_shape = kernel_size + (out_channels, in_channels)
    bias_shape = (out_channels, )

    given_h, given_w = None, None
    given_output_shape = output_shape

    if is_tensor_object(given_output_shape):
        given_output_shape = tf.convert_to_tensor(given_output_shape)
    elif given_output_shape is not None:
        given_h, given_w = given_output_shape

    # validate the parameters
    if kernel is not None:
        kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype)
        kernel = kernel_spec.validate('kernel', kernel)
    if kernel_initializer is None:
        kernel_initializer = default_kernel_initializer(weight_norm)
    if bias is not None:
        bias_spec = ParamSpec(shape=bias_shape, dtype=dtype)
        bias = bias_spec.validate('bias', bias)

    # the main part of the conv2d layer
    with tf.variable_scope(scope, default_name=name or 'deconv2d'):
        with tf.name_scope('output_shape'):
            # detect the input shape and axis arrangements
            input_shape = get_static_shape(input)
            if channels_last:
                c_axis, h_axis, w_axis = -1, -3, -2
            else:
                c_axis, h_axis, w_axis = -3, -2, -1

            output_shape = [None, None, None, None]
            output_shape[c_axis] = out_channels
            if given_output_shape is None:
                if input_shape[h_axis] is not None:
                    output_shape[h_axis] = get_deconv_output_length(
                        input_shape[h_axis], kernel_shape[0], strides[h_axis],
                        padding)
                if input_shape[w_axis] is not None:
                    output_shape[w_axis] = get_deconv_output_length(
                        input_shape[w_axis], kernel_shape[1], strides[w_axis],
                        padding)
            else:
                if not is_tensor_object(given_output_shape):
                    output_shape[h_axis] = given_h
                    output_shape[w_axis] = given_w

            # infer the batch shape in 4-d
            batch_shape = input_shape[:-3]
            if None not in batch_shape:
                output_shape[0] = int(np.prod(batch_shape))

            # now the static output shape is ready
            output_static_shape = tf.TensorShape(output_shape)

            # prepare for the dynamic batch shape
            if output_shape[0] is None:
                output_shape[0] = tf.reduce_prod(get_shape(input)[:-3])

            # prepare for the dynamic spatial dimensions
            if output_shape[h_axis] is None or output_shape[w_axis] is None:
                if given_output_shape is None:
                    input_shape = get_shape(input)
                    if output_shape[h_axis] is None:
                        output_shape[h_axis] = get_deconv_output_length(
                            input_shape[h_axis], kernel_shape[0],
                            strides[h_axis], padding)
                    if output_shape[w_axis] is None:
                        output_shape[w_axis] = get_deconv_output_length(
                            input_shape[w_axis], kernel_shape[1],
                            strides[w_axis], padding)
                else:
                    assert (is_tensor_object(given_output_shape))
                    with assert_deps([
                            assert_rank(given_output_shape, 1),
                            assert_scalar_equal(tf.size(given_output_shape), 2)
                    ]):
                        output_shape[h_axis] = given_output_shape[0]
                        output_shape[w_axis] = given_output_shape[1]

            # compose the final dynamic shape
            if any(is_tensor_object(s) for s in output_shape):
                output_shape = tf.stack(output_shape)
            else:
                output_shape = tuple(output_shape)

        # create the variables
        if kernel is None:
            kernel = model_variable('kernel',
                                    shape=kernel_shape,
                                    dtype=dtype,
                                    initializer=kernel_initializer,
                                    regularizer=kernel_regularizer,
                                    constraint=kernel_constraint,
                                    trainable=trainable)

        if weight_norm_fn is not None:
            kernel = weight_norm_fn(kernel)

        maybe_add_histogram(kernel, 'kernel')
        kernel = maybe_check_numerics(kernel, 'kernel')

        if use_bias and bias is None:
            bias = model_variable('bias',
                                  shape=bias_shape,
                                  initializer=bias_initializer,
                                  regularizer=bias_regularizer,
                                  constraint=bias_constraint,
                                  trainable=trainable)
            maybe_add_histogram(bias, 'bias')
            bias = maybe_check_numerics(bias, 'bias')

        # flatten to 4d
        output, s1, s2 = flatten_to_ndims(input, 4)

        # do convolution or deconvolution
        output = tf.nn.conv2d_transpose(value=output,
                                        filter=kernel,
                                        output_shape=output_shape,
                                        strides=strides,
                                        padding=padding,
                                        data_format=data_format)
        if output_static_shape is not None:
            output.set_shape(output_static_shape)

        # add bias
        if use_bias:
            output = tf.nn.bias_add(output, bias, data_format=data_format)

        # apply the normalization function if specified
        if normalizer_fn is not None:
            output = normalizer_fn(output)

        # split into halves if gated
        if gated:
            output, gate = tf.split(output, 2, axis=c_axis)

        # apply the activation function if specified
        if activation_fn is not None:
            output = activation_fn(output)

        # apply the gate if required
        if gated:
            output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate')

        # unflatten back to original shape
        output = unflatten_from_ndims(output, s1, s2)

        maybe_add_histogram(output, 'output')
        output = maybe_check_numerics(output, 'output')

    return output
Exemple #5
0
def conv2d(input,
           out_channels,
           kernel_size,
           strides=(1, 1),
           dilations=1,
           padding='same',
           channels_last=True,
           activation_fn=None,
           normalizer_fn=None,
           weight_norm=False,
           gated=False,
           gate_sigmoid_bias=2.,
           kernel=None,
           kernel_mask=None,
           kernel_initializer=None,
           kernel_regularizer=None,
           kernel_constraint=None,
           use_bias=None,
           bias=None,
           bias_initializer=tf.zeros_initializer(),
           bias_regularizer=None,
           bias_constraint=None,
           trainable=True,
           name=None,
           scope=None):
    """
    2D convolutional layer.

    Args:
        input (Tensor): The input tensor, at least 4-d.
        out_channels (int): The channel numbers of the output.
        kernel_size (int or (int, int)): Kernel size over spatial dimensions.
        strides (int or (int, int)): Strides over spatial dimensions.
        dilations (int): The dilation factor over spatial dimensions.
        padding: One of {"valid", "same"}, case in-sensitive.
        channels_last (bool): Whether or not the channel axis is the last
            axis in `input`? (i.e., the data format is "NHWC")
        activation_fn: The activation function.
        normalizer_fn: The normalizer function.
        weight_norm (bool or (tf.Tensor) -> tf.Tensor)):
            If :obj:`True`, apply :func:`~tfsnippet.layers.weight_norm` on
            `kernel`.  `use_scale` will be :obj:`True` if `normalizer_fn`
            is not specified, and :obj:`False` otherwise.  The axis reduction
            will be determined by the layer.

            If it is a callable function, then it will be used to normalize
            the `kernel` instead of :func:`~tfsnippet.layers.weight_norm`.
            The user must ensure the axis reduction is correct by themselves.
        gated (bool): Whether or not to use gate on output?
            `output = activation_fn(output) * sigmoid(gate)`.
        gate_sigmoid_bias (Tensor): The bias added to `gate` before applying
            the `sigmoid` activation.
        kernel (Tensor): Instead of creating a new variable, use this tensor.
        kernel_mask (Tensor): If specified, multiply this mask onto `kernel`,
            i.e., the actual kernel to use will be `kernel * kernel_mask`.
        kernel_initializer: The initializer for `kernel`.
            Would be ``default_kernel_initializer(...)`` if not specified.
        kernel_regularizer: The regularizer for `kernel`.
        kernel_constraint: The constraint for `kernel`.
        use_bias (bool or None): Whether or not to use `bias`?
            If :obj:`True`, will always use bias.
            If :obj:`None`, will use bias only if `normalizer_fn` is not given.
            If :obj:`False`, will never use bias.
            Default is :obj:`None`.
        bias (Tensor): Instead of creating a new variable, use this tensor.
        bias_initializer: The initializer for `bias`.
        bias_regularizer: The regularizer for `bias`.
        bias_constraint: The constraint for `bias`.
        trainable (bool): Whether or not the parameters are trainable?

    Returns:
        tf.Tensor: The output tensor.
    """
    input, in_channels, data_format = \
        validate_conv2d_input(input, channels_last)
    out_channels = validate_positive_int_arg('out_channels', out_channels)
    dtype = input.dtype.base_dtype
    if gated:
        out_channels *= 2

    # check functional arguments
    padding = validate_enum_arg('padding',
                                str(padding).upper(), ['VALID', 'SAME'])
    original_strides = validate_conv2d_size_tuple('strides', strides)
    strides = validate_conv2d_strides_tuple('strides', original_strides,
                                            channels_last)
    dilations = validate_positive_int_arg('dilations', dilations)

    if dilations > 1 and not channels_last:
        raise ValueError('`channels_last` == False is incompatible with '
                         '`dilations` > 1.')

    if any(i > 1 for i in strides) and dilations > 1:
        raise ValueError('`strides` > 1 is incompatible with `dilations` > 1.')

    weight_norm_fn = validate_weight_norm_arg(weight_norm,
                                              axis=-1,
                                              use_scale=normalizer_fn is None)
    if use_bias is None:
        use_bias = normalizer_fn is None

    # get the specification of outputs and parameters
    kernel_size = validate_conv2d_size_tuple('kernel_size', kernel_size)
    kernel_shape = kernel_size + (in_channels, out_channels)
    bias_shape = (out_channels, )

    # validate the parameters
    if kernel is not None:
        kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype)
        kernel = kernel_spec.validate('kernel', kernel)
    if kernel_mask is not None:
        kernel_mask_spec = InputSpec(dtype=dtype)
        kernel_mask = kernel_mask_spec.validate('kernel_mask', kernel_mask)
    if kernel_initializer is None:
        kernel_initializer = default_kernel_initializer(weight_norm)
    if bias is not None:
        bias_spec = ParamSpec(shape=bias_shape, dtype=dtype)
        bias = bias_spec.validate('bias', bias)

    # the main part of the conv2d layer
    with tf.variable_scope(scope, default_name=name or 'conv2d'):
        c_axis = -1 if channels_last else -3

        # create the variables
        if kernel is None:
            kernel = model_variable('kernel',
                                    shape=kernel_shape,
                                    dtype=dtype,
                                    initializer=kernel_initializer,
                                    regularizer=kernel_regularizer,
                                    constraint=kernel_constraint,
                                    trainable=trainable)

        if weight_norm_fn is not None:
            kernel = weight_norm_fn(kernel)
        if kernel_mask is not None:
            kernel = kernel * kernel_mask

        maybe_add_histogram(kernel, 'kernel')
        kernel = maybe_check_numerics(kernel, 'kernel')

        if use_bias and bias is None:
            bias = model_variable('bias',
                                  shape=bias_shape,
                                  initializer=bias_initializer,
                                  regularizer=bias_regularizer,
                                  constraint=bias_constraint,
                                  trainable=trainable)
            maybe_add_histogram(bias, 'bias')
            bias = maybe_check_numerics(bias, 'bias')

        # special optimization: use dense instead of 1x1 conv if possible
        if dilations == 1 and kernel_size == (1, 1) and channels_last:
            with tf.name_scope('conv2d_1x1'):
                conv2d_1x1_kernel = tf.reshape(kernel,
                                               kernel_shape[2:],
                                               name='conv2d_1x1_kernel')
                output = input[
                    ..., ::original_strides[0], ::original_strides[1], :]

                # flatten to 2d
                output, s1, s2 = flatten_to_ndims(output, 2)
                output = tf.matmul(output, conv2d_1x1_kernel)

        else:
            # flatten to 4d
            output, s1, s2 = flatten_to_ndims(input, 4)

            # do convolution
            if dilations > 1:
                output = tf.nn.atrous_conv2d(value=output,
                                             filters=kernel,
                                             rate=dilations,
                                             padding=padding)
            else:
                output = tf.nn.conv2d(input=output,
                                      filter=kernel,
                                      strides=strides,
                                      padding=padding,
                                      data_format=data_format,
                                      dilations=[1] * 4)

        # add bias
        if use_bias:
            output = tf.nn.bias_add(output, bias, data_format=data_format)

        # apply the normalization function if specified
        if normalizer_fn is not None:
            output = normalizer_fn(output)

        # split into halves if gated
        if gated:
            output, gate = tf.split(output, 2, axis=c_axis)

        # apply the activation function if specified
        if activation_fn is not None:
            output = activation_fn(output)

        # apply the gate if required
        if gated:
            output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate')

        # unflatten back to original shape
        output = unflatten_from_ndims(output, s1, s2)

        maybe_add_histogram(output, 'output')
        output = maybe_check_numerics(output, 'output')
    return output
Exemple #6
0
    def _transform(self, x, compute_y, compute_log_det):
        # check the argument
        dtype = x.dtype.base_dtype
        shape = get_static_shape(x)
        assert (-len(shape) <= -self.value_ndims <= min(self.axis))
        reduce_axis = tuple(
            sorted(set(range(-len(shape), 0)).difference(self.axis)))

        # prepare for the parameters
        if not self._initialized:
            if len(shape) == len(self._var_shape_aligned):
                raise ValueError('Initializing ActNorm requires multiple '
                                 '`x` samples, thus `x` must have at least '
                                 'one more dimension than the variable shape: '
                                 'x {} vs variable shape {}.'.format(
                                     x, self._var_shape_aligned))

            with tf.name_scope('initialization'):
                x_mean, x_var = tf.nn.moments(x, reduce_axis)
                x_mean = tf.reshape(x_mean, self._var_shape)
                x_var = maybe_check_numerics(
                    tf.reshape(x_var, self._var_shape),
                    'numeric issues in computed x_var')

                bias = self._bias.assign(-x_mean)
                if self._scale_type == 'exp':
                    pre_scale = self._pre_scale.assign(
                        -tf.constant(.5, dtype=dtype) *
                        tf.log(tf.maximum(x_var, self._epsilon)))
                    pre_scale = maybe_check_numerics(
                        pre_scale, 'numeric issues in initializing log_scale')
                else:
                    assert (self._scale_type == 'linear')
                    pre_scale = self._pre_scale.assign(
                        tf.constant(1., dtype=dtype) /
                        tf.sqrt(tf.maximum(x_var, self._epsilon)))
                    pre_scale = maybe_check_numerics(
                        pre_scale, 'numeric issues in initializing scale')
            self._initialized = True
        else:
            bias = self._bias
            pre_scale = self._pre_scale

        # align the shape of variables, and create the scale object
        bias = tf.reshape(bias, self._var_shape_aligned)
        pre_scale = tf.reshape(pre_scale, self._var_shape_aligned)

        if self._scale_type == 'exp':
            scale = ExpScale(pre_scale, self._epsilon)
        else:
            assert (self._scale_type == 'linear')
            scale = LinearScale(pre_scale, self._epsilon)

        # compute y
        y = None
        if compute_y:
            y = (x + bias) * scale

        # compute log_det
        log_det = None
        if compute_log_det:
            with tf.name_scope('log_det'):
                log_det = scale.log_scale()
                reduce_ndims1 = min(self.value_ndims,
                                    len(self._var_shape_aligned))
                reduce_ndims2 = self.value_ndims - reduce_ndims1

                # reduce the last `min(value_ndims, len(var_shape))` dimensions
                if reduce_ndims1 > 0:
                    log_det = tf.reduce_sum(log_det,
                                            axis=list(range(-reduce_ndims1,
                                                            0)))

                    # the following axis have been averaged out during
                    # computation, and will be directly summed up without
                    # getting broadcasted. Thus we need to multiply a factor
                    # to the log_det by the count of reduced elements.
                    reduce_axis1 = tuple(
                        filter(lambda a: (a >= -reduce_ndims1), reduce_axis))
                    reduce_shape1 = get_dimensions_size(x, reduce_axis1)
                    if isinstance(reduce_shape1, tuple):
                        log_det *= np.prod(reduce_shape1, dtype=np.float32)
                    else:
                        log_det *= tf.cast(tf.reduce_prod(reduce_shape1),
                                           dtype=log_det.dtype)

                # we need to broadcast `log_det` to match the shape of `x`
                log_det = broadcast_log_det_against_input(
                    log_det, x, value_ndims=reduce_ndims1)

                # reduce the remaining dimensions
                if reduce_ndims2 > 0:
                    log_det = tf.reduce_sum(log_det,
                                            axis=list(range(-reduce_ndims2,
                                                            0)))

        return y, log_det
Exemple #7
0
    def log_prob(self, given, group_ndims=0, name=None):
        given = tf.convert_to_tensor(given)

        with tf.name_scope('DiscretizedLogistic.log_prob', values=[given]):
            if self.discretize_given:
                given = self._discretize(given)

            # inv_scale = 1. / exp(log_scale)
            inv_scale = maybe_check_numerics(
                tf.exp(-self.log_scale, name='inv_scale'), 'inv_scale')
            # half_bin = bin_size / 2
            half_bin = self.bin_size * .5
            # delta = bin_size / scale, half_delta = delta / 2
            half_delta = half_bin * inv_scale

            # x_mid = (x - mean) / scale
            x_mid = (given - self.mean) * inv_scale

            # x_low = (x - mean - bin_size * 0.5) / scale
            x_low = x_mid - half_delta
            # x_high = (x - mean + bin_size * 0.5) / scale
            x_high = x_mid + half_delta

            cdf_low = tf.sigmoid(x_low, name='cdf_low')
            cdf_high = tf.sigmoid(x_high, name='cdf_high')
            cdf_delta = cdf_high - cdf_low

            # the middle bins cases:
            #   log(sigmoid(x_high) - sigmoid(x_low))
            # middle_bins_pdf = tf.log(cdf_delta + self._epsilon)
            middle_bins_pdf = tf.log(tf.maximum(cdf_delta, self._epsilon))

            # with tf.control_dependencies([
            #             tf.print(
            #                 'x_mid: ', tf.reduce_mean(x_mid),
            #                 'x_low: ', tf.reduce_mean(x_low),
            #                 'x_high: ', tf.reduce_mean(x_high),
            #                 'diff: ', tf.reduce_mean((given - self.mean)),
            #                 'mean: ', tf.reduce_mean(self.mean),
            #                 'scale: ', tf.reduce_mean(tf.exp(self.log_scale)),
            #                 'half_delta: ', tf.reduce_mean(half_delta),
            #                 'cdf_delta: ', tf.reduce_mean(cdf_delta),
            #                 'log_pdf: ', tf.reduce_mean(middle_bins_pdf)
            #             )
            #         ]):
            #     middle_bins_pdf = tf.identity(middle_bins_pdf)

            # # but in extreme cases where `sigmoid(x_high) - sigmoid(x_low)`
            # # is very small, we use an alternative form, as in PixelCNN++.
            # log_delta = tf.log(self.bin_size) - self.log_scale
            # middle_bins_pdf = tf.where(
            #     cdf_delta > self._epsilon,
            #     # to avoid NaNs pollute the select statement, we have to use
            #     # `maximum(cdf_delta, 1e-12)`
            #     tf.log(tf.maximum(cdf_delta, 1e-12)),
            #     # the alternative form.  basically it can be derived by using
            #     # the mean value theorem for integration.
            #     x_mid + log_delta - 2. * tf.nn.softplus(x_mid)
            # )

            log_prob = maybe_check_numerics(middle_bins_pdf, 'middle_bins_pdf')

            if self.biased_edges and self.min_val is not None:
                # broadcasted given, shape == x_mid
                broadcast_given = broadcast_to_shape(given, get_shape(x_low))

                # the left-edge bin case
                #   log(sigmoid(x_high) - sigmoid(-infinity))
                left_edge = self.min_val + half_bin
                left_edge_pdf = maybe_check_numerics(-tf.nn.softplus(-x_high),
                                                     'left_edge_pdf')
                log_prob = tf.where(tf.less(broadcast_given, left_edge),
                                    left_edge_pdf, log_prob)

                # the right-edge bin case
                #   log(sigmoid(infinity) - sigmoid(x_low))
                right_edge = self.max_val - half_bin
                right_edge_pdf = maybe_check_numerics(-tf.nn.softplus(x_low),
                                                      'right_edge_pdf')
                log_prob = tf.where(
                    tf.greater_equal(broadcast_given, right_edge),
                    right_edge_pdf, log_prob)

            # now reduce the group_ndims
            log_prob = reduce_group_ndims(tf.reduce_sum, log_prob, group_ndims)

        return log_prob
Exemple #8
0
def weight_norm(kernel,
                axis,
                use_scale=True,
                scale=None,
                scale_initializer=None,
                scale_regularizer=None,
                scale_constraint=None,
                trainable=True,
                epsilon=1e-12,
                name=None,
                scope=None):
    """
    Weight normalization proposed by (Salimans & Kingma, 2016).

    Roughly speaking, the weight normalization is defined as::

        kernel = scale * kernel / tf.sqrt(
            tf.reduce_sum(kernel ** 2, axis=<dimensions not in `axis`>,
                          keepdims=True)
        )

    This function does not support data-dependent initialization for `scale`.
    If you do need this feature, you have to turn off `scale`, and use
    :func:`~tfsnippet.layers.act_norm` along with :func:`weight_norm`.

    Args:
        kernel: Tensor, the weight `w` to be normalized.
        axis (int or tuple[int]): The axis to apply weight normalization.
            See above description to know what `axis` exactly is.
        use_scale (bool): Whether or not to use `scale`.  Default :obj:`True`.
        scale (Tensor): Instead of creating a new variable, use this tensor.
        scale_initializer: The initializer for `scale`.
        scale_regularizer: The regularizer for `scale`.
        scale_constraint: The constraint for `scale`.
        trainable (bool): Whether or not the variables are trainable?
        epsilon: Small float number to avoid dividing by zero.
    """
    # check the parameters
    if not use_scale and scale is not None:
        raise ValueError('`use_scale` is False but `scale` is specified.')
    axis = validate_int_tuple_arg('axis', axis)
    if not axis:
        raise ValueError('`axis` cannot be empty.')

    kernel = tf.convert_to_tensor(kernel)
    kernel_shape = get_static_shape(kernel)
    dtype = kernel.dtype.base_dtype
    var_spec = ParamSpec(kernel_shape, dtype=dtype)

    if scale_initializer is None:
        scale_initializer = tf.ones_initializer(dtype=dtype)
    if scale is not None:
        scale = var_spec.validate('scale', scale)

    # any dimension not specified in `axis` should be averaged out
    axis = resolve_negative_axis(len(kernel_shape), axis)
    reduce_axis = tuple(a for a in range(len(kernel_shape)) if a not in axis)

    with tf.variable_scope(scope, default_name=name or 'weight_norm'):
        # normalize the kernel
        kernel = maybe_check_numerics(
            tf.nn.l2_normalize(kernel, axis=reduce_axis, epsilon=epsilon),
            'weight-normalized kernel')

        # create the scaling variable
        if use_scale:
            if scale is None:
                scale = model_variable('scale',
                                       shape=kernel_shape,
                                       dtype=dtype,
                                       initializer=scale_initializer,
                                       regularizer=scale_regularizer,
                                       constraint=scale_constraint,
                                       trainable=trainable)
                scale = maybe_check_numerics(scale, 'scale')
            kernel = kernel * scale

        # now return the normalized weight
        return kernel
Exemple #9
0
    def inverse_transform(self,
                          y,
                          compute_x=True,
                          compute_log_det=True,
                          name=None):
        """
        Transform `y` into `x`, and compute the log-determinant of `f^{-1}` at
        `y` (i.e.,
        :math:`\\log \\det \\frac{\\partial f^{-1}(y)}{\\partial y}`).

        Args:
            y (Tensor): The samples of `y`.
            compute_x (bool): Whether or not to compute :math:`x = f^{-1}(y)`?
                Default :obj:`True`.
            compute_log_det (bool): Whether or not to compute the
                log-determinant?  Default :obj:`True`.
            name (str): If specified, will use this name as the TensorFlow
                operational name scope.

        Returns:
            (tf.Tensor, tf.Tensor): `x` and the (maybe summed) log-determinant.
                The items in the returned tuple might be :obj:`None`
                if corresponding `compute_?` argument is set to :obj:`False`.

        Raises:
            RuntimeError: If both `compute_x` and `compute_log_det` are set
                to :obj:`False`.
            RuntimeError: If the flow is not explicitly invertible.
        """
        if not self.explicitly_invertible:
            raise RuntimeError(
                'The flow is not explicitly invertible: {!r}'.format(self))
        if not compute_x and not compute_log_det:
            raise ValueError('At least one of `compute_x` and '
                             '`compute_log_det` should be True.')
        if not self._has_built:
            raise RuntimeError('`inverse_transform` cannot be called before '
                               'the flow has been built; it can be built by '
                               'calling `build`, `apply` or `transform`: '
                               '{!r}'.format(self))

        y = tf.convert_to_tensor(y)
        y = self._y_input_spec.validate('y', y)

        with tf.name_scope(name,
                           default_name=get_default_scope_name(
                               'inverse_transform', self),
                           values=[y]):
            x, log_det = self._inverse_transform(y, compute_x, compute_log_det)

            if compute_log_det:
                with assert_deps([
                        assert_log_det_shape_matches_input(
                            log_det=log_det,
                            input=y,
                            value_ndims=self.y_value_ndims)
                ]) as asserted:
                    if asserted:  # pragma: no cover
                        log_det = tf.identity(log_det)

            if x is not None:
                maybe_add_histogram(x, 'x')
                x = maybe_check_numerics(x, 'x')

            if log_det is not None:
                maybe_add_histogram(log_det, 'log_det')
                log_det = maybe_check_numerics(log_det, 'log_det')

            return x, log_det
Exemple #10
0
    def log_prob(self, given, group_ndims=0, name=None):
        given = tf.convert_to_tensor(given)

        with tf.name_scope('DiscretizedLogistic.log_prob', values=[given]):
            # inv_scale = 1. / scale
            inv_scale = maybe_check_numerics(
                tf.exp(-self.log_scale, name='inv_scale'), 'inv_scale')
            # half_bin = bin_size / 2
            half_bin = self._bin_size * .5
            # delta = bin_size / scale, half_delta = delta / 2
            half_delta = half_bin * inv_scale
            # log(delta) = log(bin_size) - log(scale)
            log_delta = tf.log(self._bin_size) - self.log_scale

            x_mid = (given - self.mean) * inv_scale
            x_low = x_mid - half_delta
            x_high = x_mid + half_delta

            cdf_low = tf.sigmoid(x_low, name='cdf_low')
            cdf_high = tf.sigmoid(x_high, name='cdf_high')

            # the middle bins cases:
            #   log(sigmoid(x_high) - sigmoid(x_low))
            # but in extreme cases where `sigmoid(x_high) - sigmoid(x_low)`
            # is very small, we use an alternative form, as in PixelCNN++.
            cdf_delta = cdf_high - cdf_low
            middle_bins_pdf = tf.where(
                cdf_delta > self._epsilon,
                # to avoid NaNs pollute the select statement, we have to use
                # `maximum(cdf_delta, 1e-12)`
                tf.log(tf.maximum(cdf_delta, 1e-12)),
                # the alternative form.  basically it can be derived by using
                # the mean value theorem for integration.
                x_mid + log_delta - 2. * tf.nn.softplus(x_mid)
            )
            log_prob = maybe_check_numerics(middle_bins_pdf, 'middle_bins_pdf')

            # broadcasted given, shape == x_mid
            broadcast_given = broadcast_to_shape(given, get_shape(x_mid))

            # the left-edge bin case
            #   log(sigmoid(x_high) - sigmoid(-infinity))
            if self._biased_edges and self.min_val is not None:
                left_edge = self._min_val + half_bin
                left_edge_pdf = maybe_check_numerics(
                    -tf.nn.softplus(-x_high), 'left_edge_pdf')
                log_prob = tf.where(
                    broadcast_given < left_edge, left_edge_pdf, log_prob)

            # the right-edge bin case
            #   log(sigmoid(infinity) - sigmoid(x_low))
            if self._biased_edges and self.max_val is not None:
                right_edge = self._max_val - half_bin
                right_edge_pdf = maybe_check_numerics(
                    -tf.nn.softplus(x_low), 'right_edge_pdf')
                log_prob = tf.where(
                    broadcast_given >= right_edge, right_edge_pdf, log_prob)

            # now reduce the group_ndims
            log_prob = reduce_group_ndims(tf.reduce_sum, log_prob, group_ndims)

        return log_prob
Exemple #11
0
 def check_tensor(tensor, name):
     maybe_add_histogram(tensor, name)
     return maybe_check_numerics(tensor, name)