def transform(self, x, compute_y=True, compute_log_det=True, name=None): """ Transform `x` into `y`, and compute the log-determinant of `f` at `x` (i.e., :math:`\\log \\det \\frac{\\partial f(x)}{\\partial x}`). Args: x (Tensor): The samples of `x`. compute_y (bool): Whether or not to compute :math:`y = f(x)`? Default :obj:`True`. compute_log_det (bool): Whether or not to compute the log-determinant? Default :obj:`True`. name (str): If specified, will use this name as the TensorFlow operational name scope. Returns: (tf.Tensor, tf.Tensor): `y` and the (maybe summed) log-determinant. The items in the returned tuple might be :obj:`None` if corresponding `compute_?` argument is set to :obj:`False`. Raises: RuntimeError: If both `compute_y` and `compute_log_det` are set to :obj:`False`. """ if not compute_y and not compute_log_det: raise ValueError('At least one of `compute_y` and ' '`compute_log_det` should be True.') x = tf.convert_to_tensor(x) if not self._has_built: self.build(x) x = self._x_input_spec.validate('x', x) with tf.name_scope(name, default_name=get_default_scope_name( 'transform', self), values=[x]): y, log_det = self._transform(x, compute_y, compute_log_det) if compute_log_det: with assert_deps([ assert_log_det_shape_matches_input( log_det=log_det, input=x, value_ndims=self.x_value_ndims) ]) as asserted: if asserted: # pragma: no cover log_det = tf.identity(log_det) if y is not None: maybe_add_histogram(y, 'y') y = maybe_check_numerics(y, 'y') if log_det is not None: maybe_add_histogram(log_det, 'log_det') log_det = maybe_check_numerics(log_det, 'log_det') return y, log_det
def sample(self, n_samples=None, group_ndims=0, is_reparameterized=None, compute_density=None, name=None): self._validate_sample_is_reparameterized_arg(is_reparameterized) if is_reparameterized is None: is_reparameterized = self.is_reparameterized with tf.name_scope(name, default_name='DiscretizedLogistic.sample'): # sample from uniform distribution sample_shape = self.batch_shape static_sample_shape = self.get_batch_shape() if n_samples is not None: sample_shape = tf.concat([[n_samples], sample_shape], 0) static_sample_shape = tf.TensorShape( [None if is_tensor_object(n_samples) else n_samples]). \ concatenate(static_sample_shape) u = tf.random_uniform(shape=sample_shape, minval=self._epsilon, maxval=1. - self._epsilon, dtype=self._param_dtype) u.set_shape(static_sample_shape) # inverse CDF of the logistic inverse_logistic_cdf = maybe_check_numerics( tf.log(u) - tf.log(1. - u), 'inverse_logistic_cdf') # obtain the actual sample scale = maybe_check_numerics(tf.exp(self.log_scale, name='scale'), 'scale') sample = self.mean + scale * inverse_logistic_cdf if self.discretize_sample: sample = self._discretize(sample) sample = maybe_check_numerics(sample, 'sample') sample = convert_to_tensor_and_cast(sample, self.dtype) if not is_reparameterized: sample = tf.stop_gradient(sample) t = StochasticTensor(distribution=self, tensor=sample, n_samples=n_samples, group_ndims=group_ndims, is_reparameterized=is_reparameterized) # compute the density if compute_density: compute_density_immediately(t) return t
def _check_tensor(self, tensor, name): tensor_name = '{}.{}'.format(self.__class__.__name__, name) maybe_add_histogram(tensor, tensor_name) return maybe_check_numerics(tensor, tensor_name)
def deconv2d(input, out_channels, kernel_size, strides=(1, 1), padding='same', channels_last=True, output_shape=None, activation_fn=None, normalizer_fn=None, weight_norm=False, gated=False, gate_sigmoid_bias=2., kernel=None, kernel_initializer=None, kernel_regularizer=None, kernel_constraint=None, use_bias=None, bias=None, bias_initializer=tf.zeros_initializer(), bias_regularizer=None, bias_constraint=None, trainable=True, name=None, scope=None): """ 2D deconvolutional layer. Args: input (Tensor): The input tensor, at least 4-d. out_channels (int): The channel numbers of the deconvolution output. kernel_size (int or (int, int)): Kernel size over spatial dimensions. strides (int or (int, int)): Strides over spatial dimensions. padding: One of {"valid", "same"}, case in-sensitive. channels_last (bool): Whether or not the channel axis is the last axis in `input`? (i.e., the data format is "NHWC") output_shape: If specified, use this as the shape of the deconvolution output; otherwise compute the size of each dimension by:: output_size = input_size * strides if padding == 'valid': output_size += max(kernel_size - strides, 0) activation_fn: The activation function. normalizer_fn: The normalizer function. weight_norm (bool or (tf.Tensor) -> tf.Tensor)): If :obj:`True`, apply :func:`~tfsnippet.layers.weight_norm` on `kernel`. `use_scale` will be :obj:`True` if `normalizer_fn` is not specified, and :obj:`False` otherwise. The axis reduction will be determined by the layer. If it is a callable function, then it will be used to normalize the `kernel` instead of :func:`~tfsnippet.layers.weight_norm`. The user must ensure the axis reduction is correct by themselves. gated (bool): Whether or not to use gate on output? `output = activation_fn(output) * sigmoid(gate)`. gate_sigmoid_bias (Tensor): The bias added to `gate` before applying the `sigmoid` activation. kernel (Tensor): Instead of creating a new variable, use this tensor. kernel_initializer: The initializer for `kernel`. Would be ``default_kernel_initializer(...)`` if not specified. kernel_regularizer: The regularizer for `kernel`. kernel_constraint: The constraint for `kernel`. use_bias (bool or None): Whether or not to use `bias`? If :obj:`True`, will always use bias. If :obj:`None`, will use bias only if `normalizer_fn` is not given. If :obj:`False`, will never use bias. Default is :obj:`None`. bias (Tensor): Instead of creating a new variable, use this tensor. bias_initializer: The initializer for `bias`. bias_regularizer: The regularizer for `bias`. bias_constraint: The constraint for `bias`. trainable (bool): Whether or not the parameters are trainable? Returns: tf.Tensor: The output tensor. """ input, in_channels, data_format = \ validate_conv2d_input(input, channels_last) out_channels = validate_positive_int_arg('out_channels', out_channels) dtype = input.dtype.base_dtype if gated: out_channels *= 2 # check functional arguments padding = validate_enum_arg('padding', str(padding).upper(), ['VALID', 'SAME']) strides = validate_conv2d_strides_tuple('strides', strides, channels_last) weight_norm_fn = validate_weight_norm_arg(weight_norm, axis=-1, use_scale=normalizer_fn is None) if use_bias is None: use_bias = normalizer_fn is None # get the specification of outputs and parameters kernel_size = validate_conv2d_size_tuple('kernel_size', kernel_size) kernel_shape = kernel_size + (out_channels, in_channels) bias_shape = (out_channels, ) given_h, given_w = None, None given_output_shape = output_shape if is_tensor_object(given_output_shape): given_output_shape = tf.convert_to_tensor(given_output_shape) elif given_output_shape is not None: given_h, given_w = given_output_shape # validate the parameters if kernel is not None: kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype) kernel = kernel_spec.validate('kernel', kernel) if kernel_initializer is None: kernel_initializer = default_kernel_initializer(weight_norm) if bias is not None: bias_spec = ParamSpec(shape=bias_shape, dtype=dtype) bias = bias_spec.validate('bias', bias) # the main part of the conv2d layer with tf.variable_scope(scope, default_name=name or 'deconv2d'): with tf.name_scope('output_shape'): # detect the input shape and axis arrangements input_shape = get_static_shape(input) if channels_last: c_axis, h_axis, w_axis = -1, -3, -2 else: c_axis, h_axis, w_axis = -3, -2, -1 output_shape = [None, None, None, None] output_shape[c_axis] = out_channels if given_output_shape is None: if input_shape[h_axis] is not None: output_shape[h_axis] = get_deconv_output_length( input_shape[h_axis], kernel_shape[0], strides[h_axis], padding) if input_shape[w_axis] is not None: output_shape[w_axis] = get_deconv_output_length( input_shape[w_axis], kernel_shape[1], strides[w_axis], padding) else: if not is_tensor_object(given_output_shape): output_shape[h_axis] = given_h output_shape[w_axis] = given_w # infer the batch shape in 4-d batch_shape = input_shape[:-3] if None not in batch_shape: output_shape[0] = int(np.prod(batch_shape)) # now the static output shape is ready output_static_shape = tf.TensorShape(output_shape) # prepare for the dynamic batch shape if output_shape[0] is None: output_shape[0] = tf.reduce_prod(get_shape(input)[:-3]) # prepare for the dynamic spatial dimensions if output_shape[h_axis] is None or output_shape[w_axis] is None: if given_output_shape is None: input_shape = get_shape(input) if output_shape[h_axis] is None: output_shape[h_axis] = get_deconv_output_length( input_shape[h_axis], kernel_shape[0], strides[h_axis], padding) if output_shape[w_axis] is None: output_shape[w_axis] = get_deconv_output_length( input_shape[w_axis], kernel_shape[1], strides[w_axis], padding) else: assert (is_tensor_object(given_output_shape)) with assert_deps([ assert_rank(given_output_shape, 1), assert_scalar_equal(tf.size(given_output_shape), 2) ]): output_shape[h_axis] = given_output_shape[0] output_shape[w_axis] = given_output_shape[1] # compose the final dynamic shape if any(is_tensor_object(s) for s in output_shape): output_shape = tf.stack(output_shape) else: output_shape = tuple(output_shape) # create the variables if kernel is None: kernel = model_variable('kernel', shape=kernel_shape, dtype=dtype, initializer=kernel_initializer, regularizer=kernel_regularizer, constraint=kernel_constraint, trainable=trainable) if weight_norm_fn is not None: kernel = weight_norm_fn(kernel) maybe_add_histogram(kernel, 'kernel') kernel = maybe_check_numerics(kernel, 'kernel') if use_bias and bias is None: bias = model_variable('bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable) maybe_add_histogram(bias, 'bias') bias = maybe_check_numerics(bias, 'bias') # flatten to 4d output, s1, s2 = flatten_to_ndims(input, 4) # do convolution or deconvolution output = tf.nn.conv2d_transpose(value=output, filter=kernel, output_shape=output_shape, strides=strides, padding=padding, data_format=data_format) if output_static_shape is not None: output.set_shape(output_static_shape) # add bias if use_bias: output = tf.nn.bias_add(output, bias, data_format=data_format) # apply the normalization function if specified if normalizer_fn is not None: output = normalizer_fn(output) # split into halves if gated if gated: output, gate = tf.split(output, 2, axis=c_axis) # apply the activation function if specified if activation_fn is not None: output = activation_fn(output) # apply the gate if required if gated: output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate') # unflatten back to original shape output = unflatten_from_ndims(output, s1, s2) maybe_add_histogram(output, 'output') output = maybe_check_numerics(output, 'output') return output
def conv2d(input, out_channels, kernel_size, strides=(1, 1), dilations=1, padding='same', channels_last=True, activation_fn=None, normalizer_fn=None, weight_norm=False, gated=False, gate_sigmoid_bias=2., kernel=None, kernel_mask=None, kernel_initializer=None, kernel_regularizer=None, kernel_constraint=None, use_bias=None, bias=None, bias_initializer=tf.zeros_initializer(), bias_regularizer=None, bias_constraint=None, trainable=True, name=None, scope=None): """ 2D convolutional layer. Args: input (Tensor): The input tensor, at least 4-d. out_channels (int): The channel numbers of the output. kernel_size (int or (int, int)): Kernel size over spatial dimensions. strides (int or (int, int)): Strides over spatial dimensions. dilations (int): The dilation factor over spatial dimensions. padding: One of {"valid", "same"}, case in-sensitive. channels_last (bool): Whether or not the channel axis is the last axis in `input`? (i.e., the data format is "NHWC") activation_fn: The activation function. normalizer_fn: The normalizer function. weight_norm (bool or (tf.Tensor) -> tf.Tensor)): If :obj:`True`, apply :func:`~tfsnippet.layers.weight_norm` on `kernel`. `use_scale` will be :obj:`True` if `normalizer_fn` is not specified, and :obj:`False` otherwise. The axis reduction will be determined by the layer. If it is a callable function, then it will be used to normalize the `kernel` instead of :func:`~tfsnippet.layers.weight_norm`. The user must ensure the axis reduction is correct by themselves. gated (bool): Whether or not to use gate on output? `output = activation_fn(output) * sigmoid(gate)`. gate_sigmoid_bias (Tensor): The bias added to `gate` before applying the `sigmoid` activation. kernel (Tensor): Instead of creating a new variable, use this tensor. kernel_mask (Tensor): If specified, multiply this mask onto `kernel`, i.e., the actual kernel to use will be `kernel * kernel_mask`. kernel_initializer: The initializer for `kernel`. Would be ``default_kernel_initializer(...)`` if not specified. kernel_regularizer: The regularizer for `kernel`. kernel_constraint: The constraint for `kernel`. use_bias (bool or None): Whether or not to use `bias`? If :obj:`True`, will always use bias. If :obj:`None`, will use bias only if `normalizer_fn` is not given. If :obj:`False`, will never use bias. Default is :obj:`None`. bias (Tensor): Instead of creating a new variable, use this tensor. bias_initializer: The initializer for `bias`. bias_regularizer: The regularizer for `bias`. bias_constraint: The constraint for `bias`. trainable (bool): Whether or not the parameters are trainable? Returns: tf.Tensor: The output tensor. """ input, in_channels, data_format = \ validate_conv2d_input(input, channels_last) out_channels = validate_positive_int_arg('out_channels', out_channels) dtype = input.dtype.base_dtype if gated: out_channels *= 2 # check functional arguments padding = validate_enum_arg('padding', str(padding).upper(), ['VALID', 'SAME']) original_strides = validate_conv2d_size_tuple('strides', strides) strides = validate_conv2d_strides_tuple('strides', original_strides, channels_last) dilations = validate_positive_int_arg('dilations', dilations) if dilations > 1 and not channels_last: raise ValueError('`channels_last` == False is incompatible with ' '`dilations` > 1.') if any(i > 1 for i in strides) and dilations > 1: raise ValueError('`strides` > 1 is incompatible with `dilations` > 1.') weight_norm_fn = validate_weight_norm_arg(weight_norm, axis=-1, use_scale=normalizer_fn is None) if use_bias is None: use_bias = normalizer_fn is None # get the specification of outputs and parameters kernel_size = validate_conv2d_size_tuple('kernel_size', kernel_size) kernel_shape = kernel_size + (in_channels, out_channels) bias_shape = (out_channels, ) # validate the parameters if kernel is not None: kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype) kernel = kernel_spec.validate('kernel', kernel) if kernel_mask is not None: kernel_mask_spec = InputSpec(dtype=dtype) kernel_mask = kernel_mask_spec.validate('kernel_mask', kernel_mask) if kernel_initializer is None: kernel_initializer = default_kernel_initializer(weight_norm) if bias is not None: bias_spec = ParamSpec(shape=bias_shape, dtype=dtype) bias = bias_spec.validate('bias', bias) # the main part of the conv2d layer with tf.variable_scope(scope, default_name=name or 'conv2d'): c_axis = -1 if channels_last else -3 # create the variables if kernel is None: kernel = model_variable('kernel', shape=kernel_shape, dtype=dtype, initializer=kernel_initializer, regularizer=kernel_regularizer, constraint=kernel_constraint, trainable=trainable) if weight_norm_fn is not None: kernel = weight_norm_fn(kernel) if kernel_mask is not None: kernel = kernel * kernel_mask maybe_add_histogram(kernel, 'kernel') kernel = maybe_check_numerics(kernel, 'kernel') if use_bias and bias is None: bias = model_variable('bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable) maybe_add_histogram(bias, 'bias') bias = maybe_check_numerics(bias, 'bias') # special optimization: use dense instead of 1x1 conv if possible if dilations == 1 and kernel_size == (1, 1) and channels_last: with tf.name_scope('conv2d_1x1'): conv2d_1x1_kernel = tf.reshape(kernel, kernel_shape[2:], name='conv2d_1x1_kernel') output = input[ ..., ::original_strides[0], ::original_strides[1], :] # flatten to 2d output, s1, s2 = flatten_to_ndims(output, 2) output = tf.matmul(output, conv2d_1x1_kernel) else: # flatten to 4d output, s1, s2 = flatten_to_ndims(input, 4) # do convolution if dilations > 1: output = tf.nn.atrous_conv2d(value=output, filters=kernel, rate=dilations, padding=padding) else: output = tf.nn.conv2d(input=output, filter=kernel, strides=strides, padding=padding, data_format=data_format, dilations=[1] * 4) # add bias if use_bias: output = tf.nn.bias_add(output, bias, data_format=data_format) # apply the normalization function if specified if normalizer_fn is not None: output = normalizer_fn(output) # split into halves if gated if gated: output, gate = tf.split(output, 2, axis=c_axis) # apply the activation function if specified if activation_fn is not None: output = activation_fn(output) # apply the gate if required if gated: output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate') # unflatten back to original shape output = unflatten_from_ndims(output, s1, s2) maybe_add_histogram(output, 'output') output = maybe_check_numerics(output, 'output') return output
def _transform(self, x, compute_y, compute_log_det): # check the argument dtype = x.dtype.base_dtype shape = get_static_shape(x) assert (-len(shape) <= -self.value_ndims <= min(self.axis)) reduce_axis = tuple( sorted(set(range(-len(shape), 0)).difference(self.axis))) # prepare for the parameters if not self._initialized: if len(shape) == len(self._var_shape_aligned): raise ValueError('Initializing ActNorm requires multiple ' '`x` samples, thus `x` must have at least ' 'one more dimension than the variable shape: ' 'x {} vs variable shape {}.'.format( x, self._var_shape_aligned)) with tf.name_scope('initialization'): x_mean, x_var = tf.nn.moments(x, reduce_axis) x_mean = tf.reshape(x_mean, self._var_shape) x_var = maybe_check_numerics( tf.reshape(x_var, self._var_shape), 'numeric issues in computed x_var') bias = self._bias.assign(-x_mean) if self._scale_type == 'exp': pre_scale = self._pre_scale.assign( -tf.constant(.5, dtype=dtype) * tf.log(tf.maximum(x_var, self._epsilon))) pre_scale = maybe_check_numerics( pre_scale, 'numeric issues in initializing log_scale') else: assert (self._scale_type == 'linear') pre_scale = self._pre_scale.assign( tf.constant(1., dtype=dtype) / tf.sqrt(tf.maximum(x_var, self._epsilon))) pre_scale = maybe_check_numerics( pre_scale, 'numeric issues in initializing scale') self._initialized = True else: bias = self._bias pre_scale = self._pre_scale # align the shape of variables, and create the scale object bias = tf.reshape(bias, self._var_shape_aligned) pre_scale = tf.reshape(pre_scale, self._var_shape_aligned) if self._scale_type == 'exp': scale = ExpScale(pre_scale, self._epsilon) else: assert (self._scale_type == 'linear') scale = LinearScale(pre_scale, self._epsilon) # compute y y = None if compute_y: y = (x + bias) * scale # compute log_det log_det = None if compute_log_det: with tf.name_scope('log_det'): log_det = scale.log_scale() reduce_ndims1 = min(self.value_ndims, len(self._var_shape_aligned)) reduce_ndims2 = self.value_ndims - reduce_ndims1 # reduce the last `min(value_ndims, len(var_shape))` dimensions if reduce_ndims1 > 0: log_det = tf.reduce_sum(log_det, axis=list(range(-reduce_ndims1, 0))) # the following axis have been averaged out during # computation, and will be directly summed up without # getting broadcasted. Thus we need to multiply a factor # to the log_det by the count of reduced elements. reduce_axis1 = tuple( filter(lambda a: (a >= -reduce_ndims1), reduce_axis)) reduce_shape1 = get_dimensions_size(x, reduce_axis1) if isinstance(reduce_shape1, tuple): log_det *= np.prod(reduce_shape1, dtype=np.float32) else: log_det *= tf.cast(tf.reduce_prod(reduce_shape1), dtype=log_det.dtype) # we need to broadcast `log_det` to match the shape of `x` log_det = broadcast_log_det_against_input( log_det, x, value_ndims=reduce_ndims1) # reduce the remaining dimensions if reduce_ndims2 > 0: log_det = tf.reduce_sum(log_det, axis=list(range(-reduce_ndims2, 0))) return y, log_det
def log_prob(self, given, group_ndims=0, name=None): given = tf.convert_to_tensor(given) with tf.name_scope('DiscretizedLogistic.log_prob', values=[given]): if self.discretize_given: given = self._discretize(given) # inv_scale = 1. / exp(log_scale) inv_scale = maybe_check_numerics( tf.exp(-self.log_scale, name='inv_scale'), 'inv_scale') # half_bin = bin_size / 2 half_bin = self.bin_size * .5 # delta = bin_size / scale, half_delta = delta / 2 half_delta = half_bin * inv_scale # x_mid = (x - mean) / scale x_mid = (given - self.mean) * inv_scale # x_low = (x - mean - bin_size * 0.5) / scale x_low = x_mid - half_delta # x_high = (x - mean + bin_size * 0.5) / scale x_high = x_mid + half_delta cdf_low = tf.sigmoid(x_low, name='cdf_low') cdf_high = tf.sigmoid(x_high, name='cdf_high') cdf_delta = cdf_high - cdf_low # the middle bins cases: # log(sigmoid(x_high) - sigmoid(x_low)) # middle_bins_pdf = tf.log(cdf_delta + self._epsilon) middle_bins_pdf = tf.log(tf.maximum(cdf_delta, self._epsilon)) # with tf.control_dependencies([ # tf.print( # 'x_mid: ', tf.reduce_mean(x_mid), # 'x_low: ', tf.reduce_mean(x_low), # 'x_high: ', tf.reduce_mean(x_high), # 'diff: ', tf.reduce_mean((given - self.mean)), # 'mean: ', tf.reduce_mean(self.mean), # 'scale: ', tf.reduce_mean(tf.exp(self.log_scale)), # 'half_delta: ', tf.reduce_mean(half_delta), # 'cdf_delta: ', tf.reduce_mean(cdf_delta), # 'log_pdf: ', tf.reduce_mean(middle_bins_pdf) # ) # ]): # middle_bins_pdf = tf.identity(middle_bins_pdf) # # but in extreme cases where `sigmoid(x_high) - sigmoid(x_low)` # # is very small, we use an alternative form, as in PixelCNN++. # log_delta = tf.log(self.bin_size) - self.log_scale # middle_bins_pdf = tf.where( # cdf_delta > self._epsilon, # # to avoid NaNs pollute the select statement, we have to use # # `maximum(cdf_delta, 1e-12)` # tf.log(tf.maximum(cdf_delta, 1e-12)), # # the alternative form. basically it can be derived by using # # the mean value theorem for integration. # x_mid + log_delta - 2. * tf.nn.softplus(x_mid) # ) log_prob = maybe_check_numerics(middle_bins_pdf, 'middle_bins_pdf') if self.biased_edges and self.min_val is not None: # broadcasted given, shape == x_mid broadcast_given = broadcast_to_shape(given, get_shape(x_low)) # the left-edge bin case # log(sigmoid(x_high) - sigmoid(-infinity)) left_edge = self.min_val + half_bin left_edge_pdf = maybe_check_numerics(-tf.nn.softplus(-x_high), 'left_edge_pdf') log_prob = tf.where(tf.less(broadcast_given, left_edge), left_edge_pdf, log_prob) # the right-edge bin case # log(sigmoid(infinity) - sigmoid(x_low)) right_edge = self.max_val - half_bin right_edge_pdf = maybe_check_numerics(-tf.nn.softplus(x_low), 'right_edge_pdf') log_prob = tf.where( tf.greater_equal(broadcast_given, right_edge), right_edge_pdf, log_prob) # now reduce the group_ndims log_prob = reduce_group_ndims(tf.reduce_sum, log_prob, group_ndims) return log_prob
def weight_norm(kernel, axis, use_scale=True, scale=None, scale_initializer=None, scale_regularizer=None, scale_constraint=None, trainable=True, epsilon=1e-12, name=None, scope=None): """ Weight normalization proposed by (Salimans & Kingma, 2016). Roughly speaking, the weight normalization is defined as:: kernel = scale * kernel / tf.sqrt( tf.reduce_sum(kernel ** 2, axis=<dimensions not in `axis`>, keepdims=True) ) This function does not support data-dependent initialization for `scale`. If you do need this feature, you have to turn off `scale`, and use :func:`~tfsnippet.layers.act_norm` along with :func:`weight_norm`. Args: kernel: Tensor, the weight `w` to be normalized. axis (int or tuple[int]): The axis to apply weight normalization. See above description to know what `axis` exactly is. use_scale (bool): Whether or not to use `scale`. Default :obj:`True`. scale (Tensor): Instead of creating a new variable, use this tensor. scale_initializer: The initializer for `scale`. scale_regularizer: The regularizer for `scale`. scale_constraint: The constraint for `scale`. trainable (bool): Whether or not the variables are trainable? epsilon: Small float number to avoid dividing by zero. """ # check the parameters if not use_scale and scale is not None: raise ValueError('`use_scale` is False but `scale` is specified.') axis = validate_int_tuple_arg('axis', axis) if not axis: raise ValueError('`axis` cannot be empty.') kernel = tf.convert_to_tensor(kernel) kernel_shape = get_static_shape(kernel) dtype = kernel.dtype.base_dtype var_spec = ParamSpec(kernel_shape, dtype=dtype) if scale_initializer is None: scale_initializer = tf.ones_initializer(dtype=dtype) if scale is not None: scale = var_spec.validate('scale', scale) # any dimension not specified in `axis` should be averaged out axis = resolve_negative_axis(len(kernel_shape), axis) reduce_axis = tuple(a for a in range(len(kernel_shape)) if a not in axis) with tf.variable_scope(scope, default_name=name or 'weight_norm'): # normalize the kernel kernel = maybe_check_numerics( tf.nn.l2_normalize(kernel, axis=reduce_axis, epsilon=epsilon), 'weight-normalized kernel') # create the scaling variable if use_scale: if scale is None: scale = model_variable('scale', shape=kernel_shape, dtype=dtype, initializer=scale_initializer, regularizer=scale_regularizer, constraint=scale_constraint, trainable=trainable) scale = maybe_check_numerics(scale, 'scale') kernel = kernel * scale # now return the normalized weight return kernel
def inverse_transform(self, y, compute_x=True, compute_log_det=True, name=None): """ Transform `y` into `x`, and compute the log-determinant of `f^{-1}` at `y` (i.e., :math:`\\log \\det \\frac{\\partial f^{-1}(y)}{\\partial y}`). Args: y (Tensor): The samples of `y`. compute_x (bool): Whether or not to compute :math:`x = f^{-1}(y)`? Default :obj:`True`. compute_log_det (bool): Whether or not to compute the log-determinant? Default :obj:`True`. name (str): If specified, will use this name as the TensorFlow operational name scope. Returns: (tf.Tensor, tf.Tensor): `x` and the (maybe summed) log-determinant. The items in the returned tuple might be :obj:`None` if corresponding `compute_?` argument is set to :obj:`False`. Raises: RuntimeError: If both `compute_x` and `compute_log_det` are set to :obj:`False`. RuntimeError: If the flow is not explicitly invertible. """ if not self.explicitly_invertible: raise RuntimeError( 'The flow is not explicitly invertible: {!r}'.format(self)) if not compute_x and not compute_log_det: raise ValueError('At least one of `compute_x` and ' '`compute_log_det` should be True.') if not self._has_built: raise RuntimeError('`inverse_transform` cannot be called before ' 'the flow has been built; it can be built by ' 'calling `build`, `apply` or `transform`: ' '{!r}'.format(self)) y = tf.convert_to_tensor(y) y = self._y_input_spec.validate('y', y) with tf.name_scope(name, default_name=get_default_scope_name( 'inverse_transform', self), values=[y]): x, log_det = self._inverse_transform(y, compute_x, compute_log_det) if compute_log_det: with assert_deps([ assert_log_det_shape_matches_input( log_det=log_det, input=y, value_ndims=self.y_value_ndims) ]) as asserted: if asserted: # pragma: no cover log_det = tf.identity(log_det) if x is not None: maybe_add_histogram(x, 'x') x = maybe_check_numerics(x, 'x') if log_det is not None: maybe_add_histogram(log_det, 'log_det') log_det = maybe_check_numerics(log_det, 'log_det') return x, log_det
def log_prob(self, given, group_ndims=0, name=None): given = tf.convert_to_tensor(given) with tf.name_scope('DiscretizedLogistic.log_prob', values=[given]): # inv_scale = 1. / scale inv_scale = maybe_check_numerics( tf.exp(-self.log_scale, name='inv_scale'), 'inv_scale') # half_bin = bin_size / 2 half_bin = self._bin_size * .5 # delta = bin_size / scale, half_delta = delta / 2 half_delta = half_bin * inv_scale # log(delta) = log(bin_size) - log(scale) log_delta = tf.log(self._bin_size) - self.log_scale x_mid = (given - self.mean) * inv_scale x_low = x_mid - half_delta x_high = x_mid + half_delta cdf_low = tf.sigmoid(x_low, name='cdf_low') cdf_high = tf.sigmoid(x_high, name='cdf_high') # the middle bins cases: # log(sigmoid(x_high) - sigmoid(x_low)) # but in extreme cases where `sigmoid(x_high) - sigmoid(x_low)` # is very small, we use an alternative form, as in PixelCNN++. cdf_delta = cdf_high - cdf_low middle_bins_pdf = tf.where( cdf_delta > self._epsilon, # to avoid NaNs pollute the select statement, we have to use # `maximum(cdf_delta, 1e-12)` tf.log(tf.maximum(cdf_delta, 1e-12)), # the alternative form. basically it can be derived by using # the mean value theorem for integration. x_mid + log_delta - 2. * tf.nn.softplus(x_mid) ) log_prob = maybe_check_numerics(middle_bins_pdf, 'middle_bins_pdf') # broadcasted given, shape == x_mid broadcast_given = broadcast_to_shape(given, get_shape(x_mid)) # the left-edge bin case # log(sigmoid(x_high) - sigmoid(-infinity)) if self._biased_edges and self.min_val is not None: left_edge = self._min_val + half_bin left_edge_pdf = maybe_check_numerics( -tf.nn.softplus(-x_high), 'left_edge_pdf') log_prob = tf.where( broadcast_given < left_edge, left_edge_pdf, log_prob) # the right-edge bin case # log(sigmoid(infinity) - sigmoid(x_low)) if self._biased_edges and self.max_val is not None: right_edge = self._max_val - half_bin right_edge_pdf = maybe_check_numerics( -tf.nn.softplus(x_low), 'right_edge_pdf') log_prob = tf.where( broadcast_given >= right_edge, right_edge_pdf, log_prob) # now reduce the group_ndims log_prob = reduce_group_ndims(tf.reduce_sum, log_prob, group_ndims) return log_prob
def check_tensor(tensor, name): maybe_add_histogram(tensor, name) return maybe_check_numerics(tensor, name)