def _build(self, input=None): # check the input. input = tf.convert_to_tensor(input) dtype = input.dtype.base_dtype shape = get_static_shape(input) # These facts should have been checked in `BaseFlow.build`. assert (shape is not None) assert (len(shape) >= self.value_ndims) # compute var spec and input spec min_axis = min(self.axis) shape_spec = [None] * len(shape) for a in self.axis: shape_spec[a] = shape[a] shape_spec = shape_spec[min_axis:] assert (not not shape_spec) assert (self.value_ndims >= len(shape_spec)) self._y_input_spec = self._x_input_spec = InputSpec( shape=(('...', ) + ('?', ) * (self.value_ndims - len(shape_spec)) + tuple(shape_spec)), dtype=dtype) # the shape of variables must only have necessary dimensions, # such that we can switch freely between `channels_last = True` # (in which case `input.shape = (..., *,)`, and `channels_last = False` # (in which case `input.shape = (..., *, 1, 1)`. self._var_shape = tuple(s for s in shape_spec if s is not None) # and we still need to compute the aligned variable shape, such that # we can immediately reshape the variables into this aligned shape, # then compute `scale * input + bias`. self._var_shape_aligned = tuple(s or 1 for s in shape_spec) self._var_spec = ParamSpec(self._var_shape) # validate the input self._x_input_spec.validate('input', input) # build the variables self._bias = model_variable('bias', dtype=dtype, shape=self._var_shape, regularizer=self._bias_regularizer, constraint=self._bias_constraint, trainable=self._trainable) if self._scale_type == 'exp': self._pre_scale = model_variable( 'log_scale', dtype=dtype, shape=self._var_shape, regularizer=self._log_scale_regularizer, constraint=self._log_scale_constraint, trainable=self._trainable) else: self._pre_scale = model_variable( 'scale', dtype=dtype, shape=self._var_shape, regularizer=self._scale_regularizer, constraint=self._scale_constraint, trainable=self._trainable)
def _build(self, input=None): dtype = input.dtype.base_dtype n_units = get_static_shape(input)[self.axis] w = model_variable('w', shape=[1, n_units], dtype=dtype, initializer=self._w_initializer, regularizer=self._w_regularizer, trainable=self._trainable) b = model_variable('b', shape=[1], dtype=dtype, initializer=self._b_initializer, regularizer=self._b_regularizer, trainable=self._trainable) u = model_variable('u', shape=[1, n_units], dtype=dtype, initializer=self._u_initializer, regularizer=self._u_regularizer, trainable=self._trainable) wu = tf.matmul(w, u, transpose_b=True) # wu.shape == [1] u_hat = u + (-1 + tf.nn.softplus(wu) - wu) * \ w / tf.reduce_sum(tf.square(w)) # shape == [1, n_units] self._w, self._b, self._u, self._u_hat = w, b, u, u_hat
def _build(self, input=None): n_features = self._n_features = get_static_shape(input)[self.axis] permutation = np.arange(n_features, dtype=np.int32) self._random_state.shuffle(permutation) self._permutation = model_variable( 'permutation', dtype=tf.int32, initializer=permutation, trainable=False ) self._inv_permutation = tf.invert_permutation(self._permutation)
def deconv2d(input, out_channels, kernel_size, strides=(1, 1), padding='same', channels_last=True, output_shape=None, activation_fn=None, normalizer_fn=None, weight_norm=False, gated=False, gate_sigmoid_bias=2., kernel=None, kernel_initializer=None, kernel_regularizer=None, kernel_constraint=None, use_bias=None, bias=None, bias_initializer=tf.zeros_initializer(), bias_regularizer=None, bias_constraint=None, trainable=True, name=None, scope=None): """ 2D deconvolutional layer. Args: input (Tensor): The input tensor, at least 4-d. out_channels (int): The channel numbers of the deconvolution output. kernel_size (int or (int, int)): Kernel size over spatial dimensions. strides (int or (int, int)): Strides over spatial dimensions. padding: One of {"valid", "same"}, case in-sensitive. channels_last (bool): Whether or not the channel axis is the last axis in `input`? (i.e., the data format is "NHWC") output_shape: If specified, use this as the shape of the deconvolution output; otherwise compute the size of each dimension by:: output_size = input_size * strides if padding == 'valid': output_size += max(kernel_size - strides, 0) activation_fn: The activation function. normalizer_fn: The normalizer function. weight_norm (bool or (tf.Tensor) -> tf.Tensor)): If :obj:`True`, apply :func:`~tfsnippet.layers.weight_norm` on `kernel`. `use_scale` will be :obj:`True` if `normalizer_fn` is not specified, and :obj:`False` otherwise. The axis reduction will be determined by the layer. If it is a callable function, then it will be used to normalize the `kernel` instead of :func:`~tfsnippet.layers.weight_norm`. The user must ensure the axis reduction is correct by themselves. gated (bool): Whether or not to use gate on output? `output = activation_fn(output) * sigmoid(gate)`. gate_sigmoid_bias (Tensor): The bias added to `gate` before applying the `sigmoid` activation. kernel (Tensor): Instead of creating a new variable, use this tensor. kernel_initializer: The initializer for `kernel`. Would be ``default_kernel_initializer(...)`` if not specified. kernel_regularizer: The regularizer for `kernel`. kernel_constraint: The constraint for `kernel`. use_bias (bool or None): Whether or not to use `bias`? If :obj:`True`, will always use bias. If :obj:`None`, will use bias only if `normalizer_fn` is not given. If :obj:`False`, will never use bias. Default is :obj:`None`. bias (Tensor): Instead of creating a new variable, use this tensor. bias_initializer: The initializer for `bias`. bias_regularizer: The regularizer for `bias`. bias_constraint: The constraint for `bias`. trainable (bool): Whether or not the parameters are trainable? Returns: tf.Tensor: The output tensor. """ input, in_channels, data_format = \ validate_conv2d_input(input, channels_last) out_channels = validate_positive_int_arg('out_channels', out_channels) dtype = input.dtype.base_dtype if gated: out_channels *= 2 # check functional arguments padding = validate_enum_arg('padding', str(padding).upper(), ['VALID', 'SAME']) strides = validate_conv2d_strides_tuple('strides', strides, channels_last) weight_norm_fn = validate_weight_norm_arg(weight_norm, axis=-1, use_scale=normalizer_fn is None) if use_bias is None: use_bias = normalizer_fn is None # get the specification of outputs and parameters kernel_size = validate_conv2d_size_tuple('kernel_size', kernel_size) kernel_shape = kernel_size + (out_channels, in_channels) bias_shape = (out_channels, ) given_h, given_w = None, None given_output_shape = output_shape if is_tensor_object(given_output_shape): given_output_shape = tf.convert_to_tensor(given_output_shape) elif given_output_shape is not None: given_h, given_w = given_output_shape # validate the parameters if kernel is not None: kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype) kernel = kernel_spec.validate('kernel', kernel) if kernel_initializer is None: kernel_initializer = default_kernel_initializer(weight_norm) if bias is not None: bias_spec = ParamSpec(shape=bias_shape, dtype=dtype) bias = bias_spec.validate('bias', bias) # the main part of the conv2d layer with tf.variable_scope(scope, default_name=name or 'deconv2d'): with tf.name_scope('output_shape'): # detect the input shape and axis arrangements input_shape = get_static_shape(input) if channels_last: c_axis, h_axis, w_axis = -1, -3, -2 else: c_axis, h_axis, w_axis = -3, -2, -1 output_shape = [None, None, None, None] output_shape[c_axis] = out_channels if given_output_shape is None: if input_shape[h_axis] is not None: output_shape[h_axis] = get_deconv_output_length( input_shape[h_axis], kernel_shape[0], strides[h_axis], padding) if input_shape[w_axis] is not None: output_shape[w_axis] = get_deconv_output_length( input_shape[w_axis], kernel_shape[1], strides[w_axis], padding) else: if not is_tensor_object(given_output_shape): output_shape[h_axis] = given_h output_shape[w_axis] = given_w # infer the batch shape in 4-d batch_shape = input_shape[:-3] if None not in batch_shape: output_shape[0] = int(np.prod(batch_shape)) # now the static output shape is ready output_static_shape = tf.TensorShape(output_shape) # prepare for the dynamic batch shape if output_shape[0] is None: output_shape[0] = tf.reduce_prod(get_shape(input)[:-3]) # prepare for the dynamic spatial dimensions if output_shape[h_axis] is None or output_shape[w_axis] is None: if given_output_shape is None: input_shape = get_shape(input) if output_shape[h_axis] is None: output_shape[h_axis] = get_deconv_output_length( input_shape[h_axis], kernel_shape[0], strides[h_axis], padding) if output_shape[w_axis] is None: output_shape[w_axis] = get_deconv_output_length( input_shape[w_axis], kernel_shape[1], strides[w_axis], padding) else: assert (is_tensor_object(given_output_shape)) with assert_deps([ assert_rank(given_output_shape, 1), assert_scalar_equal(tf.size(given_output_shape), 2) ]): output_shape[h_axis] = given_output_shape[0] output_shape[w_axis] = given_output_shape[1] # compose the final dynamic shape if any(is_tensor_object(s) for s in output_shape): output_shape = tf.stack(output_shape) else: output_shape = tuple(output_shape) # create the variables if kernel is None: kernel = model_variable('kernel', shape=kernel_shape, dtype=dtype, initializer=kernel_initializer, regularizer=kernel_regularizer, constraint=kernel_constraint, trainable=trainable) if weight_norm_fn is not None: kernel = weight_norm_fn(kernel) maybe_add_histogram(kernel, 'kernel') kernel = maybe_check_numerics(kernel, 'kernel') if use_bias and bias is None: bias = model_variable('bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable) maybe_add_histogram(bias, 'bias') bias = maybe_check_numerics(bias, 'bias') # flatten to 4d output, s1, s2 = flatten_to_ndims(input, 4) # do convolution or deconvolution output = tf.nn.conv2d_transpose(value=output, filter=kernel, output_shape=output_shape, strides=strides, padding=padding, data_format=data_format) if output_static_shape is not None: output.set_shape(output_static_shape) # add bias if use_bias: output = tf.nn.bias_add(output, bias, data_format=data_format) # apply the normalization function if specified if normalizer_fn is not None: output = normalizer_fn(output) # split into halves if gated if gated: output, gate = tf.split(output, 2, axis=c_axis) # apply the activation function if specified if activation_fn is not None: output = activation_fn(output) # apply the gate if required if gated: output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate') # unflatten back to original shape output = unflatten_from_ndims(output, s1, s2) maybe_add_histogram(output, 'output') output = maybe_check_numerics(output, 'output') return output
def conv2d(input, out_channels, kernel_size, strides=(1, 1), dilations=1, padding='same', channels_last=True, activation_fn=None, normalizer_fn=None, weight_norm=False, gated=False, gate_sigmoid_bias=2., kernel=None, kernel_mask=None, kernel_initializer=None, kernel_regularizer=None, kernel_constraint=None, use_bias=None, bias=None, bias_initializer=tf.zeros_initializer(), bias_regularizer=None, bias_constraint=None, trainable=True, name=None, scope=None): """ 2D convolutional layer. Args: input (Tensor): The input tensor, at least 4-d. out_channels (int): The channel numbers of the output. kernel_size (int or (int, int)): Kernel size over spatial dimensions. strides (int or (int, int)): Strides over spatial dimensions. dilations (int): The dilation factor over spatial dimensions. padding: One of {"valid", "same"}, case in-sensitive. channels_last (bool): Whether or not the channel axis is the last axis in `input`? (i.e., the data format is "NHWC") activation_fn: The activation function. normalizer_fn: The normalizer function. weight_norm (bool or (tf.Tensor) -> tf.Tensor)): If :obj:`True`, apply :func:`~tfsnippet.layers.weight_norm` on `kernel`. `use_scale` will be :obj:`True` if `normalizer_fn` is not specified, and :obj:`False` otherwise. The axis reduction will be determined by the layer. If it is a callable function, then it will be used to normalize the `kernel` instead of :func:`~tfsnippet.layers.weight_norm`. The user must ensure the axis reduction is correct by themselves. gated (bool): Whether or not to use gate on output? `output = activation_fn(output) * sigmoid(gate)`. gate_sigmoid_bias (Tensor): The bias added to `gate` before applying the `sigmoid` activation. kernel (Tensor): Instead of creating a new variable, use this tensor. kernel_mask (Tensor): If specified, multiply this mask onto `kernel`, i.e., the actual kernel to use will be `kernel * kernel_mask`. kernel_initializer: The initializer for `kernel`. Would be ``default_kernel_initializer(...)`` if not specified. kernel_regularizer: The regularizer for `kernel`. kernel_constraint: The constraint for `kernel`. use_bias (bool or None): Whether or not to use `bias`? If :obj:`True`, will always use bias. If :obj:`None`, will use bias only if `normalizer_fn` is not given. If :obj:`False`, will never use bias. Default is :obj:`None`. bias (Tensor): Instead of creating a new variable, use this tensor. bias_initializer: The initializer for `bias`. bias_regularizer: The regularizer for `bias`. bias_constraint: The constraint for `bias`. trainable (bool): Whether or not the parameters are trainable? Returns: tf.Tensor: The output tensor. """ input, in_channels, data_format = \ validate_conv2d_input(input, channels_last) out_channels = validate_positive_int_arg('out_channels', out_channels) dtype = input.dtype.base_dtype if gated: out_channels *= 2 # check functional arguments padding = validate_enum_arg('padding', str(padding).upper(), ['VALID', 'SAME']) original_strides = validate_conv2d_size_tuple('strides', strides) strides = validate_conv2d_strides_tuple('strides', original_strides, channels_last) dilations = validate_positive_int_arg('dilations', dilations) if dilations > 1 and not channels_last: raise ValueError('`channels_last` == False is incompatible with ' '`dilations` > 1.') if any(i > 1 for i in strides) and dilations > 1: raise ValueError('`strides` > 1 is incompatible with `dilations` > 1.') weight_norm_fn = validate_weight_norm_arg(weight_norm, axis=-1, use_scale=normalizer_fn is None) if use_bias is None: use_bias = normalizer_fn is None # get the specification of outputs and parameters kernel_size = validate_conv2d_size_tuple('kernel_size', kernel_size) kernel_shape = kernel_size + (in_channels, out_channels) bias_shape = (out_channels, ) # validate the parameters if kernel is not None: kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype) kernel = kernel_spec.validate('kernel', kernel) if kernel_mask is not None: kernel_mask_spec = InputSpec(dtype=dtype) kernel_mask = kernel_mask_spec.validate('kernel_mask', kernel_mask) if kernel_initializer is None: kernel_initializer = default_kernel_initializer(weight_norm) if bias is not None: bias_spec = ParamSpec(shape=bias_shape, dtype=dtype) bias = bias_spec.validate('bias', bias) # the main part of the conv2d layer with tf.variable_scope(scope, default_name=name or 'conv2d'): c_axis = -1 if channels_last else -3 # create the variables if kernel is None: kernel = model_variable('kernel', shape=kernel_shape, dtype=dtype, initializer=kernel_initializer, regularizer=kernel_regularizer, constraint=kernel_constraint, trainable=trainable) if weight_norm_fn is not None: kernel = weight_norm_fn(kernel) if kernel_mask is not None: kernel = kernel * kernel_mask maybe_add_histogram(kernel, 'kernel') kernel = maybe_check_numerics(kernel, 'kernel') if use_bias and bias is None: bias = model_variable('bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable) maybe_add_histogram(bias, 'bias') bias = maybe_check_numerics(bias, 'bias') # special optimization: use dense instead of 1x1 conv if possible if dilations == 1 and kernel_size == (1, 1) and channels_last: with tf.name_scope('conv2d_1x1'): conv2d_1x1_kernel = tf.reshape(kernel, kernel_shape[2:], name='conv2d_1x1_kernel') output = input[ ..., ::original_strides[0], ::original_strides[1], :] # flatten to 2d output, s1, s2 = flatten_to_ndims(output, 2) output = tf.matmul(output, conv2d_1x1_kernel) else: # flatten to 4d output, s1, s2 = flatten_to_ndims(input, 4) # do convolution if dilations > 1: output = tf.nn.atrous_conv2d(value=output, filters=kernel, rate=dilations, padding=padding) else: output = tf.nn.conv2d(input=output, filter=kernel, strides=strides, padding=padding, data_format=data_format, dilations=[1] * 4) # add bias if use_bias: output = tf.nn.bias_add(output, bias, data_format=data_format) # apply the normalization function if specified if normalizer_fn is not None: output = normalizer_fn(output) # split into halves if gated if gated: output, gate = tf.split(output, 2, axis=c_axis) # apply the activation function if specified if activation_fn is not None: output = activation_fn(output) # apply the gate if required if gated: output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate') # unflatten back to original shape output = unflatten_from_ndims(output, s1, s2) maybe_add_histogram(output, 'output') output = maybe_check_numerics(output, 'output') return output
def weight_norm(kernel, axis, use_scale=True, scale=None, scale_initializer=None, scale_regularizer=None, scale_constraint=None, trainable=True, epsilon=1e-12, name=None, scope=None): """ Weight normalization proposed by (Salimans & Kingma, 2016). Roughly speaking, the weight normalization is defined as:: kernel = scale * kernel / tf.sqrt( tf.reduce_sum(kernel ** 2, axis=<dimensions not in `axis`>, keepdims=True) ) This function does not support data-dependent initialization for `scale`. If you do need this feature, you have to turn off `scale`, and use :func:`~tfsnippet.layers.act_norm` along with :func:`weight_norm`. Args: kernel: Tensor, the weight `w` to be normalized. axis (int or tuple[int]): The axis to apply weight normalization. See above description to know what `axis` exactly is. use_scale (bool): Whether or not to use `scale`. Default :obj:`True`. scale (Tensor): Instead of creating a new variable, use this tensor. scale_initializer: The initializer for `scale`. scale_regularizer: The regularizer for `scale`. scale_constraint: The constraint for `scale`. trainable (bool): Whether or not the variables are trainable? epsilon: Small float number to avoid dividing by zero. """ # check the parameters if not use_scale and scale is not None: raise ValueError('`use_scale` is False but `scale` is specified.') axis = validate_int_tuple_arg('axis', axis) if not axis: raise ValueError('`axis` cannot be empty.') kernel = tf.convert_to_tensor(kernel) kernel_shape = get_static_shape(kernel) dtype = kernel.dtype.base_dtype var_spec = ParamSpec(kernel_shape, dtype=dtype) if scale_initializer is None: scale_initializer = tf.ones_initializer(dtype=dtype) if scale is not None: scale = var_spec.validate('scale', scale) # any dimension not specified in `axis` should be averaged out axis = resolve_negative_axis(len(kernel_shape), axis) reduce_axis = tuple(a for a in range(len(kernel_shape)) if a not in axis) with tf.variable_scope(scope, default_name=name or 'weight_norm'): # normalize the kernel kernel = maybe_check_numerics( tf.nn.l2_normalize(kernel, axis=reduce_axis, epsilon=epsilon), 'weight-normalized kernel') # create the scaling variable if use_scale: if scale is None: scale = model_variable('scale', shape=kernel_shape, dtype=dtype, initializer=scale_initializer, regularizer=scale_regularizer, constraint=scale_constraint, trainable=trainable) scale = maybe_check_numerics(scale, 'scale') kernel = kernel * scale # now return the normalized weight return kernel