def testLayerNorm(self): x = np.random.rand(5, 7, 11) with self.test_session() as session: y = common_layers.layer_norm(tf.constant(x, dtype=tf.float32), 11) session.run(tf.global_variables_initializer()) res = session.run(y) self.assertEqual(res.shape, (5, 7, 11))
def residual_block(x, hparams): """A stack of convolution blocks with residual connection.""" k = (hparams.kernel_height, hparams.kernel_width) dilations_and_kernels = [((1, 1), k) for _ in xrange(3)] y = common_layers.subseparable_conv_block(x, hparams.hidden_size, dilations_and_kernels, padding="SAME", separability=0, name="residual_block") x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") return tf.nn.dropout(x, 1.0 - hparams.dropout)
def residual_module(x, hparams, train, n, sep): """A stack of convolution blocks with residual connection.""" k = (hparams.kernel_height, hparams.kernel_width) dilations_and_kernels = [((1, 1), k) for _ in xrange(n)] with tf.variable_scope("residual_module%d_sep%d" % (n, sep)): y = common_layers.subseparable_conv_block(x, hparams.hidden_size, dilations_and_kernels, padding="SAME", separability=sep, name="block") x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") return tf.nn.dropout(x, 1.0 - hparams.dropout * tf.to_float(train))
def get_norm(hparams): """Get the normalizer function.""" if hparams.normalizer_fn == "layer": return lambda x, name: common_layers.layer_norm( # pylint: disable=g-long-lambda x, hparams.hidden_size, name=name) if hparams.normalizer_fn == "batch": return tf.layers.batch_normalization if hparams.normalizer_fn == "noam": return common_layers.noam_norm if hparams.normalizer_fn == "none": return lambda x, name: x raise ValueError( "Parameter normalizer_fn must be one of: 'layer', 'batch'," "'noam', 'none'.")
def residual_dilated_conv(x, repeat, padding, name, hparams): """A stack of convolution blocks with residual connections.""" with tf.variable_scope(name): k = (hparams.kernel_height, hparams.kernel_width) dilations_and_kernels = [((2**i, 1), k) for i in xrange(hparams.num_hidden_layers)] for i in xrange(repeat): with tf.variable_scope("repeat_%d" % i): y = common_layers.conv_block(x, hparams.hidden_size, dilations_and_kernels, padding=padding, name="residual_conv") x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") x = tf.nn.dropout(x, hparams.dropout) return x
def residual_fn(x, y): return common_layers.layer_norm( x + tf.nn.dropout(y, 1.0 - hparams.residual_dropout))
def layernorm_module(x, hparams): return common_layers.layer_norm(x, hparams.hidden_size, name="layer_norm")
def norm_module(x, hparams, train): del train # Unused. return common_layers.layer_norm(x, hparams.hidden_size, name="norm_module")
def add_and_normalize(x, y): return common_layers.layer_norm(x + y, hidden_size, name="moe_norm")
def residual_fn3(x, y, z, hparams): y = tf.nn.dropout(y, 1.0 - hparams.dropout) z = tf.nn.dropout(z, 1.0 - hparams.dropout) return common_layers.layer_norm(x + y + z)