def __init__(self, attention_dim, input_dim, position_encoding_weight=1., position_rate=1., reduction_factor=1, has_bias=False, bias_dim=0, keep_prob=1.): super(AttentionBlock, self).__init__() # positional encoding omega_default = position_rate / reduction_factor self.omega_default = omega_default # multispeaker case if has_bias: std = np.sqrt(1.0 / bias_dim) initializer = I.NormalInitializer(loc=0., scale=std) self.q_pos_affine = dg.Linear(bias_dim, 1, param_attr=initializer) self.k_pos_affine = dg.Linear(bias_dim, 1, param_attr=initializer) self.omega_initial = self.create_parameter( shape=[1], attr=I.ConstantInitializer(value=omega_default)) # mind the fact that q, k, v have the same feature dimension # so we can init k_affine and q_affine's weight as the same matrix # to get a better init attention init_weight = np.random.normal(size=(input_dim, attention_dim), scale=np.sqrt(1. / input_dim)) initializer = I.NumpyArrayInitializer(init_weight.astype(np.float32)) # 3 affine transformation to project q, k, v into attention_dim q_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer) self.q_affine = weight_norm(q_affine, dim=-1) k_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer) self.k_affine = weight_norm(k_affine, dim=-1) std = np.sqrt(1.0 / input_dim) initializer = I.NormalInitializer(loc=0., scale=std) v_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer) self.v_affine = weight_norm(v_affine, dim=-1) std = np.sqrt(1.0 / attention_dim) initializer = I.NormalInitializer(loc=0., scale=std) out_affine = dg.Linear(attention_dim, input_dim, param_attr=initializer) self.out_affine = weight_norm(out_affine, dim=-1) self.keep_prob = keep_prob self.has_bias = has_bias self.bias_dim = bias_dim self.attention_dim = attention_dim self.position_encoding_weight = position_encoding_weight
def __init__(self, in_channels, reduction_factor, prenet_sizes, layers, kernel_size, attention_dim, position_encoding_weight=1., omega=1., has_bias=False, bias_dim=0, keep_prob=1.): super(Decoder, self).__init__() # prenet-mind the difference of AffineBlock2 and AffineBlock1 c_in = in_channels self.prenet = dg.LayerList() for i, c_out in enumerate(prenet_sizes): affine = AffineBlock2(c_in, c_out, has_bias, bias_dim, dropout=(i != 0), keep_prob=keep_prob) self.prenet.append(affine) c_in = c_out # causal convolutions + multihop attention decoder_dim = prenet_sizes[-1] self.causal_convs = dg.LayerList() self.attention_blocks = dg.LayerList() for i in range(layers): conv = ConvBlock(decoder_dim, kernel_size, True, has_bias, bias_dim, keep_prob) attn = AttentionBlock(attention_dim, decoder_dim, position_encoding_weight, omega, reduction_factor, has_bias, bias_dim, keep_prob) self.causal_convs.append(conv) self.attention_blocks.append(attn) # output mel spectrogram output_dim = reduction_factor * in_channels # r * mel_dim std = np.sqrt(1.0 / decoder_dim) initializer = I.NormalInitializer(loc=0., scale=std) out_affine = dg.Linear(decoder_dim, output_dim, param_attr=initializer) self.out_affine = weight_norm(out_affine, dim=-1) if has_bias: self.out_sp_affine = dg.Linear(bias_dim, output_dim) self.has_bias = has_bias self.kernel_size = kernel_size self.in_channels = in_channels self.decoder_dim = decoder_dim self.reduction_factor = reduction_factor self.out_channels = output_dim
def __init__(self, in_channel, out_channel, has_bias=False, bias_dim=0): super(AffineBlock1, self).__init__() std = np.sqrt(1.0 / in_channel) initializer = I.NormalInitializer(loc=0., scale=std) affine = dg.Linear(in_channel, out_channel, param_attr=initializer) self.affine = weight_norm(affine, dim=-1) if has_bias: self.bias_affine = dg.Linear(bias_dim, out_channel) self.has_bias = has_bias self.bias_dim = bias_dim
def test_normal_initializer_default_value(self): """Test the normal initializer with default value """ program = framework.Program() block = program.global_block() block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="param", initializer=initializer.NormalInitializer()) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) self.assertAlmostEqual(init_op.attr('std'), 1.0, delta=DELTA) self.assertEqual(init_op.attr('seed'), 0)
def test_normal_initializer(self): """Test normal initializer with supplied attributes """ program = framework.Program() block = program.global_block() block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="param", initializer=initializer.NormalInitializer( 2.3, 1.9, 123)) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') self.assertAlmostEqual(init_op.attr('mean'), 2.3, delta=DELTA) self.assertAlmostEqual(init_op.attr('std'), 1.9, delta=DELTA) self.assertEqual(init_op.attr('seed'), 123)
def test_normal_initializer(self, dtype="float32"): """Test normal initializer with supplied attributes """ program = framework.Program() block = program.global_block() for _ in range(2): block.create_parameter( dtype=dtype, shape=[5, 10], lod_level=0, name="param", initializer=initializer.NormalInitializer(2.3, 1.9, 123)) num_ops = 2 if (dtype == "float16" or dtype == "uint16") else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') self.assertAlmostEqual(init_op.attr('mean'), 2.3, delta=DELTA) self.assertAlmostEqual(init_op.attr('std'), 1.9, delta=DELTA) self.assertEqual(init_op.attr('seed'), 123) return block
def __init__(self, layers, in_channels, postnet_dim, kernel_size, out_channels, upsample_factor, has_bias=False, bias_dim=0, keep_prob=1.): super(PostNet, self).__init__() self.pre_affine = AffineBlock1(in_channels, postnet_dim, has_bias, bias_dim) self.convs = dg.LayerList([ ConvBlock(postnet_dim, kernel_size, False, has_bias, bias_dim, keep_prob) for _ in range(layers) ]) std = np.sqrt(1.0 / postnet_dim) initializer = I.NormalInitializer(loc=0., scale=std) post_affine = dg.Linear(postnet_dim, out_channels, param_attr=initializer) self.post_affine = weight_norm(post_affine, dim=-1) self.upsample_factor = upsample_factor
def __init__(self, in_channel, kernel_size, causal=False, has_bias=False, bias_dim=None, keep_prob=1.): super(ConvBlock, self).__init__() self.causal = causal self.keep_prob = keep_prob self.in_channel = in_channel self.has_bias = has_bias std = np.sqrt(4 * keep_prob / (kernel_size * in_channel)) initializer = I.NormalInitializer(loc=0., scale=std) padding = "valid" if causal else "same" conv = Conv1D(in_channel, 2 * in_channel, (kernel_size, ), padding=padding, data_format="NTC", param_attr=initializer) self.conv = weight_norm(conv) if has_bias: self.bias_affine = dg.Linear(bias_dim, 2 * in_channel)