def __init__(self, vocab_size, embed_dim, hidden_size, latent_size, num_layers, init_scale=0.1, dec_dropout=0.): super(LSTMDecoder, self).__init__() self.num_layers = num_layers self.embed_dim = embed_dim self.hidden_size = hidden_size self.latent_size = latent_size self.trg_embedder = nn.Embedding( vocab_size, embed_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.output_fc = nn.Linear( hidden_size, vocab_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) if dec_dropout > 0.0: self.dropout = nn.Dropout(dec_dropout) else: self.dropout = None self.lstm = nn.RNN( LSTMDecoderCell(self.num_layers, self.embed_dim, self.hidden_size, self.latent_size, self.dropout))
def __init__(self, embed_dim, hidden_size, latent_size, vocab_size, num_layers=1, init_scale=0.1, PAD_ID=0, enc_dropout=0., dec_dropout=0.): super(VAESeq2SeqModel, self).__init__() self.PAD_ID = PAD_ID self.latent_size = latent_size self.vocab_size = vocab_size self.num_layers = num_layers self.hidden_size = hidden_size self.encoder = LSTMEncoder(vocab_size, embed_dim, hidden_size, num_layers, init_scale, enc_dropout) self.decoder = LSTMDecoder(vocab_size, embed_dim, hidden_size, latent_size, num_layers, init_scale, dec_dropout) self.distributed_fc = nn.Linear( hidden_size * 2, latent_size * 2, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.fc = nn.Linear( latent_size, 2 * hidden_size * num_layers, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)))
def __init__(self, hidden_size, bias=False, init_scale=0.1): super(AttentionLayer, self).__init__() self.input_proj = nn.Linear( hidden_size, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)), bias_attr=bias) self.output_proj = nn.Linear( hidden_size + hidden_size, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)), bias_attr=bias)
def __init__(self): super(Encoder, self).__init__() # Initialized embeddings self.embedding = nn.Embedding( config.vocab_size, config.emb_dim, weight_attr=paddle.ParamAttr(initializer=I.Normal( std=config.trunc_norm_init_std))) # Initialized lstm weights self.lstm = nn.LSTM( config.emb_dim, config.hidden_dim, num_layers=1, direction='bidirect', weight_ih_attr=paddle.ParamAttr( initializer=I.Uniform(low=-config.rand_unif_init_mag, high=config.rand_unif_init_mag)), bias_ih_attr=paddle.ParamAttr(initializer=I.Constant(value=0.0))) # Initialized linear weights self.W_h = nn.Linear(config.hidden_dim * 2, config.hidden_dim * 2, bias_attr=False)
def test_uniform_initializer_default_value(self, dtype="float32", seed=0, min_value=-1.0, max_vlaue=1.0): """Test the uniform initializer with default value """ paddle.enable_static() program = framework.Program() program.random_seed = seed block = program.global_block() for _ in range(2): block.create_parameter(dtype=dtype, shape=[5, 10], lod_level=0, name="param", initializer=initializer.Uniform()) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') self.assertAlmostEqual(init_op.attr('min'), min_value, delta=DELTA) self.assertAlmostEqual(init_op.attr('max'), max_vlaue, delta=DELTA) self.assertEqual(init_op.attr('seed'), seed) paddle.disable_static() return block
def test_uniform_initializer_two_op(self, dtype="float32", seed=123, min_value=-4.2, max_vlaue=0.0): """Test uniform initializer with supplied attributes """ paddle.enable_static() program = framework.Program() program.random_seed = seed block = program.global_block() for i in range(2): block.create_parameter(dtype=dtype, shape=[5, 10], lod_level=0, name="param", initializer=initializer.Uniform( min_value, float(i))) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op0 = block.ops[0] self.assertEqual(init_op0.type, 'uniform_random') self.assertAlmostEqual(init_op0.attr('min'), min_value, delta=DELTA) self.assertAlmostEqual(init_op0.attr('max'), 0.0, delta=DELTA) self.assertEqual(init_op0.attr("seed"), seed) paddle.disable_static() return block
def __init__(self): super(Decoder, self).__init__() self.attention_network = Attention() # Decoder self.embedding = nn.Embedding( config.vocab_size, config.emb_dim, weight_attr=paddle.ParamAttr(initializer=I.Normal( std=config.trunc_norm_init_std))) self.x_context = nn.Linear(config.hidden_dim * 2 + config.emb_dim, config.emb_dim) self.lstm = nn.LSTM( config.emb_dim, config.hidden_dim, num_layers=1, direction='forward', weight_ih_attr=paddle.ParamAttr( initializer=I.Uniform(low=-config.rand_unif_init_mag, high=config.rand_unif_init_mag)), bias_ih_attr=paddle.ParamAttr(initializer=I.Constant(value=0.0))) if config.pointer_gen: self.p_gen_linear = nn.Linear( config.hidden_dim * 4 + config.emb_dim, 1) self.out1 = nn.Linear(config.hidden_dim * 3, config.hidden_dim) self.out2 = nn.Linear( config.hidden_dim, config.vocab_size, weight_attr=paddle.ParamAttr(initializer=I.Normal( std=config.trunc_norm_init_std)))
def __init__(self, char_vocab_size, char_embed_dim, projection_dim, num_highways, cnn_filters, max_characters_per_token): super(ELMoCharacterEncoderLayer, self).__init__() self._use_highway = (num_highways > 0) self._n_filters = sum(f[1] for f in cnn_filters) self._use_proj = (self._n_filters != projection_dim) paramAttr = paddle.ParamAttr(initializer=I.Uniform(low=-1.0, high=1.0)) self._char_embedding_layer = nn.Embedding( num_embeddings=char_vocab_size, embedding_dim=char_embed_dim, weight_attr=paramAttr) self._char_embedding_layer.weight[0, :] = 0 self._convolution_layers = [] for i, (width, num) in enumerate(cnn_filters): paramAttr = paddle.ParamAttr( initializer=I.Uniform(low=-0.05, high=0.05)) conv2d = nn.Conv2D(in_channels=char_embed_dim, out_channels=num, kernel_size=(1, width), padding='Valid', data_format='NHWC', weight_attr=paramAttr) max_pool = nn.MaxPool2D(kernel_size=(1, max_characters_per_token - width + 1), stride=(1, 1), padding='Valid', data_format='NHWC') self.add_sublayer('cnn_layer_{}'.format(i), conv2d) self.add_sublayer('maxpool_layer_{}'.format(i), max_pool) self._convolution_layers.append([width, conv2d, max_pool]) self._relu = nn.ReLU() if self._use_highway: self._highway_layer = Highway(self._n_filters, num_highways) if self._use_proj: paramAttr = paddle.ParamAttr(initializer=I.Normal( mean=0.0, std=1.0 / np.sqrt(self._n_filters))) self._linear_layer = nn.Linear(self._n_filters, projection_dim, weight_attr=paramAttr)
def __init__(self, embed_dim, hidden_size, vocab_size, output_dim, vocab_path, padding_idx=0, num_layers=1, dropout_prob=0.0, init_scale=0.1, embedding_name=None): super(BiLSTM, self).__init__() if embedding_name is not None: self.embedder = TokenEmbedding(embedding_name, extended_vocab_path=vocab_path, keep_extended_vocab_only=True) embed_dim = self.embedder.embedding_dim else: self.embedder = nn.Embedding(vocab_size, embed_dim, padding_idx) self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers, 'bidirectional', dropout=dropout_prob) self.fc = nn.Linear( hidden_size * 2, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.fc_1 = nn.Linear( hidden_size * 8, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.output_layer = nn.Linear( hidden_size, output_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)))
def __init__(self, channels, cond_channels, kernel_size, dilations): super(ResidualBlock, self).__init__() # input conv std = math.sqrt(1 / channels * np.prod(kernel_size)) init = I.Uniform(-std, std) receptive_field = [ 1 + (k - 1) * d for (k, d) in zip(kernel_size, dilations) ] rh, rw = receptive_field paddings = [rh - 1, 0, rw // 2, (rw - 1) // 2] # causal & same conv = nn.Conv2D( channels, 2 * channels, kernel_size, padding=paddings, dilation=dilations, weight_attr=init, bias_attr=init) self.conv = nn.utils.weight_norm(conv) self.rh = rh self.rw = rw self.dilations = dilations # condition projection std = math.sqrt(1 / cond_channels) init = I.Uniform(-std, std) condition_proj = nn.Conv2D( cond_channels, 2 * channels, (1, 1), weight_attr=init, bias_attr=init) self.condition_proj = nn.utils.weight_norm(condition_proj) # parametric residual & skip connection std = math.sqrt(1 / channels) init = I.Uniform(-std, std) out_proj = nn.Conv2D( channels, 2 * channels, (1, 1), weight_attr=init, bias_attr=init) self.out_proj = nn.utils.weight_norm(out_proj)
def __init__(self, vocab_size, hidden_size, batch_size, num_layers=1, init_scale=0.1, dropout=0.0): super(RnnLm, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.init_scale = init_scale self.batch_size = batch_size self.reset_states() self.embedder = nn.Embedding( vocab_size, hidden_size, weight_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale))) self.lstm = nn.LSTM( input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, weight_ih_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale)), weight_hh_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale))) self.fc = nn.Linear( hidden_size, vocab_size, weight_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale)), bias_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale))) self.dropout = nn.Dropout(p=dropout)
def __init__(self, embed_dim, hidden_size, vocab_size, output_dim, padding_idx=0, num_layers=1, dropout_prob=0.0, init_scale=0.1, embed_weight=None): super(BiLSTM, self).__init__() self.embedder = nn.Embedding(vocab_size, embed_dim, padding_idx) self.embedder.weight.set_value( embed_weight) if embed_weight is not None else None self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers, 'bidirectional', dropout=dropout_prob) self.fc = nn.Linear( hidden_size * 2, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.fc_1 = nn.Linear( hidden_size * 8, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.output_layer = nn.Linear( hidden_size, output_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)))
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Seq2SeqDecoder, self).__init__() self.embedder = nn.Embedding( vocab_size, embed_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.lstm_attention = nn.RNN(Seq2SeqDecoderCell( num_layers, embed_dim, hidden_size, dropout_prob), is_reverse=False, time_major=False) self.output_layer = nn.Linear( hidden_size, vocab_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)), bias_attr=False)
def __init__(self, n_layers, channels, mel_bands, kernel_size, n_group): super(Flow, self).__init__() # input projection self.input_proj = nn.utils.weight_norm( nn.Conv2D( 1, channels, (1, 1), weight_attr=I.Uniform(-1., 1.), bias_attr=I.Uniform(-1., 1.))) # residual net self.resnet = ResidualNet(n_layers, channels, mel_bands, kernel_size, self.dilations_dict[n_group]) # output projection self.output_proj = nn.Conv2D( channels, 2, (1, 1), weight_attr=I.Constant(0.), bias_attr=I.Constant(0.)) # specs self.n_group = n_group
def __init__(self, upsample_factors): super(UpsampleNet, self).__init__() for factor in upsample_factors: std = math.sqrt(1 / (3 * 2 * factor)) init = I.Uniform(-std, std) self.append( nn.utils.weight_norm( nn.Conv2DTranspose( 1, 1, (3, 2 * factor), padding=(1, factor // 2), stride=(1, factor), weight_attr=init, bias_attr=init))) # upsample factors self.upsample_factor = np.prod(upsample_factors) self.upsample_factors = upsample_factors
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Seq2SeqEncoder, self).__init__() self.embedder = nn.Embedding( vocab_size, embed_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, direction="forward", dropout=dropout_prob if num_layers > 1 else 0.)
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, init_scale=0.1, enc_dropout=0.): super(LSTMEncoder, self).__init__() self.src_embedder = nn.Embedding( vocab_size, embed_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=enc_dropout) if enc_dropout > 0.0: self.dropout = nn.Dropout(enc_dropout) else: self.dropout = None
def __init__(self, frontend: parakeet.frontend.Phonetics, d_encoder: int, d_decoder: int, d_mel: int, n_heads: int, d_ffn: int, encoder_layers: int, decoder_layers: int, d_prenet: int, d_postnet: int, postnet_layers: int, postnet_kernel_size: int, max_reduction_factor: int, decoder_prenet_dropout: float, dropout: float): super(TransformerTTS, self).__init__() # text frontend (text normalization and g2p) self.frontend = frontend # encoder self.encoder_prenet = nn.Embedding( frontend.vocab_size, d_encoder, padding_idx=frontend.vocab.padding_index, weight_attr=I.Uniform(-0.05, 0.05)) # position encoding matrix may be extended later self.encoder_pe = pe.sinusoid_positional_encoding(0, 1000, d_encoder) self.encoder_pe_scalar = self.create_parameter( [1], attr=I.Constant(1.)) self.encoder = TransformerEncoder(d_encoder, n_heads, d_ffn, encoder_layers, dropout) # decoder self.decoder_prenet = MLPPreNet(d_mel, d_prenet, d_decoder, dropout) self.decoder_pe = pe.sinusoid_positional_encoding(0, 1000, d_decoder) self.decoder_pe_scalar = self.create_parameter( [1], attr=I.Constant(1.)) self.decoder = TransformerDecoder( d_decoder, n_heads, d_ffn, decoder_layers, dropout, d_encoder=d_encoder) self.final_proj = nn.Linear(d_decoder, max_reduction_factor * d_mel) self.decoder_postnet = CNNPostNet(d_mel, d_postnet, d_mel, postnet_kernel_size, postnet_layers) self.stop_conditioner = nn.Linear(d_mel, 3) # specs self.padding_idx = frontend.vocab.padding_index self.d_encoder = d_encoder self.d_decoder = d_decoder self.d_mel = d_mel self.max_r = max_reduction_factor self.dropout = dropout self.decoder_prenet_dropout = decoder_prenet_dropout # start and end: though it is only used in predict # it can also be used in training dtype = paddle.get_default_dtype() self.start_vec = paddle.full([1, d_mel], 0.5, dtype=dtype) self.end_vec = paddle.full([1, d_mel], -0.5, dtype=dtype) self.stop_prob_index = 2 # mutables self.r = max_reduction_factor # set it every call self.drop_n_heads = 0
net.add_sublayer('linear', nn.Linear(num_inputs, 1)) # Method 3: # from collections import OrderedDict # net = nn.Sequential(OrderedDict([ # ('linear', nn.Linear(num_inputs, 1)) # ])) print(net) for param in net.parameters(): print(param) # 3.3.4 初始化模型参数 # 设置全局参数初始化 fluid.set_global_initializer(initializer.Uniform(), initializer.Constant()) # 3.3.5 定义损失函数 loss = nn.MSELoss() # 3.3.6 定义优化算法 optimizer = optim.SGD(learning_rate=0.03, parameters=net.parameters()) print(optimizer) # 设置不同自网络的学习率(待修改) # optimizer = optim.SGD([ # {'params': net._sub_layers1.paramaters()}, # {'params': net._sub_layers2.paramaters(), 'lr': 0.01} # ], learning_rate=0.03) # for param_group in optimizer.param_groups: