예제 #1
0
    def __init__(self, hparams):
        super(Tacotron2, self).__init__()
        self.mask_padding = hparams.mask_padding
        self.fp16_run = hparams.fp16_run
        self.n_mel_channels = hparams.n_mel_channels
        self.n_frames_per_step = hparams.n_frames_per_step
        self.embedding = nn.Embedding(hparams.n_symbols,
                                      hparams.symbols_embedding_dim)
        std = sqrt(2.0 / (hparams.n_symbols + hparams.symbols_embedding_dim))
        val = sqrt(3.0) * std  # uniform bounds for std
        self.embedding.weight.data.uniform_(-val, val)
        self.encoder = Encoder(hparams)
        self.decoder = Decoder(hparams)
        self.postnet = Postnet(hparams)
        if hparams.with_gst:
            self.gst = GST(hparams)
        self.speaker_embedding = nn.Embedding(hparams.n_speakers,
                                              hparams.speaker_embedding_dim)

        if hparams.pretrained_speaker:
            with open('data/VCTK/speaker-dict.json') as f:
                speakers = json.load(f)
            embed_paths = glob.glob(
                os.path.join(hparams.pretrained_speaker_path, '*.npy'))
            embeds = np.zeros((hparams.n_speakers, 1, 512))
            for embed_path in embed_paths:
                speaker_id = embed_path.split('spker_embed-')[1][:-4]
                embed = np.load(embed_path)
                embeds[speakers[speaker_id], :, :] = embed
            embeddings = torch.tensor(embeds, dtype=torch.float32)
            self.register_buffer('pretrained_speaker', embeddings)
            self.speaker_linear = nn.Sequential(
                nn.Linear(512, hparams.speaker_embedding_dim), nn.ReLU())
예제 #2
0
 def __init__(self, hparams):
     super(Tacotron2, self).__init__()
     self.mask_padding = hparams.mask_padding
     self.fp16_run = hparams.fp16_run
     self.n_mel_channels = hparams.n_mel_channels
     self.n_frames_per_step = hparams.n_frames_per_step
     self.embedding = nn.Embedding(hparams.n_symbols,
                                   hparams.symbols_embedding_dim)
     std = sqrt(2.0 / (hparams.n_symbols + hparams.symbols_embedding_dim))
     val = sqrt(3.0) * std  # uniform bounds for std
     self.embedding.weight.data.uniform_(-val, val)
     self.encoder = Encoder(hparams)
     self.decoder = Decoder(hparams)
     self.postnet = Postnet(hparams)
     self.gst = GST(hparams)