Exemple #1
0
    def __init__(self, input_dim=600+9, output_dim=1*3, dropout_prob=0.):
        """Initialises acoustic model parameters and settings."""
        super(F0_RNN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.dropout_prob = dropout_prob

        self.recurrent_layers = utils.SequentialWithRecurrent(
            nn.Linear(self.input_dim, 256),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(256, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, 64),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, self.output_dim),
        )

        self.metrics.add_metrics('all',
                                 LF0_RMSE_Hz=LF0Distortion())
Exemple #2
0
    def __init__(self,
                 z_dim=16,
                 kld_weight=1.,
                 conditioning_dim=600 + 9,
                 output_dim=1 * 3,
                 dropout_prob=0.,
                 latent=None):
        """Initialises VAE parameters and settings."""
        super(VAE, self).__init__(z_dim=z_dim, kld_weight=kld_weight)
        self.conditioning_dim = conditioning_dim
        self.output_dim = output_dim
        self.dropout_prob = dropout_prob
        self.latent = latent

        self.encoder_layer = _Encoder(self.conditioning_dim, self.output_dim,
                                      dropout_prob, self.z_dim)

        self.decoder_layer = utils.SequentialWithRecurrent(
            nn.Linear(self.conditioning_dim + self.z_dim, 256),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(256, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, 64),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, self.output_dim),
        )

        self.metrics.add_metrics('all', LF0_RMSE_Hz=LF0Distortion())
    def __init__(self, z_dim=16, conditioning_dim=70+9, output_dim=1*3, dropout_prob=0.,
                 phone_set_file='data/unilex_phoneset.txt'):
        """Initialises VAE parameters and settings."""
        super(F0_AE, self).__init__()
        self.z_dim = z_dim
        self.conditioning_dim = conditioning_dim
        self.output_dim = output_dim
        self.dropout_prob = dropout_prob
        self.phone_set_file = phone_set_file

        self.encoder_layer = utils.SequentialWithRecurrent(
            nn.Linear(self.output_dim, 256),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(256, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, 64),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, self.z_dim)
        )

        self.decoder_layer = utils.SequentialWithRecurrent(
            nn.Linear(self.conditioning_dim + self.z_dim, 256),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(256, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(
                nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, 64),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, self.output_dim),
        )

        self.metrics.add_metrics('all',
                                 LF0_RMSE_Hz=LF0Distortion(),
                                 embeddings=TensorHistory(self.z_dim, hidden=True),
                                 name=History(hidden=True),
                                 n_segments=TensorHistory(1, dtype=torch.long, hidden=True),
                                 segment_mean_F0=TensorHistory(1, hidden=True))
Exemple #4
0
    def __init__(self,
                 z_dim=16,
                 kld_weight=1.,
                 conditioning_dim=70 + 9,
                 output_dim=1 * 3,
                 dropout_prob=0.,
                 n_components=32,
                 pseudo_inputs_seq_lens=50,
                 pseudo_inputs_mean=0.05,
                 pseudo_inputs_std=0.01,
                 phone_set_file='data/unilex_phoneset.txt'):

        if isinstance(pseudo_inputs_seq_lens, dict):
            pseudo_inputs_seq_lens, n_components = self._make_seq_lens(
                pseudo_inputs_seq_lens)

        super(F0_VAMP_VAE,
              self).__init__(z_dim=z_dim,
                             kld_weight=kld_weight,
                             input_dim=output_dim,
                             n_components=n_components,
                             pseudo_inputs_seq_lens=pseudo_inputs_seq_lens,
                             pseudo_inputs_mean=pseudo_inputs_mean,
                             pseudo_inputs_std=pseudo_inputs_std)

        self.conditioning_dim = conditioning_dim
        self.output_dim = output_dim
        self.dropout_prob = dropout_prob
        self.phone_set_file = phone_set_file

        _encoder_shared_layer = utils.SequentialWithRecurrent(
            nn.Linear(self.output_dim, 256), nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(256, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob), nn.Linear(64, 64), nn.Sigmoid(),
            nn.Dropout(p=dropout_prob))

        self.encoder_layer = vae.VAELayer(_encoder_shared_layer, 64,
                                          self.z_dim)

        self.decoder_layer = utils.SequentialWithRecurrent(
            nn.Linear(self.conditioning_dim + self.z_dim, 256),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(256, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            utils.RecurrentCuDNNWrapper(nn.GRU(64, 64, batch_first=True)),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, 64),
            nn.Sigmoid(),
            nn.Dropout(p=dropout_prob),
            nn.Linear(64, self.output_dim),
        )

        self.metrics.add_metrics('all',
                                 LF0_RMSE_Hz=LF0Distortion(),
                                 embeddings=TensorHistory(self.z_dim,
                                                          hidden=True),
                                 name=History(hidden=True),
                                 n_segments=TensorHistory(1,
                                                          dtype=torch.long,
                                                          hidden=True),
                                 segment_mean_F0=TensorHistory(1, hidden=True))