コード例 #1
0
ファイル: model.py プロジェクト: BertSam/GRPA-berthie
    def __init__(self, frame_size, n_frame_samples, n_rnn, dim, learn_h0,
                 weight_norm):
        super().__init__()

        self.frame_size = frame_size
        self.n_frame_samples = n_frame_samples
        self.dim = dim

        h0 = torch.zeros(n_rnn, dim)
        if learn_h0:
            self.h0 = torch.nn.Parameter(h0)
        else:
            self.register_buffer('h0', torch.autograd.Variable(h0))

        self.input_expand = torch.nn.Conv1d(in_channels=n_frame_samples,
                                            out_channels=dim,
                                            kernel_size=1)
        init.kaiming_uniform(self.input_expand.weight)
        init.constant(self.input_expand.bias, 0)
        if weight_norm:
            self.input_expand = torch.nn.utils.weight_norm(self.input_expand)

        # Tentative d'inclure le conditioning BGF (20-06-08)
        self.input_conditioning = torch.nn.Conv1d(in_channels=n_frame_samples,
                                                  out_channels=dim,
                                                  kernel_size=1)

        init.kaiming_uniform(self.input_conditioning.weight)
        init.constant(self.input_conditioning.bias, 0)
        if weight_norm:
            self.input_conditioning = torch.nn.utils.weight_norm(
                self.input_conditioning)
        #

        self.rnn = torch.nn.GRU(input_size=dim,
                                hidden_size=dim,
                                num_layers=n_rnn,
                                batch_first=True)
        for i in range(n_rnn):
            nn.concat_init(
                getattr(self.rnn, 'weight_ih_l{}'.format(i)),
                [nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform])
            init.constant(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0)

            nn.concat_init(
                getattr(self.rnn, 'weight_hh_l{}'.format(i)),
                [nn.lecun_uniform, nn.lecun_uniform, init.orthogonal])
            init.constant(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0)

        self.upsampling = nn.LearnedUpsampling1d(in_channels=dim,
                                                 out_channels=dim,
                                                 kernel_size=frame_size)
        init.uniform(self.upsampling.conv_t.weight, -np.sqrt(6 / dim),
                     np.sqrt(6 / dim))
        init.constant(self.upsampling.bias, 0)
        if weight_norm:
            self.upsampling.conv_t = torch.nn.utils.weight_norm(
                self.upsampling.conv_t)
コード例 #2
0
    def __init__(self,
                 frame_size,
                 n_frame_samples,
                 n_rnn,
                 dim,
                 learn_h0,
                 weight_norm,
                 skip_connection=False,
                 num_classes=0,
                 embedding_dim=256):
        super().__init__()

        self.frame_size = frame_size
        self.n_frame_samples = n_frame_samples
        self.dim = dim
        self.skip_connection = skip_connection
        self.num_classes = num_classes
        self.embedding_dim = embedding_dim

        h0 = torch.zeros(n_rnn, dim)
        if learn_h0:
            self.h0 = torch.nn.Parameter(h0)
        else:
            self.register_buffer('h0', torch.autograd.Variable(h0))

        # self.input_expand = torch.nn.Conv1d(
        #     in_channels=n_frame_samples,
        #     out_channels=dim,
        #     kernel_size=1
        # )
        # init.kaiming_uniform_(self.input_expand.weight)
        # init.constant_(self.input_expand.bias, 0)
        # if weight_norm:
        #     self.input_expand = torch.nn.utils.weight_norm(self.input_expand)

        # self.rnn = torch.nn.GRU(
        #     input_size=dim,
        #     hidden_size=dim,
        #     num_layers=n_rnn,
        #     batch_first=True
        # )

        self.rnn = torch.nn.GRU(input_size=self.frame_size + self.num_classes,
                                hidden_size=dim,
                                num_layers=n_rnn,
                                batch_first=True)
        for i in range(n_rnn):
            nn.concat_init(
                getattr(self.rnn, 'weight_ih_l{}'.format(i)),
                [nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform])
            init.constant_(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0)

            nn.concat_init(
                getattr(self.rnn, 'weight_hh_l{}'.format(i)),
                [nn.lecun_uniform, nn.lecun_uniform, init.orthogonal_])
            init.constant_(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0)

        # self.upsampling = nn.LearnedUpsampling1d(
        #     in_channels=dim,
        #     out_channels=dim,
        #     kernel_size=frame_size
        # )
        self.rnns_out = torch.nn.Linear(self.dim, self.frame_size * self.dim)
        # init.uniform_(
        #     self.upsampling.conv_t.weight, -np.sqrt(6 / dim), np.sqrt(6 / dim)
        # )
        # init.constant_(self.upsampling.bias, 0)
        # if weight_norm:
        #     self.upsampling.conv_t = torch.nn.utils.weight_norm(
        #         self.upsampling.conv_t
        #     )
        if weight_norm:
            self.rnns_out = torch.nn.utils.weight_norm(self.rnns_out)
コード例 #3
0
    def __init__(self, frame_size, n_frame_samples, n_rnn, dim, learn_h0,
                 is_cond, cond_dim, spk_dim, w_norm, qrnn):
        super().__init__()

        self.frame_size = frame_size
        self.n_frame_samples = n_frame_samples
        self.dim = dim
        self.cond_dim = cond_dim
        self.spk_dim = spk_dim
        self.weight_norm = w_norm
        self.qrnn = qrnn

        h0 = torch.zeros(n_rnn, dim)
        if learn_h0:
            self.h0 = torch.nn.Parameter(h0)
        else:
            self.register_buffer('h0', torch.autograd.Variable(h0))

        self.input_expand = torch.nn.Conv1d(in_channels=n_frame_samples,
                                            out_channels=dim,
                                            kernel_size=1)
        if is_cond:
            # Acoustic conditioners expansion
            self.cond_expand = torch.nn.Conv1d(in_channels=cond_dim,
                                               out_channels=dim,
                                               kernel_size=1)

            # Initialize 1D-Convolution (Fully-connected Layer) for acoustic conditioners
            init.kaiming_uniform(self.cond_expand.weight)
            init.constant(self.cond_expand.bias, 0)

            # Speaker embedding
            self.spk_embedding = torch.nn.Embedding(self.spk_dim, self.spk_dim)

            self.spk_expand = torch.nn.Conv1d(in_channels=self.spk_dim,
                                              out_channels=dim,
                                              kernel_size=1)

            # Initialize 1D-Convolution (Fully-connected Layer) for acoustic conditioners
            init.kaiming_uniform(self.spk_expand.weight)
            init.constant(self.spk_expand.bias, 0)

            # Apply weight normalization if chosen
            if self.weight_norm:
                self.cond_expand = weight_norm(self.cond_expand, name='weight')
                self.spk_expand = weight_norm(self.spk_expand, name='weight')

        else:
            self.cond_expand = None
            self.spk_expand = None
            self.spk_embedding = None
        init.kaiming_uniform(self.input_expand.weight)
        init.constant(self.input_expand.bias, 0)

        if self.weight_norm:
            self.input_expand = weight_norm(self.input_expand, name='weight')

        if self.qrnn:
            self.rnn = torch.nn.GRU(input_size=dim,
                                    hidden_size=dim,
                                    num_layers=n_rnn,
                                    batch_first=True)

            # self.rnn = QRNN(
            # input_size=dim,
            # hidden_size=dim,
            # num_layers=n_rnn,
            # )

        else:
            self.rnn = torch.nn.GRU(input_size=dim,
                                    hidden_size=dim,
                                    num_layers=n_rnn,
                                    batch_first=True)
        for i in range(n_rnn):
            nn.concat_init(
                getattr(self.rnn, 'weight_ih_l{}'.format(i)),
                [nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform])
            init.constant(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0)

            nn.concat_init(
                getattr(self.rnn, 'weight_hh_l{}'.format(i)),
                [nn.lecun_uniform, nn.lecun_uniform, init.orthogonal])
            init.constant(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0)

        self.upsampling = nn.LearnedUpsampling1d(in_channels=dim,
                                                 out_channels=dim,
                                                 kernel_size=frame_size)
        init.uniform(self.upsampling.conv_t.weight, -np.sqrt(6 / dim),
                     np.sqrt(6 / dim))
        init.constant(self.upsampling.bias, 0)

        if weight_norm:
            self.upsampling.conv_t = weight_norm(self.upsampling.conv_t,
                                                 name='weight')