def __init__(self, frame_size, n_frame_samples, n_rnn, dim, learn_h0, weight_norm): super().__init__() self.frame_size = frame_size self.n_frame_samples = n_frame_samples self.dim = dim h0 = torch.zeros(n_rnn, dim) if learn_h0: self.h0 = torch.nn.Parameter(h0) else: self.register_buffer('h0', torch.autograd.Variable(h0)) self.input_expand = torch.nn.Conv1d(in_channels=n_frame_samples, out_channels=dim, kernel_size=1) init.kaiming_uniform(self.input_expand.weight) init.constant(self.input_expand.bias, 0) if weight_norm: self.input_expand = torch.nn.utils.weight_norm(self.input_expand) # Tentative d'inclure le conditioning BGF (20-06-08) self.input_conditioning = torch.nn.Conv1d(in_channels=n_frame_samples, out_channels=dim, kernel_size=1) init.kaiming_uniform(self.input_conditioning.weight) init.constant(self.input_conditioning.bias, 0) if weight_norm: self.input_conditioning = torch.nn.utils.weight_norm( self.input_conditioning) # self.rnn = torch.nn.GRU(input_size=dim, hidden_size=dim, num_layers=n_rnn, batch_first=True) for i in range(n_rnn): nn.concat_init( getattr(self.rnn, 'weight_ih_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform]) init.constant(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0) nn.concat_init( getattr(self.rnn, 'weight_hh_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, init.orthogonal]) init.constant(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0) self.upsampling = nn.LearnedUpsampling1d(in_channels=dim, out_channels=dim, kernel_size=frame_size) init.uniform(self.upsampling.conv_t.weight, -np.sqrt(6 / dim), np.sqrt(6 / dim)) init.constant(self.upsampling.bias, 0) if weight_norm: self.upsampling.conv_t = torch.nn.utils.weight_norm( self.upsampling.conv_t)
def __init__(self, frame_size, n_frame_samples, n_rnn, dim, learn_h0, is_cond, cond_dim, spk_dim, w_norm, qrnn): super().__init__() self.frame_size = frame_size self.n_frame_samples = n_frame_samples self.dim = dim self.cond_dim = cond_dim self.spk_dim = spk_dim self.weight_norm = w_norm self.qrnn = qrnn h0 = torch.zeros(n_rnn, dim) if learn_h0: self.h0 = torch.nn.Parameter(h0) else: self.register_buffer('h0', torch.autograd.Variable(h0)) self.input_expand = torch.nn.Conv1d(in_channels=n_frame_samples, out_channels=dim, kernel_size=1) if is_cond: # Acoustic conditioners expansion self.cond_expand = torch.nn.Conv1d(in_channels=cond_dim, out_channels=dim, kernel_size=1) # Initialize 1D-Convolution (Fully-connected Layer) for acoustic conditioners init.kaiming_uniform(self.cond_expand.weight) init.constant(self.cond_expand.bias, 0) # Speaker embedding self.spk_embedding = torch.nn.Embedding(self.spk_dim, self.spk_dim) self.spk_expand = torch.nn.Conv1d(in_channels=self.spk_dim, out_channels=dim, kernel_size=1) # Initialize 1D-Convolution (Fully-connected Layer) for acoustic conditioners init.kaiming_uniform(self.spk_expand.weight) init.constant(self.spk_expand.bias, 0) # Apply weight normalization if chosen if self.weight_norm: self.cond_expand = weight_norm(self.cond_expand, name='weight') self.spk_expand = weight_norm(self.spk_expand, name='weight') else: self.cond_expand = None self.spk_expand = None self.spk_embedding = None init.kaiming_uniform(self.input_expand.weight) init.constant(self.input_expand.bias, 0) if self.weight_norm: self.input_expand = weight_norm(self.input_expand, name='weight') if self.qrnn: self.rnn = torch.nn.GRU(input_size=dim, hidden_size=dim, num_layers=n_rnn, batch_first=True) # self.rnn = QRNN( # input_size=dim, # hidden_size=dim, # num_layers=n_rnn, # ) else: self.rnn = torch.nn.GRU(input_size=dim, hidden_size=dim, num_layers=n_rnn, batch_first=True) for i in range(n_rnn): nn.concat_init( getattr(self.rnn, 'weight_ih_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform]) init.constant(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0) nn.concat_init( getattr(self.rnn, 'weight_hh_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, init.orthogonal]) init.constant(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0) self.upsampling = nn.LearnedUpsampling1d(in_channels=dim, out_channels=dim, kernel_size=frame_size) init.uniform(self.upsampling.conv_t.weight, -np.sqrt(6 / dim), np.sqrt(6 / dim)) init.constant(self.upsampling.bias, 0) if weight_norm: self.upsampling.conv_t = weight_norm(self.upsampling.conv_t, name='weight')