def __init__(self, mel_channel): super(Generator, self).__init__() self.mel_channel = mel_channel self.generator = nn.Sequential( nn.ReflectionPad1d(3), nn.utils.weight_norm(nn.Conv1d(mel_channel, 512, kernel_size=7, stride=1)), nn.LeakyReLU(0.2), nn.utils.weight_norm(nn.ConvTranspose1d(512, 256, kernel_size=16, stride=8, padding=4)), ResStack(256), nn.LeakyReLU(0.2), nn.utils.weight_norm(nn.ConvTranspose1d(256, 128, kernel_size=16, stride=8, padding=4)), ResStack(128), nn.LeakyReLU(0.2), nn.utils.weight_norm(nn.ConvTranspose1d(128, 64, kernel_size=4, stride=2, padding=1)), ResStack(64), nn.LeakyReLU(0.2), nn.utils.weight_norm(nn.ConvTranspose1d(64, 32, kernel_size=4, stride=2, padding=1)), ResStack(32), nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), nn.utils.weight_norm(nn.Conv1d(32, 1, kernel_size=7, stride=1)), nn.Tanh(), )
def __init__(self): super().__init__() input_size = 512 upsample = [8, 8, 2, 2] layers = [nn.ReflectionPad1d(3), WMConv1d(80, input_size, 7)] for i, s in enumerate(upsample): input_size //= 2 layers += [ nn.LeakyReLU(0.3), WMConvTranspose1d(input_size * 2, input_size, s * 2, s, padding=s // 2 + s % 2), ResStack(input_size) ] layers += [ nn.LeakyReLU(0.3), nn.ReflectionPad1d(3), WMConv1d(input_size, 1, 7, 1), nn.Tanh() ] self.model = nn.Sequential(*layers)
def __init__(self, classes, input_acc_size, input_nc=1, output_nc=1, ngf=64, n_blocks=4): super(Generator, self).__init__() self.classes = classes self.input_acc_size = input_acc_size # label emmbedding model = [nn.Embedding(classes, input_acc_size)] self.embed_model = nn.Sequential(*model) # concatenated model = [ nn.ReflectionPad1d(1), nn.Conv1d(input_nc, ngf, kernel_size=8, padding=0, bias=True), nn.InstanceNorm1d(ngf), nn.ReLU(True) ] n_downsampling = 2 for i in range(n_downsampling): mult = 2**i model += [ nn.Conv1d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=True), nn.InstanceNorm1d(ngf * mult * 2), nn.ReLU(True) ] mult = 2**n_downsampling for i in range(n_blocks): model += [ResnetBlock(ngf * mult)] for i in range(n_downsampling): mult = 2**(n_downsampling - i) model += [ nn.ConvTranspose1d(ngf * mult, int(ngf * mult / 2), kernel_size=4, stride=2, padding=1, output_padding=1, bias=True), nn.InstanceNorm1d(int(ngf * mult / 2)), nn.ReLU(True) ] model += [nn.ReflectionPad1d(3)] model += [nn.Conv1d(ngf, output_nc, kernel_size=8, padding=1)] model += [nn.Tanh()] self.model = nn.Sequential(*model)
def __init__(self): super().__init__() down_sampling = 4 channels = [64, 256, 1024, 1024] prev_channel = 16 self.blocks = nn.ModuleList([ nn.Sequential(nn.ReflectionPad1d(7), WMConv1d(1, prev_channel, 15), nn.LeakyReLU(0.3)) ]) for i, channel in enumerate(channels): self.blocks.extend([ nn.Sequential( nn.ReflectionPad1d(20), WMConv1d(prev_channel, channel, 41, 4, groups=4**(i + 1)), nn.LeakyReLU(0.3)) ]) prev_channel = channel self.blocks.extend([ nn.Sequential(nn.ReflectionPad1d(2), WMConv1d(prev_channel, 1024, 5, 1), nn.LeakyReLU(0.3)), nn.Sequential(nn.ReflectionPad1d(1), WMConv1d(1024, 1, 3, 1)) ])
def __init__(self, ic = 1, oc = 1, norm_type = 'instancenorm', use_sn = False): super(ResBlock, self).__init__() self.ic = ic self.oc = oc self.norm_type = norm_type self.use_sn = use_sn self.relu = nn.ReLU(inplace = True) self.reflection_pad1 = nn.ReflectionPad1d(15) self.reflection_pad2 = nn.ReflectionPad1d(15) self.conv1 = nn.Conv1d(ic, oc, 31, 1, 0, bias = False) self.conv2 = nn.Conv1d(oc, oc, 31, 1, 0, bias = False) if(self.use_sn == True): self.conv1 = SpectralNorm(self.conv1) self.conv2 = SpectralNorm(self.conv2) if(self.norm_type == 'batchnorm'): self.bn1 = nn.BatchNorm1d(oc) self.bn2 = nn.BatchNorm1d(oc) elif(self.norm_type == 'instancenorm'): self.bn1 = nn.InstanceNorm1d(oc) self.bn2 = nn.InstanceNorm1d(oc)
def __init__(self, input_channel=80, hu=512, ku=[16, 16, 4, 4], kr=[3, 7, 11], Dr=[1, 3, 5]): super(Generator, self).__init__() self.input = nn.Sequential( nn.ReflectionPad1d(3), nn.utils.weight_norm(nn.Conv1d(input_channel, hu, kernel_size=7)) ) generator = [] for k in ku: inp = hu out = int(inp/2) generator += [ nn.LeakyReLU(0.2), nn.utils.weight_norm(nn.ConvTranspose1d(inp, out, k, k//2)), MRF(kr, out, Dr) ] hu = out self.generator = nn.Sequential(*generator) self.output = nn.Sequential( nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), nn.utils.weight_norm(nn.Conv1d(hu, 1, kernel_size=7, stride=1)), nn.Tanh() )
def __init__( self, in_channels=80, out_channels=1, proj_kernel=7, base_channels=512, upsample_factors=(8, 8, 2, 2), res_kernel=3, num_res_blocks=3, ): super().__init__() # assert model parameters assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number." # setup additional model parameters base_padding = (proj_kernel - 1) // 2 act_slope = 0.2 self.inference_padding = 2 # initial layer layers = [] layers += [ nn.ReflectionPad1d(base_padding), weight_norm(nn.Conv1d(in_channels, base_channels, kernel_size=proj_kernel, stride=1, bias=True)), ] # upsampling layers and residual stacks for idx, upsample_factor in enumerate(upsample_factors): layer_in_channels = base_channels // (2 ** idx) layer_out_channels = base_channels // (2 ** (idx + 1)) layer_filter_size = upsample_factor * 2 layer_stride = upsample_factor layer_output_padding = upsample_factor % 2 layer_padding = upsample_factor // 2 + layer_output_padding layers += [ nn.LeakyReLU(act_slope), weight_norm( nn.ConvTranspose1d( layer_in_channels, layer_out_channels, layer_filter_size, stride=layer_stride, padding=layer_padding, output_padding=layer_output_padding, bias=True, ) ), ResidualStack(channels=layer_out_channels, num_res_blocks=num_res_blocks, kernel_size=res_kernel), ] layers += [nn.LeakyReLU(act_slope)] # final layer layers += [ nn.ReflectionPad1d(base_padding), weight_norm(nn.Conv1d(layer_out_channels, out_channels, proj_kernel, stride=1, bias=True)), nn.Tanh(), ] self.layers = nn.Sequential(*layers)
def __init__(self, channels, kernel_size=8, global_pool=None, convpool=None, compress=False, batchnorm=False): super(Encoder, self).__init__() model = [] acti = nn.LeakyReLU(0.2) nr_layer = len(channels) - 2 if compress else len(channels) - 1 for i in range(nr_layer): if convpool is None: pad = (kernel_size - 1) // 2 model.append(nn.ReflectionPad1d(pad)) model.append(nn.Conv1d(channels[i], channels[i + 1], kernel_size=kernel_size, stride=2)) if batchnorm: model.append(nn.BatchNorm1d(channels[i + 1])) model.append(acti) else: # body & view pad = (kernel_size - 1) // 2 model.append(nn.ReflectionPad1d(pad)) model.append(nn.Conv1d(channels[i], channels[i + 1], kernel_size=kernel_size, stride=1)) if batchnorm: model.append(nn.BatchNorm1d(channels[i + 1])) model.append(acti) model.append(convpool(kernel_size=2, stride=2)) # nn.MaxPool1d self.global_pool = global_pool self.compress = compress self.model = nn.Sequential(*model) if self.compress: self.conv1x1 = nn.Conv1d(channels[-2], channels[-1], kernel_size=1) self.last_conv = nn.Conv1d(channels[-1], channels[-1], kernel_size=1, bias=False)
def __init__(self, channels, padding=3, kernel_size=8, conv_stride=2, conv_pool=None): super(ConvEncoder, self).__init__() self.in_channels = channels[0] model = [] acti = nn.LeakyReLU(0.2) nr_layer = len(channels) - 1 for i in range(nr_layer): if conv_pool is None: model.append(nn.ReflectionPad1d(padding)) model.append( nn.Conv1d(channels[i], channels[i + 1], kernel_size=kernel_size, stride=conv_stride)) model.append(acti) else: model.append(nn.ReflectionPad1d(padding)) model.append( nn.Conv1d(channels[i], channels[i + 1], kernel_size=kernel_size, stride=conv_stride)) model.append(acti) model.append(conv_pool(kernel_size=2, stride=2)) self.model = nn.Sequential(*model)
def __init__(self, mel_dim): super().__init__() factor = [8, 8, 2, 2] layers = [ nn.ReflectionPad1d(3), # 3+80+3 = 86 weight_norm(nn.Conv1d(mel_dim, 512, kernel_size=7)), ] input_size = 512 for f in factor: layers += [ encoder_sequential(input_size, input_size // 2, kernel_size=f * 2, stride=f, padding=f // 2 + f % 2) ] input_size //= 2 for d in range(3): layers += [residual_stack(input_size, 3**d)] layers += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), weight_norm(nn.Conv1d(32, 1, kernel_size=7)), nn.Tanh(), ] self.generator = nn.Sequential(*layers)
def __init__(self,input_channels,output_channels,kernel_size,stride,drop_out_prob=-1.0,dilation=1,bn=True,activation_use=True): super(Conv1dBlock, self).__init__() self.input_channels = input_channels self.output_channels = output_channels self.kernel_size = kernel_size self.stride = stride self.drop_out_prob = drop_out_prob self.dilation = dilation self.activation_use = activation_use self.padding = kernel_size[0] '''Padding Calculation''' input_rows = input_channels filter_rows = kernel_size[0] out_rows = (input_rows + stride - 1) // stride self.padding_rows = max(0, (out_rows -1) * stride + (filter_rows -1) * dilation + 1 - input_rows) if self.padding_rows > 0: if self.padding_rows % 2 == 0: self.paddingAdded = nn.ReflectionPad1d(self.padding_rows // 2) else: self.paddingAdded = nn.ReflectionPad1d((self.padding_rows //2,(self.padding_rows +1)//2)) else: self.paddingAdded = nn.Identity() self.conv1 = nn.Conv1d(in_channels=input_channels,out_channels=output_channels, kernel_size=kernel_size,stride=stride,padding=0,dilation=dilation) self.batch_norm = nn.BatchNorm1d(num_features=output_channels,momentum=0.9,eps=0.001) if bn else nn.Identity() self.drop_out = nn.Dropout(drop_out_prob) if self.drop_out_prob != -1 else nn.Identity()
def __init__(self, mel_channel, n_residual_layers, ratios=[8, 8, 4], mult=256, out_band=1): super(Generator, self).__init__() self.mel_channel = mel_channel generator = [ nn.ReflectionPad1d(3), nn.utils.weight_norm( nn.Conv1d(mel_channel, mult * 2, kernel_size=7, stride=1)), ] # Upsample to raw audio scale for _, r in enumerate(ratios): generator += [Upsample(mult, r)] for j in range(n_residual_layers): generator += [ResStack(mult, dilation=3**j)] mult //= 2 generator += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), nn.utils.weight_norm( nn.Conv1d(mult * 2, out_band, kernel_size=7, stride=1)), nn.Tanh(), ] self.generator = nn.Sequential(*generator) self.apply(weights_init)
def __init__(self): super().__init__() self.Conv_1 = nn.Sequential( nn.ReflectionPad1d(7), weight_norm(nn.Conv1d(1, 16, kernel_size=15)), nn.LeakyReLU(0.2)) self.Conv_2 = nn.Sequential( weight_norm( nn.Conv1d(16, 64, kernel_size=41, stride=4, padding=20, groups=4)), nn.LeakyReLU(0.2)) self.Conv_3 = nn.Sequential( weight_norm( nn.Conv1d(64, 256, kernel_size=41, stride=4, padding=20, groups=16)), nn.LeakyReLU(0.2)) self.Conv_4 = nn.Sequential( weight_norm( nn.Conv1d(256, 1024, kernel_size=41, stride=4, padding=20, groups=64)), residual_stack(1024, 3), residual_stack(1024, 9), nn.LeakyReLU(0.2)) self.ConvTrans_4 = nn.Sequential( weight_norm( nn.ConvTranspose1d(1024, 256, kernel_size=16, stride=4, padding=6)), residual_stack(256, 3), residual_stack(256, 9), nn.LeakyReLU(0.2)) self.ConvTrans_3 = nn.Sequential( weight_norm( nn.ConvTranspose1d(256, 64, kernel_size=16, stride=4, padding=6)), residual_stack(64, 3), residual_stack(64, 9), nn.LeakyReLU(0.2)) self.ConvTrans_2 = nn.Sequential( weight_norm( nn.ConvTranspose1d(64, 16, kernel_size=16, stride=4, padding=6)), residual_stack(16, 3), residual_stack(16, 9), nn.LeakyReLU(0.2)) self.ConvTrans_1 = nn.Sequential( nn.ReflectionPad1d(3), weight_norm(nn.Conv1d(16, 1, kernel_size=7)), nn.Tanh())
def __init__(self, config, base_width, n_labels): self.stat_dim = 1500 super().__init__() in_dim = config['input_dim'] self.tdnn_fr1 = nn.Sequential( nn.Conv1d(in_dim, base_width, stride=1, dilation=1, kernel_size=5), nn.BatchNorm1d(base_width), nn.ReLU(True), ) self.tdnn_fr2 = nn.Sequential( nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3), nn.BatchNorm1d(base_width), nn.ReLU(True), nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3), nn.BatchNorm1d(base_width), nn.ReflectionPad1d(6) ) self.tdnn_fr3 = nn.Sequential( nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3), nn.BatchNorm1d(base_width), nn.ReLU(True), nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3), nn.BatchNorm1d(base_width), nn.ReflectionPad1d(6) ) self.connect_conv = nn.Conv1d(base_width, self.stat_dim ,kernel_size=1) self.tdnn_fr4 = nn.Sequential( nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3), nn.BatchNorm1d(base_width), nn.ReLU(True), nn.Conv1d(base_width, self.stat_dim, stride=1, dilation=1, kernel_size=1), nn.BatchNorm1d(self.stat_dim), nn.ReflectionPad1d(3) ) self.tdnn_uttr = nn.Sequential( st_pool_layer(), # ST_pool = 1 nn.Linear(self.stat_dim*2, base_width), nn.BatchNorm1d(base_width), nn.ReLU(True), # xvector = 4 ) self.classifier = nn.Sequential( nn.Linear(base_width, base_width), nn.BatchNorm1d(base_width), nn.Linear(base_width, n_labels) ) self._initialize_weights()
def __init__(self, input_size, ngf, n_residual_layers, ratios=[8, 8, 2, 2]): super().__init__() # ratios = [8, 8, 2, 2] self.hop_length = np.prod(ratios) mult = int(2 ** len(ratios)) model = [ nn.ReflectionPad1d(3), WNConv1d(input_size, mult * ngf, kernel_size=7, padding=0), ] # Upsample to raw audio scale for i, r in enumerate(ratios): if i == 0: model += [ nn.LeakyReLU(0.2), BGRU(mult * ngf), WNConvTranspose1d( mult * ngf, mult * ngf // 2, kernel_size=r * 2, stride=r, padding=r // 2 + r % 2, output_padding=r % 2, ), ] else: model += [ nn.LeakyReLU(0.2), WNConvTranspose1d( mult * ngf, mult * ngf // 2, kernel_size=r * 2, stride=r, padding=r // 2 + r % 2, output_padding=r % 2, ), ] for j in range(n_residual_layers): model += [ResnetBlock(mult * ngf // 2, dilation=3 ** j)] mult //= 2 model += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), # BGRU(ngf), WNConv1d(ngf, 1, kernel_size=7, padding=0), nn.Tanh(), ] self.model = nn.Sequential(*model) self.apply(weights_init)
def __init__(self, input_size, ngf, n_residual_layers): # In original paper, input_size == n_mel_channels # ngf is a model hyperparameter, meaning the final number of feature maps in Generator, 32 in paper. super().__init__() # ratios = [8, 8, 2, 2] # 4 stages of upsampling: 8x, 8x, 2x, 2x --> 256x (hop_length when calculating Mel Spectrogram) ratios = [8, 8, 4, 2, 2 ] # Note(houwx): Modify to 8x, 8x, 4x, 2x, 2x = 256 x 4 here self.hop_length = np.prod(ratios) mult = int(2**len(ratios)) # 16 model = [ nn.ReflectionPad1d( 3 ), # Padding on left & right: (N, n_mel, T_mel) --> (N, n_mel, T_mel + 3 left + 3 right) WNConv1d( in_channels=input_size, out_channels=mult * ngf, kernel_size=7, padding=0 ), # (N, n_mel=80, T_mel + 6) --> (N, ngf * mult=16 * 32, T_mel) ] # Upsample to raw audio scale for i, r in enumerate(ratios): model += [ nn.LeakyReLU(0.2), WNConvTranspose1d( mult * ngf, mult * ngf // 2, kernel_size=r * 2, # [16, 16, 4, 4] stride=r, # [8, 8, 2, 2] padding=r // 2 + r % 2, # [4, 4,1, 1] output_padding=r % 2, # All 0s ), # First upsample as example: (N, ngf * mult=16*32, T_mel) --> (N, ngf * mult // 2=16*16, (T_mel - 1)* r - r + r * 2 - 1 + 1 = T_mel * r) ] for j in range(n_residual_layers): # [0 , 1, 2] model += [ResnetBlock(mult * ngf // 2, dilation=3**j)] # No Change in shape. First ResBlock: (N, ngf * mult / 2, T_mel * r) --> (N, ngf * mult / 2, T_mel * r) mult //= 2 # After 4 stages, get: (N, ngf, T_mel * 256) model += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d( 3), # (N, ngf, T_mel * 256) -- > (N, ngf, T_mel * 256 + 3 + 3) WNConv1d(ngf, 1, kernel_size=7, padding=0 ), # (N, ngf, T_mel * 256 + 6) --> (N, 1, T_mel * 256) nn.Tanh(), ] self.model = nn.Sequential(*model) self.apply(weights_init)
def __init__(self, in_features): super(ResidualBlock, self).__init__() conv_block = [nn.ReflectionPad1d(1), nn.Conv1d(in_features, in_features, 3), nn.InstanceNorm2d(in_features), nn.ReLU(inplace=True), nn.ReflectionPad1d(1), nn.Conv1d(in_features, in_features, 3), nn.InstanceNorm2d(in_features)] self.conv_block = nn.Sequential(*conv_block)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=4, # NOTE: you must carefully set the following parameters in_lf0_idx=300, in_lf0_min=5.3936276, in_lf0_max=6.491111, out_lf0_idx=180, out_lf0_mean=5.953093881972361, out_lf0_scale=0.23435173188961034, init_type="none", use_mdn=False, num_gaussians=8, dim_wise=False, ): super().__init__() self.in_lf0_idx = in_lf0_idx self.in_lf0_min = in_lf0_min self.in_lf0_max = in_lf0_max self.out_lf0_idx = out_lf0_idx self.out_lf0_mean = out_lf0_mean self.out_lf0_scale = out_lf0_scale self.use_mdn = use_mdn model = [ nn.ReflectionPad1d(3), WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0), ] for n in range(num_layers): model.append(ResnetBlock(hidden_dim, dilation=2 ** n)) last_conv_out_dim = hidden_dim if use_mdn else out_dim model += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), WNConv1d(hidden_dim, last_conv_out_dim, kernel_size=7, padding=0), ] self.model = nn.Sequential(*model) if self.use_mdn: self.mdn_layer = MDNLayer( in_dim=hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ) else: self.mdn_layer = None init_weights(self, init_type)
def __init__(self): super(JCU_Discriminator, self).__init__() self.mel_conv = nn.Sequential( nn.ReflectionPad1d(3), nn.utils.weight_norm(nn.Conv1d(80, 128, kernel_size=2, stride=1)), nn.LeakyReLU(0.2, True), ) x_conv = [ nn.ReflectionPad1d(7), nn.utils.weight_norm(nn.Conv1d(1, 16, kernel_size=7, stride=1)), nn.LeakyReLU(0.2, True), ] x_conv += [ nn.utils.weight_norm( nn.Conv1d( 16, 64, kernel_size=41, stride=4, padding=4 * 5, groups=16 // 4, )), nn.LeakyReLU(0.2), ] x_conv += [ nn.utils.weight_norm( nn.Conv1d( 64, 128, kernel_size=21, stride=2, padding=2 * 5, groups=64 // 4, )), nn.LeakyReLU(0.2), ] self.x_conv = nn.Sequential(*x_conv) self.mel_conv2 = nn.Sequential( nn.utils.weight_norm( nn.Conv1d(128, 128, kernel_size=5, stride=1, padding=2)), nn.LeakyReLU(0.2, True), ) self.mel_conv3 = nn.utils.weight_norm( nn.Conv1d(128, 1, kernel_size=3, stride=1, padding=1)) self.x_conv2 = nn.Sequential( nn.utils.weight_norm( nn.Conv1d(128, 128, kernel_size=5, stride=1, padding=2)), nn.LeakyReLU(0.2, True), ) self.x_conv3 = nn.utils.weight_norm( nn.Conv1d(128, 1, kernel_size=3, stride=1, padding=1))
def __init__( self, in_dim, ff_hidden_dim=2048, conv_hidden_dim=1024, lstm_hidden_dim=256, out_dim=199, dropout=0.0, num_lstm_layers=2, bidirectional=True, init_type="none", ): super().__init__() self.ff = nn.Sequential( nn.Linear(in_dim, ff_hidden_dim), nn.ReLU(), nn.Linear(ff_hidden_dim, ff_hidden_dim), nn.ReLU(), nn.Linear(ff_hidden_dim, ff_hidden_dim), nn.ReLU(), ) self.conv = nn.Sequential( nn.ReflectionPad1d(3), nn.Conv1d(ff_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), nn.ReflectionPad1d(3), nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), nn.ReflectionPad1d(3), nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), ) num_direction = 2 if bidirectional else 1 self.lstm = nn.LSTM( conv_hidden_dim, lstm_hidden_dim, num_lstm_layers, bidirectional=True, batch_first=True, dropout=dropout, ) last_in_dim = num_direction * lstm_hidden_dim self.fc = nn.Linear(last_in_dim, out_dim) init_weights(self, init_type)
def matchDimensions1D(trunk, mask): """ Finds difference in dimensions between two matrices and pads smallest to allow for matrix operations """ difference = findDifferenceOneAxis(trunk, mask, 1) left, right = findPaddingBothSidesOneAxis(np.absolute(difference)) if difference > 0: pad = nn.ReflectionPad1d((left, right)) mask = pad(mask) return trunk, mask elif difference < 0: pad = nn.ReflectionPad1d((left, right)) trunk = pad(trunk) return trunk, mask return trunk, mask
def __init__(self, in_dim, hidden_dim, out_dim, num_layers=4, dropout=0.0): super().__init__() model = [ nn.ReflectionPad1d(3), WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0), ] for n in range(num_layers): model.append(ResnetBlock(hidden_dim, dilation=2**n)) model += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), WNConv1d(hidden_dim, out_dim, kernel_size=7, padding=0), ] self.model = nn.Sequential(*model)
def __init__(self, kernel, channel, padding, dilations=[1, 3, 5]): super().__init__() resstack = [] for dilation in dilations: resstack += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(dilation), nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=kernel, dilation=dilation)), nn.LeakyReLU(0.2), nn.ReflectionPad1d(padding), nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1)), ] self.resstack = nn.Sequential(*resstack) self.shortcut = nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1))
def __init__( self, c_in: int, c_h: int, c_out: int, kernel_size: int, bank_size: int, bank_scale: int, c_bank: int, n_conv_blocks: int, n_dense_blocks: int, subsample: List[int], act: str, dropout_rate: float, ): super(SpeakerEncoder, self).__init__() self.c_in = c_in self.c_h = c_h self.c_out = c_out self.kernel_size = kernel_size self.n_conv_blocks = n_conv_blocks self.n_dense_blocks = n_dense_blocks self.subsample = subsample self.act = get_act(act) self.conv_bank = ConvBank(c_in, c_bank, bank_size, bank_scale, act) in_channels = c_bank * (bank_size // bank_scale) + c_in self.in_conv_layer = nn.Conv1d(in_channels, c_h, kernel_size=1) self.first_conv_layers = nn.ModuleList([ nn.Sequential( nn.ReflectionPad1d((kernel_size // 2, kernel_size // 2 - 1 + kernel_size % 2)), nn.Conv1d(c_h, c_h, kernel_size=kernel_size), ) for _ in range(n_conv_blocks) ]) self.second_conv_layers = nn.ModuleList([ nn.Sequential( nn.ReflectionPad1d((kernel_size // 2, kernel_size // 2 - 1 + kernel_size % 2)), nn.Conv1d(c_h, c_h, kernel_size=kernel_size, stride=sub), ) for sub, _ in zip(subsample, range(n_conv_blocks)) ]) self.pooling_layer = nn.AdaptiveAvgPool1d(1) self.first_dense_layers = nn.ModuleList( [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)]) self.second_dense_layers = nn.ModuleList( [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)]) self.output_layer = nn.Linear(c_h, c_out) self.dropout_layer = nn.Dropout(p=dropout_rate)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=4, init_type="none", use_mdn=False, num_gaussians=8, dim_wise=False, **kwargs, ): super().__init__() self.use_mdn = use_mdn if "dropout" in kwargs: warn( "dropout argument in Conv1dResnet is deprecated" " and will be removed in future versions" ) model = [ nn.ReflectionPad1d(3), WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0), ] for n in range(num_layers): model.append(ResnetBlock(hidden_dim, dilation=2 ** n)) last_conv_out_dim = hidden_dim if use_mdn else out_dim model += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), WNConv1d(hidden_dim, last_conv_out_dim, kernel_size=7, padding=0), ] self.model = nn.Sequential(*model) if self.use_mdn: self.mdn_layer = MDNLayer( in_dim=hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ) else: self.mdn_layer = None init_weights(self, init_type)
def _conv(self, in_c: int, out_c: int, kernel_sz: Optional[Tuple[int]] = None, bias: bool = False, seq: bool = True, stride: Tuple[int] = None): ksz = kernel_sz or self.kernel_sz bias = False if self.norm != 'none' and not bias else True stride = stride or tuple([1 for _ in ksz]) if not self.separable or all([ks == 1 for ks in ksz]): layers = [nn.Conv3d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 3 else \ [nn.Conv2d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 2 else \ [nn.Conv1d(in_c, out_c, ksz, bias=bias, stride=stride)] else: layers = [SeparableConv3d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 3 else \ [SeparableConv2d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 2 else \ [SeparableConv1d(in_c, out_c, ksz, bias=bias, stride=stride)] if any([ks > 1 for ks in ksz]): rp = tuple([ ks // 2 for p in zip(reversed(ksz), reversed(ksz)) for ks in p ]) layers = [nn.ReplicationPad3d(rp)] + layers if self.dim == 3 else \ [nn.ReflectionPad2d(rp)] + layers if self.dim == 2 else \ [nn.ReflectionPad1d(rp)] + layers if seq and len(layers) > 1: c = nn.Sequential(*layers) else: c = layers if len(layers) > 1 else layers[0] return c
def __init__(self, inchannel, outchannel, uppool, filter_size): super(skip_connection_de, self).__init__() self.inchannel = inchannel self.outchannel = outchannel self.uppool = uppool self.filter_size = filter_size if uppool: ## input size -> input size*2 self.pipeline = nn.Sequential( nn.Upsample(scale_factor=2, mode='linear'), nn.ReflectionPad1d(filter_size // 2), nn.Conv1d(inchannel, outchannel, self.filter_size, 1, padding=0, bias=False), nn.BatchNorm1d(outchannel), nn.LeakyReLU()) else: ## input size same self.pipeline = nn.Sequential( nn.Conv1d(inchannel, outchannel, self.filter_size, 1, filter_size // 2, bias=False), nn.BatchNorm1d(outchannel), nn.LeakyReLU(), )
def __init__(self, channels, num_res_blocks, kernel_size): super(ResidualStack, self).__init__() assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd." base_padding = (kernel_size - 1) // 2 self.blocks = nn.ModuleList() for idx in range(num_res_blocks): layer_kernel_size = kernel_size layer_dilation = layer_kernel_size**idx layer_padding = base_padding * layer_dilation self.blocks += [ nn.Sequential( nn.LeakyReLU(0.2), nn.ReflectionPad1d(layer_padding), weight_norm( nn.Conv1d(channels, channels, kernel_size=kernel_size, dilation=layer_dilation, bias=True)), nn.LeakyReLU(0.2), weight_norm( nn.Conv1d(channels, channels, kernel_size=1, bias=True)), ) ] self.shortcuts = nn.ModuleList([ weight_norm(nn.Conv1d(channels, channels, kernel_size=1, bias=True)) for i in range(num_res_blocks) ])
def __init__(self, ni, no, ks, stride, pad = None, pad_type = 'Zero', output_pad = 0, use_bn = True, use_sn = False, norm_type = 'batchnorm', activation_type = 'leakyrelu'): super(DeConvBlock, self).__init__() self.use_bn = use_bn self.use_sn = use_sn self.norm_type = norm_type self.pad_type = pad_type if(pad is None): pad = ks // 2 // stride if(self.pad_type == 'Zero'): self.deconv = nn.ConvTranspose1d(ni, no, ks, stride, pad, output_padding = output_pad, bias = False) elif(self.pad_type == 'Reflection'): self.deconv = nn.ConvTranspose1d(ni, no, ks, stride, 0, output_padding = output_pad, bias = False) self.reflection = nn.ReflectionPad1d(pad) if(self.use_bn == True): if(self.norm_type == 'batchnorm'): self.bn = nn.BatchNorm1d(no) elif(self.norm_type == 'instancenorm'): self.bn = nn.InstanceNorm1d(no) if(self.use_sn == True): self.deconv = SpectralNorm(self.deconv) if(activation_type == 'relu'): self.act = nn.ReLU(inplace = True) elif(activation_type == 'leakyrelu'): self.act = nn.LeakyReLU(0.2, inplace = True) elif(activation_type == 'elu'): self.act = nn.ELU(inplace = True) elif(activation_type == 'selu'): self.act = nn.SELU(inplace = True) elif(activation_type == None): self.act = Nothing()
def forward(self, x): x = broadcast_dim(x) if self.center: if self.pad_mode == 'constant': padding = nn.ConstantPad1d(self.kernal_width // 2, 0) elif self.pad_mode == 'reflect': padding = nn.ReflectionPad1d(self.kernal_width // 2) x = padding(x) # STFT fourier_real = conv1d(x, self.wcos, stride=self.hop_length) fourier_imag = conv1d(x, self.wsin, stride=self.hop_length) # CQT CQT_real, CQT_imag = complex_mul( (self.cqt_kernels_real, self.cqt_kernels_imag), (fourier_real, fourier_imag)) # Getting CQT Amplitude CQT = torch.sqrt(CQT_real.pow(2) + CQT_imag.pow(2)) if self.norm: return CQT / self.kernal_width * torch.sqrt( self.lenghts.view(-1, 1)) else: return CQT * torch.sqrt(self.lenghts.view(-1, 1))