def __init__(self, in_channels, out_channels, factor, dilations): super(UpsamplingBlock, self).__init__() self.first_block_main_branch = torch.nn.ModuleDict({ 'upsampling': torch.nn.Sequential(*[ torch.nn.LeakyReLU(0.2), InterpolationBlock( scale_factor=factor, mode='linear', align_corners=False), Conv1dWithInitialization(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=dilations[0], dilation=dilations[0]) ]), 'modulation': BasicModulationBlock(out_channels, dilation=dilations[1]) }) self.first_block_residual_branch = torch.nn.Sequential(*[ Conv1dWithInitialization(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), InterpolationBlock( scale_factor=factor, mode='linear', align_corners=False) ]) self.second_block_main_branch = torch.nn.ModuleDict({ f'modulation_{idx}': BasicModulationBlock(out_channels, dilation=dilations[2 + idx]) for idx in range(2) })
def __init__(self, in_channels, out_channels, input_dscaled_by): super(FeatureWiseLinearModulation, self).__init__() self.signal_conv = torch.nn.Sequential(*[ Conv1dWithInitialization( in_channels=in_channels, out_channels=in_channels, kernel_size=3, stride=1, padding=1 ), torch.nn.LeakyReLU(0.2) ]) self.positional_encoding = PositionalEncoding(in_channels) self.scale_conv = Conv1dWithInitialization( in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1 ) self.shift_conv = Conv1dWithInitialization( in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1 )
def __init__(self, in_channels, out_channels, dilation): super(ConvolutionBlock, self).__init__() self.leaky_relu = torch.nn.LeakyReLU(0.2) self.convolution = Conv1dWithInitialization(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation)
def __init__(self, n_channels, dilation): super(BasicModulationBlock, self).__init__() self.featurewise_affine = FeatureWiseAffine() self.leaky_relu = torch.nn.LeakyReLU(0.2) self.convolution = Conv1dWithInitialization(in_channels=n_channels, out_channels=n_channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation)
def __init__(self, in_channels, out_channels, factor, dilations): super(DownsamplingBlock, self).__init__() in_sizes = [in_channels ] + [out_channels for _ in range(len(dilations) - 1)] out_sizes = [out_channels for _ in range(len(in_sizes))] self.main_branch = torch.nn.Sequential(*([ InterpolationBlock(scale_factor=factor, mode='linear', align_corners=False, downsample=True) ] + [ ConvolutionBlock(in_size, out_size, dilation) for in_size, out_size, dilation in zip(in_sizes, out_sizes, dilations) ])) self.residual_branch = torch.nn.Sequential(*[ Conv1dWithInitialization(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), InterpolationBlock(scale_factor=factor, mode='linear', align_corners=False, downsample=True) ])
def __init__(self, config): super(WaveGradNN, self).__init__() # Building upsampling branch (mels -> signal) self.ublock_preconv = Conv1dWithInitialization( in_channels=config.data_config.n_mels, out_channels=config.model_config.upsampling_preconv_out_channels, kernel_size=3, stride=1, padding=1) upsampling_in_sizes = [config.model_config.upsampling_preconv_out_channels] \ + config.model_config.upsampling_out_channels[:-1] self.ublocks = torch.nn.ModuleList([ UBlock(in_channels=in_size, out_channels=out_size, factor=factor, dilations=dilations) for in_size, out_size, factor, dilations in zip( upsampling_in_sizes, config.model_config. upsampling_out_channels, config.model_config.factors, config.model_config.upsampling_dilations) ]) self.ublock_postconv = Conv1dWithInitialization( in_channels=config.model_config.upsampling_out_channels[-1], out_channels=1, kernel_size=3, stride=1, padding=1) # Building downsampling branch (starting from signal) self.dblock_preconv = Conv1dWithInitialization( in_channels=1, out_channels=config.model_config.downsampling_preconv_out_channels, kernel_size=5, stride=1, padding=2) downsampling_in_sizes = [config.model_config.downsampling_preconv_out_channels] \ + config.model_config.downsampling_out_channels[:-1] self.dblocks = torch.nn.ModuleList([ DBlock(in_channels=in_size, out_channels=out_size, factor=factor, dilations=dilations) for in_size, out_size, factor, dilations in zip( downsampling_in_sizes, config.model_config. downsampling_out_channels, config.model_config.factors[1:] [::-1], config.model_config.downsampling_dilations) ]) # Building FiLM connections (in order of downscaling stream) film_in_sizes = [32] + config.model_config.downsampling_out_channels film_out_sizes = config.model_config.upsampling_out_channels[::-1] film_factors = [1] + config.model_config.factors[1:][::-1] self.films = torch.nn.ModuleList([ FiLM( in_channels=in_size, out_channels=out_size, input_dscaled_by=np.product( film_factors[:i + 1] ) # for proper positional encodings initialization ) for i, (in_size, out_size) in enumerate(zip(film_in_sizes, film_out_sizes)) ])