class UpsamplingBlock(nn.Module): def __init__(self, n_inputs, n_shortcut, n_outputs, kernel_size, stride, depth, conv_type, res): super(UpsamplingBlock, self).__init__() assert (stride > 1) # CONV 1 for UPSAMPLING if res == "fixed": self.upconv = Resample1d(n_inputs, 15, stride, transpose=True) else: self.upconv = ConvLayer(n_inputs, n_inputs, kernel_size, stride, conv_type, transpose=True) self.pre_shortcut_convs = nn.ModuleList( [ConvLayer(n_inputs, n_outputs, kernel_size, 1, conv_type)] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) # CONVS to combine high- with low-level information (from shortcut) self.post_shortcut_convs = nn.ModuleList([ ConvLayer(n_outputs + n_shortcut, n_outputs, kernel_size, 1, conv_type) ] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) def forward(self, x, shortcut): # UPSAMPLE HIGH-LEVEL FEATURES upsampled = self.upconv(x) for conv in self.pre_shortcut_convs: upsampled = conv(upsampled) # Prepare shortcut connection combined = crop(shortcut, upsampled) # Combine high- and low-level features for conv in self.post_shortcut_convs: combined = conv( torch.cat([combined, crop(upsampled, combined)], dim=1)) return combined def get_output_size(self, input_size): curr_size = self.upconv.get_output_size(input_size) # Upsampling convs for conv in self.pre_shortcut_convs: curr_size = conv.get_output_size(curr_size) # Combine convolutions for conv in self.post_shortcut_convs: curr_size = conv.get_output_size(curr_size) return curr_size
class DownsamplingBlock(nn.Module): def __init__(self, n_inputs, n_shortcut, n_outputs, kernel_size, stride, depth, conv_type, res): super(DownsamplingBlock, self).__init__() assert (stride > 1) self.kernel_size = kernel_size self.stride = stride # CONV 1 self.pre_shortcut_convs = nn.ModuleList( [ConvLayer(n_inputs, n_shortcut, kernel_size, 1, conv_type)] + [ ConvLayer(n_shortcut, n_shortcut, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) self.post_shortcut_convs = nn.ModuleList( [ConvLayer(n_shortcut, n_outputs, kernel_size, 1, conv_type)] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) # CONV 2 with decimation if res == "fixed": self.downconv = Resample1d( n_outputs, 15, stride) # Resampling with fixed-size sinc lowpass filter else: self.downconv = ConvLayer(n_outputs, n_outputs, kernel_size, stride, conv_type) def forward(self, x): # PREPARING SHORTCUT FEATURES shortcut = x for conv in self.pre_shortcut_convs: shortcut = conv(shortcut) # PREPARING FOR DOWNSAMPLING out = shortcut for conv in self.post_shortcut_convs: out = conv(out) # DOWNSAMPLING out = self.downconv(out) return out, shortcut def get_input_size(self, output_size): curr_size = self.downconv.get_input_size(output_size) for conv in reversed(self.post_shortcut_convs): curr_size = conv.get_input_size(curr_size) for conv in reversed(self.pre_shortcut_convs): curr_size = conv.get_input_size(curr_size) return curr_size
def __init__(self, n_inputs, n_shortcut, n_outputs, kernel_size, stride, depth, conv_type, res): super(UpBlock, self).__init__() assert (stride > 1) self.pre_shortcut_convs = nn.ModuleList( [ConvLayer(n_inputs, n_outputs, kernel_size, 1, conv_type)] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) # CONVS to combine high- with low-level information (from shortcut) self.post_shortcut_convs = nn.ModuleList([ ConvLayer(n_outputs + n_shortcut, n_outputs, kernel_size, 1, conv_type) ] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ])
def __init__(self, n_inputs, n_shortcut, n_outputs, kernel_size, stride, depth, conv_type, res): super(DownBlock, self).__init__() assert (stride > 1) self.kernel_size = kernel_size self.stride = stride # CONV 1 self.pre_shortcut_convs = nn.ModuleList( [ConvLayer(n_inputs, n_shortcut, kernel_size, 1, conv_type)] + [ ConvLayer(n_shortcut, n_shortcut, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) self.post_shortcut_convs = nn.ModuleList( [ConvLayer(n_shortcut, n_outputs, kernel_size, 1, conv_type)] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ])
def __init__(self, n_inputs, n_shortcut, n_outputs, kernel_size, stride, depth, conv_type, res): super(UpsamplingBlock, self).__init__() assert (stride > 1) # CONV 1 for UPSAMPLING if res == "fixed": self.upconv = Resample1d(n_inputs, 15, stride, transpose=True) else: self.upconv = ConvLayer(n_inputs, n_inputs, kernel_size, stride, conv_type, transpose=True) self.pre_shortcut_convs = nn.ModuleList( [ConvLayer(n_inputs, n_outputs, kernel_size, 1, conv_type)] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) # CONVS to combine high- with low-level information (from shortcut) self.post_shortcut_convs = nn.ModuleList([ ConvLayer(n_outputs + n_shortcut, n_outputs, kernel_size, 1, conv_type) ] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ])
def __init__(self, n_inputs, n_shortcut, n_outputs, kernel_size, stride, depth, conv_type, res): super(DownsamplingBlock, self).__init__() assert (stride > 1) self.kernel_size = kernel_size self.stride = stride # CONV 1 self.pre_shortcut_convs = nn.ModuleList( [ConvLayer(n_inputs, n_shortcut, kernel_size, 1, conv_type)] + [ ConvLayer(n_shortcut, n_shortcut, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) self.post_shortcut_convs = nn.ModuleList( [ConvLayer(n_shortcut, n_outputs, kernel_size, 1, conv_type)] + [ ConvLayer(n_outputs, n_outputs, kernel_size, 1, conv_type) for _ in range(depth - 1) ]) # CONV 2 with decimation if res == "fixed": self.downconv = Resample1d( n_outputs, 15, stride) # Resampling with fixed-size sinc lowpass filter else: self.downconv = ConvLayer(n_outputs, n_outputs, kernel_size, stride, conv_type)
def __init__(self, num_inputs, num_channels, num_outputs, levels, encoder_kernel_size, decoder_kernel_size, target_output_size, conv_type, res, depth=1, strides=2): super(Waveunet, self).__init__() self.num_levels = len(num_channels) - 1 self.strides = strides self.encoder_kernel_size = encoder_kernel_size self.decoder_kernel_size = decoder_kernel_size self.num_inputs = num_inputs self.num_outputs = num_outputs self.depth = depth self.levels = levels # Only odd filter kernels allowed assert (encoder_kernel_size % 2 == 1) assert (decoder_kernel_size % 2 == 1) self.waveunets = nn.Module() # Create a model for each source if we separate sources separately, otherwise only one (model_list=["ALL"]) module = nn.Module() module.downsampling_blocks = nn.ModuleList() module.upsampling_blocks = nn.ModuleList() module.up_blocks = nn.ModuleList() module.down_blocks = nn.ModuleList() for i in range(self.num_levels): in_ch = num_inputs if i == 0 else num_channels[i] if (i < self.levels): module.downsampling_blocks.append( DownsamplingBlock(in_ch, num_channels[i], num_channels[i + 1], self.encoder_kernel_size, strides, depth, conv_type, res)) else: module.down_blocks.append( DownBlock(in_ch, num_channels[i], num_channels[i + 1], self.encoder_kernel_size, strides, depth, conv_type, res)) for i in range(0, self.num_levels): if (i < self.num_levels - self.levels): module.up_blocks.append( UpBlock(num_channels[-1 - i], num_channels[-2 - i], num_channels[-2 - i], self.decoder_kernel_size, strides, depth, conv_type, res)) else: module.upsampling_blocks.append( UpsamplingBlock(num_channels[-1 - i], num_channels[-2 - i], num_channels[-2 - i], self.decoder_kernel_size, strides, depth, conv_type, res)) module.bottlenecks = nn.ModuleList([ ConvLayer(num_channels[-1], num_channels[-1], self.encoder_kernel_size, 1, conv_type) for _ in range(depth) ]) # Output conv outputs = num_outputs module.output_conv = nn.Conv1d(num_channels[0], outputs, 1) self.waveunets = module self.set_output_size(target_output_size)
def __init__(self, num_inputs, num_channels, num_outputs, instruments, kernel_size, target_output_size, conv_type, res, separate=False, depth=1, strides=2): super(Waveunet, self).__init__() self.num_levels = len(num_channels) self.strides = strides self.kernel_size = kernel_size self.num_inputs = num_inputs self.num_outputs = num_outputs self.depth = depth self.instruments = instruments self.separate = separate # Only odd filter kernels allowed assert (kernel_size % 2 == 1) self.waveunets = nn.ModuleDict() model_list = instruments if separate else ["ALL"] # Create a model for each source if we separate sources separately, otherwise only one (model_list=["ALL"]) for instrument in model_list: module = nn.Module() module.downsampling_blocks = nn.ModuleList() module.upsampling_blocks = nn.ModuleList() for i in range(self.num_levels - 1): in_ch = num_inputs if i == 0 else num_channels[i] module.downsampling_blocks.append( DownsamplingBlock(in_ch, num_channels[i], num_channels[i + 1], kernel_size, strides, depth, conv_type, res)) for i in range(0, self.num_levels - 1): module.upsampling_blocks.append( UpsamplingBlock(num_channels[-1 - i], num_channels[-2 - i], num_channels[-2 - i], kernel_size, strides, depth, conv_type, res)) module.bottlenecks = nn.ModuleList([ ConvLayer(num_channels[-1], num_channels[-1], kernel_size, 1, conv_type) for _ in range(depth) ]) # Output conv outputs = num_outputs if separate else num_outputs * len( instruments) module.output_conv = nn.Conv1d(num_channels[0], outputs, 1) self.waveunets[instrument] = module self.set_output_size(target_output_size)