def __init__(self,in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'): super(ComplexConvTranspose1d, self).__init__() self.conv_tran_r = ConvTranspose1d(in_channels, out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation, padding_mode) self.conv_tran_i = ConvTranspose1d(in_channels, out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation, padding_mode)
def __init__( self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, output_padding: int = 0, dilation: int = 1, groups: int = 1, bias: bool = True, weight_quant: Optional[WeightQuantType] = Int8WeightPerTensorFloat, bias_quant: Optional[BiasQuantType] = None, input_quant: Optional[ActQuantType] = None, output_quant: Optional[ActQuantType] = None, return_quant_tensor: bool = False, **kwargs) -> None: ConvTranspose1d.__init__(self, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, dilation=dilation, groups=groups, bias=bias) QuantWBIOL.__init__(self, weight_quant=weight_quant, bias_quant=bias_quant, input_quant=input_quant, output_quant=output_quant, return_quant_tensor=return_quant_tensor, **kwargs) self._output_size = None
def __init__(self, h): super(Generator, self).__init__() self.h = h self.num_kernels = len(h.resblock_kernel_sizes) self.num_upsamples = len(h.upsample_rates) self.conv_pre = weight_norm( Conv1d(80, h.upsample_initial_channel, 7, 1, padding=3)) resblock = ResBlock1 if h.resblock == '1' else ResBlock2 self.ups = nn.ModuleList() for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)): self.ups.append( weight_norm( ConvTranspose1d(h.upsample_initial_channel // (2**i), h.upsample_initial_channel // (2**(i + 1)), k, u, padding=(k - u) // 2))) self.resblocks = nn.ModuleList() for i in range(len(self.ups)): ch = h.upsample_initial_channel // (2**(i + 1)) for j, (k, d) in enumerate( zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)): self.resblocks.append(resblock(h, ch, k, d)) self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) self.ups.apply(init_weights) self.conv_post.apply(init_weights)
def __init__(self, cfg): super(Generator, self).__init__() self.num_kernels = len(cfg["resblock_kernel_sizes"]) self.num_upsamples = len(cfg["upsample_rates"]) self.conv_pre = weight_norm( Conv1d(80, cfg["upsample_initial_channel"], 7, 1, padding=3)) self.ups = nn.ModuleList() for i, (u, k) in enumerate( zip(cfg["upsample_rates"], cfg["upsample_kernel_sizes"])): self.ups.append( weight_norm( ConvTranspose1d( cfg["upsample_initial_channel"] // (2**i), cfg["upsample_initial_channel"] // (2**(i + 1)), k, u, padding=(k - u) // 2, ))) self.resblocks = nn.ModuleList() for i in range(len(self.ups)): ch = cfg["upsample_initial_channel"] // (2**(i + 1)) for k, d in zip(cfg["resblock_kernel_sizes"], cfg["resblock_dilation_sizes"]): self.resblocks.append(ResBlock(ch, k, d)) self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) self.ups.apply(init_weights) self.conv_post.apply(init_weights)
def __init__(self, h, c_out=1): super(HifiGanGenerator, self).__init__() self.h = h self.num_kernels = len(h['resblock_kernel_sizes']) self.num_upsamples = len(h['upsample_rates']) self.conv_pre = weight_norm( Conv1d(80, h['upsample_initial_channel'], 7, 1, padding=3)) resblock = ResBlock1 if h['resblock'] == '1' else ResBlock2 self.ups = nn.ModuleList() for i, (u, k) in enumerate( zip(h['upsample_rates'], h['upsample_kernel_sizes'])): c_cur = h['upsample_initial_channel'] // (2**(i + 1)) self.ups.append( weight_norm( ConvTranspose1d(c_cur * 2, c_cur, k, u, padding=(k - u) // 2))) self.resblocks = nn.ModuleList() for i in range(len(self.ups)): ch = h['upsample_initial_channel'] // (2**(i + 1)) for j, (k, d) in enumerate( zip(h['resblock_kernel_sizes'], h['resblock_dilation_sizes'])): self.resblocks.append(resblock(h, ch, k, d)) self.conv_post = weight_norm(Conv1d(ch, c_out, 7, 1, padding=3)) self.ups.apply(init_weights) self.conv_post.apply(init_weights)
def __init__( self, in_channels, out_channels, resblock_type, resblock_dilation_sizes, resblock_kernel_sizes, upsample_kernel_sizes, upsample_initial_channel, upsample_factors, inference_padding=5, ): r"""HiFiGAN Generator with Multi-Receptive Field Fusion (MRF) Network: x -> lrelu -> upsampling_layer -> resblock1_k1x1 -> z1 -> + -> z_sum / #resblocks -> lrelu -> conv_post_7x1 -> tanh -> o .. -> zI ---| resblockN_kNx1 -> zN ---' Args: in_channels (int): number of input tensor channels. out_channels (int): number of output tensor channels. resblock_type (str): type of the `ResBlock`. '1' or '2'. resblock_dilation_sizes (List[List[int]]): list of dilation values in each layer of a `ResBlock`. resblock_kernel_sizes (List[int]): list of kernel sizes for each `ResBlock`. upsample_kernel_sizes (List[int]): list of kernel sizes for each transposed convolution. upsample_initial_channel (int): number of channels for the first upsampling layer. This is divided by 2 for each consecutive upsampling layer. upsample_factors (List[int]): upsampling factors (stride) for each upsampling layer. inference_padding (int): constant padding applied to the input at inference time. Defaults to 5. """ super().__init__() self.inference_padding = inference_padding self.num_kernels = len(resblock_kernel_sizes) self.num_upsamples = len(upsample_factors) # initial upsampling layers self.conv_pre = weight_norm(Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3)) resblock = ResBlock1 if resblock_type == "1" else ResBlock2 # upsampling layers self.ups = nn.ModuleList() for i, (u, k) in enumerate(zip(upsample_factors, upsample_kernel_sizes)): self.ups.append( weight_norm( ConvTranspose1d( upsample_initial_channel // (2 ** i), upsample_initial_channel // (2 ** (i + 1)), k, u, padding=(k - u) // 2, ) ) ) # MRF blocks self.resblocks = nn.ModuleList() for i in range(len(self.ups)): ch = upsample_initial_channel // (2 ** (i + 1)) for _, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): self.resblocks.append(resblock(ch, k, d)) # post convolution layer self.conv_post = weight_norm(Conv1d(ch, out_channels, 7, 1, padding=3))
def __init__(self): super(Generator, self).__init__() self.conv_pre = weight_norm(Conv1d(80, 512, 7, 1, padding=3)) self.ups = nn.ModuleList([ weight_norm(ConvTranspose1d(512, 256, 16, 8, padding=4)), weight_norm(ConvTranspose1d(256, 128, 16, 8, padding=4)), weight_norm(ConvTranspose1d(128, 64, 4, 2, padding=1)), weight_norm(ConvTranspose1d(64, 32, 4, 2, padding=1)) ]) self.resblocks = nn.ModuleList([ ResBlock(256, 256), ResBlock(128, 128), ResBlock(64, 64), ResBlock(32, 32) ]) self.conv_post = weight_norm(Conv1d(32, 1, 7, 1, padding=3))
def __init__(self, n): super(Decoder, self).__init__() self.cnn2 = ConvTranspose1d(1024, 1024, kernel_size=5, stride=5) self.cnn3 = ConvTranspose1d(1024, 1024, kernel_size=3, stride=3) self.cnn4 = Conv1d(1024, 1024, kernel_size=3, padding=1) self.cnn5 = Conv1d(1024, 1024, kernel_size=3, padding=1) self.cnn6 = Conv1d(1024, 1024, kernel_size=3, padding=1) self.cnn7 = Conv1d(1024, n, kernel_size=1) self.cnn2_g = ConvTranspose1d(1024, 1024, kernel_size=5, stride=5) self.cnn3_g = ConvTranspose1d(1024, 1024, kernel_size=3, stride=3) self.cnn4_g = Conv1d(1024, 1024, kernel_size=3, padding=1) self.cnn5_g = Conv1d(1024, 1024, kernel_size=3, padding=1) self.cnn6_g = Conv1d(1024, 1024, kernel_size=3, padding=1) self.cnn7_g = Conv1d(1024, n, kernel_size=1) self.bn2 = BatchNorm1d(1024) self.bn3 = BatchNorm1d(1024) self.bn4 = BatchNorm1d(1024) self.bn5 = BatchNorm1d(1024) self.bn6 = BatchNorm1d(1024) self.bn7 = BatchNorm1d(1024)
def __init__(self, resblock_kernel_sizes=[3, 7, 11], upsample_rates=[10, 6], upsample_initial_channel=256, resblock_type="1", upsample_kernel_sizes=[20, 12], resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]], transposedconv=True, bias=True): super(MultiBandHiFiGANGenerator, self).__init__() self.num_kernels = len(resblock_kernel_sizes) self.num_upsamples = len(upsample_rates) self.conv_pre = Conv1d(80, upsample_initial_channel, 7, 1, padding=3, bias=bias) resblock = ResBlock1 if resblock_type == '1' else ResBlock2 self.ups = nn.ModuleList() for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): self.ups.append( UpsampleLayer(upsample_initial_channel // (2**i), upsample_initial_channel // (2**(i + 1)), upsample_rate=u, kernel_size=k, stride=1, padding=k // 2, bias=bias) if transposedconv == False else ConvTranspose1d(upsample_initial_channel // (2**i), upsample_initial_channel // (2**(i + 1)), k, u, padding=(u // 2 + u % 2), output_padding=u % 2, bias=bias)) self.resblocks = nn.ModuleList() for i in range(len(self.ups)): ch = upsample_initial_channel // (2**(i + 1)) for j, (k, d) in enumerate( zip(resblock_kernel_sizes, resblock_dilation_sizes)): self.resblocks.append(resblock(ch, k, d, bias=bias)) self.conv_post = Conv1d(ch, 4, 7, 1, padding=3, bias=bias) # 4 band self.pqmf = PQMF() # 4 band # apply weight norm self.apply_weight_norm() # reset parameters self.reset_parameters()
def __init__(self, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int]], stride: Union[int, Tuple[int]] = 1, padding: Union[int, Tuple[int]] = 0, output_padding: Union[int, Tuple[int]] = 0, dilation: Union[int, Tuple[int]] = 1, groups: int = 1, bias: bool = True, weight_quant: Union[WeightQuantProxyProtocol, Type[Injector]] = DefaultWeightQI, bias_quant: Union[BiasQuantProxyProtocol, Type[Injector]] = DefaultBiasQI, input_quant: Union[ActQuantProxyProtocol, Type[Injector]] = None, output_quant: Union[ActQuantProxyProtocol, Type[Injector]] = None, return_quant_tensor: bool = False, **kwargs) -> None: ConvTranspose1d.__init__(self, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, dilation=dilation, groups=groups, bias=bias) QuantWBIOL.__init__(self, weight=self.weight, bias=self.bias, weight_quant=weight_quant, bias_quant=bias_quant, input_quant=input_quant, output_quant=output_quant, return_quant_tensor=return_quant_tensor, **kwargs) self._output_size = None
def __init__( self, resblock, upsample_rates, upsample_kernel_sizes, upsample_initial_channel, resblock_kernel_sizes, resblock_dilation_sizes, initial_input_size=80, apply_weight_init_conv_pre=False, ): super().__init__() self.num_kernels = len(resblock_kernel_sizes) self.num_upsamples = len(upsample_rates) self.conv_pre = weight_norm( Conv1d(initial_input_size, upsample_initial_channel, 7, 1, padding=3)) self.lrelu_slope = LRELU_SLOPE resblock = ResBlock1 if resblock == 1 else ResBlock2 self.ups = nn.ModuleList() for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): self.ups.append( weight_norm( ConvTranspose1d( upsample_initial_channel // (2**i), upsample_initial_channel // (2**(i + 1)), k, u, padding=(k - u) // 2, ))) self.resblocks = nn.ModuleList() for i in range(len(self.ups)): resblock_list = nn.ModuleList() ch = upsample_initial_channel // (2**(i + 1)) for j, (k, d) in enumerate( zip(resblock_kernel_sizes, resblock_dilation_sizes)): resblock_list.append(resblock(ch, k, d)) self.resblocks.append(resblock_list) self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) self.ups.apply(init_weights) self.conv_post.apply(init_weights) if apply_weight_init_conv_pre: self.conv_pre.apply(init_weights)
def test_conv_transpose1d(batch, length, in_channels, out_channels, kernel_size, stride, padding, output_padding, dilation, groups, bias, padding_mode): x = torch.randn(batch, in_channels, length, requires_grad=True, device=device) conv = ConvTranspose1d(in_channels, out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation, padding_mode).to(device) fft_conv = FFTConvTranspose1d(in_channels, out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation, padding_mode).to(device) fft_conv.load_state_dict(conv.state_dict()) y1 = conv(x) y2 = fft_conv(x) assert torch.allclose( y1, y2, atol=1e-5, rtol=1e-5), torch.abs(y1 - y2).max().item() y2.sum().backward()
def __init__( self, upsample_in_channel: int, upsample_out_channel: int, upsample_kernel_size: int, upsample_rates: int, resblock_kernel_sizes: List[int], resblock_dilation_sizes: List[List[int]] ): super(UpSampler, self).__init__() self.up = weight_norm( ConvTranspose1d( upsample_in_channel, upsample_out_channel, upsample_kernel_size, upsample_rates, padding=(upsample_kernel_size - upsample_rates) // 2, ) ) self.up.apply(init_weights) self.res_0 = ResBlock( channels=upsample_out_channel, kernel_size=resblock_kernel_sizes[0], dilation=resblock_dilation_sizes[0] ) self.res_1 = ResBlock( channels=upsample_out_channel, kernel_size=resblock_kernel_sizes[1], dilation=resblock_dilation_sizes[1] ) self.res_2 = ResBlock( channels=upsample_out_channel, kernel_size=resblock_kernel_sizes[2], dilation=resblock_dilation_sizes[2] ) self.num_kernels = len(resblock_kernel_sizes)
def __init__(self, initial_channel, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates, upsample_initial_channel, upsample_kernel_sizes, gin_channels=0): super(Generator, self).__init__() self.num_kernels = len(resblock_kernel_sizes) self.num_upsamples = len(upsample_rates) self.conv_pre = Conv1d(initial_channel, upsample_initial_channel, 7, 1, padding=3) resblock = modules.ResBlock1 if resblock == '1' else modules.ResBlock2 self.ups = nn.ModuleList() for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): self.ups.append(weight_norm( ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)), k, u, padding=(k-u)//2))) self.resblocks = nn.ModuleList() for i in range(len(self.ups)): ch = upsample_initial_channel//(2**(i+1)) for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): self.resblocks.append(resblock(ch, k, d)) self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) self.ups.apply(init_weights) if gin_channels != 0: self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1)
def __init__( self, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int]], stride: Union[int, Tuple[int]] = 1, padding: Union[int, Tuple[int]] = 0, output_padding: Union[int, Tuple[int]] = 0, padding_type: PaddingType = PaddingType.STANDARD, dilation: Union[int, Tuple[int]] = 1, groups: int = 1, bias: bool = True, bias_quant_type: QuantType = QuantType.FP, bias_narrow_range: bool = False, bias_bit_width: int = None, weight_quant_override: WeightQuantProxy = None, weight_quant_type: QuantType = QuantType.FP, weight_narrow_range: bool = False, weight_scaling_override: Optional[Module] = None, weight_bit_width_impl_override: Union[BitWidthParameter, BitWidthConst] = None, weight_bit_width_impl_type: BitWidthImplType = BitWidthImplType. CONST, weight_restrict_bit_width_type: RestrictValueType = RestrictValueType.INT, weight_bit_width: int = 32, weight_min_overall_bit_width: Optional[int] = 2, weight_max_overall_bit_width: Optional[int] = None, weight_scaling_impl_type: ScalingImplType = ScalingImplType.STATS, weight_scaling_const: Optional[float] = None, weight_scaling_stats_op: StatsOp = StatsOp.MAX, weight_scaling_per_output_channel: bool = False, weight_ternary_threshold: float = 0.5, weight_restrict_scaling_type: RestrictValueType = RestrictValueType .LOG_FP, weight_scaling_stats_sigma: float = 3.0, weight_scaling_min_val: float = SCALING_MIN_VAL, weight_override_pretrained_bit_width: bool = False, compute_output_scale: bool = False, compute_output_bit_width: bool = False, return_quant_tensor: bool = False, deterministic: bool = False) -> None: QuantLayer.__init__(self, compute_output_scale=compute_output_scale, compute_output_bit_width=compute_output_bit_width, return_quant_tensor=return_quant_tensor) ConvTranspose1d.__init__(self, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, dilation=dilation, groups=groups, bias=bias) if weight_quant_type == QuantType.FP and compute_output_bit_width: raise Exception( "Computing output bit width requires enabling quantization") if bias_quant_type != QuantType.FP and not (compute_output_scale and compute_output_bit_width): raise Exception( "Quantizing bias requires to compute output scale and output bit width" ) if torch.backends.cudnn.benchmark: torch.backends.cudnn.deterministic = deterministic # self.per_elem_ops = 2 * self.kernel_size[0] * (in_channels // groups) # TO DO: Implement op_count self.padding_type = padding_type self.weight_reg = WeightReg() if weight_quant_override is not None: self.weight_quant = weight_quant_override self.weight_quant.add_tracked_parameter(self.weight) else: weight_scaling_stats_input_concat_dim = 1 if weight_scaling_per_output_channel: weight_stats_input_view_shape_impl = StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS weight_scaling_shape = self.per_output_channel_broadcastable_shape weight_scaling_stats_reduce_dim = 1 else: weight_stats_input_view_shape_impl = StatsInputViewShapeImpl.OVER_TENSOR weight_scaling_shape = SCALING_SCALAR_SHAPE weight_scaling_stats_reduce_dim = None if weight_scaling_stats_op == StatsOp.MAX_AVE: weight_stats_input_view_shape_impl = StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS weight_scaling_stats_reduce_dim = 1 self.weight_quant = WeightQuantProxy( bit_width=weight_bit_width, quant_type=weight_quant_type, narrow_range=weight_narrow_range, scaling_override=weight_scaling_override, restrict_scaling_type=weight_restrict_scaling_type, scaling_const=weight_scaling_const, scaling_stats_op=weight_scaling_stats_op, scaling_impl_type=weight_scaling_impl_type, scaling_stats_reduce_dim=weight_scaling_stats_reduce_dim, scaling_shape=weight_scaling_shape, bit_width_impl_type=weight_bit_width_impl_type, bit_width_impl_override=weight_bit_width_impl_override, restrict_bit_width_type=weight_restrict_bit_width_type, min_overall_bit_width=weight_min_overall_bit_width, max_overall_bit_width=weight_max_overall_bit_width, tracked_parameter_list_init=self.weight, ternary_threshold=weight_ternary_threshold, scaling_stats_input_view_shape_impl= weight_stats_input_view_shape_impl, scaling_stats_input_concat_dim= weight_scaling_stats_input_concat_dim, scaling_stats_sigma=weight_scaling_stats_sigma, scaling_min_val=weight_scaling_min_val, override_pretrained_bit_width= weight_override_pretrained_bit_width) self.bias_quant = BiasQuantProxy(quant_type=bias_quant_type, bit_width=bias_bit_width, narrow_range=bias_narrow_range)
def __init__(self, in_channels, out_channels, pool, inner_size, activation, repeat=5, kernel_size=1, stride=1, dilation=1, dropout=0.0, residual=False, separable=False): super(BlockX, self).__init__() self.use_res = residual self.conv = ModuleList() _in_channels = in_channels padding = self.get_padding(kernel_size[0], stride[0], dilation[0]) self.conv.extend([ SF(), Conv1d(_in_channels, inner_size * 2, kernel_size=pool, stride=pool, padding=0, bias=False), Conv1d(inner_size * 2, inner_size * 2, kernel_size, padding=padding, bias=False, groups=inner_size * 2), BatchNorm1d(inner_size * 2, eps=1e-3, momentum=0.1), ]) self.conv.extend(self.get_activation(activation, dropout)) _in_channels = inner_size # add the first n - 1 convolutions + activation for _ in range(repeat - 2): self.conv.extend( self.get_tcs(_in_channels, inner_size, kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding, separable=separable)) self.conv.extend(self.get_activation(activation, dropout)) _in_channels = inner_size # add the last conv and batch norm self.conv.extend([ Conv1d(inner_size, inner_size, kernel_size, padding=padding, bias=False, groups=inner_size), ConvTranspose1d(inner_size, out_channels * 2, kernel_size=pool, stride=pool, padding=0, bias=False), BatchNorm1d(out_channels * 2, eps=1e-3, momentum=0.1), ]) # add the residual connection if self.use_res: self.residual = Sequential( *self.get_tcs(in_channels, out_channels)) # add the activation and dropout self.activation = Sequential(*self.get_activation(activation, dropout))