Ejemplo n.º 1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 downsample=False,
                 blur_kernel=[1, 3, 3, 1],
                 bias=True,
                 act_cfg=dict(type='fused_bias'),
                 fp16_enabled=False,
                 conv_clamp=256.):

        self.fp16_enabled = fp16_enabled
        self.conv_clamp = float(conv_clamp)
        layers = []

        if downsample:
            factor = 2
            p = (len(blur_kernel) - factor) + (kernel_size - 1)
            pad0 = (p + 1) // 2
            pad1 = p // 2

            layers.append(Blur(blur_kernel, pad=(pad0, pad1)))

            stride = 2
            self.padding = 0
        else:
            stride = 1
            self.padding = kernel_size // 2

        self.with_fused_bias = act_cfg is not None and act_cfg.get(
            'type') == 'fused_bias'
        if self.with_fused_bias:
            conv_act_cfg = None
        else:
            conv_act_cfg = act_cfg
        layers.append(
            EqualizedLRConvModule(in_channels,
                                  out_channels,
                                  kernel_size,
                                  padding=self.padding,
                                  stride=stride,
                                  bias=bias and not self.with_fused_bias,
                                  norm_cfg=None,
                                  act_cfg=conv_act_cfg,
                                  equalized_lr_cfg=dict(mode='fan_in',
                                                        gain=1.)))
        if self.with_fused_bias:
            layers.append(_FusedBiasLeakyReLU(out_channels))

        super(ConvDownLayer, self).__init__(*layers)
Ejemplo n.º 2
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 style_channels,
                 padding=1,
                 initial=False,
                 blur_kernel=[1, 2, 1],
                 upsample=False,
                 fused=False):
        """Convolutional style blocks composing of noise injector, AdaIN module
        and convolution layers.

        Args:
            in_channels (int): The channel number of the input tensor.
            out_channels (itn): The channel number of the output tensor.
            kernel_size (int): The kernel size of convolution layers.
            style_channels (int): The number of channels for style code.
            padding (int, optional): Padding of convolution layers.
                Defaults to 1.
            initial (bool, optional): Whether this is the first StyleConv of
                StyleGAN's generator. Defaults to False.
            blur_kernel (list, optional): The blurry kernel.
                Defaults to [1, 2, 1].
            upsample (bool, optional): Whether perform upsampling.
                Defaults to False.
            fused (bool, optional): Whether use fused upconv.
                Defaults to False.
        """
        super().__init__()

        if initial:
            self.conv1 = ConstantInput(in_channels)
        else:
            if upsample:
                if fused:
                    self.conv1 = nn.Sequential(
                        EqualizedLRConvUpModule(
                            in_channels,
                            out_channels,
                            kernel_size,
                            padding=padding,
                            act_cfg=dict(type='LeakyReLU',
                                         negative_slope=0.2)),
                        Blur(blur_kernel, pad=(1, 1)),
                    )
                else:
                    self.conv1 = nn.Sequential(
                        nn.Upsample(scale_factor=2, mode='nearest'),
                        EqualizedLRConvModule(
                            in_channels,
                            out_channels,
                            kernel_size,
                            padding=padding,
                            act_cfg=None), Blur(blur_kernel, pad=(1, 1)))
            else:
                self.conv1 = EqualizedLRConvModule(
                    in_channels,
                    out_channels,
                    kernel_size,
                    padding=padding,
                    act_cfg=None)

        self.noise_injector1 = NoiseInjection()
        self.activate1 = nn.LeakyReLU(0.2)
        self.adain1 = AdaptiveInstanceNorm(out_channels, style_channels)

        self.conv2 = EqualizedLRConvModule(
            out_channels,
            out_channels,
            kernel_size,
            padding=padding,
            act_cfg=None)
        self.noise_injector2 = NoiseInjection()
        self.activate2 = nn.LeakyReLU(0.2)
        self.adain2 = AdaptiveInstanceNorm(out_channels, style_channels)
    def __init__(self,
                 out_size,
                 style_channels,
                 num_mlps=8,
                 blur_kernel=[1, 2, 1],
                 lr_mlp=0.01,
                 default_style_mode='mix',
                 eval_style_mode='single',
                 mix_prob=0.9):
        super().__init__()
        self.out_size = out_size
        self.style_channels = style_channels
        self.num_mlps = num_mlps
        self.lr_mlp = lr_mlp
        self._default_style_mode = default_style_mode
        self.default_style_mode = default_style_mode
        self.eval_style_mode = eval_style_mode
        self.mix_prob = mix_prob

        # define style mapping layers
        mapping_layers = [PixelNorm()]

        for _ in range(num_mlps):
            mapping_layers.append(
                EqualLinearActModule(style_channels,
                                     style_channels,
                                     equalized_lr_cfg=dict(lr_mul=lr_mlp,
                                                           gain=1.),
                                     act_cfg=dict(type='LeakyReLU',
                                                  negative_slope=0.2)))

        self.style_mapping = nn.Sequential(*mapping_layers)

        self.channels = {
            4: 512,
            8: 512,
            16: 512,
            32: 512,
            64: 256,
            128: 128,
            256: 64,
            512: 32,
            1024: 16,
        }

        # generator backbone (8x8 --> higher resolutions)
        self.log_size = int(math.log2(self.out_size))

        self.convs = nn.ModuleList()
        self.to_rgbs = nn.ModuleList()

        in_channels_ = self.channels[4]

        for i in range(2, self.log_size + 1):
            out_channels_ = self.channels[2**i]
            self.convs.append(
                StyleConv(in_channels_,
                          out_channels_,
                          3,
                          style_channels,
                          initial=(i == 2),
                          upsample=True,
                          fused=True))
            self.to_rgbs.append(
                EqualizedLRConvModule(out_channels_, 3, 1, act_cfg=None))

            in_channels_ = out_channels_

        self.num_latents = self.log_size * 2 - 2
        self.num_injected_noises = self.num_latents

        # register buffer for injected noises
        for layer_idx in range(self.num_injected_noises):
            res = (layer_idx + 4) // 2
            shape = [1, 1, 2**res, 2**res]
            self.register_buffer(f'injected_noise_{layer_idx}',
                                 torch.randn(*shape))
    def __init__(self,
                 in_size,
                 blur_kernel=[1, 2, 1],
                 mbstd_cfg=dict(group_size=4)):
        super().__init__()

        self.with_mbstd = mbstd_cfg is not None
        channels = {
            4: 512,
            8: 512,
            16: 512,
            32: 512,
            64: 256,
            128: 128,
            256: 64,
            512: 32,
            1024: 16,
        }

        log_size = int(math.log2(in_size))
        self.log_size = log_size
        in_channels = channels[in_size]

        self.convs = nn.ModuleList()
        self.from_rgb = nn.ModuleList()

        for i in range(log_size, 2, -1):
            out_channel = channels[2**(i - 1)]
            self.from_rgb.append(
                EqualizedLRConvModule(3,
                                      in_channels,
                                      kernel_size=3,
                                      padding=1,
                                      act_cfg=dict(type='LeakyReLU',
                                                   negative_slope=0.2)))
            self.convs.append(
                nn.Sequential(
                    EqualizedLRConvModule(in_channels,
                                          out_channel,
                                          kernel_size=3,
                                          padding=1,
                                          bias=True,
                                          norm_cfg=None,
                                          act_cfg=dict(type='LeakyReLU',
                                                       negative_slope=0.2)),
                    Blur(blur_kernel, pad=(1, 1)),
                    EqualizedLRConvDownModule(out_channel,
                                              out_channel,
                                              kernel_size=3,
                                              stride=2,
                                              padding=1,
                                              act_cfg=None),
                    nn.LeakyReLU(negative_slope=0.2, inplace=True)))

            in_channels = out_channel

        self.from_rgb.append(
            EqualizedLRConvModule(3,
                                  in_channels,
                                  kernel_size=3,
                                  padding=0,
                                  act_cfg=dict(type='LeakyReLU',
                                               negative_slope=0.2)))
        self.convs.append(
            nn.Sequential(
                EqualizedLRConvModule(in_channels + 1,
                                      512,
                                      kernel_size=3,
                                      padding=1,
                                      bias=True,
                                      norm_cfg=None,
                                      act_cfg=dict(type='LeakyReLU',
                                                   negative_slope=0.2)),
                EqualizedLRConvModule(512,
                                      512,
                                      kernel_size=4,
                                      padding=0,
                                      bias=True,
                                      norm_cfg=None,
                                      act_cfg=None),
            ))

        if self.with_mbstd:
            self.mbstd_layer = MiniBatchStddevLayer(**mbstd_cfg)

        self.final_linear = nn.Sequential(EqualLinearActModule(channels[4], 1))

        self.n_layer = len(self.convs)