Beispiel #1
0
    def __init__(self, G_ch=64, dim_z=128, bottom_width=4, resolution=128,
                 G_kernel_size=3, G_attn='64', n_classes=1000,
                 num_G_SVs=1, num_G_SV_itrs=1,
                 G_shared=True, shared_dim=0, hier=False,
                 cross_replica=False, mybn=False,
                 G_activation=nn.ReLU(inplace=False),
                 G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8,
                 BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False,
                 G_init='ortho', skip_init=False, no_optim=False,
                 G_param='SN', norm_style='bn',
                 **kwargs):
        super(Generator, self).__init__()
        # Channel width mulitplier
        self.ch = G_ch
        # Dimensionality of the latent space
        self.dim_z = dim_z
        # The initial spatial dimensions
        self.bottom_width = bottom_width
        # Resolution of the output
        self.resolution = resolution
        # Kernel size?
        self.kernel_size = G_kernel_size
        # Attention?
        self.attention = G_attn
        # number of classes, for use in categorical conditional generation
        self.n_classes = n_classes
        # Use shared embeddings?
        self.G_shared = G_shared
        # Dimensionality of the shared embedding? Unused if not using G_shared
        self.shared_dim = shared_dim if shared_dim > 0 else dim_z
        # Hierarchical latent space?
        self.hier = hier
        # Cross replica batchnorm?
        self.cross_replica = cross_replica
        # Use my batchnorm?
        self.mybn = mybn
        # nonlinearity for residual blocks
        self.activation = G_activation
        # Initialization style
        self.init = G_init
        # Parameterization style
        self.G_param = G_param
        # Normalization style
        self.norm_style = norm_style
        # Epsilon for BatchNorm?
        self.BN_eps = BN_eps
        # Epsilon for Spectral Norm?
        self.SN_eps = SN_eps
        # fp16?
        self.fp16 = G_fp16
        # Architecture dict
        self.arch = G_arch(self.ch, self.attention)[resolution]

        # If using hierarchical latents, adjust z
        if self.hier:
            # Number of places z slots into
            self.num_slots = len(self.arch['in_channels']) + 1
            self.z_chunk_size = (self.dim_z // self.num_slots)
            # Recalculate latent dimensionality for even splitting into chunks
            self.dim_z = self.z_chunk_size *  self.num_slots
        else:
            self.num_slots = 1
            self.z_chunk_size = 0

        # Which convs, batchnorms, and linear layers to use
        if self.G_param == 'SN':
            self.which_conv = functools.partial(layers.SNConv2d,
                                kernel_size=3, padding=1,
                                num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,
                                eps=self.SN_eps)
            self.which_linear = functools.partial(layers.SNLinear,
                                num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,
                                eps=self.SN_eps)
        else:
            self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1)
            self.which_linear = nn.Linear

        # We use a non-spectral-normed embedding here regardless;
        # For some reason applying SN to G's embedding seems to randomly cripple G
        self.which_embedding = nn.Embedding
        bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared
                     else self.which_embedding)
        self.which_bn = functools.partial(layers.ccbn,
                              which_linear=bn_linear,
                              cross_replica=self.cross_replica,
                              mybn=self.mybn,
                              input_size=(self.shared_dim + self.z_chunk_size if self.G_shared
                                          else self.n_classes),
                              norm_style=self.norm_style,
                              eps=self.BN_eps)


        # Prepare model
        # If not using shared embeddings, self.shared is just a passthrough
        self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared
                        else layers.identity())
        # First linear layer
        self.linear = self.which_linear(self.dim_z // self.num_slots,
                                        self.arch['in_channels'][0] * (self.bottom_width **2))

        # self.blocks is a doubly-nested list of modules, the outer loop intended
        # to be over blocks at a given resolution (resblocks and/or self-attention)
        # while the inner loop is over a given block
        self.blocks = []
        for index in range(len(self.arch['out_channels'])):
            self.blocks += [[layers.GBlock(in_channels=self.arch['in_channels'][index],
                                   out_channels=self.arch['out_channels'][index],
                                   which_conv=self.which_conv,
                                   which_bn=self.which_bn,
                                   activation=self.activation,
                                   upsample=(functools.partial(F.interpolate, scale_factor=2)
                                             if self.arch['upsample'][index] else None))]]
            self.end_layer = len(self.blocks)
            self.start_layer = 0

            # If attention on this block, attach it to the end
            if self.arch['attention'][self.arch['resolution'][index]]:
                print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index])
                self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)]

        # Turn self.blocks into a ModuleList so that it's all properly registered.
        self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])

        # output layer: batchnorm-relu-conv.
        # Consider using a non-spectral conv here
        self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1],
                                                    cross_replica=self.cross_replica,
                                                    mybn=self.mybn),
                                        self.activation,
                                        self.which_conv(self.arch['out_channels'][-1], 3))

        # Initialize weights. Optionally skip init for testing.
        if not skip_init:
            self.init_weights()

        # Set up optimizer
        # If this is an EMA copy, no need for an optim, so just return now
        if no_optim:
            return
        self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps
        if G_mixed_precision:
            print('Using fp16 adam in G...')
            import utils
            self.optim = utils.Adam16(params=self.parameters(), lr=self.lr,
                                 betas=(self.B1, self.B2), weight_decay=0,
                                 eps=self.adam_eps)
        else:
            self.optim = optim.Adam(params=self.parameters(), lr=self.lr,
                                 betas=(self.B1, self.B2), weight_decay=0,
                                 eps=self.adam_eps)
Beispiel #2
0
    def __init__(self,
                 G_ch=64,
                 dim_z=128,
                 bottom_width=4,
                 resolution=128,
                 G_kernel_size=3,
                 G_attn='64',
                 n_classes=1000,
                 num_G_SVs=1,
                 num_G_SV_itrs=1,
                 G_shared=True,
                 shared_dim=0,
                 hier=False,
                 cross_replica=False,
                 mybn=False,
                 G_activation=nn.ReLU(inplace=False),
                 G_lr=5e-5,
                 G_B1=0.0,
                 G_B2=0.999,
                 adam_eps=1e-8,
                 BN_eps=1e-5,
                 SN_eps=1e-12,
                 G_mixed_precision=False,
                 G_fp16=False,
                 G_init='ortho',
                 skip_init=False,
                 no_optim=False,
                 G_param='SN',
                 norm_style='bn',
                 add_blur=False,
                 add_noise=False,
                 add_style=False,
                 style_mlp=6,
                 attn_style='nl',
                 no_conditional=False,
                 sched_version='default',
                 num_epochs=500,
                 arch=None,
                 skip_z=False,
                 use_dog_cnt=False,
                 dim_dog_cnt_z=32,
                 mix_style=False,
                 **kwargs):
        super(Generator, self).__init__()
        # Channel width mulitplier
        self.ch = G_ch
        # Dimensionality of the latent space
        self.dim_z = dim_z
        # The initial spatial dimensions
        self.bottom_width = bottom_width
        # Resolution of the output
        self.resolution = resolution
        # Kernel size?
        self.kernel_size = G_kernel_size
        # Attention?
        self.attention = G_attn
        # number of classes, for use in categorical conditional generation
        self.n_classes = n_classes
        # Use shared embeddings?
        self.G_shared = G_shared
        # Dimensionality of the shared embedding? Unused if not using G_shared
        self.shared_dim = shared_dim if shared_dim > 0 else dim_z
        # Hierarchical latent space?
        self.hier = hier
        # Cross replica batchnorm?
        self.cross_replica = cross_replica
        # Use my batchnorm?
        self.mybn = mybn
        # nonlinearity for residual blocks
        self.activation = G_activation
        # Initialization style
        self.init = G_init
        # Parameterization style
        self.G_param = G_param
        # Normalization style
        self.norm_style = norm_style
        # Normalization style
        self.add_blur = add_blur
        self.add_noise = add_noise
        self.add_style = add_style
        self.skip_z = skip_z
        self.use_dog_cnt = use_dog_cnt
        self.dim_dog_cnt_z = dim_dog_cnt_z
        self.mix_style = mix_style

        # Epsilon for BatchNorm?
        self.BN_eps = BN_eps
        # Epsilon for Spectral Norm?
        self.SN_eps = SN_eps
        # fp16?
        self.fp16 = G_fp16
        # Architecture dict
        if arch is None:
            arch = f'{resolution}'
        self.arch = G_arch(self.ch, self.attention)[arch]

        # If using hierarchical latents, adjust z
        if self.hier:
            # Number of places z slots into
            self.num_slots = len(self.arch['in_channels']) + 1
            self.z_chunk_size = (self.dim_z // self.num_slots)
            # Recalculate latent dimensionality for even splitting into chunks
            self.dim_z = self.z_chunk_size * self.num_slots
        else:
            self.num_slots = 1
            self.z_chunk_size = 0

        # Which convs, batchnorms, and linear layers to use
        if self.G_param == 'SN':
            self.which_conv = functools.partial(layers.SNConv2d,
                                                kernel_size=3,
                                                padding=1,
                                                num_svs=num_G_SVs,
                                                num_itrs=num_G_SV_itrs,
                                                eps=self.SN_eps)
            self.which_linear = functools.partial(layers.SNLinear,
                                                  num_svs=num_G_SVs,
                                                  num_itrs=num_G_SV_itrs,
                                                  eps=self.SN_eps)
        else:
            self.which_conv = functools.partial(nn.Conv2d,
                                                kernel_size=3,
                                                padding=1)
            self.which_linear = nn.Linear

        if attn_style == 'cbam':
            self.which_attn = layers.CBAM
        else:
            self.which_attn = layers.Attention

        # We use a non-spectral-normed embedding here regardless;
        # For some reason applying SN to G's embedding seems to randomly cripple G
        self.which_embedding = nn.Embedding
        bn_linear = (functools.partial(self.which_linear, bias=False)
                     if self.G_shared else self.which_embedding)
        input_size = self.shared_dim + self.z_chunk_size if self.G_shared else self.n_classes
        if self.G_shared and use_dog_cnt:
            input_size += dim_dog_cnt_z
        self.which_bn = functools.partial(
            layers.ccbn,
            which_linear=bn_linear,
            cross_replica=self.cross_replica,
            mybn=self.mybn,
            input_size=input_size,
            norm_style=self.norm_style,
            eps=self.BN_eps,
            style_linear=self.which_linear,
            dim_z=self.dim_z,
            no_conditional=no_conditional,
            skip_z=self.skip_z,
            use_dog_cnt=use_dog_cnt,
            g_shared=G_shared,
        )

        # Prepare model
        # If not using shared embeddings, self.shared is just a passthrough
        self.shared = (self.which_embedding(n_classes, self.shared_dim)
                       if G_shared else layers.identity())

        self.dog_cnt_shared = (self.which_embedding(4, self.dim_dog_cnt_z)
                               if G_shared else layers.identity())
        # First linear layer
        self.linear = self.which_linear(
            self.dim_z // self.num_slots,
            self.arch['in_channels'][0] * (self.bottom_width**2))

        # self.blocks is a doubly-nested list of modules, the outer loop intended
        # to be over blocks at a given resolution (resblocks and/or self-attention)
        # while the inner loop is over a given block
        self.blocks = []
        for index in range(len(self.arch['out_channels'])):
            self.blocks += [[
                layers.GBlock(
                    in_channels=self.arch['in_channels'][index],
                    out_channels=self.arch['out_channels'][index],
                    which_conv=self.which_conv,
                    which_bn=self.which_bn,
                    activation=self.activation,
                    upsample=(functools.partial(F.interpolate, scale_factor=2)
                              if self.arch['upsample'][index] else None),
                    add_blur=add_blur,
                    add_noise=add_noise,
                )
            ]]

            # If attention on this block, attach it to the end
            if self.arch['attention'][self.arch['resolution'][index]]:
                print('Adding attention layer in G at resolution %d' %
                      self.arch['resolution'][index])
                self.blocks[-1] += [
                    self.which_attn(self.arch['out_channels'][index],
                                    self.which_conv)
                ]

        # Turn self.blocks into a ModuleList so that it's all properly registered.
        self.blocks = nn.ModuleList(
            [nn.ModuleList(block) for block in self.blocks])

        # output layer: batchnorm-relu-conv.
        # Consider using a non-spectral conv here
        self.output_layer = nn.Sequential(
            layers.bn(self.arch['out_channels'][-1],
                      cross_replica=self.cross_replica,
                      mybn=self.mybn), self.activation,
            self.which_conv(self.arch['out_channels'][-1], 3))

        if self.add_style:
            # layers = [PixelNorm()]
            style_layers = []
            for i in range(style_mlp):
                style_layers.append(
                    layers.StyleLayer(self.dim_z, self.which_linear,
                                      self.activation))

            self.style = nn.Sequential(*style_layers)

        # Initialize weights. Optionally skip init for testing.
        if not skip_init:
            self.init_weights()

        # Set up optimizer
        # If this is an EMA copy, no need for an optim, so just return now
        if no_optim:
            return
        self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps
        if G_mixed_precision:
            print('Using fp16 adam in G...')
            import utils
            self.optim = utils.Adam16(params=self.parameters(),
                                      lr=self.lr,
                                      betas=(self.B1, self.B2),
                                      weight_decay=0,
                                      eps=self.adam_eps,
                                      amsgrad=kwargs['amsgrad'])
        else:
            self.optim = optim.Adam(params=self.parameters(),
                                    lr=self.lr,
                                    betas=(self.B1, self.B2),
                                    weight_decay=0,
                                    eps=self.adam_eps,
                                    amsgrad=kwargs['amsgrad'])

        # LR scheduling, left here for forward compatibility
        # self.lr_sched = {'itr' : 0}# if self.progressive else {}
        # self.j = 0

        if sched_version == 'default':
            self.lr_sched = None
        elif sched_version == 'cal_v0':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingLR(
                self.optim,
                T_max=num_epochs,
                eta_min=self.lr / 2,
                last_epoch=-1)
        elif sched_version == 'cal_v1':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingLR(
                self.optim,
                T_max=num_epochs,
                eta_min=self.lr / 4,
                last_epoch=-1)
        elif sched_version == 'cawr_v0':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingWarmRestarts(
                self.optim, T_0=10, T_mult=2, eta_min=self.lr / 2)
        elif sched_version == 'cawr_v1':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingWarmRestarts(
                self.optim, T_0=25, T_mult=2, eta_min=self.lr / 4)
        else:
            self.lr_sched = None