Exemple #1
0
    def init_normal_sampler(self, mult):
        enc_sampler, dec_sampler, nf_cells = nn.ModuleList(), nn.ModuleList(), nn.ModuleList()
        enc_kv, dec_kv, query = nn.ModuleList(), nn.ModuleList(), nn.ModuleList()
        for s in range(self.num_latent_scales):
            for g in range(self.groups_per_scale[self.num_latent_scales - s - 1]):
                # build mu, sigma generator for encoder
                num_c = int(self.num_channels_enc * mult)
                cell = Conv2D(num_c, 2 * self.num_latent_per_group, kernel_size=3, padding=1, bias=True)
                enc_sampler.append(cell)
                # build NF
                for n in range(self.num_flows):
                    arch = self.arch_instance['ar_nn']
                    num_c1 = int(self.num_channels_enc * mult)
                    num_c2 = 8 * self.num_latent_per_group  # use 8x features
                    nf_cells.append(PairedCellAR(self.num_latent_per_group, num_c1, num_c2, arch))
                if not (s == 0 and g == 0):  # for the first group, we use a fixed standard Normal.
                    num_c = int(self.num_channels_dec * mult)
                    cell = nn.Sequential(
                        nn.ELU(),
                        Conv2D(num_c, 2 * self.num_latent_per_group, kernel_size=1, padding=0, bias=True))
                    dec_sampler.append(cell)

            mult = mult / CHANNEL_MULT

        return enc_sampler, dec_sampler, nf_cells, enc_kv, dec_kv, query
Exemple #2
0
 def init_encoder0(self, mult):
     num_c = int(self.num_channels_enc * mult)
     cell = nn.Sequential(
         nn.ELU(),
         Conv2D(num_c, num_c, kernel_size=1, bias=True),
         nn.ELU())
     return cell
Exemple #3
0
 def init_image_conditional(self, mult):
     C_in = int(self.num_channels_dec * mult)
     if self.dataset in {'mnist', 'omniglot'}:
         C_out = 1
     else:
         if self.num_mix_output == 1:
             C_out = 2 * 3
         else:
             C_out = 10 * self.num_mix_output
     return nn.Sequential(nn.ELU(),
                          Conv2D(C_in, C_out, 3, padding=1, bias=True))
Exemple #4
0
 def init_image_conditional(self, mult):
     C_in = int(self.num_channels_dec * mult)
     C_out = 1 if self.dataset == 'mnist' else 10 * self.num_mix_output
     return nn.Sequential(nn.ELU(),
                          Conv2D(C_in, C_out, 3, padding=1, bias=True))
Exemple #5
0
 def init_stem(self):
     Cout = self.num_channels_enc
     Cin = 1 if self.dataset == 'mnist' else self.in_channels
     stem = Conv2D(Cin, Cout, 3, padding=1, bias=True)
     return stem
Exemple #6
0
    def __init__(self, args, writer, arch_instance):
        super(AutoEncoder, self).__init__()
        self.writer = writer
        self.arch_instance = arch_instance
        self.dataset = args.dataset
        self.crop_output = self.dataset == 'mnist'
        self.use_se = args.use_se
        self.res_dist = args.res_dist
        self.num_bits = args.num_x_bits

        self.num_latent_scales = args.num_latent_scales  # number of spatial scales that latent layers will reside
        self.num_groups_per_scale = args.num_groups_per_scale  # number of groups of latent vars. per scale
        self.num_latent_per_group = args.num_latent_per_group  # number of latent vars. per group
        self.groups_per_scale = groups_per_scale(
            self.num_latent_scales,
            self.num_groups_per_scale,
            args.ada_groups,
            minimum_groups=args.min_groups_per_scale)

        self.vanilla_vae = self.num_latent_scales == 1 and self.num_groups_per_scale == 1

        # encoder parameteres
        self.num_channels_enc = args.num_channels_enc
        self.num_channels_dec = args.num_channels_dec
        self.num_preprocess_blocks = args.num_preprocess_blocks  # block is defined as series of Normal followed by Down
        self.num_preprocess_cells = args.num_preprocess_cells  # number of cells per block
        self.num_cell_per_cond_enc = args.num_cell_per_cond_enc  # number of cell for each conditional in encoder

        # decoder parameters
        # self.num_channels_dec = args.num_channels_dec
        self.num_postprocess_blocks = args.num_postprocess_blocks
        self.num_postprocess_cells = args.num_postprocess_cells
        self.num_cell_per_cond_dec = args.num_cell_per_cond_dec  # number of cell for each conditional in decoder

        # general cell parameters
        self.input_size = get_input_size(self.dataset, args)
        # decoder param
        self.num_mix_output = 10

        # used for generative purpose
        c_scaling = CHANNEL_MULT**(self.num_preprocess_blocks +
                                   self.num_latent_scales - 1)
        spatial_scaling = 2**(self.num_preprocess_blocks +
                              self.num_latent_scales - 1)
        prior_ftr0_size = (int(c_scaling * self.num_channels_dec),
                           self.input_size // spatial_scaling,
                           self.input_size // spatial_scaling)
        self.prior_ftr0 = nn.Parameter(torch.rand(size=prior_ftr0_size),
                                       requires_grad=True)
        self.z0_size = [
            self.num_latent_per_group, self.input_size // spatial_scaling,
            self.input_size // spatial_scaling
        ]

        self.stem = self.init_stem()
        self.pre_process, mult = self.init_pre_process(mult=1)

        if self.vanilla_vae:
            self.enc_tower = []
        else:
            self.enc_tower, mult = self.init_encoder_tower(mult)

        self.with_nf = args.num_nf > 0
        self.num_flows = args.num_nf

        self.enc0 = self.init_encoder0(mult)
        self.enc_sampler, self.dec_sampler, self.nf_cells, self.enc_kv, self.dec_kv, self.query = \
            self.init_normal_sampler(mult)

        if self.vanilla_vae:
            self.dec_tower = []
            self.stem_decoder = Conv2D(self.num_latent_per_group,
                                       mult * self.num_channels_enc, (1, 1),
                                       bias=True)
        else:
            self.dec_tower, mult = self.init_decoder_tower(mult)

        self.post_process, mult = self.init_post_process(mult)

        self.image_conditional = self.init_image_conditional(mult)

        # collect all norm params in Conv2D and gamma param in batchnorm
        self.all_log_norm = []
        self.all_conv_layers = []
        self.all_bn_layers = []
        for n, layer in self.named_modules():
            # if isinstance(layer, Conv2D) and '_ops' in n:   # only chose those in cell
            if isinstance(layer, Conv2D) or isinstance(layer, ARConv2d):
                self.all_log_norm.append(layer.log_weight_norm)
                self.all_conv_layers.append(layer)
            if isinstance(layer, nn.BatchNorm2d) or isinstance(layer, nn.SyncBatchNorm) or \
                    isinstance(layer, SyncBatchNormSwish):
                self.all_bn_layers.append(layer)

        print('len log norm:', len(self.all_log_norm))
        print('len bn:', len(self.all_bn_layers))
        # left/right singular vectors used for SR
        self.sr_u = {}
        self.sr_v = {}
        self.num_power_iter = 4

        self.in_channels = args.in_channels
Exemple #7
0
 def init_stem(self):
     Cout = self.num_channels_enc
     Cin = 1 if self.dataset in {'mnist', 'omniglot'} else 3
     stem = Conv2D(Cin, Cout, 3, padding=1, bias=True)
     return stem