def init_normal_sampler(self, mult): enc_sampler, dec_sampler, nf_cells = nn.ModuleList(), nn.ModuleList(), nn.ModuleList() enc_kv, dec_kv, query = nn.ModuleList(), nn.ModuleList(), nn.ModuleList() for s in range(self.num_latent_scales): for g in range(self.groups_per_scale[self.num_latent_scales - s - 1]): # build mu, sigma generator for encoder num_c = int(self.num_channels_enc * mult) cell = Conv2D(num_c, 2 * self.num_latent_per_group, kernel_size=3, padding=1, bias=True) enc_sampler.append(cell) # build NF for n in range(self.num_flows): arch = self.arch_instance['ar_nn'] num_c1 = int(self.num_channels_enc * mult) num_c2 = 8 * self.num_latent_per_group # use 8x features nf_cells.append(PairedCellAR(self.num_latent_per_group, num_c1, num_c2, arch)) if not (s == 0 and g == 0): # for the first group, we use a fixed standard Normal. num_c = int(self.num_channels_dec * mult) cell = nn.Sequential( nn.ELU(), Conv2D(num_c, 2 * self.num_latent_per_group, kernel_size=1, padding=0, bias=True)) dec_sampler.append(cell) mult = mult / CHANNEL_MULT return enc_sampler, dec_sampler, nf_cells, enc_kv, dec_kv, query
def init_encoder0(self, mult): num_c = int(self.num_channels_enc * mult) cell = nn.Sequential( nn.ELU(), Conv2D(num_c, num_c, kernel_size=1, bias=True), nn.ELU()) return cell
def init_image_conditional(self, mult): C_in = int(self.num_channels_dec * mult) if self.dataset in {'mnist', 'omniglot'}: C_out = 1 else: if self.num_mix_output == 1: C_out = 2 * 3 else: C_out = 10 * self.num_mix_output return nn.Sequential(nn.ELU(), Conv2D(C_in, C_out, 3, padding=1, bias=True))
def init_image_conditional(self, mult): C_in = int(self.num_channels_dec * mult) C_out = 1 if self.dataset == 'mnist' else 10 * self.num_mix_output return nn.Sequential(nn.ELU(), Conv2D(C_in, C_out, 3, padding=1, bias=True))
def init_stem(self): Cout = self.num_channels_enc Cin = 1 if self.dataset == 'mnist' else self.in_channels stem = Conv2D(Cin, Cout, 3, padding=1, bias=True) return stem
def __init__(self, args, writer, arch_instance): super(AutoEncoder, self).__init__() self.writer = writer self.arch_instance = arch_instance self.dataset = args.dataset self.crop_output = self.dataset == 'mnist' self.use_se = args.use_se self.res_dist = args.res_dist self.num_bits = args.num_x_bits self.num_latent_scales = args.num_latent_scales # number of spatial scales that latent layers will reside self.num_groups_per_scale = args.num_groups_per_scale # number of groups of latent vars. per scale self.num_latent_per_group = args.num_latent_per_group # number of latent vars. per group self.groups_per_scale = groups_per_scale( self.num_latent_scales, self.num_groups_per_scale, args.ada_groups, minimum_groups=args.min_groups_per_scale) self.vanilla_vae = self.num_latent_scales == 1 and self.num_groups_per_scale == 1 # encoder parameteres self.num_channels_enc = args.num_channels_enc self.num_channels_dec = args.num_channels_dec self.num_preprocess_blocks = args.num_preprocess_blocks # block is defined as series of Normal followed by Down self.num_preprocess_cells = args.num_preprocess_cells # number of cells per block self.num_cell_per_cond_enc = args.num_cell_per_cond_enc # number of cell for each conditional in encoder # decoder parameters # self.num_channels_dec = args.num_channels_dec self.num_postprocess_blocks = args.num_postprocess_blocks self.num_postprocess_cells = args.num_postprocess_cells self.num_cell_per_cond_dec = args.num_cell_per_cond_dec # number of cell for each conditional in decoder # general cell parameters self.input_size = get_input_size(self.dataset, args) # decoder param self.num_mix_output = 10 # used for generative purpose c_scaling = CHANNEL_MULT**(self.num_preprocess_blocks + self.num_latent_scales - 1) spatial_scaling = 2**(self.num_preprocess_blocks + self.num_latent_scales - 1) prior_ftr0_size = (int(c_scaling * self.num_channels_dec), self.input_size // spatial_scaling, self.input_size // spatial_scaling) self.prior_ftr0 = nn.Parameter(torch.rand(size=prior_ftr0_size), requires_grad=True) self.z0_size = [ self.num_latent_per_group, self.input_size // spatial_scaling, self.input_size // spatial_scaling ] self.stem = self.init_stem() self.pre_process, mult = self.init_pre_process(mult=1) if self.vanilla_vae: self.enc_tower = [] else: self.enc_tower, mult = self.init_encoder_tower(mult) self.with_nf = args.num_nf > 0 self.num_flows = args.num_nf self.enc0 = self.init_encoder0(mult) self.enc_sampler, self.dec_sampler, self.nf_cells, self.enc_kv, self.dec_kv, self.query = \ self.init_normal_sampler(mult) if self.vanilla_vae: self.dec_tower = [] self.stem_decoder = Conv2D(self.num_latent_per_group, mult * self.num_channels_enc, (1, 1), bias=True) else: self.dec_tower, mult = self.init_decoder_tower(mult) self.post_process, mult = self.init_post_process(mult) self.image_conditional = self.init_image_conditional(mult) # collect all norm params in Conv2D and gamma param in batchnorm self.all_log_norm = [] self.all_conv_layers = [] self.all_bn_layers = [] for n, layer in self.named_modules(): # if isinstance(layer, Conv2D) and '_ops' in n: # only chose those in cell if isinstance(layer, Conv2D) or isinstance(layer, ARConv2d): self.all_log_norm.append(layer.log_weight_norm) self.all_conv_layers.append(layer) if isinstance(layer, nn.BatchNorm2d) or isinstance(layer, nn.SyncBatchNorm) or \ isinstance(layer, SyncBatchNormSwish): self.all_bn_layers.append(layer) print('len log norm:', len(self.all_log_norm)) print('len bn:', len(self.all_bn_layers)) # left/right singular vectors used for SR self.sr_u = {} self.sr_v = {} self.num_power_iter = 4 self.in_channels = args.in_channels
def init_stem(self): Cout = self.num_channels_enc Cin = 1 if self.dataset in {'mnist', 'omniglot'} else 3 stem = Conv2D(Cin, Cout, 3, padding=1, bias=True) return stem