def __init__(self, code_dim=100, n_class=100, SN=True,size=64): super().__init__() if SN == True: self.lin_code = spectral_init(nn.Linear(code_dim, 4 * 4 * 512)) else: self.lin_code = nn.Linear(code_dim, 4 * 4 * 512) self.conv = nn.ModuleList([ConvBlock(512, 256, n_class=n_class,SN=SN), ConvBlock(256, 128, n_class=n_class,SN=SN), ConvBlock(128, 64, n_class=n_class,SN=SN)]) self.bn = nn.BatchNorm2d(64) self.shared = layers.identity() self.shared_d = layers.identity() self.dim_z = code_dim if SN == True: self.colorize = spectral_init(nn.Conv2d(64, 3, [3, 3], padding=1)) else: self.colorize = nn.Conv2d(64, 3, [3, 3], padding=1) self.optim = optim.Adam(params=self.parameters(), lr=1e-4, betas=(0.00, 0.999), weight_decay=0, eps=1e-8)
def __init__(self, G_ch=64, G_depth=2, dim_z=128, bottom_width=4, resolution=128, G_kernel_size=3, G_attn='64', n_classes=1000, num_G_SVs=1, num_G_SV_itrs=1, G_shared=True, shared_dim=0, hier=False, cross_replica=False, mybn=False, G_activation=nn.ReLU(inplace=False), G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8, BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False, G_init='ortho', skip_init=False, no_optim=False, G_param='SN', norm_style='bn', **kwargs): super(Generator, self).__init__() # Channel width mulitplier self.ch = G_ch # Number of resblocks per stage self.G_depth = G_depth # Dimensionality of the latent space self.dim_z = dim_z # The initial spatial dimensions self.bottom_width = bottom_width # Resolution of the output self.resolution = resolution # Kernel size? self.kernel_size = G_kernel_size # Attention? self.attention = G_attn # number of classes, for use in categorical conditional generation self.n_classes = n_classes # Use shared embeddings? self.G_shared = G_shared # Dimensionality of the shared embedding? Unused if not using G_shared self.shared_dim = shared_dim if shared_dim > 0 else dim_z # Hierarchical latent space? self.hier = hier # Cross replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn # nonlinearity for residual blocks self.activation = G_activation # Initialization style self.init = G_init # Parameterization style self.G_param = G_param # Normalization style self.norm_style = norm_style # Epsilon for BatchNorm? self.BN_eps = BN_eps # Epsilon for Spectral Norm? self.SN_eps = SN_eps # fp16? self.fp16 = G_fp16 # Architecture dict self.arch = G_arch(self.ch, self.attention)[resolution] # Which convs, batchnorms, and linear layers to use if self.G_param == 'SN': self.which_conv = functools.partial(layers.SNConv2d, kernel_size=3, padding=1, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) self.which_linear = functools.partial(layers.SNLinear, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) else: self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1) self.which_linear = nn.Linear # We use a non-spectral-normed embedding here regardless; # For some reason applying SN to G's embedding seems to randomly cripple G self.which_embedding = nn.Embedding bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared else self.which_embedding) self.which_bn = functools.partial(layers.ccbn, which_linear=bn_linear, cross_replica=self.cross_replica, mybn=self.mybn, input_size=(self.shared_dim + self.dim_z if self.G_shared else self.n_classes), norm_style=self.norm_style, eps=self.BN_eps) # Prepare model # If not using shared embeddings, self.shared is just a passthrough self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared else layers.identity()) # First linear layer self.linear = self.which_linear(self.dim_z + self.shared_dim, self.arch['in_channels'][0] * (self.bottom_width **2)) # self.blocks is a doubly-nested list of modules, the outer loop intended # to be over blocks at a given resolution (resblocks and/or self-attention) # while the inner loop is over a given block self.blocks = [] for index in range(len(self.arch['out_channels'])): self.blocks += [[GBlock(in_channels=self.arch['in_channels'][index], out_channels=self.arch['in_channels'][index] if g_index==0 else self.arch['out_channels'][index], which_conv=self.which_conv, which_bn=self.which_bn, activation=self.activation, upsample=(functools.partial(F.interpolate, scale_factor=2) if self.arch['upsample'][index] and g_index == (self.G_depth-1) else None))] for g_index in range(self.G_depth)] # If attention on this block, attach it to the end if self.arch['attention'][self.arch['resolution'][index]]: print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index]) self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)] # Turn self.blocks into a ModuleList so that it's all properly registered. self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks]) # output layer: batchnorm-relu-conv. # Consider using a non-spectral conv here self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1], cross_replica=self.cross_replica, mybn=self.mybn), self.activation, self.which_conv(self.arch['out_channels'][-1], 3)) # Initialize weights. Optionally skip init for testing. if not skip_init: self.init_weights() # Set up optimizer # If this is an EMA copy, no need for an optim, so just return now if no_optim: return self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps if G_mixed_precision: print('Using fp16 adam in G...') import utils self.optim = utils.Adam16(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) else: self.optim = optim.Adam(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps)
def __init__(self, G_ch=64, dim_z=128, bottom_width=4, resolution=128, G_kernel_size=3, G_attn='64', n_classes=1000, num_G_SVs=1, num_G_SV_itrs=1, G_shared=True, shared_dim=0, hier=False, cross_replica=False, mybn=False, G_activation=nn.ReLU(inplace=False), G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8, BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False, G_init='ortho', skip_init=False, no_optim=False, G_param='SN', norm_style='bn', add_blur=False, add_noise=False, add_style=False, style_mlp=6, attn_style='nl', no_conditional=False, sched_version='default', num_epochs=500, arch=None, skip_z=False, use_dog_cnt=False, dim_dog_cnt_z=32, mix_style=False, **kwargs): super(Generator, self).__init__() # Channel width mulitplier self.ch = G_ch # Dimensionality of the latent space self.dim_z = dim_z # The initial spatial dimensions self.bottom_width = bottom_width # Resolution of the output self.resolution = resolution # Kernel size? self.kernel_size = G_kernel_size # Attention? self.attention = G_attn # number of classes, for use in categorical conditional generation self.n_classes = n_classes # Use shared embeddings? self.G_shared = G_shared # Dimensionality of the shared embedding? Unused if not using G_shared self.shared_dim = shared_dim if shared_dim > 0 else dim_z # Hierarchical latent space? self.hier = hier # Cross replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn # nonlinearity for residual blocks self.activation = G_activation # Initialization style self.init = G_init # Parameterization style self.G_param = G_param # Normalization style self.norm_style = norm_style # Normalization style self.add_blur = add_blur self.add_noise = add_noise self.add_style = add_style self.skip_z = skip_z self.use_dog_cnt = use_dog_cnt self.dim_dog_cnt_z = dim_dog_cnt_z self.mix_style = mix_style # Epsilon for BatchNorm? self.BN_eps = BN_eps # Epsilon for Spectral Norm? self.SN_eps = SN_eps # fp16? self.fp16 = G_fp16 # Architecture dict if arch is None: arch = f'{resolution}' self.arch = G_arch(self.ch, self.attention)[arch] # If using hierarchical latents, adjust z if self.hier: # Number of places z slots into self.num_slots = len(self.arch['in_channels']) + 1 self.z_chunk_size = (self.dim_z // self.num_slots) # Recalculate latent dimensionality for even splitting into chunks self.dim_z = self.z_chunk_size * self.num_slots else: self.num_slots = 1 self.z_chunk_size = 0 # Which convs, batchnorms, and linear layers to use if self.G_param == 'SN': self.which_conv = functools.partial(layers.SNConv2d, kernel_size=3, padding=1, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) self.which_linear = functools.partial(layers.SNLinear, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) else: self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1) self.which_linear = nn.Linear if attn_style == 'cbam': self.which_attn = layers.CBAM else: self.which_attn = layers.Attention # We use a non-spectral-normed embedding here regardless; # For some reason applying SN to G's embedding seems to randomly cripple G self.which_embedding = nn.Embedding bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared else self.which_embedding) input_size = self.shared_dim + self.z_chunk_size if self.G_shared else self.n_classes if self.G_shared and use_dog_cnt: input_size += dim_dog_cnt_z self.which_bn = functools.partial( layers.ccbn, which_linear=bn_linear, cross_replica=self.cross_replica, mybn=self.mybn, input_size=input_size, norm_style=self.norm_style, eps=self.BN_eps, style_linear=self.which_linear, dim_z=self.dim_z, no_conditional=no_conditional, skip_z=self.skip_z, use_dog_cnt=use_dog_cnt, g_shared=G_shared, ) # Prepare model # If not using shared embeddings, self.shared is just a passthrough self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared else layers.identity()) self.dog_cnt_shared = (self.which_embedding(4, self.dim_dog_cnt_z) if G_shared else layers.identity()) # First linear layer self.linear = self.which_linear( self.dim_z // self.num_slots, self.arch['in_channels'][0] * (self.bottom_width**2)) # self.blocks is a doubly-nested list of modules, the outer loop intended # to be over blocks at a given resolution (resblocks and/or self-attention) # while the inner loop is over a given block self.blocks = [] for index in range(len(self.arch['out_channels'])): self.blocks += [[ layers.GBlock( in_channels=self.arch['in_channels'][index], out_channels=self.arch['out_channels'][index], which_conv=self.which_conv, which_bn=self.which_bn, activation=self.activation, upsample=(functools.partial(F.interpolate, scale_factor=2) if self.arch['upsample'][index] else None), add_blur=add_blur, add_noise=add_noise, ) ]] # If attention on this block, attach it to the end if self.arch['attention'][self.arch['resolution'][index]]: print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index]) self.blocks[-1] += [ self.which_attn(self.arch['out_channels'][index], self.which_conv) ] # Turn self.blocks into a ModuleList so that it's all properly registered. self.blocks = nn.ModuleList( [nn.ModuleList(block) for block in self.blocks]) # output layer: batchnorm-relu-conv. # Consider using a non-spectral conv here self.output_layer = nn.Sequential( layers.bn(self.arch['out_channels'][-1], cross_replica=self.cross_replica, mybn=self.mybn), self.activation, self.which_conv(self.arch['out_channels'][-1], 3)) if self.add_style: # layers = [PixelNorm()] style_layers = [] for i in range(style_mlp): style_layers.append( layers.StyleLayer(self.dim_z, self.which_linear, self.activation)) self.style = nn.Sequential(*style_layers) # Initialize weights. Optionally skip init for testing. if not skip_init: self.init_weights() # Set up optimizer # If this is an EMA copy, no need for an optim, so just return now if no_optim: return self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps if G_mixed_precision: print('Using fp16 adam in G...') import utils self.optim = utils.Adam16(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps, amsgrad=kwargs['amsgrad']) else: self.optim = optim.Adam(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps, amsgrad=kwargs['amsgrad']) # LR scheduling, left here for forward compatibility # self.lr_sched = {'itr' : 0}# if self.progressive else {} # self.j = 0 if sched_version == 'default': self.lr_sched = None elif sched_version == 'cal_v0': self.lr_sched = optim.lr_scheduler.CosineAnnealingLR( self.optim, T_max=num_epochs, eta_min=self.lr / 2, last_epoch=-1) elif sched_version == 'cal_v1': self.lr_sched = optim.lr_scheduler.CosineAnnealingLR( self.optim, T_max=num_epochs, eta_min=self.lr / 4, last_epoch=-1) elif sched_version == 'cawr_v0': self.lr_sched = optim.lr_scheduler.CosineAnnealingWarmRestarts( self.optim, T_0=10, T_mult=2, eta_min=self.lr / 2) elif sched_version == 'cawr_v1': self.lr_sched = optim.lr_scheduler.CosineAnnealingWarmRestarts( self.optim, T_0=25, T_mult=2, eta_min=self.lr / 4) else: self.lr_sched = None
def create_models(self): cfg = self.cfg if self.mode == 'test': print('Creating test models....') else: print('Creating train models....') ######################## ###### Parameters ###### ######################## nb_anchors = cfg.nb_anchors pool_size = cfg.pool_size nb_object_classes = cfg.nb_object_classes nb_hoi_classes = cfg.nb_hoi_classes print(' Obj. classes:', nb_object_classes) print(' HOI classes:', nb_hoi_classes) ######################## ######## Inputs ######## ######################## # RPN # img_input = keras.layers.Input(shape=(None, None, 3), name='input_image') # DET # nb_detection_rois = cfg.nb_detection_rois if self.mode == 'train' else None img_det_input = keras.layers.Input(shape=(None, None, 3), name='input_image') roi_input = keras.layers.Input(shape=(nb_detection_rois, 5), name='input_roi') # HOI # nb_hoi_rois = cfg.nb_hoi_rois if self.mode == 'train' else None img_hoi_input = keras.layers.Input(shape=(None, None, 3), name='input_image') human_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5), name="input_human") object_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5), name="input_object") interaction_fast_input = keras.layers.Input(shape=(nb_hoi_rois, cfg.winShape[0], cfg.winShape[1], 2), name="input_interaction") human_img_input = keras.layers.Input(shape=(227, 227, 3), name="input_human_img") object_img_input = keras.layers.Input(shape=(227, 227, 3), name="input_object_img") interaction_slow_input = keras.layers.Input(shape=(cfg.winShape[0], cfg.winShape[1], 2), name="input_interaction") human_slow_input = keras.layers.Input(shape=(5, ), name="input_human") object_slow_input = keras.layers.Input(shape=(5, ), name="input_object") # SHARED # features_input = keras.layers.Input(shape=(None, None, 512), name="input_features") ######################## ######### RPN ########## ######################## if self.do_rpn: print(' Creating RPN model...') output_features = models.VGG16_buildin(cfg)(img_input) self.nb_models += 1 rpn_inputs = [img_input] rpn_features = layers.rpn(cfg)([output_features]) x_class = keras.layers.Conv2D( filters=nb_anchors, kernel_size=(1, 1), activation='sigmoid', kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay), name='rpn_out_class')(rpn_features) x_deltas = keras.layers.Conv2D( filters=nb_anchors * 4, kernel_size=(1, 1), activation='linear', kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay), name='rpn_out_regress')(rpn_features) if self.mode == 'test' and cfg.use_shared_cnn: rpn_outputs = [x_class, x_deltas, output_features] else: rpn_outputs = [x_class, x_deltas] self.model_rpn = keras.models.Model(inputs=rpn_inputs, outputs=rpn_outputs) self.model_rpn.name = 'rpn' # Only train from conv3_1 print(' Freezing first few layers...') nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers print(' Freeze up to', nb_freeze_layers) for i, layer in enumerate(self.model_rpn.layers): layer.trainable = False if i == nb_freeze_layers: break ######################## ###### Detection ####### ######################## if self.do_det: print(' Creating DET model...') self.nb_models += 1 if self.mode == 'test' and cfg.use_shared_cnn: print(' -using shared CNN') output_features_det = features_input detection_inputs = [features_input, roi_input] else: output_features_det = models.VGG16_buildin(cfg)(img_det_input) detection_inputs = [img_det_input, roi_input] object_rois = layers.RoiPoolingConv( pool_size=pool_size, batch_size=cfg.nb_detection_rois)( [output_features_det, roi_input]) object_features = layers.fullyConnected( cfg, stream='det', use_dropout=True)([object_rois]) object_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=nb_object_classes, activation='softmax', kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay)), name="det_out_class" if not cfg.do_finetune else "det_fineout_class")(object_features) object_deltas = keras.layers.TimeDistributed( keras.layers.Dense( units=4 * (nb_object_classes - 1), activation="linear", kernel_initializer=keras.initializers.RandomNormal( stddev=0.001), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay)), name="det_out_regress" if not cfg.do_finetune else "det_fineout_regress")(object_features) detection_outputs = [object_scores, object_deltas] self.model_det = keras.models.Model(inputs=detection_inputs, outputs=detection_outputs) self.model_det.name = 'det' # Only train from conv3_1 nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers for i, layer in enumerate(self.model_det.layers): layer.trainable = False if i == nb_freeze_layers: break ######################## ######### HOI ########## ######################## if self.do_hoi and cfg.do_fast_hoi: print(' Creating fast HOI model...') self.nb_models += 1 if self.mode == 'test' and cfg.use_shared_cnn: print(' -using shared CNN') output_features_hoi = features_input hoi_inputs = [ features_input, human_fast_input, object_fast_input, interaction_fast_input ] else: if cfg.backbone == 'vgg': output_features_hoi = models.VGG16_buildin(cfg)( img_hoi_input) else: output_features_hoi = models.AlexNet_buildin(cfg)( img_hoi_input) hoi_inputs = [ img_hoi_input, human_fast_input, object_fast_input, interaction_fast_input ] ## HUMAN ## hoi_human_rois = layers.RoiPoolingConv( pool_size=pool_size, batch_size=cfg.nb_hoi_rois, mode=self.mode)([output_features_hoi, human_fast_input]) hoi_human_features = layers.fullyConnected( cfg, stream='human')([hoi_human_rois]) hoi_human_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name="scores4human" if not cfg.do_finetune else "scores4human_finetune")(hoi_human_features) ## OBJECT ## hoi_object_rois = layers.RoiPoolingConv( pool_size=pool_size, batch_size=cfg.nb_hoi_rois, mode=self.mode)([output_features_hoi, object_fast_input]) hoi_object_features = layers.fullyConnected( cfg, stream='object')([hoi_object_rois]) hoi_object_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name="scores4object" if not cfg.do_finetune else "scores4object_finetune")(hoi_object_features) ## INTERACTION ## hoi_pattern_features = layers.pairwiseStream(cfg=cfg)( [interaction_fast_input]) hoi_pattern_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name="scores4pattern" if not cfg.do_finetune else "scores4pattern_finetune")(hoi_pattern_features) ## FINAL ## hoi_score = keras.layers.Add()( [hoi_human_scores, hoi_object_scores, hoi_pattern_scores]) hoi_final_score = keras.layers.Activation( "softmax" if cfg.do_categorical_hoi else 'sigmoid', name="hoi_out_class" if not cfg.do_finetune else "hoi_fineout_class")(hoi_score) human_fast_input = layers.identity(cfg)([human_fast_input]) object_fast_input = layers.identity(cfg)([object_fast_input]) if self.mode == 'test': hoi_outputs = [ hoi_final_score, human_fast_input, object_fast_input ] else: hoi_outputs = [hoi_final_score] self.model_hoi = keras.models.Model(inputs=hoi_inputs, outputs=hoi_outputs) self.model_hoi.name = 'hoi' if self.do_hoi and not cfg.do_fast_hoi: print(' Creating slow HOI model...') self.nb_models += 1 if cfg.backbone == 'vgg': hoi_human_features = models.VGG16_buildin(cfg)(human_img_input) hoi_object_features = models.VGG16_buildin(cfg)( object_img_input) else: hoi_human_features = models.AlexNet_buildin(cfg)( human_img_input) hoi_object_features = models.AlexNet_buildin(cfg)( object_img_input) hoi_inputs = [ human_img_input, object_img_input, interaction_slow_input, human_slow_input, object_slow_input ] ## HUMAN ## hoi_human_scores = keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), name="scores4human")(hoi_human_features) ## OBJECT ## hoi_object_scores = keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), name="scores4object")(hoi_object_features) ## INTERACTION ## interaction_input = layers.intct_expansion(cfg)( [interaction_slow_input]) hoi_pattern_features = layers.pairwiseStream(cfg=cfg)( [interaction_input]) hoi_pattern_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name='scores4pattern')(hoi_pattern_features) hoi_pattern_scores = layers.intct_reduction(cfg)( [hoi_pattern_scores]) ## FINAL ## hoi_score = keras.layers.Add()( [hoi_human_scores, hoi_object_scores, hoi_pattern_scores]) hoi_final_score = keras.layers.Activation( "sigmoid", name="hoi_out_class")(hoi_score) human_slow_input = layers.identity(cfg)([human_slow_input]) object_slow_input = layers.identity(cfg)([object_slow_input]) if self.mode == 'test': hoi_outputs = [ hoi_final_score, human_slow_input, object_slow_input ] else: hoi_outputs = [hoi_final_score] self.model_hoi = keras.models.Model(inputs=hoi_inputs, outputs=hoi_outputs) self.model_hoi.name = 'hoi'
def __init__(self, G_ch=64, dim_z=128, bottom_width=4, resolution=128, G_kernel_size=3, G_attn='64', n_classes=1000, num_G_SVs=1, num_G_SV_itrs=1, G_shared=True, shared_dim=0, hier=False, cross_replica=False, mybn=False, G_activation=nn.ReLU(inplace=False), G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8, BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False, G_init='ortho', skip_init=False, no_optim=False, G_param='SN', norm_style='bn', **kwargs): """ utils中有这些参数的定义,通过parase和vars方法封装这些参数 看一下模型到底是咋样 G_ch 生成模型的信道 默认64,指的是一种模型机构的总和,64可解析为如下结构 ch = 64 arch[128] = {'in_channels' : [ch * item for item in [16, 16, 8, 4, 2]], 'out_channels' : [ch * item for item in [16, 8, 4, 2, 1]], 'upsample' : [True] * 5, 'resolution' : [8, 16, 32, 64, 128], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,8)}} dim_z 噪声的维度,默认为128 """ super(Generator, self).__init__() # Channel width mulitplier self.ch = G_ch # Dimensionality of the latent space self.dim_z = dim_z # The initial spatial dimensions ## TODO 暂时不理解这个的主要作用 self.bottom_width = bottom_width # Resolution of the output ## 表示选择的结构 self.resolution = resolution # Kernel size? ## TODO 这个不是外部参数导入的, 也么有用到 self.kernel_size = G_kernel_size # Attention? ## 只是做了个中介,转手就到了self.arch中选择,最后会在attention的结构中得到解析 self.attention = G_attn # number of classes, for use in categorical conditional generation self.n_classes = n_classes # Use shared embeddings? ## 默认False self.G_shared = G_shared # Dimensionality of the shared embedding? Unused if not using G_shared self.shared_dim = shared_dim if shared_dim > 0 else dim_z # Hierarchical latent space? self.hier = hier # Cross replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn # nonlinearity for residual blocks self.activation = G_activation # Initialization style self.init = G_init # Parameterization style self.G_param = G_param # Normalization style self.norm_style = norm_style # Epsilon for BatchNorm? self.BN_eps = BN_eps # Epsilon for Spectral Norm? ## https://zhuanlan.zhihu.com/p/68081406 self.SN_eps = SN_eps # fp16? self.fp16 = G_fp16 # Architecture dict self.arch = G_arch(self.ch, self.attention)[resolution] # If using hierarchical latents, adjust z if self.hier: # Number of places z slots into self.num_slots = len(self.arch['in_channels']) + 1 self.z_chunk_size = (self.dim_z // self.num_slots) # Recalculate latent dimensionality for even splitting into chunks self.dim_z = self.z_chunk_size * self.num_slots else: self.num_slots = 1 self.z_chunk_size = 0 # Which convs, batchnorms, and linear layers to use if self.G_param == 'SN': self.which_conv = functools.partial(layers.SNConv2d, kernel_size=3, padding=1, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) self.which_linear = functools.partial(layers.SNLinear, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) else: self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1) self.which_linear = nn.Linear # We use a non-spectral-normed embedding here regardless; # For some reason applying SN to G's embedding seems to randomly cripple G ## *** fluid.dygraph.Embedding == nn.Embedding self.which_embedding = nn.Embedding bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared else self.which_embedding) self.which_bn = functools.partial(layers.ccbn, which_linear=bn_linear, cross_replica=self.cross_replica, mybn=self.mybn, input_size=(self.shared_dim + self.z_chunk_size if self.G_shared else self.n_classes), norm_style=self.norm_style, eps=self.BN_eps) # Prepare model # If not using shared embeddings, self.shared is just a passthrough self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared else layers.identity()) # First linear layer self.linear = self.which_linear(self.dim_z // self.num_slots, self.arch['in_channels'][0] * (self.bottom_width **2)) # self.blocks is a doubly-nested list of modules, the outer loop intended # to be over blocks at a given resolution (resblocks and/or self-attention) # while the inner loop is over a given block self.blocks = [] for index in range(len(self.arch['out_channels'])): self.blocks += [[layers.GBlock(in_channels=self.arch['in_channels'][index], out_channels=self.arch['out_channels'][index], which_conv=self.which_conv, which_bn=self.which_bn, activation=self.activation, upsample=(functools.partial(F.interpolate, scale_factor=2) if self.arch['upsample'][index] else None))]] # If attention on this block, attach it to the end if self.arch['attention'][self.arch['resolution'][index]]: print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index]) self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)] # Turn self.blocks into a ModuleList so that it's all properly registered. self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks]) # output layer: batchnorm-relu-conv. # Consider using a non-spectral conv here self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1], cross_replica=self.cross_replica, mybn=self.mybn), self.activation, self.which_conv(self.arch['out_channels'][-1], 3)) # Initialize weights. Optionally skip init for testing. if not skip_init: self.init_weights() # Set up optimizer # If this is an EMA copy, no need for an optim, so just return now if no_optim: return self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps if G_mixed_precision: print('Using fp16 adam in G...') import utils self.optim = utils.Adam16(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) else: self.optim = optim.Adam(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps)