예제 #1
0
    def __init__(self, code_dim=100, n_class=100, SN=True,size=64):
        super().__init__()
        if SN == True:
            self.lin_code = spectral_init(nn.Linear(code_dim, 4 * 4 * 512))
        else:
            self.lin_code = nn.Linear(code_dim, 4 * 4 * 512)

        self.conv = nn.ModuleList([ConvBlock(512, 256, n_class=n_class,SN=SN),
                                   ConvBlock(256, 128, n_class=n_class,SN=SN),
                                   ConvBlock(128, 64, n_class=n_class,SN=SN)])

        self.bn = nn.BatchNorm2d(64)
        self.shared = layers.identity()
        self.shared_d = layers.identity()
        self.dim_z = code_dim
        if SN == True:
            self.colorize = spectral_init(nn.Conv2d(64, 3, [3, 3], padding=1))
        else:
            self.colorize = nn.Conv2d(64, 3, [3, 3], padding=1)

        self.optim = optim.Adam(params=self.parameters(), lr=1e-4,
                                betas=(0.00, 0.999), weight_decay=0, eps=1e-8)
예제 #2
0
  def __init__(self, G_ch=64, G_depth=2, dim_z=128, bottom_width=4, resolution=128,
               G_kernel_size=3, G_attn='64', n_classes=1000,
               num_G_SVs=1, num_G_SV_itrs=1,
               G_shared=True, shared_dim=0, hier=False,
               cross_replica=False, mybn=False,
               G_activation=nn.ReLU(inplace=False),
               G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8,
               BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False,
               G_init='ortho', skip_init=False, no_optim=False,
               G_param='SN', norm_style='bn',
               **kwargs):
    super(Generator, self).__init__()
    # Channel width mulitplier
    self.ch = G_ch
    # Number of resblocks per stage
    self.G_depth = G_depth
    # Dimensionality of the latent space
    self.dim_z = dim_z
    # The initial spatial dimensions
    self.bottom_width = bottom_width
    # Resolution of the output
    self.resolution = resolution
    # Kernel size?
    self.kernel_size = G_kernel_size
    # Attention?
    self.attention = G_attn
    # number of classes, for use in categorical conditional generation
    self.n_classes = n_classes
    # Use shared embeddings?
    self.G_shared = G_shared
    # Dimensionality of the shared embedding? Unused if not using G_shared
    self.shared_dim = shared_dim if shared_dim > 0 else dim_z
    # Hierarchical latent space?
    self.hier = hier
    # Cross replica batchnorm?
    self.cross_replica = cross_replica
    # Use my batchnorm?
    self.mybn = mybn
    # nonlinearity for residual blocks
    self.activation = G_activation
    # Initialization style
    self.init = G_init
    # Parameterization style
    self.G_param = G_param
    # Normalization style
    self.norm_style = norm_style
    # Epsilon for BatchNorm?
    self.BN_eps = BN_eps
    # Epsilon for Spectral Norm?
    self.SN_eps = SN_eps
    # fp16?
    self.fp16 = G_fp16
    # Architecture dict
    self.arch = G_arch(self.ch, self.attention)[resolution]


    # Which convs, batchnorms, and linear layers to use
    if self.G_param == 'SN':
      self.which_conv = functools.partial(layers.SNConv2d,
                          kernel_size=3, padding=1,
                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,
                          eps=self.SN_eps)
      self.which_linear = functools.partial(layers.SNLinear,
                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,
                          eps=self.SN_eps)
    else:
      self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1)
      self.which_linear = nn.Linear
      
    # We use a non-spectral-normed embedding here regardless;
    # For some reason applying SN to G's embedding seems to randomly cripple G
    self.which_embedding = nn.Embedding
    bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared
                 else self.which_embedding)
    self.which_bn = functools.partial(layers.ccbn,
                          which_linear=bn_linear,
                          cross_replica=self.cross_replica,
                          mybn=self.mybn,
                          input_size=(self.shared_dim + self.dim_z if self.G_shared
                                      else self.n_classes),
                          norm_style=self.norm_style,
                          eps=self.BN_eps)


    # Prepare model
    # If not using shared embeddings, self.shared is just a passthrough
    self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared 
                    else layers.identity())
    # First linear layer
    self.linear = self.which_linear(self.dim_z + self.shared_dim, self.arch['in_channels'][0] * (self.bottom_width **2))

    # self.blocks is a doubly-nested list of modules, the outer loop intended
    # to be over blocks at a given resolution (resblocks and/or self-attention)
    # while the inner loop is over a given block
    self.blocks = []
    for index in range(len(self.arch['out_channels'])):
      self.blocks += [[GBlock(in_channels=self.arch['in_channels'][index],
                             out_channels=self.arch['in_channels'][index] if g_index==0 else self.arch['out_channels'][index],
                             which_conv=self.which_conv,
                             which_bn=self.which_bn,
                             activation=self.activation,
                             upsample=(functools.partial(F.interpolate, scale_factor=2)
                                       if self.arch['upsample'][index] and g_index == (self.G_depth-1) else None))]
                       for g_index in range(self.G_depth)]

      # If attention on this block, attach it to the end
      if self.arch['attention'][self.arch['resolution'][index]]:
        print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index])
        self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)]

    # Turn self.blocks into a ModuleList so that it's all properly registered.
    self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])

    # output layer: batchnorm-relu-conv.
    # Consider using a non-spectral conv here
    self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1],
                                                cross_replica=self.cross_replica,
                                                mybn=self.mybn),
                                    self.activation,
                                    self.which_conv(self.arch['out_channels'][-1], 3))

    # Initialize weights. Optionally skip init for testing.
    if not skip_init:
      self.init_weights()

    # Set up optimizer
    # If this is an EMA copy, no need for an optim, so just return now
    if no_optim:
      return
    self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps
    if G_mixed_precision:
      print('Using fp16 adam in G...')
      import utils
      self.optim = utils.Adam16(params=self.parameters(), lr=self.lr,
                           betas=(self.B1, self.B2), weight_decay=0,
                           eps=self.adam_eps)
    else:
      self.optim = optim.Adam(params=self.parameters(), lr=self.lr,
                           betas=(self.B1, self.B2), weight_decay=0,
                           eps=self.adam_eps)
예제 #3
0
    def __init__(self,
                 G_ch=64,
                 dim_z=128,
                 bottom_width=4,
                 resolution=128,
                 G_kernel_size=3,
                 G_attn='64',
                 n_classes=1000,
                 num_G_SVs=1,
                 num_G_SV_itrs=1,
                 G_shared=True,
                 shared_dim=0,
                 hier=False,
                 cross_replica=False,
                 mybn=False,
                 G_activation=nn.ReLU(inplace=False),
                 G_lr=5e-5,
                 G_B1=0.0,
                 G_B2=0.999,
                 adam_eps=1e-8,
                 BN_eps=1e-5,
                 SN_eps=1e-12,
                 G_mixed_precision=False,
                 G_fp16=False,
                 G_init='ortho',
                 skip_init=False,
                 no_optim=False,
                 G_param='SN',
                 norm_style='bn',
                 add_blur=False,
                 add_noise=False,
                 add_style=False,
                 style_mlp=6,
                 attn_style='nl',
                 no_conditional=False,
                 sched_version='default',
                 num_epochs=500,
                 arch=None,
                 skip_z=False,
                 use_dog_cnt=False,
                 dim_dog_cnt_z=32,
                 mix_style=False,
                 **kwargs):
        super(Generator, self).__init__()
        # Channel width mulitplier
        self.ch = G_ch
        # Dimensionality of the latent space
        self.dim_z = dim_z
        # The initial spatial dimensions
        self.bottom_width = bottom_width
        # Resolution of the output
        self.resolution = resolution
        # Kernel size?
        self.kernel_size = G_kernel_size
        # Attention?
        self.attention = G_attn
        # number of classes, for use in categorical conditional generation
        self.n_classes = n_classes
        # Use shared embeddings?
        self.G_shared = G_shared
        # Dimensionality of the shared embedding? Unused if not using G_shared
        self.shared_dim = shared_dim if shared_dim > 0 else dim_z
        # Hierarchical latent space?
        self.hier = hier
        # Cross replica batchnorm?
        self.cross_replica = cross_replica
        # Use my batchnorm?
        self.mybn = mybn
        # nonlinearity for residual blocks
        self.activation = G_activation
        # Initialization style
        self.init = G_init
        # Parameterization style
        self.G_param = G_param
        # Normalization style
        self.norm_style = norm_style
        # Normalization style
        self.add_blur = add_blur
        self.add_noise = add_noise
        self.add_style = add_style
        self.skip_z = skip_z
        self.use_dog_cnt = use_dog_cnt
        self.dim_dog_cnt_z = dim_dog_cnt_z
        self.mix_style = mix_style

        # Epsilon for BatchNorm?
        self.BN_eps = BN_eps
        # Epsilon for Spectral Norm?
        self.SN_eps = SN_eps
        # fp16?
        self.fp16 = G_fp16
        # Architecture dict
        if arch is None:
            arch = f'{resolution}'
        self.arch = G_arch(self.ch, self.attention)[arch]

        # If using hierarchical latents, adjust z
        if self.hier:
            # Number of places z slots into
            self.num_slots = len(self.arch['in_channels']) + 1
            self.z_chunk_size = (self.dim_z // self.num_slots)
            # Recalculate latent dimensionality for even splitting into chunks
            self.dim_z = self.z_chunk_size * self.num_slots
        else:
            self.num_slots = 1
            self.z_chunk_size = 0

        # Which convs, batchnorms, and linear layers to use
        if self.G_param == 'SN':
            self.which_conv = functools.partial(layers.SNConv2d,
                                                kernel_size=3,
                                                padding=1,
                                                num_svs=num_G_SVs,
                                                num_itrs=num_G_SV_itrs,
                                                eps=self.SN_eps)
            self.which_linear = functools.partial(layers.SNLinear,
                                                  num_svs=num_G_SVs,
                                                  num_itrs=num_G_SV_itrs,
                                                  eps=self.SN_eps)
        else:
            self.which_conv = functools.partial(nn.Conv2d,
                                                kernel_size=3,
                                                padding=1)
            self.which_linear = nn.Linear

        if attn_style == 'cbam':
            self.which_attn = layers.CBAM
        else:
            self.which_attn = layers.Attention

        # We use a non-spectral-normed embedding here regardless;
        # For some reason applying SN to G's embedding seems to randomly cripple G
        self.which_embedding = nn.Embedding
        bn_linear = (functools.partial(self.which_linear, bias=False)
                     if self.G_shared else self.which_embedding)
        input_size = self.shared_dim + self.z_chunk_size if self.G_shared else self.n_classes
        if self.G_shared and use_dog_cnt:
            input_size += dim_dog_cnt_z
        self.which_bn = functools.partial(
            layers.ccbn,
            which_linear=bn_linear,
            cross_replica=self.cross_replica,
            mybn=self.mybn,
            input_size=input_size,
            norm_style=self.norm_style,
            eps=self.BN_eps,
            style_linear=self.which_linear,
            dim_z=self.dim_z,
            no_conditional=no_conditional,
            skip_z=self.skip_z,
            use_dog_cnt=use_dog_cnt,
            g_shared=G_shared,
        )

        # Prepare model
        # If not using shared embeddings, self.shared is just a passthrough
        self.shared = (self.which_embedding(n_classes, self.shared_dim)
                       if G_shared else layers.identity())

        self.dog_cnt_shared = (self.which_embedding(4, self.dim_dog_cnt_z)
                               if G_shared else layers.identity())
        # First linear layer
        self.linear = self.which_linear(
            self.dim_z // self.num_slots,
            self.arch['in_channels'][0] * (self.bottom_width**2))

        # self.blocks is a doubly-nested list of modules, the outer loop intended
        # to be over blocks at a given resolution (resblocks and/or self-attention)
        # while the inner loop is over a given block
        self.blocks = []
        for index in range(len(self.arch['out_channels'])):
            self.blocks += [[
                layers.GBlock(
                    in_channels=self.arch['in_channels'][index],
                    out_channels=self.arch['out_channels'][index],
                    which_conv=self.which_conv,
                    which_bn=self.which_bn,
                    activation=self.activation,
                    upsample=(functools.partial(F.interpolate, scale_factor=2)
                              if self.arch['upsample'][index] else None),
                    add_blur=add_blur,
                    add_noise=add_noise,
                )
            ]]

            # If attention on this block, attach it to the end
            if self.arch['attention'][self.arch['resolution'][index]]:
                print('Adding attention layer in G at resolution %d' %
                      self.arch['resolution'][index])
                self.blocks[-1] += [
                    self.which_attn(self.arch['out_channels'][index],
                                    self.which_conv)
                ]

        # Turn self.blocks into a ModuleList so that it's all properly registered.
        self.blocks = nn.ModuleList(
            [nn.ModuleList(block) for block in self.blocks])

        # output layer: batchnorm-relu-conv.
        # Consider using a non-spectral conv here
        self.output_layer = nn.Sequential(
            layers.bn(self.arch['out_channels'][-1],
                      cross_replica=self.cross_replica,
                      mybn=self.mybn), self.activation,
            self.which_conv(self.arch['out_channels'][-1], 3))

        if self.add_style:
            # layers = [PixelNorm()]
            style_layers = []
            for i in range(style_mlp):
                style_layers.append(
                    layers.StyleLayer(self.dim_z, self.which_linear,
                                      self.activation))

            self.style = nn.Sequential(*style_layers)

        # Initialize weights. Optionally skip init for testing.
        if not skip_init:
            self.init_weights()

        # Set up optimizer
        # If this is an EMA copy, no need for an optim, so just return now
        if no_optim:
            return
        self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps
        if G_mixed_precision:
            print('Using fp16 adam in G...')
            import utils
            self.optim = utils.Adam16(params=self.parameters(),
                                      lr=self.lr,
                                      betas=(self.B1, self.B2),
                                      weight_decay=0,
                                      eps=self.adam_eps,
                                      amsgrad=kwargs['amsgrad'])
        else:
            self.optim = optim.Adam(params=self.parameters(),
                                    lr=self.lr,
                                    betas=(self.B1, self.B2),
                                    weight_decay=0,
                                    eps=self.adam_eps,
                                    amsgrad=kwargs['amsgrad'])

        # LR scheduling, left here for forward compatibility
        # self.lr_sched = {'itr' : 0}# if self.progressive else {}
        # self.j = 0

        if sched_version == 'default':
            self.lr_sched = None
        elif sched_version == 'cal_v0':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingLR(
                self.optim,
                T_max=num_epochs,
                eta_min=self.lr / 2,
                last_epoch=-1)
        elif sched_version == 'cal_v1':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingLR(
                self.optim,
                T_max=num_epochs,
                eta_min=self.lr / 4,
                last_epoch=-1)
        elif sched_version == 'cawr_v0':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingWarmRestarts(
                self.optim, T_0=10, T_mult=2, eta_min=self.lr / 2)
        elif sched_version == 'cawr_v1':
            self.lr_sched = optim.lr_scheduler.CosineAnnealingWarmRestarts(
                self.optim, T_0=25, T_mult=2, eta_min=self.lr / 4)
        else:
            self.lr_sched = None
예제 #4
0
파일: methods.py 프로젝트: aag147/rcnn
    def create_models(self):
        cfg = self.cfg

        if self.mode == 'test':
            print('Creating test models....')
        else:
            print('Creating train models....')

        ########################
        ###### Parameters ######
        ########################
        nb_anchors = cfg.nb_anchors
        pool_size = cfg.pool_size
        nb_object_classes = cfg.nb_object_classes
        nb_hoi_classes = cfg.nb_hoi_classes
        print('   Obj. classes:', nb_object_classes)
        print('   HOI classes:', nb_hoi_classes)

        ########################
        ######## Inputs ########
        ########################

        # RPN #
        img_input = keras.layers.Input(shape=(None, None, 3),
                                       name='input_image')

        # DET #
        nb_detection_rois = cfg.nb_detection_rois if self.mode == 'train' else None
        img_det_input = keras.layers.Input(shape=(None, None, 3),
                                           name='input_image')
        roi_input = keras.layers.Input(shape=(nb_detection_rois, 5),
                                       name='input_roi')

        # HOI #
        nb_hoi_rois = cfg.nb_hoi_rois if self.mode == 'train' else None
        img_hoi_input = keras.layers.Input(shape=(None, None, 3),
                                           name='input_image')
        human_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5),
                                              name="input_human")
        object_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5),
                                               name="input_object")
        interaction_fast_input = keras.layers.Input(shape=(nb_hoi_rois,
                                                           cfg.winShape[0],
                                                           cfg.winShape[1], 2),
                                                    name="input_interaction")

        human_img_input = keras.layers.Input(shape=(227, 227, 3),
                                             name="input_human_img")

        object_img_input = keras.layers.Input(shape=(227, 227, 3),
                                              name="input_object_img")
        interaction_slow_input = keras.layers.Input(shape=(cfg.winShape[0],
                                                           cfg.winShape[1], 2),
                                                    name="input_interaction")
        human_slow_input = keras.layers.Input(shape=(5, ), name="input_human")
        object_slow_input = keras.layers.Input(shape=(5, ),
                                               name="input_object")

        # SHARED #
        features_input = keras.layers.Input(shape=(None, None, 512),
                                            name="input_features")

        ########################
        ######### RPN ##########
        ########################
        if self.do_rpn:
            print('   Creating RPN model...')
            output_features = models.VGG16_buildin(cfg)(img_input)
            self.nb_models += 1

            rpn_inputs = [img_input]

            rpn_features = layers.rpn(cfg)([output_features])

            x_class = keras.layers.Conv2D(
                filters=nb_anchors,
                kernel_size=(1, 1),
                activation='sigmoid',
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                bias_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name='rpn_out_class')(rpn_features)

            x_deltas = keras.layers.Conv2D(
                filters=nb_anchors * 4,
                kernel_size=(1, 1),
                activation='linear',
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                bias_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name='rpn_out_regress')(rpn_features)

            if self.mode == 'test' and cfg.use_shared_cnn:
                rpn_outputs = [x_class, x_deltas, output_features]
            else:
                rpn_outputs = [x_class, x_deltas]

            self.model_rpn = keras.models.Model(inputs=rpn_inputs,
                                                outputs=rpn_outputs)
            self.model_rpn.name = 'rpn'

            # Only train from conv3_1
            print('   Freezing first few layers...')

            nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers
            print('   Freeze up to', nb_freeze_layers)
            for i, layer in enumerate(self.model_rpn.layers):
                layer.trainable = False
                if i == nb_freeze_layers:
                    break

        ########################
        ###### Detection #######
        ########################
        if self.do_det:
            print('   Creating DET model...')

            self.nb_models += 1

            if self.mode == 'test' and cfg.use_shared_cnn:
                print('   -using shared CNN')
                output_features_det = features_input
                detection_inputs = [features_input, roi_input]
            else:
                output_features_det = models.VGG16_buildin(cfg)(img_det_input)
                detection_inputs = [img_det_input, roi_input]

            object_rois = layers.RoiPoolingConv(
                pool_size=pool_size, batch_size=cfg.nb_detection_rois)(
                    [output_features_det, roi_input])

            object_features = layers.fullyConnected(
                cfg, stream='det', use_dropout=True)([object_rois])

            object_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=nb_object_classes,
                    activation='softmax',
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                    bias_regularizer=keras.regularizers.l2(cfg.weight_decay)),
                name="det_out_class" if not cfg.do_finetune else
                "det_fineout_class")(object_features)

            object_deltas = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=4 * (nb_object_classes - 1),
                    activation="linear",
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.001),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                    bias_regularizer=keras.regularizers.l2(cfg.weight_decay)),
                name="det_out_regress" if not cfg.do_finetune else
                "det_fineout_regress")(object_features)

            detection_outputs = [object_scores, object_deltas]

            self.model_det = keras.models.Model(inputs=detection_inputs,
                                                outputs=detection_outputs)
            self.model_det.name = 'det'

            # Only train from conv3_1
            nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers
            for i, layer in enumerate(self.model_det.layers):
                layer.trainable = False
                if i == nb_freeze_layers:
                    break

        ########################
        ######### HOI ##########
        ########################
        if self.do_hoi and cfg.do_fast_hoi:
            print('   Creating fast HOI model...')
            self.nb_models += 1

            if self.mode == 'test' and cfg.use_shared_cnn:
                print('   -using shared CNN')
                output_features_hoi = features_input
                hoi_inputs = [
                    features_input, human_fast_input, object_fast_input,
                    interaction_fast_input
                ]
            else:
                if cfg.backbone == 'vgg':
                    output_features_hoi = models.VGG16_buildin(cfg)(
                        img_hoi_input)
                else:
                    output_features_hoi = models.AlexNet_buildin(cfg)(
                        img_hoi_input)
                hoi_inputs = [
                    img_hoi_input, human_fast_input, object_fast_input,
                    interaction_fast_input
                ]

            ## HUMAN ##
            hoi_human_rois = layers.RoiPoolingConv(
                pool_size=pool_size,
                batch_size=cfg.nb_hoi_rois,
                mode=self.mode)([output_features_hoi, human_fast_input])

            hoi_human_features = layers.fullyConnected(
                cfg, stream='human')([hoi_human_rois])

            hoi_human_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name="scores4human" if not cfg.do_finetune else
                "scores4human_finetune")(hoi_human_features)

            ## OBJECT ##
            hoi_object_rois = layers.RoiPoolingConv(
                pool_size=pool_size,
                batch_size=cfg.nb_hoi_rois,
                mode=self.mode)([output_features_hoi, object_fast_input])

            hoi_object_features = layers.fullyConnected(
                cfg, stream='object')([hoi_object_rois])

            hoi_object_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name="scores4object" if not cfg.do_finetune else
                "scores4object_finetune")(hoi_object_features)

            ## INTERACTION ##
            hoi_pattern_features = layers.pairwiseStream(cfg=cfg)(
                [interaction_fast_input])
            hoi_pattern_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name="scores4pattern" if not cfg.do_finetune else
                "scores4pattern_finetune")(hoi_pattern_features)

            ## FINAL ##
            hoi_score = keras.layers.Add()(
                [hoi_human_scores, hoi_object_scores, hoi_pattern_scores])

            hoi_final_score = keras.layers.Activation(
                "softmax" if cfg.do_categorical_hoi else 'sigmoid',
                name="hoi_out_class"
                if not cfg.do_finetune else "hoi_fineout_class")(hoi_score)

            human_fast_input = layers.identity(cfg)([human_fast_input])
            object_fast_input = layers.identity(cfg)([object_fast_input])

            if self.mode == 'test':
                hoi_outputs = [
                    hoi_final_score, human_fast_input, object_fast_input
                ]
            else:
                hoi_outputs = [hoi_final_score]

            self.model_hoi = keras.models.Model(inputs=hoi_inputs,
                                                outputs=hoi_outputs)
            self.model_hoi.name = 'hoi'

        if self.do_hoi and not cfg.do_fast_hoi:
            print('   Creating slow HOI model...')
            self.nb_models += 1

            if cfg.backbone == 'vgg':
                hoi_human_features = models.VGG16_buildin(cfg)(human_img_input)
                hoi_object_features = models.VGG16_buildin(cfg)(
                    object_img_input)
            else:
                hoi_human_features = models.AlexNet_buildin(cfg)(
                    human_img_input)
                hoi_object_features = models.AlexNet_buildin(cfg)(
                    object_img_input)

            hoi_inputs = [
                human_img_input, object_img_input, interaction_slow_input,
                human_slow_input, object_slow_input
            ]

            ## HUMAN ##
            hoi_human_scores = keras.layers.Dense(
                units=1 * nb_hoi_classes,
                activation=None,
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name="scores4human")(hoi_human_features)

            ## OBJECT ##
            hoi_object_scores = keras.layers.Dense(
                units=1 * nb_hoi_classes,
                activation=None,
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name="scores4object")(hoi_object_features)

            ## INTERACTION ##
            interaction_input = layers.intct_expansion(cfg)(
                [interaction_slow_input])

            hoi_pattern_features = layers.pairwiseStream(cfg=cfg)(
                [interaction_input])
            hoi_pattern_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name='scores4pattern')(hoi_pattern_features)

            hoi_pattern_scores = layers.intct_reduction(cfg)(
                [hoi_pattern_scores])

            ## FINAL ##
            hoi_score = keras.layers.Add()(
                [hoi_human_scores, hoi_object_scores, hoi_pattern_scores])

            hoi_final_score = keras.layers.Activation(
                "sigmoid", name="hoi_out_class")(hoi_score)

            human_slow_input = layers.identity(cfg)([human_slow_input])
            object_slow_input = layers.identity(cfg)([object_slow_input])

            if self.mode == 'test':
                hoi_outputs = [
                    hoi_final_score, human_slow_input, object_slow_input
                ]
            else:
                hoi_outputs = [hoi_final_score]

            self.model_hoi = keras.models.Model(inputs=hoi_inputs,
                                                outputs=hoi_outputs)
            self.model_hoi.name = 'hoi'
예제 #5
0
  def __init__(self, G_ch=64, dim_z=128, bottom_width=4, resolution=128,
               G_kernel_size=3, G_attn='64', n_classes=1000,
               num_G_SVs=1, num_G_SV_itrs=1,
               G_shared=True, shared_dim=0, hier=False,
               cross_replica=False, mybn=False,
               G_activation=nn.ReLU(inplace=False),
               G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8,
               BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False,
               G_init='ortho', skip_init=False, no_optim=False,
               G_param='SN', norm_style='bn',
               **kwargs):
    """
    utils中有这些参数的定义,通过parase和vars方法封装这些参数
    看一下模型到底是咋样
    G_ch 生成模型的信道 默认64,指的是一种模型机构的总和,64可解析为如下结构
    ch = 64
    arch[128] = {'in_channels' :  [ch * item for item in [16, 16, 8, 4, 2]],
                'out_channels' : [ch * item for item in [16, 8, 4, 2, 1]],
                'upsample' : [True] * 5,
                'resolution' : [8, 16, 32, 64, 128],
                'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])
                                for i in range(3,8)}}
    dim_z 噪声的维度,默认为128

    """
    super(Generator, self).__init__()
    # Channel width mulitplier
    self.ch = G_ch
    # Dimensionality of the latent space
    self.dim_z = dim_z
    # The initial spatial dimensions
    ## TODO 暂时不理解这个的主要作用
    self.bottom_width = bottom_width
    # Resolution of the output
    ## 表示选择的结构
    self.resolution = resolution
    # Kernel size?
    ## TODO 这个不是外部参数导入的, 也么有用到
    self.kernel_size = G_kernel_size
    # Attention?
    ## 只是做了个中介,转手就到了self.arch中选择,最后会在attention的结构中得到解析
    self.attention = G_attn
    # number of classes, for use in categorical conditional generation
    self.n_classes = n_classes
    # Use shared embeddings?
    ## 默认False
    self.G_shared = G_shared
    # Dimensionality of the shared embedding? Unused if not using G_shared
    self.shared_dim = shared_dim if shared_dim > 0 else dim_z
    # Hierarchical latent space?
    self.hier = hier
    # Cross replica batchnorm?
    self.cross_replica = cross_replica
    # Use my batchnorm?
    self.mybn = mybn
    # nonlinearity for residual blocks
    self.activation = G_activation
    # Initialization style
    self.init = G_init
    # Parameterization style
    self.G_param = G_param
    # Normalization style
    self.norm_style = norm_style
    # Epsilon for BatchNorm?
    self.BN_eps = BN_eps
    # Epsilon for Spectral Norm?
    ## https://zhuanlan.zhihu.com/p/68081406
    self.SN_eps = SN_eps
    # fp16?
    self.fp16 = G_fp16
    # Architecture dict
    self.arch = G_arch(self.ch, self.attention)[resolution]

    # If using hierarchical latents, adjust z
    if self.hier:
      # Number of places z slots into
      self.num_slots = len(self.arch['in_channels']) + 1
      self.z_chunk_size = (self.dim_z // self.num_slots)
      # Recalculate latent dimensionality for even splitting into chunks
      self.dim_z = self.z_chunk_size *  self.num_slots
    else:
      self.num_slots = 1
      self.z_chunk_size = 0

    # Which convs, batchnorms, and linear layers to use
    if self.G_param == 'SN':
      self.which_conv = functools.partial(layers.SNConv2d,
                          kernel_size=3, padding=1,
                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,
                          eps=self.SN_eps)
      self.which_linear = functools.partial(layers.SNLinear,
                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,
                          eps=self.SN_eps)
    else:
      self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1)
      self.which_linear = nn.Linear
      
    # We use a non-spectral-normed embedding here regardless;
    # For some reason applying SN to G's embedding seems to randomly cripple G
    ## *** fluid.dygraph.Embedding == nn.Embedding
    self.which_embedding = nn.Embedding
    bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared
                 else self.which_embedding)
    
    self.which_bn = functools.partial(layers.ccbn,
                          which_linear=bn_linear,
                          cross_replica=self.cross_replica,
                          mybn=self.mybn,
                          input_size=(self.shared_dim + self.z_chunk_size if self.G_shared
                                      else self.n_classes),
                          norm_style=self.norm_style,
                          eps=self.BN_eps)

    # Prepare model
    # If not using shared embeddings, self.shared is just a passthrough
    self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared 
                    else layers.identity())
    # First linear layer
    self.linear = self.which_linear(self.dim_z // self.num_slots,
                                    self.arch['in_channels'][0] * (self.bottom_width **2))

    # self.blocks is a doubly-nested list of modules, the outer loop intended
    # to be over blocks at a given resolution (resblocks and/or self-attention)
    # while the inner loop is over a given block
    self.blocks = []
    for index in range(len(self.arch['out_channels'])):
      self.blocks += [[layers.GBlock(in_channels=self.arch['in_channels'][index],
                             out_channels=self.arch['out_channels'][index],
                             which_conv=self.which_conv,
                             which_bn=self.which_bn,
                             activation=self.activation,
                             upsample=(functools.partial(F.interpolate, scale_factor=2)
                                       if self.arch['upsample'][index] else None))]]

      # If attention on this block, attach it to the end
      if self.arch['attention'][self.arch['resolution'][index]]:
        print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index])
        self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)]

    # Turn self.blocks into a ModuleList so that it's all properly registered.
    self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])

    # output layer: batchnorm-relu-conv.
    # Consider using a non-spectral conv here
    self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1],
                                                cross_replica=self.cross_replica,
                                                mybn=self.mybn),
                                    self.activation,
                                    self.which_conv(self.arch['out_channels'][-1], 3))

    # Initialize weights. Optionally skip init for testing.
    if not skip_init:
      self.init_weights()

    # Set up optimizer
    # If this is an EMA copy, no need for an optim, so just return now
    if no_optim:
      return self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps
    if G_mixed_precision:
      print('Using fp16 adam in G...')
      import utils
      self.optim = utils.Adam16(params=self.parameters(), lr=self.lr,
                           betas=(self.B1, self.B2), weight_decay=0,
                           eps=self.adam_eps)
    else:
      self.optim = optim.Adam(params=self.parameters(), lr=self.lr,
                           betas=(self.B1, self.B2), weight_decay=0,
                           eps=self.adam_eps)