def __init__(self, cfg, in_channels):
        super(FPNDetNetFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION

        pooler = Pooler(cfg.MODEL.ROI_BOX_HEAD)

        input_size = in_channels * resolution**2
        self.use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        self.pooler = pooler
        self.fc6 = nn.Conv2d(
            in_channels,
            in_channels * 4,
            kernel_size=resolution,
            stride=resolution,
            padding=0,
            bias=False if self.use_gn else True,
        )
        if self.use_gn:
            self.gn6 = group_norm(in_channels * 4)
        self.fc7 = nn.Conv2d(
            in_channels * 4,
            in_channels * 4,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=False if self.use_gn else True,
        )
        if self.use_gn:
            self.gn7 = group_norm(in_channels * 4)
        self.out_channels = in_channels * 4
Ejemplo n.º 2
0
 def __init__(
     self,
     cfg,
     in_channels,
     num_backbone_stages,
     top_blocks,
 ):
     """
     Arguments:
         in_channels_list (list[int]): number of channels for each feature map that
             will be fed
         out_channels (int): number of channels of the FPN representation
         top_blocks (nn.Module or None): if provided, an extra operation will
             be performed on the output of the last (smallest resolution)
             FPN output, and the result will extend the result list
     """
     super(Bottom2UP, self).__init__()
     # self.inner_blocks = []
     # self.layer_blocks = []
     # for idx, in_channels in enumerate(in_channels_list, 1):  # 起始索引为1
     #     inner_block = "fpn_inner{}".format(idx)  # 用下表起名: fpn_inner1, fpn_inner2, fpn_inner3, fpn_inner4
     #     layer_block = "fpn_layer{}".format(idx)  # 用下表起名: fpn_layer1, fpn_layer2, fpn_layer3, fpn_layer4
     #
     #     if in_channels == 0:
     #         continue
     #     inner_block_module = conv_block(in_channels, out_channels, 1)  # 该1*1卷积层主要作用为改变通道数为out_channels
     #     layer_block_module = conv_block(out_channels, out_channels, 3,
     #                                     1)  # 用3*3卷积对融合结果卷积,消除上采样的混叠效(aliasing effect)
     #     self.add_module(inner_block, inner_block_module)
     #     self.add_module(layer_block, layer_block_module)
     #     self.inner_blocks.append(inner_block)
     #     self.layer_blocks.append(layer_block)
     # self.top_blocks = top_blocks  # 将top_blocks作为FPN类成员变量,指定最后一层的输出是否需要再经过池化等操作,这里是最大值池化
     self.panet_buttomup_conv1_modules = nn.ModuleList()
     self.panet_buttomup_conv2_modules = nn.ModuleList()
     for i in range(num_backbone_stages):
         if cfg.MODEL.FPN.PANET.USE_GN:
             self.panet_buttomup_conv1_modules.append(
                 nn.Sequential(
                     nn.Conv2d(in_channels, in_channels, 3, 2, 1,
                               bias=True),  # 下采样
                     group_norm(in_channels),
                     nn.ReLU(inplace=True)))
             self.panet_buttomup_conv2_modules.append(
                 nn.Sequential(
                     nn.Conv2d(in_channels, in_channels, 3, 1, 1,
                               bias=True),  # 像素相加后使用
                     group_norm(in_channels),
                     nn.ReLU(inplace=True)))
         else:
             self.panet_buttomup_conv1_modules.append(
                 nn.Conv2d(in_channels, in_channels, 3, 2, 1))
             self.panet_buttomup_conv2_modules.append(
                 nn.Conv2d(in_channels, in_channels, 3, 1, 1))
     self.top_blocks = top_blocks
Ejemplo n.º 3
0
    def __init__(self,
                 channels,
                 group=1,
                 kernel=3,
                 dilation=(1, 4, 8, 12),
                 shuffle=False,
                 deform=None):
        super(OneDynamicWeightsCat11, self).__init__()
        in_channel = channels // 4
        self.scale1 = nn.Sequential(
            nn.Conv2d(channels, in_channel, 1, padding=0, bias=False),
            group_norm(in_channel), nn.ReLU(inplace=True))

        if deform == 'deform':
            self.cata = nn.Conv2d(in_channel,
                                  group * kernel * kernel + 18,
                                  3,
                                  padding=dilation[0],
                                  dilation=dilation[0],
                                  bias=False)
            self.unfold1 = DeformUnfold(kernel_size=(3, 3),
                                        padding=dilation[0],
                                        dilation=dilation[0])

        else:
            self.cata = nn.Conv2d(in_channel,
                                  group * kernel * kernel,
                                  3,
                                  padding=dilation[0],
                                  dilation=dilation[0],
                                  bias=False)
            self.unfold1 = nn.Unfold(kernel_size=(3, 3),
                                     padding=dilation[0],
                                     dilation=dilation[0])

        self.softmax = nn.Softmax(dim=-1)

        self.shuffle = shuffle
        self.deform = deform
        self.group = group
        self.K = kernel * kernel

        self.scale2 = nn.Sequential(
            nn.Conv2d(in_channel * 2, in_channel, 1, padding=0, bias=True),
            group_norm(in_channel), nn.ReLU(inplace=True))

        self.scale3 = nn.Sequential(
            nn.Conv2d(in_channel, channels, 1, padding=0, bias=True),
            group_norm(channels), nn.ReLU(inplace=True))
Ejemplo n.º 4
0
    def __init__(self, plane):
        super(GCNwithNonlocal, self).__init__()
        inter_plane = plane // 2
        self.node_k = nn.Conv2d(plane, inter_plane, kernel_size=1)
        self.node_v = nn.Conv2d(plane, inter_plane, kernel_size=1)
        self.node_q = nn.Conv2d(plane, inter_plane, kernel_size=1)

        self.conv_wg = nn.Conv1d(inter_plane,
                                 inter_plane,
                                 kernel_size=1,
                                 bias=False)
        self.bn_wg = group_norm(inter_plane)
        self.softmax = nn.Softmax(dim=2)

        self.out = nn.Sequential(nn.Conv2d(inter_plane, plane, kernel_size=1),
                                 group_norm(plane))
Ejemplo n.º 5
0
    def __init__(self, channels):
        super(deformMP, self).__init__()
        in_channel = channels // 4
        self.scale1 = nn.Sequential(
            nn.Conv2d(channels, in_channel, 1, padding=0, bias=False),
            group_norm(in_channel), nn.ReLU(inplace=True))

        self.off_conva = nn.Conv2d(in_channel, 18, 3, padding=1, bias=False)
        self.kernel_conva = DeformConv(in_channel,
                                       in_channel,
                                       kernel_size=3,
                                       padding=1,
                                       bias=False)

        self.scale3 = nn.Sequential(
            nn.Conv2d(in_channel, channels, 1, padding=0, bias=False),
            group_norm(channels), nn.ReLU(inplace=True))
Ejemplo n.º 6
0
    def __init__(self, cfg, in_channels):
        super(FPNXconv1fcFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(output_size=(resolution, resolution),
                        scales=scales,
                        sampling_ratio=sampling_ratio,
                        cfg=cfg)
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM
        num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION

        use_ws = cfg.MODEL.USE_WS

        xconvs = []
        for ix in range(num_stacked_convs):
            if use_ws:
                xconvs.append(
                    Conv2dWS(in_channels,
                             conv_head_dim,
                             kernel_size=3,
                             stride=1,
                             padding=dilation,
                             dilation=dilation,
                             bias=False if use_gn else True))
            else:
                xconvs.append(
                    nn.Conv2d(in_channels,
                              conv_head_dim,
                              kernel_size=3,
                              stride=1,
                              padding=dilation,
                              dilation=dilation,
                              bias=False if use_gn else True))
            in_channels = conv_head_dim
            if use_gn:
                xconvs.append(group_norm(in_channels))
            xconvs.append(nn.ReLU(inplace=True))

        self.add_module("xconvs", nn.Sequential(*xconvs))
        for modules in [
                self.xconvs,
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    if not use_gn:
                        torch.nn.init.constant_(l.bias, 0)

        input_size = conv_head_dim * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        self.fc6 = make_fc(input_size, representation_size, use_gn=False)
        self.out_channels = representation_size
Ejemplo n.º 7
0
def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, padding=1):
    """3x3 convolution with padding"""
    return [
        (f'{module_name}_{postfix}/conv',
         nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)),
        (f'{module_name}_{postfix}/norm',
            group_norm(out_channels) if _GN else FrozenBatchNorm2d(out_channels)
        ),
        (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True))
    ]
Ejemplo n.º 8
0
    def __init__(self, channels):
        super(ConvMP, self).__init__()
        in_channel = channels // 4
        self.scale1 = nn.Sequential(
            nn.Conv2d(channels, in_channel, 1, padding=0, bias=False),
            group_norm(in_channel), nn.ReLU(inplace=True))

        self.cata = nn.Conv2d(in_channel,
                              in_channel,
                              3,
                              padding=1,
                              groups=in_channel)

        self.scale2 = nn.Sequential(
            nn.Conv2d(in_channel * 2, in_channel, 1, padding=0, bias=False),
            group_norm(in_channel), nn.ReLU(inplace=True))

        self.scale3 = nn.Sequential(
            nn.Conv2d(in_channel, channels, 1, padding=0, bias=False),
            group_norm(channels), nn.ReLU(inplace=True))
Ejemplo n.º 9
0
def DFConv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, 
                with_modulated_dcn=None, deformable_groups=None):
    """3x3 convolution with padding"""
    return [
        (f'{module_name}_{postfix}/conv',
         DFConv2d(in_channels, out_channels, with_modulated_dcn=with_modulated_dcn,
            kernel_size=kernel_size, stride=stride, groups=groups, 
            deformable_groups=deformable_groups, bias=False)),
        (f'{module_name}_{postfix}/norm',
            group_norm(out_channels) if _GN else FrozenBatchNorm2d(out_channels)
        ),
        (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True))
    ]
Ejemplo n.º 10
0
def conv1x1(in_channels,
            out_channels,
            module_name,
            postfix,
            stride=1,
            groups=1,
            kernel_size=1,
            padding=0):
    """1x1 convolution with padding"""
    return [
        ('{}_{}/conv'.format(module_name, postfix),
         nn.Conv2d(in_channels,
                   out_channels,
                   kernel_size=kernel_size,
                   stride=stride,
                   padding=padding,
                   groups=groups,
                   bias=False)),
        ('{}_{}/norm'.format(module_name, postfix),
         group_norm(out_channels) if _GN else FrozenBatchNorm2d(out_channels)),
        ('{}_{}/relu'.format(module_name, postfix), nn.ReLU(inplace=True))
    ]
Ejemplo n.º 11
0
    def __init__(self, cfg):
        super(FPNXconv1fcFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        use_gw = cfg.MODEL.ROI_BOX_HEAD.USE_GW

        in_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS
        conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM
        num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE
        else:
            norm_type = 'none'
            if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm':
                self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        xconvs = []
        for ix in range(num_stacked_convs):
            if cfg.MODEL.ROI_BOX_HEAD.USE_DECONV:
                xconvs.append(
                    Deconv(in_channels,
                           conv_head_dim,
                           kernel_size=3,
                           stride=1,
                           padding=dilation,
                           dilation=dilation,
                           bias=True,
                           block=cfg.MODEL.DECONV.BLOCK,
                           sampling_stride=cfg.MODEL.DECONV.STRIDE,
                           sync=cfg.MODEL.DECONV.SYNC,
                           norm_type=norm_type))
                in_channels = conv_head_dim
            else:
                xconvs.append(
                    nn.Conv2d(in_channels,
                              conv_head_dim,
                              kernel_size=3,
                              stride=1,
                              padding=dilation,
                              dilation=dilation,
                              bias=False if (use_gn or use_gw) else True))
                in_channels = conv_head_dim
                if use_gn or use_gw:
                    xconvs.append(group_norm(in_channels))

            xconvs.append(nn.ReLU(inplace=True))

        self.add_module("xconvs", nn.Sequential(*xconvs))
        for modules in [
                self.xconvs,
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d) or isinstance(l, Deconv):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    if not (use_gn or use_gw):
                        torch.nn.init.constant_(l.bias, 0)

        input_size = conv_head_dim * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM

        block = 0
        use_delinear = cfg.MODEL.ROI_BOX_HEAD.USE_DECONV
        if use_delinear:
            block = cfg.MODEL.DECONV.BLOCK_FC  #check here

        self.fc6 = make_fc(input_size,
                           representation_size,
                           use_gn=False,
                           use_gw=False,
                           use_delinear=use_delinear,
                           block=block,
                           sync=cfg.MODEL.DECONV.SYNC,
                           norm_type=norm_type)
Ejemplo n.º 12
0
    def __init__(self, planes, ratio=4):
        super(GloReLocalModule, self).__init__()

        self.phi = nn.Conv2d(planes,
                             planes // ratio * 2,
                             kernel_size=1,
                             bias=False)
        self.bn_phi = group_norm(planes // ratio * 2)
        self.theta = nn.Conv2d(planes,
                               planes // ratio,
                               kernel_size=1,
                               bias=False)
        self.bn_theta = group_norm(planes // ratio)

        #  Interaction Space
        #  Adjacency Matrix: (-)A_g
        self.conv_adj = nn.Conv1d(planes // ratio,
                                  planes // ratio,
                                  kernel_size=1,
                                  bias=False)
        self.bn_adj = group_norm(planes // ratio)

        #  State Update Function: W_g
        self.conv_wg = nn.Conv1d(planes // ratio * 2,
                                 planes // ratio * 2,
                                 kernel_size=1,
                                 bias=False)
        self.bn_wg = group_norm(planes // ratio * 2)

        #  last fc
        self.conv3 = nn.Conv2d(planes // ratio * 2,
                               planes,
                               kernel_size=1,
                               bias=False)
        self.bn3 = group_norm(planes)

        self.local = nn.Sequential(
            nn.Conv2d(planes,
                      planes,
                      3,
                      groups=planes,
                      stride=2,
                      padding=1,
                      bias=False), group_norm(planes),
            nn.Conv2d(planes,
                      planes,
                      3,
                      groups=planes,
                      stride=2,
                      padding=1,
                      bias=False), group_norm(planes))

        self.gcn_local_attention = GCNwithNonlocal(planes)

        self.sigmoid_spatial = nn.Sigmoid()

        self.final = nn.Sequential(
            nn.Conv2d(planes * 2, planes, kernel_size=1, bias=False),
            group_norm(planes))

        self.relu = nn.ReLU(inplace=True)
Ejemplo n.º 13
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNPANETFeatureExtractor, self).__init__()
        self.cfg = cfg
        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION  # 14
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES  # (0.25, 0.125, 0.0625, 0.03125)
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO  # 2
        pooler = AdaptivePooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS  # (256, 256, 256, 256)
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        # for layer_idx, layer_features in enumerate(layers, 1):
        #     layer_name = "mask_fcn{}".format(layer_idx)
        #     module = make_conv3x3(
        #         next_feature, layer_features,
        #         dilation=dilation, stride=1, use_gn=use_gn
        #     )  # 这里用到膨胀卷积了
        #     self.add_module(layer_name, module)
        #     next_feature = layer_features
        #     self.blocks.append(layer_name)
        self.add_module("mask_fcn1_1",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        self.add_module("mask_fcn1_2",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        self.add_module("mask_fcn1_3",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        self.add_module("mask_fcn1_4",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        next_feature = layers[0]
        for layer_idx, layer_features in enumerate(layers[1:], 2):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(
                next_feature, layer_features,
                dilation=dilation, stride=1, use_gn=use_gn
            )  # 这里用到膨胀卷积了
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        # TODO:区分前后景所需的模块,需要初始化权重!!!
        conv4 = nn.Conv2d(layers[2], layers[2], 3, 1, padding=1 * dilation, dilation=dilation, bias=False)
        nn.init.kaiming_normal_(
            conv4.weight, mode="fan_out", nonlinearity="relu"
        )
        self.mask_conv4_fc = nn.Sequential(
            conv4,
            group_norm(layers[2]),
            nn.ReLU(inplace=True))
        # --------------------------------------------------------------------------------------------------------#
        conv5 = nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False)
        nn.init.kaiming_normal_(
            conv5.weight, mode="fan_out", nonlinearity="relu"
        )
        self.mask_conv5_fc = nn.Sequential(
            conv5,
            group_norm(int(layers[2] / 2)),
            nn.ReLU(inplace=True))
        # self.mask_conv5_fc = nn.Sequential(
        #     nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False),
        #     group_norm(int(layers[2] / 2)),
        #     nn.ReLU(inplace=True))
        # nn.init.kaiming_normal_(
        #     self.mask_conv5_fc.weight, mode="fan_out", nonlinearity="relu"
        # )
        #---------------------------------------------------------------------------------------------------------#
        fc = nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2,
                       (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True)
        nn.init.kaiming_normal_(
            fc.weight, mode="fan_out", nonlinearity="relu"
        )
        self.mask_fc = nn.Sequential(
            fc,
            nn.ReLU(inplace=True))
        # self.mask_fc = nn.Sequential(
        #     nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2,
        #               (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True),
        #     nn.ReLU(inplace=True))
        # nn.init.kaiming_normal_(
        #     self.mask_fc.weight, mode="fan_out", nonlinearity="relu"
        # )

        self.out_channels = layer_features
    def __init__(self, cfg, in_channels):
        super(FPNXconv1fc_panet_FeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(output_size=(resolution, resolution),
                        scales=scales,
                        sampling_ratio=sampling_ratio,
                        panet=True)
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM
        num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION

        xconvs = []
        for ix in range(num_stacked_convs):
            xconvs.append(
                nn.Conv2d(in_channels,
                          conv_head_dim,
                          kernel_size=3,
                          stride=1,
                          padding=dilation,
                          dilation=dilation,
                          bias=False if use_gn else True))
            in_channels = conv_head_dim
            if use_gn:
                xconvs.append(group_norm(in_channels))
            xconvs.append(nn.ReLU(inplace=True))

        self.add_module("xconvs", nn.Sequential(*xconvs))
        #tmp use hard-coded #levels
        num_levels = 4
        self.conv1_head = nn.ModuleList()
        for i in range(num_levels):
            self.conv1_head.append(
                nn.Sequential(
                    *((nn.Conv2d(in_channels,
                                 conv_head_dim,
                                 kernel_size=3,
                                 stride=1,
                                 padding=dilation,
                                 dilation=dilation,
                                 bias=False if use_gn else True), ) +
                      ((group_norm(in_channels), ) if use_gn else ()) +
                      (nn.ReLU(inplace=True), ))))

        #HACK: use MSRA INIT
        for modules in [
                self.xconvs,
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.kaiming_normal_(l.weight,
                                                  mode="fan_out",
                                                  nonlinearity="relu")
                    # torch.nn.init.normal_(l.weight, std=0.01)
                    if not use_gn:
                        torch.nn.init.constant_(l.bias, 0)
        for modules in [
                self.conv1_head,
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.kaiming_normal_(l.weight,
                                                  mode="fan_out",
                                                  nonlinearity="relu")
                    if not use_gn:
                        torch.nn.init.constant_(l.bias, 0)

        input_size = conv_head_dim * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        self.fc6 = make_fc(input_size, representation_size, use_gn=False)
        self.out_channels = representation_size