Beispiel #1
0
    def __init__(self, cfg):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]

        if cfg.MODEL.ROI_HEADS.USE_FPN:
            num_inputs = dim_reduced
        else:
            stage_index = 4
            stage2_relative_factor = 2**(stage_index - 1)
            res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
            num_inputs = res2_out_channels * stage2_relative_factor

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        if cfg.MODEL.ROI_MASK_HEAD.OVERLAP:
            self.conv5_overlap = ConvTranspose2d(num_inputs, dim_reduced, 2, 2,
                                                 0)
            self.overlap_fcn_logits = Conv2d(dim_reduced, 1, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
 def __init__(self, cfg):
     super(RoiAlignMaskFeatureExtractor, self).__init__()
     input_channels = 257
     self.mask_fcn1 = Conv2d(input_channels, 256, 3, 1, 1)
     self.mask_fcn2 = Conv2d(256, 256, 3, 1, 1)
     self.mask_fcn3 = Conv2d(256, 256, 3, 1, 1)
     if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL == 1:
         self.conv5_mask = ConvTranspose2d(256, 256, 2, 2, 0)
         self.mask_fcn_logits = Conv2d(256, 1, 3, 1, 1)
         for l in [
                 self.mask_fcn1, self.mask_fcn2, self.mask_fcn3,
                 self.conv5_mask, self.mask_fcn_logits
         ]:
             nn.init.kaiming_normal_(l.weight,
                                     mode="fan_out",
                                     nonlinearity="relu")
             nn.init.constant_(l.bias, 0)
     else:
         self.mask_fcn_logits = None
         self.conv5_mask = Conv2d(256, 16, 3, 1, 1)
         for l in [
                 self.mask_fcn1,
                 self.mask_fcn2,
                 self.mask_fcn3,
                 self.conv5_mask,
         ]:
             nn.init.kaiming_normal_(l.weight,
                                     mode="fan_out",
                                     nonlinearity="relu")
             nn.init.constant_(l.bias, 0)
Beispiel #3
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
        num_inputs = in_channels
        self.dual_modal = cfg.MODEL.ROI_MASK_HEAD.DUAL_MODAL
        self.use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN

        if self.dual_modal:
            self.conv5_mask = Conv2d(num_inputs, dim_reduced * 4, 3, padding=1)
            self.pixel_shuffle = nn.PixelShuffle(2)
        else:
            self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)

        self.mask_fcn_final = Conv2d(dim_reduced, num_classes, 1, 1, 0)
        self.cfg = cfg

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
    def __init__(self, cfg):
        super(SeqRCNNC4Predictor, self).__init__()
        num_classes = 1
        # char_num_classes = cfg.MODEL.ROI_MASK_HEAD.CHAR_NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]

        if cfg.MODEL.ROI_HEADS.USE_FPN:
            if cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'CAT':
                num_inputs = dim_reduced + 1
            elif cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'MIX' or cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'ATTENTION_CHANNEL':
                num_inputs = dim_reduced * 2
            else:
                num_inputs = dim_reduced
        else:
            stage_index = 4
            stage2_relative_factor = 2**(stage_index - 1)
            res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
            num_inputs = res2_out_channels * stage2_relative_factor

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        if cfg.SEQUENCE.SEQ_ON:
            # self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
            self.seq = make_roi_seq_predictor(cfg, dim_reduced)
        # else:
        #     self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Beispiel #5
0
    def __init__(self, num_inputs=256, dim_reduced=256, num_conv=0, no_transform1=False, first_kernel=3,
                 no_relu=False, use_leaky_relu=False):
        super(DeConvUpSampler, self).__init__()
        self.first_kernel = first_kernel
        self.no_relu = no_relu
        self.use_leaky_relu = use_leaky_relu
        if no_transform1:
            self.transform1 = EmptyBlock()
        else:
            self.transform1 = self.build_transform(num_inputs, dim_reduced, dim_reduced, num_conv)
        self.deconv1 = ConvTranspose2d(dim_reduced, dim_reduced, 2, 2, 0)
        self.transform2 = self.build_transform(dim_reduced, dim_reduced, dim_reduced, num_conv)
        self.deconv2 = ConvTranspose2d(dim_reduced, num_inputs, 2, 2, 0)

        for modules in [self.transform1.modules(), self.transform2.modules(), [self.deconv1, self.deconv2]]:
            for l in modules:
                if isinstance(l, (nn.Conv2d, nn.ConvTranspose2d)):
                    nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu")
                    nn.init.constant_(l.bias, 0)
    def __init__(self, cfg):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]

        if cfg.MODEL.ROI_HEADS.USE_FPN:
            num_inputs = dim_reduced
        else:
            stage_index = 4
            stage2_relative_factor = 2**(stage_index - 1)
            res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
            num_inputs = res2_out_channels * stage2_relative_factor
        if cfg.MODEL.ROI_MASK_HEAD.USE_DECONV:
            block = cfg.MODEL.DECONV.BLOCK

            if cfg.MODEL.DECONV.LAYERWISE_NORM:
                norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE
            else:
                norm_type = 'none'
                if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm':
                    self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

            self.conv5_mask = DeconvTransposed(
                num_inputs,
                dim_reduced,
                2,
                2,
                0,
                block=block,
                sampling_stride=cfg.MODEL.DECONV.STRIDE,
                sync=cfg.MODEL.DECONV.SYNC,
                norm_type=norm_type)
            self.mask_fcn_logits = Deconv(
                dim_reduced,
                num_classes,
                1,
                1,
                0,
                block=block,
                sampling_stride=cfg.MODEL.DECONV.STRIDE,
                sync=cfg.MODEL.DECONV.SYNC,
                norm_type=norm_type)
        else:
            self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
            self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Beispiel #7
0
    def __init__(self, cfg, predictor):
        super(MaskRelationRefineNet, self).__init__()
        self.cfg = cfg.clone()
        hide_dim = (
            784, ) if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL == 1 else (int(
                14 * 14 * 16), )
        self.output_channel = 784 if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL == 1 else int(
            14 * 14 * 16)
        self.relation_hw = 14 if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL != 1 else 28
        self.appearance_feature_extractor = make_relation_mask_feature_extractor(
            cfg)
        self.prepare_sort_by_cluster = False
        self.num_center_per_class = 1
        if self.cfg.MODEL.RELATION_MASK.IOU_COOR == True and self.geo_feature_dim == 4:
            self.geo_feature_dim = 5
        if cfg.MODEL.RELATION_MASK.IOU_COOR and self.geo_feature_dim > 5:
            self.geo_feature_dim = int(self.geo_feature_dim / 4 * 5)
        self.boxcoder = BoxCoder(weights=(10., 10., 5., 5.))
        # self.class_agnostic = cfg.MODEL.RELATION_NMS.CLASS_AGNOSTIC
        self.fg_class = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES - 1
        # in_channel = int(16 * 14 * 14) if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL!=1 else ()
        self.classifier = nn.Conv2d(cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL,
                                    3, 1)
        # self.classifier = nn.Linear(128, int(cfg.MODEL.ROI_MASK_HEAD.RESOLUTION * cfg.MODEL.ROI_MASK_HEAD.RESOLUTION), bias=True)

        if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL != 1:
            self.deconv_1 = ConvTranspose2d(16, 16, 2, 2, 0)

        # self.detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG
        self.iter = 0
        if self.cfg.MODEL.RELATION_MASK.TYPE == 'CAM':
            self.relation_module = CAM_Module(128)
        elif self.cfg.MODEL.RELATION_MASK.TYPE == 'CIAM':
            self.relation_module = CIAM_Module(cfg)
        if self.cfg.MODEL.RELATION_MASK.SAME_PREDICTOR:
            self.predictor = predictor
        else:
            self.deconv_1 = ConvTranspose2d(
                cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL,
                cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL, 2, 2, 0)
            self.classifier = nn.Conv2d(
                cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL, 3, 1, 1, 0)
    def __init__(self, cfg):
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES,
            sampling_ratio=cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO,
        )
        self.pooler = pooler
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        next_feature = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.blocks = []
        self.use_attn = False if cfg.MODEL.ROI_MASK_HEAD.ATTN == "" else True

        # Determine whether upsampling is necessary from the resolution
        # if cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / (2.0 * resolution) == 2.0:
        #     use_upsample = True
        # else:
        #     use_upsample = False

        use_upsample = \
            True if (cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / resolution) == 4.0 \
                else False
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            # if layer_idx % 2 == 1 and use_upsample:
            #     module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0)
            # else:
            #     module = Conv2d(next_feature, layer_features, 3, 1, 1)
            if layer_idx == 3 and use_upsample:
                module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0)
            else:
                module = Conv2d(next_feature, layer_features, 3, 1, 1)

            # Caffe2 implementation uses MSRAFill, which in fact
            # corresponds to kaiming_normal_ in PyTorch
            nn.init.kaiming_normal_(module.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")
            nn.init.constant_(module.bias, 0)

            if self.use_attn and layer_idx in [2]:
                attn_name = "mask_attn{}".format(layer_idx)
                size = (layer_features, resolution, resolution)
                self.add_module(attn_name, RoIAttnModule(cfg, size))

            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
Beispiel #9
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
        num_inputs = in_channels

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
    def __init__(self, cfg):
        input_channels = 256
        self.mask_fcn1 = Conv2d(input_channels, 256, 3, 1, 1)
        self.mask_fcn2 = Conv2d(256, 256, 3, 1, 1)
        self.mask_fcn3 = Conv2d(256, 256, 3, 1, 1)
        self.conv5_mask = ConvTranspose2d(256, 256, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(256, 1, 1, 1, 0)

        for l in [
                self.mask_fcn1, self.mask_fcn2, self.mask_fcn3, self.mask_fcn4,
                self.conv5_mask, self.mask_fcn_logits
        ]:
            nn.init.kaiming_normal_(l.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")
            nn.init.constant_(l.bias, 0)
 def __init__(self, cfg, in_channels):
     super(MaskXRCNNC4Predictor, self).__init__()
     # num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
     dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
     num_inputs = in_channels
     self.USE_MLPMASK = cfg.MODEL.ROI_MASK_HEAD.USE_MLPMASK
     self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
     # self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
     if self.USE_MLPMASK:
         self.MLP_mask = nn.Linear(256 * 28 * 28, 28 * 28)
     for name, param in self.named_parameters():
         if "bias" in name:
             nn.init.constant_(param, 0)
         elif "weight" in name:
             # Caffe2 implementation uses MSRAFill, which in fact
             # corresponds to kaiming_normal_ in PyTorch
             nn.init.kaiming_normal_(param,
                                     mode="fan_out",
                                     nonlinearity="relu")
    def __init__(self, cfg):
        super(BOUNDARYRCNNC4Predictor, self).__init__()
        dim_reduced = cfg.MODEL.ROI_BOUNDARY_HEAD.CONV_LAYERS[-1]
        self.resol = cfg.MODEL.ROI_BOUNDARY_HEAD.RESOLUTION  # 56

        if cfg.MODEL.ROI_HEADS.USE_FPN:
            num_inputs = dim_reduced
        else:
            stage_index = 4
            stage2_relative_factor = 2 ** (stage_index - 1)
            res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS  #256
            num_inputs = res2_out_channels * stage2_relative_factor

        self.bo_input_xy = Conv2d(num_inputs, num_inputs, 1, 1, 0)
        nn.init.kaiming_normal_(self.bo_input_xy.weight,
                mode='fan_out', nonlinearity='relu')
        nn.init.constant_(self.bo_input_xy.bias, 0)

        self.conv5_bo_xy = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        nn.init.kaiming_normal_(self.conv5_bo_xy.weight,
                mode='fan_out', nonlinearity='relu')
        nn.init.constant_(self.conv5_bo_xy.bias, 0)

        self.bo_input_1_1 = Conv2d(dim_reduced, dim_reduced, 1, 1, 0)
        nn.init.kaiming_normal_(self.bo_input_1_1.weight,
                                mode='fan_out', nonlinearity='relu')
        nn.init.constant_(self.bo_input_1_1.bias, 0)

        self.bo_input_2_1 = Conv2d(dim_reduced, dim_reduced, 1, 1, 0)
        nn.init.kaiming_normal_(self.bo_input_2_1.weight,
                                mode='fan_out', nonlinearity='relu')
        nn.init.constant_(self.bo_input_2_1.bias, 0)

        self.conv5_bo_x = Conv2d(dim_reduced, 1, (3, 1), 1, (1,0)) # H W
        nn.init.kaiming_normal_(self.conv5_bo_x.weight,
                mode='fan_out', nonlinearity='relu') # 'relu'
        nn.init.constant_(self.conv5_bo_x.bias, 0)

        self.conv5_bo_y = Conv2d(dim_reduced, 1, (1, 3), 1, (0,1)) # H W
        nn.init.kaiming_normal_(self.conv5_bo_y.weight,
                mode='fan_out', nonlinearity='relu')
        nn.init.constant_(self.conv5_bo_y.bias, 0)
        self.up_scale=2
Beispiel #13
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
        num_inputs = in_channels
        # if cfg.MODEL.COARSE_ON and not cfg.MODEL.BBOXFeature_ON:
        #     num_inputs =2048#in_channels
        # if cfg.MODEL.COARSE_ON and cfg.MODEL.BBOXFeature_ON:
        #     num_inputs = 3072  # in_channels

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
        num_inputs = in_channels

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")

        self.n_class = num_classes
        self.maxpool_col = nn.AdaptiveMaxPool2d((28, 1))
        self.maxpool_row = nn.AdaptiveMaxPool2d((1, 28))
Beispiel #15
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES  # 81
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]  # 256
        num_inputs = in_channels  # 256

        # 转置卷积, 上采样两倍, 14-->28
        self.conv5_mask = ConvTranspose2d(in_channels=num_inputs,
                                          out_channels=dim_reduced,
                                          kernel_size=2,
                                          stride=2,
                                          padding=0)
        # 1x1卷积
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
Beispiel #16
0
    def __init__(self, cfg, in_channels):
        super(KeypointRCNNPredictor, self).__init__()
        self.in_channels = in_channels
        self.num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
        self.num_convs = 4
        self.point_feat_channels = 32
        self.conv_out_channels = self.point_feat_channels * self.num_keypoints
        conv_kernel_size = 3
        conv_kernel_size1 = 5
        deconv_kernel_size = 4
        # deconv_kernel = 4
        # self.kps_score_lowres = layers.ConvTranspose2d(
        #     input_features,
        #     num_keypoints,
        #     deconv_kernel,
        #     stride=2,
        #     padding=deconv_kernel // 2 - 1,
        # )
        # nn.init.kaiming_normal_(
        #     self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
        # )
        # nn.init.constant_(self.kps_score_lowres.bias, 0)
        # self.up_scale = 2
        # self.out_channels = num_keypoints

        self.convs = []
        for i in range(self.num_convs):
            _in_channels = (self.in_channels
                            if i == 0 else self.conv_out_channels)
            strides = 1
            padding = (conv_kernel_size - 1) // 2
            self.convs.append(
                nn.Sequential(
                    Conv2d(_in_channels, self.conv_out_channels,
                           conv_kernel_size, strides, padding),
                    GroupNorm(32, self.conv_out_channels),
                    nn.ReLU(inplace=True)))
        self.convs = nn.Sequential(*self.convs)

        # self.convs1 = []
        # for i in range(self.num_convs):
        #     _in_channels = (
        #         self.in_channels if i == 0 else self.conv_out_channels)
        #     strides = 1
        #     padding = (conv_kernel_size1 - 1) // 2
        #     self.convs1.append(
        #         nn.Sequential(
        #             Conv2d(
        #                 _in_channels,
        #                 self.conv_out_channels,
        #                 conv_kernel_size1,
        #                 strides,
        #                 padding),
        #             GroupNorm(32, self.conv_out_channels),
        #             nn.ReLU(inplace=True)))
        # self.convs1 = nn.Sequential(*self.convs1)

        # self.convs2 = []
        # for i in range(self.num_convs):
        #     _in_channels = (
        #         self.in_channels if i == 0 else self.conv_out_channels)
        #     strides = 1
        #     padding = (conv_kernel_size1 - 1) // 2
        #     self.convs2.append(
        #         nn.Sequential(
        #             Conv2d(
        #                 _in_channels,
        #                 self.conv_out_channels,
        #                 conv_kernel_size1,
        #                 strides,
        #                 padding),
        #             GroupNorm(32, self.conv_out_channels),
        #             nn.ReLU(inplace=True)))
        # self.convs2 = nn.Sequential(*self.convs2)

        # self.updeconv1_1 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.conv_out_channels // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)
        # self.norm1 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2)
        # self.updeconv1_2 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.conv_out_channels // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)
        # self.norm2 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2)
        # self.updeconv2_1 = ConvTranspose2d(
        #     self.conv_out_channels // 2,
        #     self.num_keypoints // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)
        # self.updeconv2_2 = ConvTranspose2d(
        #     self.conv_out_channels // 2,
        #     self.num_keypoints // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)

        self.updeconv1_ = ConvTranspose2d(self.conv_out_channels,
                                          self.conv_out_channels,
                                          kernel_size=deconv_kernel_size,
                                          stride=2,
                                          padding=(deconv_kernel_size - 2) //
                                          2,
                                          groups=self.num_keypoints)
        self.norm1 = GroupNorm(self.num_keypoints, self.conv_out_channels)

        self.updeconv2_ = ConvTranspose2d(self.conv_out_channels,
                                          self.num_keypoints,
                                          kernel_size=deconv_kernel_size,
                                          stride=2,
                                          padding=(deconv_kernel_size - 2) //
                                          2,
                                          groups=self.num_keypoints)

        #        self.conv_guide = Conv2d(
        #            self.conv_out_channels,
        #            self.conv_out_channels,
        #            3,
        #            1,
        #            1)

        #        self.dcn = DFConv2d_guide(self.conv_out_channels,
        #            self.num_keypoints,
        #            groups=self.num_keypoints)

        # self.norm2 = GroupNorm(self.num_keypoints, self.conv_out_channels)
        # self.final_conv = Conv2d(
        #                 self.conv_out_channels,
        #                 self.num_keypoints,
        #                 1,
        #                 1,
        #                 0,
        #                 groups=self.num_keypoints)
        # self.conv_offset = Conv2d(
        #                 self.conv_out_channels,
        #                 self.num_keypoints * 2,
        #                 1,
        #                 1,
        #                 0,
        #                 groups=self.num_keypoints)

        # self.convs_1 = []
        # for i in range(self.num_convs):
        #     _in_channels = (
        #         self.in_channels if i == 0 else self.conv_out_channels)
        #     strides = 1
        #     padding = (conv_kernel_size - 1) // 2
        #     self.convs_1.append(
        #         nn.Sequential(
        #             Conv2d(
        #                 _in_channels,
        #                 self.conv_out_channels,
        #                 conv_kernel_size,
        #                 strides,
        #                 padding),
        #             GroupNorm(36, self.conv_out_channels),
        #             nn.ReLU(inplace=True)))
        # self.convs_1 = nn.Sequential(*self.convs_1)

        # self.updeconv1_1 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.conv_out_channels,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints)
        # self.norm1_1 = GroupNorm(self.num_keypoints, self.conv_out_channels)
        # self.updeconv2_1 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.num_keypoints,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints)

        # #TODO 20201015
        # self.neighbor_points = []
        # grid_size = 3
        # for i in range(grid_size):  # i-th column
        #     for j in range(grid_size):  # j-th row
        #         neighbors = []
        #         if i > 0:  # left: (i - 1, j)
        #             neighbors.append((i - 1) * grid_size + j)
        #         if j > 0:  # up: (i, j - 1)
        #             neighbors.append(i * grid_size + j - 1)
        #         if j < grid_size - 1:  # down: (i, j + 1)
        #             neighbors.append(i * grid_size + j + 1)
        #         if i < grid_size - 1:  # right: (i + 1, j)
        #             neighbors.append((i + 1) * grid_size + j)
        #         self.neighbor_points.append(tuple(neighbors))

        # self.forder_trans = nn.ModuleList()  # first-order feature transition
        # self.sorder_trans = nn.ModuleList()  # second-order feature transition
        # for neighbors in self.neighbor_points:
        #     fo_trans = nn.ModuleList()
        #     so_trans = nn.ModuleList()
        #     for _ in range(len(neighbors)):
        #         # each transition module consists of a 5x5 depth-wise conv and
        #         # 1x1 conv.
        #         fo_trans.append(
        #             nn.Sequential(
        #                 Conv2d(
        #                     self.point_feat_channels,
        #                     self.point_feat_channels,
        #                     5,
        #                     stride=1,
        #                     padding=2,
        #                     groups=self.point_feat_channels),
        #                 Conv2d(self.point_feat_channels,
        #                           self.point_feat_channels, 1)))
        #         so_trans.append(
        #             nn.Sequential(
        #                 Conv2d(
        #                     self.point_feat_channels,
        #                     self.point_feat_channels,
        #                     5,
        #                     1,
        #                     2,
        #                     groups=self.point_feat_channels),
        #                 Conv2d(self.point_feat_channels,
        #                           self.point_feat_channels, 1)))
        #     self.forder_trans.append(fo_trans)
        #     self.sorder_trans.append(so_trans)

        # representation_size = 14 * 14 * 288
        # self.keypoints_weight = nn.Linear(representation_size, self.num_keypoints)
        # nn.init.normal_(self.cls_score.weight, std=0.01)

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                nn.init.normal_(m.weight.data, std=0.001)
                if m.bias is not None:
                    m.bias.data.zero_()
        # nn.init.constant_(self.final_conv.bias,-np.log(0.99/0.01))
        # nn.init.constant_(self.dcn.bias,-np.log(0.99/0.01))
        nn.init.constant_(self.updeconv2_.bias, -np.log(0.99 / 0.01))
Beispiel #17
0
    def __init__(self, cfg):
        super(KERCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_KE_HEAD.CONV_LAYERS[-1]

        self.resol = cfg.MODEL.ROI_KE_HEAD.RESOLUTION

        if cfg.MODEL.ROI_HEADS.USE_FPN:
            num_inputs = dim_reduced
        else:
            stage_index = 4
            stage2_relative_factor = 2**(stage_index - 1)
            res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
            num_inputs = res2_out_channels * stage2_relative_factor

        assert (cfg.MODEL.ROI_KE_HEAD.NUM_KES %
                2 == 0), 'require plural but got {}'.format(
                    str(cfg.MODEL.ROI_KE_HEAD.NUM_KES))
        NumPred = int(cfg.MODEL.ROI_KE_HEAD.NUM_KES / 2 + 2)

        self.ke_input_xy = Conv2d(num_inputs, num_inputs, 1, 1, 0)
        nn.init.kaiming_normal_(self.ke_input_xy.weight,
                                mode='fan_out',
                                nonlinearity='relu')
        nn.init.constant_(self.ke_input_xy.bias, 0)

        self.conv5_ke_xy = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        nn.init.kaiming_normal_(self.conv5_ke_xy.weight,
                                mode='fan_out',
                                nonlinearity='relu')
        nn.init.constant_(self.conv5_ke_xy.bias, 0)

        self.conv5_ke_x_shrink = Conv2d(dim_reduced, NumPred, (self.resol, 1),
                                        1, 0)  # H W
        nn.init.kaiming_normal_(self.conv5_ke_x_shrink.weight,
                                mode='fan_out',
                                nonlinearity='relu')
        nn.init.constant_(self.conv5_ke_x_shrink.bias, 0)

        self.conv5_ke_y_shrink = Conv2d(dim_reduced, NumPred, (1, self.resol),
                                        1, 0)  # H W
        nn.init.kaiming_normal_(self.conv5_ke_y_shrink.weight,
                                mode='fan_out',
                                nonlinearity='relu')
        nn.init.constant_(self.conv5_ke_y_shrink.bias, 0)

        # mt branch
        self.cat_trans = Conv2d(dim_reduced, cfg.MODEL.ROI_KE_HEAD.NUM_KES, 1,
                                1, 0)
        nn.init.kaiming_normal_(self.cat_trans.weight,
                                mode='fan_out',
                                nonlinearity='relu')
        nn.init.constant_(self.cat_trans.bias, 0)

        self.mty = Conv2d(cfg.MODEL.ROI_KE_HEAD.NUM_KES,
                          cfg.MODEL.ROI_KE_HEAD.NUM_MATCHTYPE,
                          (int(self.resol / 2), int(self.resol / 2)), 1, 0)
        nn.init.kaiming_normal_(self.mty.weight,
                                mode='fan_out',
                                nonlinearity='relu')
        nn.init.constant_(self.mty.bias, 0)

        self.up_scale = 2