Example #1
0
def panoptic_upsampler_block(in_channels, out_channels, expansion):

    modules = []

    if expansion == 0:
        modules.append(
            make_conv3x3(in_channels,
                         out_channels,
                         dilation=1,
                         stride=1,
                         use_gn=True,
                         use_relu=True,
                         kaiming_init=True))  # no upsample

    for i in range(expansion):
        modules.append(
            make_conv3x3(in_channels if i == 0 else out_channels,
                         out_channels,
                         dilation=1,
                         stride=1,
                         use_gn=True,
                         use_relu=True,
                         kaiming_init=True))
        modules.append(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False))

    return nn.Sequential(*modules)
Example #2
0
    def __init__(self, cfg, in_channels, out_channels, mode="bilinear"):
        super(TwoConvUpsampleStage, self).__init__()

        self.mode = mode
        self.conv1 = make_conv3x3(in_channels,
                                  out_channels,
                                  use_gn=cfg.MODEL.SEMANTIC.USE_GN,
                                  use_relu=True)
        self.conv2 = make_conv3x3(in_channels,
                                  out_channels,
                                  use_gn=cfg.MODEL.SEMANTIC.USE_GN,
                                  use_relu=True)
    def __init__(self, cfg):
        super(MaskIoUFeatureExtractor, self).__init__()

        input_channels = 260  # default 257
        use_gn = cfg.MODEL.MASKIOU_USE_GN

        self.maskiou_fcn1 = make_conv3x3(input_channels, 256, use_gn=use_gn)
        self.maskiou_fcn2 = make_conv3x3(256, 256, use_gn=use_gn)
        self.maskiou_fcn3 = make_conv3x3(256, 256, use_gn=use_gn)
        self.maskiou_fcn4 = make_conv3x3(256, 256, stride=2, use_gn=use_gn)
        self.maskiou_fc1 = make_fc(256 * 7 * 7, 1024, use_gn=use_gn)
        self.maskiou_fc2 = make_fc(1024, 1024, use_gn=use_gn)
Example #4
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]

        self.conv4_fc = make_conv3x3(self.out_channels,
                                     dim_reduced,
                                     use_gn=use_gn)
        self.conv5_fc = make_conv3x3(dim_reduced,
                                     int(dim_reduced / 2),
                                     use_gn=use_gn)
        self.fc_final = make_fc(
            int(dim_reduced / 2) *
            (cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION)**2,
            cfg.MODEL.ROI_MASK_HEAD.RESOLUTION**2)
Example #5
0
    def __init__(
        self,
        in_channels,
        refine_level=2,
        refine_type='none',
        use_gn=False,
        freeze=False,
    ):
        super(BFP, self).__init__()
        assert refine_type in ['none', 'conv', 'non_local']

        self.in_channels = in_channels
        self.refine_level = refine_level
        self.refine_type = refine_type
        assert 0 <= self.refine_level

        if self.refine_type == 'conv':
            self.refine = make_conv3x3(self.in_channels,
                                       self.in_channels,
                                       use_gn=use_gn,
                                       use_relu=True,
                                       kaiming_init=True)
        elif self.refine_type == 'non_local':
            self.refine = NonLocal2D(
                self.in_channels,
                reduction=1,
                use_scale=False,
                use_gn=use_gn,
            )
        else:
            self.refine = None

        self.freeze = freeze
        if self.freeze:
            dfs_freeze(self, requires_grad=False)
Example #6
0
    def __init__(self, cfg, in_channels):
        super(MaskIoUFeatureExtractor, self).__init__()

        layers = cfg.MODEL.ROI_MASKIOU_HEAD.CONV_LAYERS
        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION // 2
        input_features = in_channels + 1
        fc_input_size = layers[0] * resolution * resolution

        self.blocks = []
        stride = 1
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "maskiou_fcn{}".format(layer_idx)
            if layer_idx == len(layers):
                stride = 2
            module = make_conv3x3(input_features,
                                  layer_features,
                                  stride=stride)
            self.add_module(layer_name, module)
            input_features = layer_features
            self.blocks.append(layer_name)

        self.maskiou_fc1 = nn.Linear(fc_input_size, 1024)
        self.maskiou_fc2 = nn.Linear(1024, 1024)

        for l in [self.maskiou_fc1, self.maskiou_fc2]:
            nn.init.kaiming_uniform_(l.weight, a=1)
            nn.init.constant_(l.bias, 0)
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        pooler = Pooler(cfg.MODEL.ROI_MASK_HEAD)

        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features
Example #8
0
    def __init__(self,
                 in_channels,
                 reduction=2,
                 use_scale=True,
                 use_gn=True,
                 mode='embedded_gaussian'):
        super(NonLocal2D, self).__init__()

        self.in_channels = in_channels
        self.reduction = reduction
        self.use_scale = use_scale
        self.inter_channels = in_channels // reduction
        self.mode = mode
        assert mode in ['embedded_gaussian', 'dot_product']

        self.g = make_conv1x1(self.in_channels,
                              self.inter_channels,
                              kaiming_init=False)

        self.theta = make_conv1x1(self.in_channels,
                                  self.inter_channels,
                                  kaiming_init=False)

        self.phi = make_conv1x1(self.in_channels,
                                self.inter_channels,
                                kaiming_init=False)

        self.conv_out = make_conv3x3(self.inter_channels,
                                     self.in_channels,
                                     use_gn=use_gn,
                                     kaiming_init=True)
    def __init__(self, cfg, in_channels):
        super(KeypointRCNNFeatureExtractor, self).__init__()

        use_gn = cfg.MODEL.ROI_KEYPOINT_HEAD.USE_GN

        use_contextual_pooler = False
        if use_contextual_pooler:
            pooler = make_contextual_pooler(cfg, 'ROI_KEYPOINT_HEAD')
        else:
            pooler = make_pooler(cfg, 'ROI_KEYPOINT_HEAD')
        self.pooler = pooler

        input_features = in_channels
        layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
        resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
        next_feature = input_features
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "kp_fcn{}".format(layer_idx)
            module = make_conv3x3(
                next_feature, layer_features,
                dilation=1, stride=1, use_gn=use_gn, use_relu=True, kaiming_init=True
            )
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features
        if cfg.MODEL.ROI_KEYPOINT_HEAD.ATTENTION_ON:
            self.regional_attention = RegionalAttention(cfg, in_channels, self.pooler, resolution)
        else:
            self.regional_attention = None
Example #10
0
    def __init__(self, cfg):
        super(EMMPredictor, self).__init__()

        if cfg.MODEL.BACKBONE.CONV_BODY.startswith("DLA"):
            in_channels = cfg.MODEL.DLA.BACKBONE_OUT_CHANNELS
        elif cfg.MODEL.BACKBONE.CONV_BODY.startswith("R-"):
            in_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
        else:
            in_channels = 128

        self.cls_tower = make_conv3x3(in_channels=in_channels, out_channels=in_channels,
                                      use_gn=True, use_relu=True, kaiming_init=False)
        self.reg_tower = make_conv3x3(in_channels=in_channels, out_channels=in_channels,
                                      use_gn=True, use_relu=True, kaiming_init=False)
        self.cls = make_conv3x3(in_channels=in_channels, out_channels=2, kaiming_init=False)
        self.center = make_conv3x3(in_channels=in_channels, out_channels=1, kaiming_init=False)
        self.reg = make_conv3x3(in_channels=in_channels, out_channels=4, kaiming_init=False)
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        use_gw = cfg.MODEL.ROI_MASK_HEAD.USE_GW

        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
        use_deconv = cfg.MODEL.ROI_MASK_HEAD.USE_DECONV
        block = cfg.MODEL.DECONV.BLOCK

        if use_deconv:
            use_gn = False
            use_gw = False

        next_feature = input_size
        self.blocks = []

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE
        else:
            norm_type = 'none'
            if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm':
                self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn,
                                  use_gw=use_gw,
                                  use_deconv=use_deconv,
                                  block=block,
                                  sampling_stride=cfg.MODEL.DECONV.STRIDE,
                                  sync=cfg.MODEL.DECONV.SYNC,
                                  norm_type=norm_type)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
Example #12
0
    def __init__(self, cfg, in_channels):
        super(MaskDecoder, self).__init__()
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        resolutions = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION  #[32, 16, 8, 4, 4]
        poolers = []
        for idx, resolution in enumerate(resolutions):
            poolers.append(
                Pooler(
                    output_size=(resolution, resolution),
                    scales=[scales[idx]],
                    sampling_ratio=sampling_ratio,
                ))
        self.poolers = poolers

        inner_blocks = []
        for idx in range(len(scales)):
            inner_block = make_conv1x1(in_channels,
                                       128,
                                       dilation=1,
                                       use_gn=1,
                                       use_relu=1)
            block_name = 'inner_maskdecoder_{}'.format(idx + 1)
            self.add_module(block_name, inner_block)
            inner_blocks.append(block_name)

        conv_blocks = []
        for idx in range(len(scales) - 1):
            if idx < len(scales) - 1:
                conv_block = make_conv3x3(128, 128, use_gn=1, use_relu=1)
            else:
                conv_block = nn.Sequential(
                    make_conv3x3(128, 128, use_gn=1, use_relu=1),
                    make_conv3x3(128, 128, use_gn=1, use_relu=1),
                )
            block_name = 'conv_maskdecoder_{}'.format(idx + 1)
            self.add_module(block_name, conv_block)
            conv_blocks.append(block_name)

        self.inner_blocks = inner_blocks
        self.conv_blocks = conv_blocks
        self.out_channels = 128
Example #13
0
 def __init__(self, cfg, in_channels):
     super(SegmentationBranch, self).__init__()
     self.in_channels = in_channels
     self.seg_fcn1 = make_conv1x1(in_channels,
                                  in_channels,
                                  use_relu=0,
                                  kaiming_init=False)
     self.seg_fcn2 = make_conv3x3(in_channels,
                                  in_channels,
                                  use_gn=1,
                                  use_relu=1)
     self.seg_fcn3 = make_conv3x3(in_channels,
                                  in_channels,
                                  use_gn=1,
                                  use_relu=1)
     self.seg_fcn4 = make_conv3x3(in_channels,
                                  in_channels,
                                  use_gn=1,
                                  use_relu=1)
     self.predict = make_conv1x1(in_channels, 1, kaiming_init=False)
    def __init__(self, cfg, in_channels):
        super(MaskIoUFeatureExtractor, self).__init__()

        input_channels = in_channels + 1  # cat features and mask single channel
        use_gn = cfg.MODEL.ROI_MASKIOU_HEAD.USE_GN
        representation_size = cfg.MODEL.ROI_MASKIOU_HEAD.MLP_HEAD_DIM

        resolution_key = "RESOLUTION"
        pooler_resolution_key = "POOLER_RESOLUTION"

        resolution = cfg.MODEL.ROI_MASK_HEAD[resolution_key]
        input_pooler_resolution = cfg.MODEL.ROI_MASK_HEAD[
            pooler_resolution_key]

        self.max_pool2d = lambda x: x
        if resolution == input_pooler_resolution * 2:
            self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2, stride=2)
            resolution = resolution // 2  # after max pooling 2x2
        elif resolution != input_pooler_resolution:
            raise NotImplementedError(
                "Only supports %s == %s or %s == 2x%s. Received %d vs %d instead"
                % (resolution_key, pooler_resolution_key, resolution_key,
                   pooler_resolution_key, resolution, input_pooler_resolution))

        layers = cfg.MODEL.ROI_MASKIOU_HEAD.CONV_LAYERS
        # stride=1 for each layer, and stride=2 for last layer
        strides = [1 for l in layers]
        strides[-1] = 2

        next_feature = input_channels
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers):
            layer_name = "maskiou_fcn{}".format(layer_idx + 1)
            stride = strides[layer_idx]
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  stride=stride,
                                  dilation=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            self.blocks.append(layer_name)

            next_feature = layer_features
            if stride == 2:
                resolution = resolution // 2

        self.maskiou_fc1 = make_fc(next_feature * resolution**2,
                                   representation_size,
                                   use_gn=False)
        self.maskiou_fc2 = make_fc(representation_size,
                                   representation_size,
                                   use_gn=False)
        self.out_channels = representation_size
Example #15
0
    def __init__(self, cfg, in_channels, extract_type="avg"):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
        self.extract_type = extract_type
        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            if self.extract_type == "corr" and layer_idx == 1:
                input_feature = resolution**2
            else:
                input_feature = next_feature
            module = make_conv3x3(input_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            input_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features
        if extract_type == "corr":
            self.feature_l2_norm = FeatureL2Norm()
            self.feature_correlation = FeatureCorrelation()
Example #16
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION[0]
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        pooler_mask = Pooler(
            output_size=(resolution, resolution),
            scales=(1., ),
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels
        self.pooler = pooler
        self.pooler_mask = pooler_mask

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            flag = cfg.MODEL.SEG_ON_ADD_CHANEL and (layer_idx == 1)
            module = make_conv3x3(next_feature + flag,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features
Example #17
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNSpatialAttentionFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        lvl_map_func = cfg.MODEL.ROI_MASK_HEAD.LEVEL_MAP_FUNCTION
        self.maskiou = cfg.MODEL.MASKIOU_ON
        pooler = Pooler(output_size=(resolution, resolution),
                        scales=scales,
                        sampling_ratio=sampling_ratio,
                        lvl_map_func=lvl_map_func)
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        #spatial attention module
        self.spatialAtt = SpatialAttention()
        self.num_pooler = len(scales)

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)

        self.out_channels = layer_features
Example #18
0
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = PyramidRROIAlign(
            output_size=(resolution, resolution),
            scales=scales,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        self.word_margin = cfg.MODEL.ROI_REC_HEAD.BOXES_MARGIN
        self.det_margin = cfg.MODEL.RRPN.GT_BOX_MARGIN

        self.rescale = self.word_margin / self.det_margin

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            # module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
            # # Caffe2 implementation uses MSRAFill, which in fact
            # # corresponds to kaiming_normal_ in PyTorch
            # nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
            # nn.init.constant_(module.bias, 0)
            module = make_conv3x3(next_feature, layer_features, 
                dilation=dilation, stride=1, use_gn=use_gn
            )
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
Example #20
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION  # ex. 14
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES  # ex. (0.25, 0.125, 0.0625, 0.03125)
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO  # ex. 2
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels  # backbone input channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN  # false
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS  #  (256, 256, 256, 256)
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION  # 1

        next_feature = input_size
        self.blocks = []
        # enumerate(.., start=0)
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features  # 256
Example #21
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(BOUNDARYRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOUNDARY_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOUNDARY_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOUNDARY_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
            deformable=cfg.MODEL.ROI_BOUNDARY_HEAD.DEFORMABLE_POOLING
            # deformable = True
        )
        input_size = in_channels
        self.pooler = pooler

        layers = cfg.MODEL.ROI_BOUNDARY_HEAD.CONV_LAYERS
        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "boundary_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
Example #22
0
 def __init__(self,
              output_size,
              scales,
              sampling_ratio,
              in_channels=512,
              cat_all_levels=False):
     """
     Arguments:
         output_size (list[tuple[int]] or list[int]): output size for the pooled region
         scales (list[float]): scales for each Pooler
         sampling_ratio (int): sampling ratio for ROIAlign
     """
     super(Pooler, self).__init__()
     poolers = []
     for scale in scales:
         poolers.append(
             ROIAlign(output_size,
                      spatial_scale=scale,
                      sampling_ratio=sampling_ratio))
     self.poolers = nn.ModuleList(poolers)
     self.output_size = output_size
     self.cat_all_levels = cat_all_levels
     # get the levels in the feature map by leveraging the fact that the network always
     # downsamples by a factor of 2 at each level.
     lvl_min = -torch.log2(torch.tensor(scales[0],
                                        dtype=torch.float32)).item()
     lvl_max = -torch.log2(torch.tensor(scales[-1],
                                        dtype=torch.float32)).item()
     self.map_levels = LevelMapper(lvl_min, lvl_max)
     # reduce the channels
     if self.cat_all_levels:
         self.reduce_channel = make_conv3x3(in_channels * len(self.poolers),
                                            in_channels,
                                            dilation=1,
                                            stride=1,
                                            use_relu=True)
    def __init__(self, cfg, in_channels):
        super(C52MLPFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        assert len(scales) == 1
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.conv = make_conv3x3(in_channels,
                                 256,
                                 use_gn=use_gn,
                                 use_relu=True)
        in_channels = 256
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        self.pooler = pooler
        input_size = in_channels * resolution**2
        self.fc6 = make_fc(input_size, representation_size, use_gn)
        self.fc7 = make_fc(representation_size, representation_size, use_gn)
        self.out_channels = representation_size
Example #24
0
    def __init__(self,
                 cfg,
                 in_channels_list,
                 in_channels_scale,
                 out_channels,
                 one_by_one_in_channels,
                 mode="bilinear"):
        super(FPNBasedSemanticSegmentationHead, self).__init__()

        self.mode = mode
        self.upsampling_blocks = []
        self.number_upsamples_per = []

        priming = cfg.MODEL.RPN.USE_SEMANTIC_FEATURES or cfg.MODEL.ROI_HEADS.USE_SEMANTIC_FEATURES
        # skip the possible "top" features?
        target_scale = cfg.MODEL.SEMANTIC.COMBINE_AT_SCALE

        for idx, in_channels in enumerate(in_channels_list):
            upsampler_name = "upsample_scale{0}".format(idx)
            in_channels = in_channels_list[idx]
            scale = in_channels_scale[idx]

            number_upsamples = int(np.log2(target_scale / scale))
            self.number_upsamples_per.append(number_upsamples)
            if number_upsamples == 0:
                # paper is not quite clear what happens here. my guess is the usual but no upsampling.
                upsampler = make_conv3x3(in_channels,
                                         out_channels,
                                         use_gn=cfg.MODEL.SEMANTIC.USE_GN,
                                         use_relu=True)
                # upsample1 = make_dfconv3x3(
                #     in_channels, out_channels, use_gn=False, use_relu=True)
                # upsample2 = make_dfconv3x3(
                #     out_channels, out_channels, use_gn=False, use_relu=True)
                # upsampler = nn.Sequential(*[upsample1, upsample2])
            else:
                upsampler = SetOfUpsamplingStages(cfg,
                                                  in_channels,
                                                  out_channels,
                                                  count=number_upsamples,
                                                  mode=self.mode)
                #upsampler = StraightUpsamplingStages(cfg, in_channels, out_channels, count=number_upsamples, mode=self.mode)

            self.add_module(upsampler_name, upsampler)
            self.upsampling_blocks.append(upsampler)

        if not cfg.MODEL.RPN.USE_SEMANTIC_FEATURES:
            # unsure if there should be a ReLU here.
            make_1x1_conv = conv_with_kaiming_uniform(use_gn=False,
                                                      use_relu=True)
            self.conv = make_1x1_conv(one_by_one_in_channels,
                                      out_channels,
                                      kernel_size=1,
                                      stride=1)

        make_project = conv_with_kaiming_uniform(use_gn=False, use_relu=False)
        # add VOID + THING vs VOID + THINGS + STUFF
        number_classes = (1 + cfg.MODEL.SEMANTIC.NUM_CLASSES +
                          1 if cfg.MODEL.SEMANTIC.COLLAPSE_THING_ONTOLOGY else
                          cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES +
                          cfg.MODEL.SEMANTIC.NUM_CLASSES)
        self.project = make_project(out_channels,
                                    number_classes,
                                    kernel_size=1,
                                    stride=1)
Example #25
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNPANETFeatureExtractor, self).__init__()
        self.cfg = cfg
        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION  # 14
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES  # (0.25, 0.125, 0.0625, 0.03125)
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO  # 2
        pooler = AdaptivePooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS  # (256, 256, 256, 256)
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        # for layer_idx, layer_features in enumerate(layers, 1):
        #     layer_name = "mask_fcn{}".format(layer_idx)
        #     module = make_conv3x3(
        #         next_feature, layer_features,
        #         dilation=dilation, stride=1, use_gn=use_gn
        #     )  # 这里用到膨胀卷积了
        #     self.add_module(layer_name, module)
        #     next_feature = layer_features
        #     self.blocks.append(layer_name)
        self.add_module("mask_fcn1_1",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        self.add_module("mask_fcn1_2",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        self.add_module("mask_fcn1_3",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        self.add_module("mask_fcn1_4",
                        make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn))
        next_feature = layers[0]
        for layer_idx, layer_features in enumerate(layers[1:], 2):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(
                next_feature, layer_features,
                dilation=dilation, stride=1, use_gn=use_gn
            )  # 这里用到膨胀卷积了
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        # TODO:区分前后景所需的模块,需要初始化权重!!!
        conv4 = nn.Conv2d(layers[2], layers[2], 3, 1, padding=1 * dilation, dilation=dilation, bias=False)
        nn.init.kaiming_normal_(
            conv4.weight, mode="fan_out", nonlinearity="relu"
        )
        self.mask_conv4_fc = nn.Sequential(
            conv4,
            group_norm(layers[2]),
            nn.ReLU(inplace=True))
        # --------------------------------------------------------------------------------------------------------#
        conv5 = nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False)
        nn.init.kaiming_normal_(
            conv5.weight, mode="fan_out", nonlinearity="relu"
        )
        self.mask_conv5_fc = nn.Sequential(
            conv5,
            group_norm(int(layers[2] / 2)),
            nn.ReLU(inplace=True))
        # self.mask_conv5_fc = nn.Sequential(
        #     nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False),
        #     group_norm(int(layers[2] / 2)),
        #     nn.ReLU(inplace=True))
        # nn.init.kaiming_normal_(
        #     self.mask_conv5_fc.weight, mode="fan_out", nonlinearity="relu"
        # )
        #---------------------------------------------------------------------------------------------------------#
        fc = nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2,
                       (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True)
        nn.init.kaiming_normal_(
            fc.weight, mode="fan_out", nonlinearity="relu"
        )
        self.mask_fc = nn.Sequential(
            fc,
            nn.ReLU(inplace=True))
        # self.mask_fc = nn.Sequential(
        #     nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2,
        #               (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True),
        #     nn.ReLU(inplace=True))
        # nn.init.kaiming_normal_(
        #     self.mask_fc.weight, mode="fan_out", nonlinearity="relu"
        # )

        self.out_channels = layer_features
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPN_adp_ff_FeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO

        pooler = Pooler(output_size=(resolution, resolution),
                        scales=scales,
                        sampling_ratio=sampling_ratio,
                        panet=True)
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        layer_features = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[0]

        #first 2 conv layers are shared and remains the same, but the paper says 3 is best
        module_list = []
        for i in range(2):
            module_list.extend([
                make_conv3x3(next_feature,
                             layer_features,
                             dilation=dilation,
                             stride=1,
                             use_gn=use_gn,
                             use_relu=True)
            ])
            next_feature = layer_features
        self.conv_fcn = nn.Sequential(*module_list)

        #this is for adaptive feature pooling,
        self.mask_conv1 = nn.ModuleList()
        # num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1
        num_levels = 4
        for i in range(num_levels):
            self.mask_conv1.append(
                make_conv3x3(next_feature,
                             layer_features,
                             dilation=dilation,
                             stride=1,
                             use_gn=use_gn,
                             use_relu=True), )

        self.mask_conv4 = make_conv3x3(next_feature,
                                       layer_features,
                                       dilation=dilation,
                                       stride=1,
                                       use_gn=use_gn,
                                       use_relu=True)

        self.mask_conv4_fc = make_conv3x3(next_feature,
                                          layer_features,
                                          dilation=dilation,
                                          stride=1,
                                          use_gn=use_gn,
                                          use_relu=True)

        self.mask_conv5_fc = make_conv3x3(next_feature,
                                          int(layer_features / 2),
                                          dilation=dilation,
                                          stride=1,
                                          use_gn=use_gn,
                                          use_relu=True)

        self.mask_fc = nn.Sequential(
            nn.Linear(int(layer_features / 2) * (resolution)**2,
                      cfg.MODEL.ROI_MASK_HEAD.RESOLUTION**2,
                      bias=True), nn.ReLU(inplace=True))

        # upsample layer
        self.upconv = nn.ConvTranspose2d(layer_features, layer_features, 2, 2,
                                         0)
        self.out_channels = layer_features
        #init_weights
        # make_conv3x3 has already done the init, default kaiming = MSRAFFill in panet.
        self.apply(self._init_weights)