Esempio n. 1
0
    def __init__(self, dim_in, spatial_scale):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(roi_convx_head, self).__init__()
        self.dim_in = dim_in[-1]

        method = cfg.MRCNN.ROI_XFORM_METHOD
        resolution = cfg.MRCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.MRCNN.CONVX_HEAD.USE_LITE
        use_bn = cfg.MRCNN.CONVX_HEAD.USE_BN
        use_gn = cfg.MRCNN.CONVX_HEAD.USE_GN
        conv_dim = cfg.MRCNN.CONVX_HEAD.CONV_DIM
        num_stacked_convs = cfg.MRCNN.CONVX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.MRCNN.CONVX_HEAD.DILATION

        self.blocks = []
        for layer_idx in range(num_stacked_convs):
            layer_name = "mask_fcn{}".format(layer_idx + 1)
            module = make_conv(self.dim_in,
                               conv_dim,
                               kernel=3,
                               stride=1,
                               dilation=dilation,
                               use_dwconv=use_lite,
                               use_bn=use_bn,
                               use_gn=use_gn,
                               suffix_1x1=use_lite)
            self.add_module(layer_name, module)
            self.dim_in = conv_dim
            self.blocks.append(layer_name)
        self.dim_out = self.dim_in

        if cfg.MRCNN.CONVX_HEAD.USE_WS:
            self = convert_conv2convws_model(self)

        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity="relu")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
                elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
Esempio n. 2
0
    def __init__(self, dim_in, spatial_scale, norm='bn'):
        super().__init__()
        self.dim_in = dim_in[-1]

        if cfg.BACKBONE.RESNET.USE_ALIGN:
            block = res.AlignedBottleneck
        else:
            if cfg.BACKBONE.RESNET.BOTTLENECK:
                block = res.Bottleneck  # not use the original Bottleneck module
            else:
                block = res.BasicBlock
        self.expansion = block.expansion
        self.stride_3x3 = cfg.BACKBONE.RESNET.STRIDE_3X3
        self.avg_down = cfg.BACKBONE.RESNET.AVG_DOWN
        self.norm = norm
        self.radix = cfg.BACKBONE.RESNET.RADIX

        layers = cfg.BACKBONE.RESNET.LAYERS
        self.base_width = cfg.BACKBONE.RESNET.WIDTH
        stage_with_context = cfg.BACKBONE.RESNET.STAGE_WITH_CONTEXT
        self.ctx_ratio = cfg.BACKBONE.RESNET.CTX_RATIO
        stage_with_conv = cfg.BACKBONE.RESNET.STAGE_WITH_CONV
        c5_dilation = cfg.BACKBONE.RESNET.C5_DILATION

        method = cfg.FAST_RCNN.ROI_XFORM_METHOD
        resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        self.inplanes = self.dim_in
        c5_stride = min(resolution) // 7
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       c5_stride,
                                       dilation=c5_dilation,
                                       conv=stage_with_conv[3],
                                       context=stage_with_context[3])
        self.dim_out = self.stage_out_dim[-1]

        del self.conv1
        del self.bn1
        del self.relu
        del self.maxpool
        del self.layer1
        del self.layer2
        del self.layer3
        del self.avgpool
        del self.fc
        self._init_weights()
Esempio n. 3
0
    def __init__(self, dim_in, spatial_scale):
        super(RoIOPLDHead, self).__init__()
        self.num_points = cfg.OPLD.NUM_POINTS
        self.roi_feat_size = cfg.OPLD.ROI_FEAT_SIZE

        self.num_convs = cfg.OPLD.ROI_HEAD.NUM_CONVS
        self.point_feat_channels = cfg.OPLD.ROI_HEAD.POINT_FEAT_CHANNELS
        self.neighbor_points = cfg.OPLD.ROI_HEAD.NEIGHBOR_POINTS

        self.conv_out_channels = self.point_feat_channels * self.num_points
        self.class_agnostic = False
        self.dim_in = dim_in[-1]

        self.whole_map_size = self.roi_feat_size * 4
        self.convs = []
        conv_kernel_size = 3
        stride = 1
        for i in range(self.num_convs):
            in_channels = (self.dim_in if i == 0 else self.conv_out_channels)
            padding = (conv_kernel_size - 1) // 2
            self.convs.append(
                nn.Sequential(
                    nn.Conv2d(in_channels,
                              self.conv_out_channels,
                              kernel_size=conv_kernel_size,
                              stride=stride,
                              padding=padding),
                    nn.GroupNorm(32, self.conv_out_channels, eps=1e-5),
                    nn.ReLU(inplace=True)))
        self.convs = nn.Sequential(*self.convs)

        self.forder_trans = self._build_trans(
            nn.ModuleList())  # first-order feature transition
        self.sorder_trans = self._build_trans(
            nn.ModuleList())  # second-order feature transition

        method = cfg.OPLD.ROI_XFORM_METHOD
        resolution = cfg.OPLD.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.OPLD.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler
        self.dim_out = [self.conv_out_channels]
Esempio n. 4
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = dim_in[-1]

        method = cfg.FAST_RCNN.ROI_XFORM_METHOD
        resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE
        use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN
        use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN
        conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM
        num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS
        dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION
        
        xconvs = []
        for ix in range(num_stacked_convs):
            xconvs.append(
                make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
                          use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True)
            )
            self.dim_in = conv_dim
        self.add_module("xconvs", nn.Sequential(*xconvs))
        
        input_size = self.dim_in * resolution[0] * resolution[1]
        mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM
        self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False)
        self.dim_out = mlp_dim
        
        if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS:
            self = convert_conv2convws_model(self)
Esempio n. 5
0
    def __init__(self, dim_in, spatial_scale):
        super(roi_convx_head, self).__init__()
        self.dim_in = dim_in[-1]

        method = cfg.UVRCNN.ROI_XFORM_METHOD
        resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.UVRCNN.CONVX_HEAD.USE_LITE
        use_bn = cfg.UVRCNN.CONVX_HEAD.USE_BN
        use_gn = cfg.UVRCNN.CONVX_HEAD.USE_GN
        conv_dim = cfg.UVRCNN.CONVX_HEAD.CONV_DIM
        num_stacked_convs = cfg.UVRCNN.CONVX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.UVRCNN.CONVX_HEAD.DILATION

        self.blocks = []
        for layer_idx in range(num_stacked_convs):
            layer_name = "UV_fcn{}".format(layer_idx + 1)
            module = make_conv(self.dim_in,
                               conv_dim,
                               kernel=3,
                               stride=1,
                               dilation=dilation,
                               use_dwconv=use_lite,
                               use_bn=use_bn,
                               use_gn=use_gn,
                               suffix_1x1=use_lite)
            self.add_module(layer_name, module)
            self.dim_in = conv_dim
            self.blocks.append(layer_name)
        self.dim_out = self.dim_in
Esempio n. 6
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = dim_in[-1]

        method = cfg.FAST_RCNN.ROI_XFORM_METHOD
        resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        input_size = self.dim_in * resolution[0] * resolution[1]
        mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM
        use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN
        use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN
        self.pooler = pooler
        self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn)
        self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn)
        self.dim_out = mlp_dim

        if cfg.FAST_RCNN.MLP_HEAD.USE_WS:
            self = convert_conv2convws_model(self)
Esempio n. 7
0
    def __init__(self, dim_in, spatial_scale):
        super(roi_gce_head, self).__init__()
        self.dim_in = dim_in[-1]

        method = cfg.PRCNN.ROI_XFORM_METHOD
        resolution = cfg.HRCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.HRCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_nl = cfg.HRCNN.GCE_HEAD.USE_NL
        use_bn = cfg.HRCNN.GCE_HEAD.USE_BN
        use_gn = cfg.HRCNN.GCE_HEAD.USE_GN
        conv_dim = cfg.HRCNN.GCE_HEAD.CONV_DIM
        asppv3_dim = cfg.HRCNN.GCE_HEAD.ASPPV3_DIM
        num_convs_before_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3
        asppv3_dilation = cfg.HRCNN.GCE_HEAD.ASPPV3_DILATION
        num_convs_after_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3

        # convx before asppv3 module
        before_asppv3_list = []
        for _ in range(num_convs_before_asppv3):
            before_asppv3_list.append(
                make_conv(self.dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True))
            self.dim_in = conv_dim
        self.conv_before_asppv3 = nn.Sequential(
            *before_asppv3_list) if len(before_asppv3_list) else None

        # asppv3 module
        self.asppv3 = []
        self.asppv3.append(
            make_conv(self.dim_in,
                      asppv3_dim,
                      kernel=1,
                      use_bn=use_bn,
                      use_gn=use_gn,
                      use_relu=True))
        for dilation in asppv3_dilation:
            self.asppv3.append(
                make_conv(self.dim_in,
                          asppv3_dim,
                          kernel=3,
                          dilation=dilation,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True))
        self.asppv3 = nn.ModuleList(self.asppv3)
        self.im_pool = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            make_conv(self.dim_in,
                      asppv3_dim,
                      kernel=1,
                      use_bn=use_bn,
                      use_gn=use_gn,
                      use_relu=True))
        self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim

        feat_list = []
        feat_list.append(
            make_conv(self.dim_in,
                      conv_dim,
                      kernel=1,
                      use_bn=use_bn,
                      use_gn=use_gn,
                      use_relu=True))
        if use_nl:
            feat_list.append(
                NonLocal2d(conv_dim,
                           int(conv_dim * cfg.HRCNN.GCE_HEAD.NL_RATIO),
                           conv_dim,
                           use_gn=True))
        self.feat = nn.Sequential(*feat_list)
        self.dim_in = conv_dim

        # convx after asppv3 module
        assert num_convs_after_asppv3 >= 1
        after_asppv3_list = []
        for _ in range(num_convs_after_asppv3):
            after_asppv3_list.append(
                make_conv(self.dim_in,
                          conv_dim,
                          kernel=3,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True))
            self.dim_in = conv_dim
        self.conv_after_asppv3 = nn.Sequential(
            *after_asppv3_list) if len(after_asppv3_list) else None
        self.dim_out = self.dim_in
Esempio n. 8
0
    def __init__(self, dim_in, spatial_scale, norm='bn'):
        super().__init__()
        self.dim_in = dim_in[-1]

        if cfg.BACKBONE.RESNET.USE_ALIGN:
            block = res.AlignedBottleneck
        else:
            if cfg.BACKBONE.RESNET.BOTTLENECK:
                block = res.Bottleneck  # not use the original Bottleneck module
            else:
                block = res.BasicBlock
        self.expansion = block.expansion
        self.stride_3x3 = cfg.BACKBONE.RESNET.STRIDE_3X3
        self.avg_down = cfg.BACKBONE.RESNET.AVG_DOWN
        self.norm = norm

        layers = cfg.BACKBONE.RESNET.LAYERS
        self.base_width = cfg.BACKBONE.RESNET.WIDTH
        stage_with_context = cfg.BACKBONE.RESNET.STAGE_WITH_CONTEXT
        self.ctx_ratio = cfg.BACKBONE.RESNET.CTX_RATIO
        stage_with_conv = cfg.BACKBONE.RESNET.STAGE_WITH_CONV
        c5_dilation = cfg.BACKBONE.RESNET.C5_DILATION

        self.inplanes = self.dim_in
        c5_stride = 2 if c5_dilation == 1 else 1
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       c5_stride,
                                       dilation=c5_dilation,
                                       conv=stage_with_conv[3],
                                       context=stage_with_context[3])
        self.conv_new = nn.Sequential(
            nn.Conv2d(512 * self.expansion,
                      256,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=True), nn.ReLU(inplace=True))

        self.dim_in = 256
        method = cfg.FAST_RCNN.ROI_XFORM_METHOD
        resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        input_size = self.dim_in * resolution[0] * resolution[1]
        mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM
        self.fc1 = nn.Linear(input_size, mlp_dim)
        self.fc2 = nn.Linear(mlp_dim, mlp_dim)
        self.dim_out = mlp_dim

        del self.conv1
        del self.bn1
        del self.relu
        del self.maxpool
        del self.layer1
        del self.layer2
        del self.layer3
        del self.avgpool
        del self.fc
        self._init_weights()