Esempio n. 1
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = sum(dim_in)
        self.spatial_scale = spatial_scale

        hrfpn_dim = cfg.FPN.HRFPN.DIM  # 256
        use_lite = cfg.FPN.HRFPN.USE_LITE
        use_bn = cfg.FPN.HRFPN.USE_BN
        use_gn = cfg.FPN.HRFPN.USE_GN
        if cfg.FPN.HRFPN.POOLING_TYPE == 'AVG':
            self.pooling = F.avg_pool2d
        else:
            self.pooling = F.max_pool2d
        self.num_extra_pooling = cfg.FPN.HRFPN.NUM_EXTRA_POOLING  # 1
        self.num_output = len(dim_in) + self.num_extra_pooling  # 5

        self.reduction_conv = make_conv(self.dim_in,
                                        hrfpn_dim,
                                        kernel=1,
                                        use_bn=use_bn,
                                        use_gn=use_gn)
        self.dim_in = hrfpn_dim

        self.fpn_conv = nn.ModuleList()
        for i in range(self.num_output):
            self.fpn_conv.append(
                make_conv(self.dim_in,
                          hrfpn_dim,
                          kernel=3,
                          use_dwconv=use_lite,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          suffix_1x1=use_lite))
            self.dim_in = hrfpn_dim

        if self.num_extra_pooling:
            self.spatial_scale.append(self.spatial_scale[-1] * 0.5)
        self.dim_out = [self.dim_in for _ in range(self.num_output)]
        self._init_weights()
Esempio n. 2
0
    def __init__(self, dim_in, spatial_scale, stage=1):
        super().__init__()
        self.dim_in = dim_in[-1]

        method = cfg.FAST_RCNN.ROI_XFORM_METHOD
        resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE
        use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN
        use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN
        conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM
        num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS
        dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION

        xconvs = []
        for ix in range(num_stacked_convs):
            xconvs.append(
                make_conv(self.dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          dilation=dilation,
                          use_dwconv=use_lite,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          suffix_1x1=use_lite,
                          use_relu=True))
            self.dim_in = conv_dim
        self.add_module("xconvs", nn.Sequential(*xconvs))

        input_size = self.dim_in * resolution[0] * resolution[1]
        mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM
        self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False)
        self.dim_out = mlp_dim
        self.stage = stage

        if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS:
            self = convert_conv2convws_model(self)
Esempio n. 3
0
 def __init__(self,
              dim_in=256,
              with_conv=True,
              use_lite=False,
              use_bn=False,
              use_gn=False):
     super(MergingCell, self).__init__()
     self.dim_in = dim_in
     self.with_conv = with_conv
     if self.with_conv:
         self.conv_out = nn.Sequential(
             nn.ReLU(inplace=True),
             make_conv(self.dim_in,
                       self.dim_in,
                       kernel=3,
                       use_dwconv=use_lite,
                       use_bn=use_bn,
                       use_gn=use_gn,
                       use_relu=False,
                       suffix_1x1=use_lite))
     self.dim_out = self.dim_in
Esempio n. 4
0
    def __init__(self, dim_in, spatial_scale):
        super(roi_convx_head, self).__init__()
        self.dim_in = dim_in[-1]

        resolution = cfg.HRCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.HRCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.HRCNN.CONVX_HEAD.USE_LITE
        use_bn = cfg.HRCNN.CONVX_HEAD.USE_BN
        use_gn = cfg.HRCNN.CONVX_HEAD.USE_GN
        conv_dim = cfg.HRCNN.CONVX_HEAD.CONV_DIM
        num_stacked_convs = cfg.HRCNN.CONVX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.HRCNN.CONVX_HEAD.DILATION

        self.blocks = []
        for layer_idx in range(num_stacked_convs):
            layer_name = "hier_fcn{}".format(layer_idx + 1)
            module = make_conv(self.dim_in,
                               conv_dim,
                               kernel=3,
                               stride=1,
                               dilation=dilation,
                               use_dwconv=use_lite,
                               use_bn=use_bn,
                               use_gn=use_gn,
                               suffix_1x1=use_lite)
            self.add_module(layer_name, module)
            self.dim_in = conv_dim
            self.blocks.append(layer_name)
        self.dim_out = self.dim_in
Esempio n. 5
0
    def __init__(self, dim_in):
        super(Hier_output, self).__init__()

        num_classes = cfg.HRCNN.NUM_CLASSES
        num_convs = cfg.HRCNN.OUTPUT_NUM_CONVS
        conv_dim = cfg.HRCNN.OUTPUT_CONV_DIM
        use_lite = cfg.HRCNN.OUTPUT_USE_LITE
        use_bn = cfg.HRCNN.OUTPUT_USE_BN
        use_gn = cfg.HRCNN.OUTPUT_USE_GN
        use_dcn = cfg.HRCNN.OUTPUT_USE_DCN

        cls_tower = []
        bbox_tower = []
        for i in range(num_convs):
            conv_type = 'deform' if use_dcn and i == num_convs - 1 else 'normal'
            cls_tower.append(
                make_conv(dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type=conv_type,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))
            bbox_tower.append(
                make_conv(dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type=conv_type,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))
            dim_in = conv_dim

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_logits = nn.Conv2d(conv_dim,
                                    num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(conv_dim,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(conv_dim,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # Initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.HRCNN.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)
Esempio n. 6
0
    def __init__(self, dim_in):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSHead, self).__init__()
        self.dim_in = dim_in[-1]

        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.NUM_CLASSES - 1
        use_lite = cfg.FCOS.USE_LITE
        use_bn = cfg.FCOS.USE_BN
        use_gn = cfg.FCOS.USE_GN
        use_dcn = cfg.FCOS.USE_DCN
        dense_points = cfg.FCOS.DENSE_POINTS

        self.fpn_strides = cfg.FCOS.FPN_STRIDES
        self.norm_reg_targets = cfg.FCOS.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.FCOS.CENTERNESS_ON_REG

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.FCOS.NUM_CONVS):
            conv_type = 'deform' if use_dcn and i == cfg.FCOS.NUM_CONVS - 1 else 'normal'
            cls_tower.append(
                make_conv(self.dim_in,
                          self.dim_in,
                          kernel=3,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type=conv_type,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))
            bbox_tower.append(
                make_conv(self.dim_in,
                          self.dim_in,
                          kernel=3,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type=conv_type,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_logits = nn.Conv2d(self.dim_in,
                                    num_classes * dense_points,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(self.dim_in,
                                   4 * dense_points,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(self.dim_in,
                                    1 * dense_points,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
Esempio n. 7
0
    def __init__(self, dim_in):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSHeadLite, self).__init__()
        self.dim_in = dim_in[-1]

        num_classes = cfg.MODEL.NUM_CLASSES - 1
        use_lite = cfg.FCOS.FCOSLITE_HEAD.USE_LITE
        use_bn = cfg.FCOS.FCOSLITE_HEAD.USE_BN
        use_gn = cfg.FCOS.FCOSLITE_HEAD.USE_GN
        tower_conv_kernel = cfg.FCOS.FCOSLITE_HEAD.TOWER_CONV_KERNEL
        last_conv_kernel = cfg.FCOS.FCOSLITE_HEAD.LAST_CONV_KERNEL
        dense_points = cfg.FCOS.DENSE_POINTS

        self.fpn_strides = cfg.FCOS.FPN_STRIDES
        self.norm_reg_targets = cfg.FCOS.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.FCOS.CENTERNESS_ON_REG

        base_tower = []
        for i in range(1):
            base_tower.append(
                make_conv(self.dim_in,
                          self.dim_in,
                          kernel=1,
                          stride=1,
                          dilation=1,
                          use_dwconv=False,
                          conv_type='normal',
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=False))

        cls_tower = []
        for i in range(cfg.FCOS.FCOSLITE_HEAD.CLS_NUM_CONVS):
            cls_tower.append(
                make_conv(self.dim_in,
                          self.dim_in,
                          kernel=tower_conv_kernel,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type='normal',
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))

        bbox_tower = []
        for i in range(cfg.FCOS.FCOSLITE_HEAD.BBOX_NUM_CONVS):
            bbox_tower.append(
                make_conv(self.dim_in,
                          self.dim_in,
                          kernel=tower_conv_kernel,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type='normal',
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))

        self.add_module('base_tower', nn.Sequential(*base_tower))
        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_logits = nn.Conv2d(self.dim_in,
                                    num_classes * dense_points,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0)
        self.bbox_pred = nn.Conv2d(self.dim_in,
                                   4 * dense_points,
                                   kernel_size=last_conv_kernel,
                                   stride=1,
                                   padding=last_conv_kernel // 2)
        self.centerness = nn.Conv2d(self.dim_in,
                                    1 * dense_points,
                                    kernel_size=last_conv_kernel,
                                    stride=1,
                                    padding=last_conv_kernel // 2)
Esempio n. 8
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = dim_in[-1]  # 2048
        self.spatial_scale = spatial_scale

        self.num_stack = cfg.FPN.BIFPN.NUM_STACK
        bifpn_dim = cfg.FPN.BIFPN.DIM
        self.eps = cfg.FPN.BIFPN.EPS
        use_lite = cfg.FPN.BIFPN.USE_LITE
        use_bn = cfg.FPN.BIFPN.USE_BN
        use_gn = cfg.FPN.BIFPN.USE_GN
        min_level, max_level = get_min_max_levels()  # 3, 7
        self.num_backbone_stages = len(dim_in) - (
            min_level - cfg.FPN.LOWEST_BACKBONE_LVL
        )  # 3 (cfg.FPN.LOWEST_BACKBONE_LVL=2)

        # bifpn module
        self.bifpn_in = nn.ModuleList()
        for i in range(self.num_backbone_stages):
            px_in = make_conv(dim_in[-1 - i],
                              bifpn_dim,
                              kernel=1,
                              use_bn=use_bn,
                              use_gn=use_gn)
            self.bifpn_in.append(px_in)
        self.dim_in = bifpn_dim

        # add bifpn connections
        self.bifpn_stages = nn.ModuleList()
        for _ in range(self.num_stack):
            stage = nn.ModuleDict()

            # fusion weights
            stage['p6_td_fusion'] = Fusion2D()
            stage['p5_td_fusion'] = Fusion2D()
            stage['p4_td_fusion'] = Fusion2D()
            stage['p3_out_fusion'] = Fusion2D()
            stage['p4_out_fusion'] = Fusion3D()
            stage['p5_out_fusion'] = Fusion3D()
            stage['p6_out_fusion'] = Fusion3D()
            stage['p7_out_fusion'] = Fusion2D()

            # top-down connect
            stage['p6_td_conv'] = make_conv(bifpn_dim,
                                            bifpn_dim,
                                            kernel=3,
                                            use_dwconv=use_lite,
                                            use_bn=use_bn,
                                            use_gn=use_gn,
                                            use_relu=use_bn or use_gn,
                                            suffix_1x1=use_lite)
            stage['p5_td_conv'] = make_conv(bifpn_dim,
                                            bifpn_dim,
                                            kernel=3,
                                            use_dwconv=use_lite,
                                            use_bn=use_bn,
                                            use_gn=use_gn,
                                            use_relu=use_bn or use_gn,
                                            suffix_1x1=use_lite)
            stage['p4_td_conv'] = make_conv(bifpn_dim,
                                            bifpn_dim,
                                            kernel=3,
                                            use_dwconv=use_lite,
                                            use_bn=use_bn,
                                            use_gn=use_gn,
                                            use_relu=use_bn or use_gn,
                                            suffix_1x1=use_lite)

            # output
            stage['p3_out_conv'] = make_conv(bifpn_dim,
                                             bifpn_dim,
                                             kernel=3,
                                             use_dwconv=use_lite,
                                             use_bn=use_bn,
                                             use_gn=use_gn,
                                             use_relu=use_bn or use_gn,
                                             suffix_1x1=use_lite)
            stage['p4_out_conv'] = make_conv(bifpn_dim,
                                             bifpn_dim,
                                             kernel=3,
                                             use_dwconv=use_lite,
                                             use_bn=use_bn,
                                             use_gn=use_gn,
                                             use_relu=use_bn or use_gn,
                                             suffix_1x1=use_lite)
            stage['p5_out_conv'] = make_conv(bifpn_dim,
                                             bifpn_dim,
                                             kernel=3,
                                             use_dwconv=use_lite,
                                             use_bn=use_bn,
                                             use_gn=use_gn,
                                             use_relu=use_bn or use_gn,
                                             suffix_1x1=use_lite)
            stage['p6_out_conv'] = make_conv(bifpn_dim,
                                             bifpn_dim,
                                             kernel=3,
                                             use_dwconv=use_lite,
                                             use_bn=use_bn,
                                             use_gn=use_gn,
                                             use_relu=use_bn or use_gn,
                                             suffix_1x1=use_lite)
            stage['p7_out_conv'] = make_conv(bifpn_dim,
                                             bifpn_dim,
                                             kernel=3,
                                             use_dwconv=use_lite,
                                             use_bn=use_bn,
                                             use_gn=use_gn,
                                             use_relu=use_bn or use_gn,
                                             suffix_1x1=use_lite)
            self.bifpn_stages.append(stage)

        self.extra_levels = max_level - cfg.FPN.HIGHEST_BACKBONE_LVL  # 2
        for _ in range(self.extra_levels):
            self.spatial_scale.append(self.spatial_scale[-1] * 0.5)

        self.spatial_scale = self.spatial_scale[min_level - 2:]
        self.dim_out = [self.dim_in for _ in range(max_level - min_level + 1)]

        self._init_weights()
Esempio n. 9
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = dim_in[-1]  # 2048
        self.spatial_scale = spatial_scale

        self.num_stack = cfg.FPN.NASFPN.NUM_STACK  # 7
        nasfpn_dim = cfg.FPN.NASFPN.DIM  # 256
        use_lite = cfg.FPN.NASFPN.USE_LITE
        use_bn = cfg.FPN.NASFPN.USE_BN
        use_gn = cfg.FPN.NASFPN.USE_GN
        min_level, max_level = get_min_max_levels()  # 3, 7
        self.num_backbone_stages = len(dim_in) - (
            min_level - cfg.FPN.LOWEST_BACKBONE_LVL
        )  # 3 (cfg.FPN.LOWEST_BACKBONE_LVL=2)

        # nasfpn module
        self.nasfpn_in = nn.ModuleList()
        for i in range(self.num_backbone_stages):
            px_in = make_conv(dim_in[-1 - i],
                              nasfpn_dim,
                              kernel=1,
                              use_bn=use_bn,
                              use_gn=use_gn)
            self.nasfpn_in.append(px_in)
        self.dim_in = nasfpn_dim

        # add nasfpn connections
        self.nasfpn_stages = nn.ModuleList()
        for _ in range(self.num_stack):
            stage = nn.ModuleDict()
            # gp(p6, p4) -> p4_1
            stage['gp_64_4'] = GPCell(nasfpn_dim,
                                      use_lite=use_lite,
                                      use_bn=use_bn,
                                      use_gn=use_gn)
            # sum(p4_1, p4) -> p4_2
            stage['sum_44_4'] = SumCell(nasfpn_dim,
                                        use_lite=use_lite,
                                        use_bn=use_bn,
                                        use_gn=use_gn)
            # sum(p4_2, p3) -> p3_out
            stage['sum_43_3'] = SumCell(nasfpn_dim,
                                        use_lite=use_lite,
                                        use_bn=use_bn,
                                        use_gn=use_gn)
            # sum(p3_out, p4_2) -> p4_out
            stage['sum_43_4'] = SumCell(nasfpn_dim,
                                        use_lite=use_lite,
                                        use_bn=use_bn,
                                        use_gn=use_gn)
            # sum(p5, gp(p4_out, p3_out)) -> p5_out
            stage['gp_43_5'] = GPCell(with_conv=False)
            stage['sum_55_5'] = SumCell(nasfpn_dim,
                                        use_lite=use_lite,
                                        use_bn=use_bn,
                                        use_gn=use_gn)
            # sum(p7, gp(p5_out, p4_2)) -> p7_out
            stage['gp_54_7'] = GPCell(with_conv=False)
            stage['sum_77_7'] = SumCell(nasfpn_dim,
                                        use_lite=use_lite,
                                        use_bn=use_bn,
                                        use_gn=use_gn)
            # gp(p7_out, p5_out) -> p6_out
            stage['gp_75_6'] = GPCell(nasfpn_dim,
                                      use_lite=use_lite,
                                      use_bn=use_bn,
                                      use_gn=use_gn)
            self.nasfpn_stages.append(stage)

        self.extra_levels = max_level - cfg.FPN.HIGHEST_BACKBONE_LVL  # 2
        for _ in range(self.extra_levels):
            self.spatial_scale.append(self.spatial_scale[-1] * 0.5)

        self.spatial_scale = self.spatial_scale[min_level - 2:]
        self.dim_out = [self.dim_in for _ in range(max_level - min_level + 1)]

        self._init_weights()
Esempio n. 10
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = dim_in[-1]  # 2048
        self.spatial_scale = spatial_scale

        fpn_dim = cfg.FPN.DIM  # 256
        use_lite = cfg.FPN.USE_LITE
        use_bn = cfg.FPN.USE_BN
        use_gn = cfg.FPN.USE_GN
        min_level, max_level = get_min_max_levels()  # 2, 6
        self.num_backbone_stages = len(dim_in) - (
            min_level - cfg.FPN.LOWEST_BACKBONE_LVL
        )  # 4 (cfg.FPN.LOWEST_BACKBONE_LVL=2)

        # P5 in
        self.p5_in = make_conv(self.dim_in,
                               fpn_dim,
                               kernel=1,
                               use_bn=use_bn,
                               use_gn=use_gn)

        # P5 out
        self.p5_out = make_conv(fpn_dim,
                                fpn_dim,
                                kernel=3,
                                use_dwconv=use_lite,
                                use_bn=use_bn,
                                use_gn=use_gn,
                                suffix_1x1=use_lite)

        # fpn module
        self.fpn_in = []
        self.fpn_out = []
        for i in range(self.num_backbone_stages - 1):  # skip the top layer
            px_in = make_conv(dim_in[-i - 2],
                              fpn_dim,
                              kernel=1,
                              use_bn=use_bn,
                              use_gn=use_gn)  # from P4 to P2
            px_out = make_conv(fpn_dim,
                               fpn_dim,
                               kernel=3,
                               use_dwconv=use_lite,
                               use_bn=use_bn,
                               use_gn=use_gn,
                               suffix_1x1=use_lite)
            self.fpn_in.append(px_in)
            self.fpn_out.append(px_out)
        self.fpn_in = nn.ModuleList(self.fpn_in)  # [P4, P3, P2]
        self.fpn_out = nn.ModuleList(self.fpn_out)
        self.dim_in = fpn_dim

        # P6. Original FPN P6 level implementation from CVPR'17 FPN paper.
        if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == cfg.FPN.HIGHEST_BACKBONE_LVL + 1:
            self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0)
            self.spatial_scale.append(self.spatial_scale[-1] * 0.5)

        # Coarser FPN levels introduced for RetinaNet
        if cfg.FPN.EXTRA_CONV_LEVELS and max_level > cfg.FPN.HIGHEST_BACKBONE_LVL:
            self.extra_pyramid_modules = nn.ModuleList()
            if cfg.FPN.USE_C5:
                self.dim_in = dim_in[-1]
            for i in range(cfg.FPN.HIGHEST_BACKBONE_LVL + 1, max_level + 1):
                self.extra_pyramid_modules.append(
                    make_conv(self.dim_in,
                              fpn_dim,
                              kernel=3,
                              stride=2,
                              use_dwconv=use_lite,
                              use_bn=use_bn,
                              use_gn=use_gn,
                              suffix_1x1=use_lite))
                self.dim_in = fpn_dim
                self.spatial_scale.append(self.spatial_scale[-1] * 0.5)

        # self.spatial_scale.reverse()  # [1/64, 1/32, 1/16, 1/8, 1/4]
        # self.dim_out = [self.dim_in]
        num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1
        # Retain only the spatial scales that will be used for RoI heads. `self.spatial_scale`
        # may include extra scales that are used for RPN proposals, but not for RoI heads.
        self.spatial_scale = self.spatial_scale[:num_roi_levels]
        self.dim_out = [self.dim_in for _ in range(num_roi_levels)]

        if cfg.FPN.USE_WS:
            self = convert_conv2convws_model(self)

        self._init_weights()
Esempio n. 11
0
    def __init__(self, dim_in, spatial_scale, stage):
        super(roi_grid_head, self).__init__()
        self.grid_points = cfg.GRID_RCNN.GRID_POINTS if not cfg.GRID_RCNN.CASCADE_MAPPING_ON else \
            cfg.GRID_RCNN.CASCADE_MAPPING_OPTION.GRID_NUM[stage]
        self.roi_feat_size = cfg.GRID_RCNN.ROI_FEAT_SIZE

        self.num_convs = cfg.GRID_RCNN.GRID_HEAD.NUM_CONVS
        self.point_feat_channels = cfg.GRID_RCNN.GRID_HEAD.POINT_FEAT_CHANNELS

        self.conv_out_channels = self.point_feat_channels * self.grid_points
        self.class_agnostic = False
        self.dim_in = dim_in[-1]

        assert self.grid_points >= 4
        self.grid_size = int(np.sqrt(self.grid_points))
        if self.grid_size * self.grid_size != self.grid_points:
            raise ValueError('grid_points must be a square number')

        # the predicted heatmap is half of whole_map_size
        if not isinstance(self.roi_feat_size, int):
            raise ValueError('Only square RoIs are supporeted in Grid R-CNN')
        self.whole_map_size = self.roi_feat_size * 4

        self.convs = []
        conv_kernel_size = 3
        for i in range(self.num_convs):
            in_channels = (self.dim_in if i == 0 else self.conv_out_channels)
            stride = 2 if i == 0 else 1
            padding = (conv_kernel_size - 1) // 2
            self.convs.append(
                nn.Sequential(
                    nn.Conv2d(in_channels,
                              self.conv_out_channels,
                              kernel_size=conv_kernel_size,
                              stride=stride,
                              padding=padding),
                    nn.GroupNorm(4 * self.grid_points,
                                 self.conv_out_channels,
                                 eps=1e-5), nn.ReLU(inplace=True)))
        self.convs = nn.Sequential(*self.convs)

        # find the 4-neighbor of each grid point
        self.neighbor_points = self._get_neighbors()
        # total edges in the grid
        self.num_edges = sum([len(p) for p in self.neighbor_points])

        if cfg.GRID_RCNN.FUSED_ON:
            self.forder_trans = self._build_trans(
                nn.ModuleList())  # first-order feature transition
            self.sorder_trans = self._build_trans(
                nn.ModuleList())  # second-order feature transition

        method = cfg.GRID_RCNN.ROI_XFORM_METHOD
        resolution = cfg.GRID_RCNN.ROI_XFORM_RESOLUTION_GRID
        sampling_ratio = cfg.GRID_RCNN.ROI_XFORM_SAMPLING_RATIO
        spatial_scale = [spatial_scale[0]
                         ] if cfg.GRID_RCNN.FINEST_LEVEL_ROI else spatial_scale
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler
        self.dim_out = dim_in

        if cfg.GRID_RCNN.OFFSET_ON:
            self.offset_conv = make_conv(self.dim_in, 64, kernel=3, stride=2)
            self.offset_fc = make_fc(64 * 7 * 7, 4 * self.grid_points)