Beispiel #1
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super().__init__()
        # TODO: Implement the sigmoid version first.
        self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        head_configs = {
            "cls":
            (cfg.MODEL.FCOS.NUM_CLS_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE),
            "bbox":
            (cfg.MODEL.FCOS.NUM_BOX_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE),
            "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS, False)
        }
        norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM
        self.num_levels = len(input_shape)

        in_channels = [s.channels for s in input_shape]
        assert len(
            set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0]

        self.in_channels_to_top_module = in_channels

        for head in head_configs:
            tower = []
            num_convs, use_deformable = head_configs[head]
            for i in range(num_convs):
                if use_deformable and i == num_convs - 1:
                    conv_func = DFConv2d
                else:
                    conv_func = nn.Conv2d
                tower.append(
                    conv_func(in_channels,
                              in_channels,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=True))
                if norm == "GN":
                    tower.append(nn.GroupNorm(32, in_channels))
                elif norm == "NaiveGN":
                    tower.append(NaiveGroupNorm(32, in_channels))
                elif norm == "BN":
                    tower.append(
                        ModuleListDial([
                            nn.BatchNorm2d(in_channels)
                            for _ in range(self.num_levels)
                        ]))
                elif norm == "SyncBN":
                    tower.append(
                        ModuleListDial([
                            NaiveSyncBatchNorm(in_channels)
                            for _ in range(self.num_levels)
                        ]))
                tower.append(nn.ReLU())
            self.add_module('{}_tower'.format(head), nn.Sequential(*tower))

        self.cls_logits = nn.Conv2d(in_channels,
                                    self.num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.ctrness = nn.Conv2d(in_channels,
                                 1,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)

        if cfg.MODEL.FCOS.USE_SCALE:
            self.scales = nn.ModuleList(
                [Scale(init_value=1.0) for _ in range(self.num_levels)])
        else:
            self.scales = None

        for modules in [
                self.cls_tower, self.bbox_tower, self.share_tower,
                self.cls_logits, self.bbox_pred, self.ctrness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)
Beispiel #2
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super().__init__()
        self.num_classes = cfg.MODEL.DTInst.NUM_CLASSES
        self.fpn_strides = cfg.MODEL.DTInst.FPN_STRIDES
        self.num_codes = cfg.MODEL.DTInst.NUM_CODE
        self.use_gcn_in_mask = cfg.MODEL.DTInst.USE_GCN_IN_MASK
        self.gcn_kernel_size = cfg.MODEL.DTInst.GCN_KERNEL_SIZE
        self.mask_size = cfg.MODEL.DTInst.MASK_SIZE
        self.if_whiten = cfg.MODEL.DTInst.WHITEN

        head_configs = {
            "cls":
            (cfg.MODEL.DTInst.NUM_CLS_CONVS, cfg.MODEL.DTInst.USE_DEFORMABLE),
            "bbox":
            (cfg.MODEL.DTInst.NUM_BOX_CONVS, cfg.MODEL.DTInst.USE_DEFORMABLE),
            "share": (cfg.MODEL.DTInst.NUM_SHARE_CONVS,
                      cfg.MODEL.DTInst.USE_DEFORMABLE),
            "mask":
            (cfg.MODEL.DTInst.NUM_MASK_CONVS, cfg.MODEL.DTInst.USE_DEFORMABLE)
        }

        self.type_deformable = cfg.MODEL.DTInst.TYPE_DEFORMABLE
        self.last_deformable = cfg.MODEL.DTInst.LAST_DEFORMABLE
        norm = None if cfg.MODEL.DTInst.NORM == "none" else cfg.MODEL.DTInst.NORM

        in_channels = [s.channels for s in input_shape]
        assert len(
            set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0]

        for head in head_configs:
            tower = []
            num_convs, use_deformable = head_configs[head]
            for i in range(num_convs):
                # conv type.
                if use_deformable:
                    if self.last_deformable:
                        if i == num_convs - 1:
                            conv_func = DFConv2d
                            type_func = self.type_deformable
                        else:
                            conv_func = nn.Conv2d
                            type_func = "Conv2d"
                    else:
                        conv_func = DFConv2d
                        type_func = self.type_deformable
                else:
                    conv_func = nn.Conv2d
                    type_func = "Conv2d"
                # conv operation.
                if type_func == "DCNv1":
                    tower.append(
                        conv_func(in_channels,
                                  in_channels,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1,
                                  bias=False,
                                  with_modulated_dcn=False))
                elif type_func == "DCNv2":
                    tower.append(
                        conv_func(in_channels,
                                  in_channels,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1,
                                  bias=False,
                                  with_modulated_dcn=True))
                elif type_func == "Conv2d":
                    tower.append(
                        conv_func(in_channels,
                                  in_channels,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1,
                                  bias=True))
                else:
                    raise NotImplementedError
                # norm.
                if norm == "GN":
                    tower.append(nn.GroupNorm(32, in_channels))
                elif norm == "NaiveGN":
                    tower.append(NaiveGroupNorm(32, in_channels))
                # activation.
                tower.append(nn.ReLU())
            self.add_module('{}_tower'.format(head), nn.Sequential(*tower))

        self.cls_logits = nn.Conv2d(in_channels,
                                    self.num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.ctrness = nn.Conv2d(in_channels,
                                 1,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)

        # self.residual = nn.Sequential(
        #     nn.Conv2d(in_channels * 2 + self.mask_size ** 2, in_channels, kernel_size=3, stride=1, padding=1),
        #     nn.GroupNorm(32, in_channels),
        #     nn.ReLU(),
        #     nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
        #     nn.GroupNorm(32, in_channels),
        #     nn.ReLU(),
        #     nn.Conv2d(in_channels, self.mask_size ** 2, kernel_size=1, stride=1, padding=0),
        # )

        self.residual = nn.Sequential(
            nn.Conv2d(in_channels * 3,
                      in_channels,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels,
                      in_channels,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
        )

        # self.residual = nn.Sequential(
        #     nn.Conv2d(in_channels, in_channels * 2, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(in_channels * 2, in_channels, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        # )

        self.code_transform = nn.Conv2d(in_channels,
                                        3,
                                        kernel_size=3,
                                        stride=1,
                                        padding=1)
        torch.nn.init.constant_(self.code_transform.bias[0], 1)
        torch.nn.init.constant_(self.code_transform.bias[1], 0)
        torch.nn.init.constant_(self.code_transform.bias[2], 0.75)

        if self.use_gcn_in_mask:
            self.mask_pred = GCN(in_channels,
                                 self.num_codes,
                                 k=self.gcn_kernel_size)
        else:
            self.mask_pred = nn.Conv2d(in_channels,
                                       self.num_codes,
                                       kernel_size=3,
                                       stride=1,
                                       padding=1)

        if cfg.MODEL.DTInst.USE_SCALE:
            self.scales = nn.ModuleList(
                [Scale(init_value=1.0) for _ in self.fpn_strides])
        else:
            self.scales = None

        for modules in [
                self.cls_tower, self.bbox_tower, self.share_tower,
                self.cls_logits, self.bbox_pred, self.ctrness, self.mask_tower,
                self.mask_pred, self.residual
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.DTInst.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)