Example #1
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.box_coder = layers.BoxCoder()

        self.stride_list = cfg.rpn_stride
        rpn_channel = cfg.rpn_channel
        self.in_features = cfg.rpn_in_features
        self.anchors_generator = layers.DefaultAnchorGenerator(
            cfg.anchor_base_size,
            cfg.anchor_scales,
            cfg.anchor_aspect_ratios,
            cfg.anchor_offset,
        )
        self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1)
        self.rpn_cls_score = M.Conv2d(
            rpn_channel, cfg.num_cell_anchors * 2,
            kernel_size=1, stride=1
        )
        self.rpn_bbox_offsets = M.Conv2d(
            rpn_channel, cfg.num_cell_anchors * 4,
            kernel_size=1, stride=1
        )

        for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]:
            M.init.normal_(l.weight, std=0.01)
            M.init.fill_(l.bias, 0)
Example #2
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.box_coder = layers.BoxCoder(cfg.rcnn_reg_mean, cfg.rcnn_reg_std)

        # roi head
        self.in_features = cfg.rcnn_in_features
        self.stride = cfg.rcnn_stride
        self.pooling_method = cfg.pooling_method
        self.pooling_size = cfg.pooling_size

        self.fc1 = M.Linear(256 * self.pooling_size[0] * self.pooling_size[1],
                            1024)
        self.fc2 = M.Linear(1024, 1024)
        for l in [self.fc1, self.fc2]:
            M.init.normal_(l.weight, std=0.01)
            M.init.fill_(l.bias, 0)

        # box predictor
        self.pred_cls = M.Linear(1024, cfg.num_classes + 1)
        self.pred_delta = M.Linear(1024, cfg.num_classes * 4)
        M.init.normal_(self.pred_cls.weight, std=0.01)
        M.init.normal_(self.pred_delta.weight, std=0.001)
        for l in [self.pred_cls, self.pred_delta]:
            M.init.fill_(l.bias, 0)
Example #3
0
    def __init__(self, cfg, batch_size):
        super().__init__()
        self.cfg = cfg
        self.batch_size = batch_size

        self.anchor_gen = layers.DefaultAnchorGenerator(
            base_size=4,
            anchor_scales=self.cfg.anchor_scales,
            anchor_ratios=self.cfg.anchor_ratios,
        )
        self.box_coder = layers.BoxCoder(reg_mean=cfg.reg_mean, reg_std=cfg.reg_std)

        self.stride_list = np.array([8, 16, 32, 64, 128]).astype(np.float32)
        self.in_features = ["p3", "p4", "p5", "p6", "p7"]

        # ----------------------- build the backbone ------------------------ #
        bottom_up = resnet50(norm=layers.get_norm(self.cfg.resnet_norm))

        # ------------ freeze the weights of resnet stage1 and stage 2 ------ #
        if self.cfg.backbone_freeze_at >= 1:
            for p in bottom_up.conv1.parameters():
                p.requires_grad = False
        if self.cfg.backbone_freeze_at >= 2:
            for p in bottom_up.layer1.parameters():
                p.requires_grad = False

        # ----------------------- build the FPN ----------------------------- #
        in_channels_p6p7 = 2048
        out_channels = 256
        self.backbone = layers.FPN(
            bottom_up=bottom_up,
            in_features=["res3", "res4", "res5"],
            out_channels=out_channels,
            norm="",
            top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels),
        )

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        # ----------------------- build the RetinaNet Head ------------------ #
        self.head = layers.RetinaNetHead(cfg, feature_shapes)

        self.inputs = {
            "image": mge.tensor(
                np.random.random([2, 3, 224, 224]).astype(np.float32), dtype="float32",
            ),
            "im_info": mge.tensor(
                np.random.random([2, 5]).astype(np.float32), dtype="float32",
            ),
            "gt_boxes": mge.tensor(
                np.random.random([2, 100, 5]).astype(np.float32), dtype="float32",
            ),
        }
Example #4
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.box_coder = layers.BoxCoder(cfg.rpn_reg_mean, cfg.rpn_reg_std)

        # check anchor settings
        assert len(set(len(x) for x in cfg.anchor_scales)) == 1
        assert len(set(len(x) for x in cfg.anchor_ratios)) == 1
        self.num_cell_anchors = len(cfg.anchor_scales[0]) * len(
            cfg.anchor_ratios[0])

        self.stride_list = np.array(cfg.rpn_stride).astype(np.float32)
        rpn_channel = cfg.rpn_channel
        self.in_features = cfg.rpn_in_features

        self.anchor_generator = layers.AnchorBoxGenerator(
            anchor_scales=cfg.anchor_scales,
            anchor_ratios=cfg.anchor_ratios,
            strides=cfg.rpn_stride,
            offset=self.cfg.anchor_offset,
        )

        self.matcher = layers.Matcher(cfg.match_thresholds, cfg.match_labels,
                                      cfg.match_allow_low_quality)

        self.rpn_conv = M.Conv2d(256,
                                 rpn_channel,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.rpn_cls_score = M.Conv2d(rpn_channel,
                                      self.num_cell_anchors,
                                      kernel_size=1,
                                      stride=1)
        self.rpn_bbox_offsets = M.Conv2d(rpn_channel,
                                         self.num_cell_anchors * 4,
                                         kernel_size=1,
                                         stride=1)

        for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]:
            M.init.normal_(l.weight, std=0.01)
            M.init.fill_(l.bias, 0)
Example #5
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg

        self.anchor_generator = layers.AnchorBoxGenerator(
            anchor_scales=self.cfg.anchor_scales,
            anchor_ratios=self.cfg.anchor_ratios,
            strides=self.cfg.stride,
            offset=self.cfg.anchor_offset,
        )
        self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std)

        self.in_features = cfg.in_features

        # ----------------------- build backbone ------------------------ #
        bottom_up = getattr(resnet, cfg.backbone)(
            norm=layers.get_norm(cfg.backbone_norm), pretrained=cfg.backbone_pretrained
        )
        del bottom_up.fc

        # ----------------------- build FPN ----------------------------- #
        self.backbone = layers.FPN(
            bottom_up=bottom_up,
            in_features=cfg.fpn_in_features,
            out_channels=cfg.fpn_out_channels,
            norm=cfg.fpn_norm,
            top_block=layers.LastLevelP6P7(
                cfg.fpn_top_in_channel, cfg.fpn_out_channels, cfg.fpn_top_in_feature
            ),
            strides=cfg.fpn_in_strides,
            channels=cfg.fpn_in_channels,
        )

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        # ----------------------- build RetinaNet Head ------------------ #
        self.head = layers.BoxHead(cfg, feature_shapes)

        self.matcher = layers.Matcher(
            cfg.match_thresholds, cfg.match_labels, cfg.match_allow_low_quality
        )
Example #6
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg

        self.anchor_generator = layers.AnchorBoxGenerator(
            anchor_scales=self.cfg.anchor_scales,
            anchor_ratios=self.cfg.anchor_ratios,
            strides=self.cfg.stride,
            offset=self.cfg.anchor_offset,
        )
        self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std)

        self.stride_list = np.array(cfg.stride, dtype=np.float32)
        self.in_features = cfg.in_features

        # ----------------------- build backbone ------------------------ #
        bottom_up = getattr(resnet, cfg.backbone)(
            norm=layers.get_norm(cfg.resnet_norm),
            pretrained=cfg.backbone_pretrained)
        del bottom_up.fc

        # ----------------------- build FPN ----------------------------- #
        in_channels_p6p7 = 2048
        out_channels = 256
        self.backbone = layers.FPN(
            bottom_up=bottom_up,
            in_features=["res3", "res4", "res5"],
            out_channels=out_channels,
            norm=cfg.fpn_norm,
            top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels),
        )

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        # ----------------------- build head ------------------ #
        self.head = layers.BoxHead(cfg, feature_shapes)