def __init__(self, cfg): super().__init__() self.cfg = cfg self.box_coder = layers.BoxCoder() self.stride_list = cfg.rpn_stride rpn_channel = cfg.rpn_channel self.in_features = cfg.rpn_in_features self.anchors_generator = layers.DefaultAnchorGenerator( cfg.anchor_base_size, cfg.anchor_scales, cfg.anchor_aspect_ratios, cfg.anchor_offset, ) self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1) self.rpn_cls_score = M.Conv2d( rpn_channel, cfg.num_cell_anchors * 2, kernel_size=1, stride=1 ) self.rpn_bbox_offsets = M.Conv2d( rpn_channel, cfg.num_cell_anchors * 4, kernel_size=1, stride=1 ) for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]: M.init.normal_(l.weight, std=0.01) M.init.fill_(l.bias, 0)
def __init__(self, cfg, batch_size): super().__init__() self.cfg = cfg self.batch_size = batch_size self.anchor_gen = layers.DefaultAnchorGenerator( base_size=4, anchor_scales=self.cfg.anchor_scales, anchor_ratios=self.cfg.anchor_ratios, ) self.box_coder = layers.BoxCoder(reg_mean=cfg.reg_mean, reg_std=cfg.reg_std) self.stride_list = np.array([8, 16, 32, 64, 128]).astype(np.float32) self.in_features = ["p3", "p4", "p5", "p6", "p7"] # ----------------------- build the backbone ------------------------ # bottom_up = resnet50(norm=layers.get_norm(self.cfg.resnet_norm)) # ------------ freeze the weights of resnet stage1 and stage 2 ------ # if self.cfg.backbone_freeze_at >= 1: for p in bottom_up.conv1.parameters(): p.requires_grad = False if self.cfg.backbone_freeze_at >= 2: for p in bottom_up.layer1.parameters(): p.requires_grad = False # ----------------------- build the FPN ----------------------------- # in_channels_p6p7 = 2048 out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res3", "res4", "res5"], out_channels=out_channels, norm="", top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels), ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build the RetinaNet Head ------------------ # self.head = layers.RetinaNetHead(cfg, feature_shapes) self.inputs = { "image": mge.tensor( np.random.random([2, 3, 224, 224]).astype(np.float32), dtype="float32", ), "im_info": mge.tensor( np.random.random([2, 5]).astype(np.float32), dtype="float32", ), "gt_boxes": mge.tensor( np.random.random([2, 100, 5]).astype(np.float32), dtype="float32", ), }