Esempio n. 1
0
    def __init__(self,
                 features,
                 scales,
                 ratios,
                 classes,
                 roi_size,
                 train_patterns,
                 stride=32,
                 rpn_channel=1024,
                 num_sample=128,
                 pos_iou_thresh=0.5,
                 neg_iou_thresh_high=0.5,
                 neg_iou_thresh_low=0.0,
                 pos_ratio=0.25,
                 nms_thresh=0.3,
                 nms_topk=400,
                 post_nms=100):

        #super from rcnn
        super(LHRCNN, self).__init__()
        self.stride = stride
        self._max_batch = 1  # currently only support batch size = 1
        self._max_roi = 100000  # maximum allowed ROIs
        self.num_class = len(classes)
        self._target_generator = set([RCNNTargetGenerator(self.num_class)])
        self.k, _ = roi_size
        self.CT = 10

        #--------RCNN parms--------
        self.train_patterns = train_patterns
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms

        #-------nn init------------
        with self.name_scope():
            #-------ligth head rcnn setting-------
            self.rpn = RPN(rpn_channel, stride, scales=scales, ratios=ratios)
            self.sampler = RCNNTargetSampler(num_sample, pos_iou_thresh,
                                             neg_iou_thresh_high,
                                             neg_iou_thresh_low, pos_ratio)
            self.group_conv = Group_Conv(10 * self.k * self.k)
            #-------rcnn setting---------------
            self.box_to_center = BBoxCornerToCenter()
            self.box_decoder = NormalizedBoxCenterDecoder()
            self.cls_decoder = MultiPerClassDecoder(num_class=self.num_class +
                                                    1)
            self.features = features
            self.share = nn.Dense(1024,
                                  activation='relu',
                                  weight_initializer=mx.init.Normal(0.01))
            self.clf = nn.Dense(self.num_class + 1,
                                weight_initializer=mx.init.Normal(0.01))
            self.reg = nn.Dense(self.num_class * 4,
                                weight_initializer=mx.init.Normal(0.01))
Esempio n. 2
0
    def __init__(self, network, base_size, features, num_filters, sizes, ratios,
                 steps, classes, use_1x1_transition=True, use_bn=False, fpn_channel=64,
                 reduce_ratio=1.0, min_depth=128, global_pool=False, pretrained=False,
                 stds=(0.1, 0.1, 0.2, 0.2), nms_thresh=0.35, nms_topk=5000, post_nms=750,
                 ctx=mx.cpu(), **kwargs):
        super(SFD, self).__init__(**kwargs)
        if network is None:
            num_layers = len(steps)
        else:
            num_layers = len(features) + len(num_filters) + int(global_pool)
        assert isinstance(sizes, list), "Must provide sizes as list or list of list"
        assert isinstance(ratios, list), "Must provide ratios as list or list of list"
        if not isinstance(ratios[0], (tuple, list)):
            ratios = [ratios] * num_layers  # propagate to all layers if use same ratio
        assert num_layers == len(sizes) == len(ratios), \
            "Mismatched (number of layers) vs (sizes) vs (ratios): {}, {}, {}".format(
                num_layers, len(sizes), len(ratios))
        assert num_layers > 0, "SFD require at least one layer, suggest multiple."
        self._num_layers = num_layers
        self.classes = classes
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms
        self.base_size = base_size
        self.im_size = [base_size, base_size]

        with self.name_scope():
            if network is None:
                # use fine-grained manually designed block as features
                self.features = features(batch_norm=use_bn, pretrained=pretrained, ctx=ctx)
            else:
                self.features = FeatureExpander(
                    network=network, outputs=features, num_filters=num_filters,
                    use_1x1_transition=use_1x1_transition, fpn_channel=fpn_channel,
                    use_bn=use_bn, reduce_ratio=reduce_ratio, min_depth=min_depth,
                    global_pool=global_pool, pretrained=pretrained, ctx=ctx)
            self.class_predictors = nn.HybridSequential()
            self.box_predictors = nn.HybridSequential()
            self.anchor_generators = nn.HybridSequential()
            asz = [base_size // 4, base_size // 4]
            for i, s, r, st in zip(range(num_layers), sizes, ratios, steps):
                anchor_generator = SFDAnchorGenerator(i, self.im_size, s, r, st, asz)
                self.anchor_generators.add(anchor_generator)
                # asz = max(asz // 2, 16)  # pre-compute larger than 16x16 anchor map
                asz = [max(sz // 2, 16) for sz in asz]
                num_anchors = anchor_generator.num_depth
                cls_num_channel = num_anchors * (len(self.classes) + 1)
                if i == 0:
                    self.class_predictors.add(ConvMOPredictor(cls_num_channel, 3))
                else:
                    self.class_predictors.add(ConvPredictor(cls_num_channel))
                self.box_predictors.add(ConvPredictor(num_anchors * 4))
            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(len(self.classes) + 1, thresh=0.01)
Esempio n. 3
0
    def __init__(self,
                 stages,
                 sizes,
                 ratios,
                 steps,
                 dm_channels=256,
                 pm_channels=256,
                 sm_channels=32,
                 stds=(0.1, 0.1, 0.2, 0.2),
                 nms_thresh=0.45,
                 nms_topk=1000,
                 post_nms=400,
                 anchor_alloc_size=256,
                 ctx=mx.cpu(),
                 norm_layer=nn.BatchNorm,
                 norm_kwargs=None,
                 **kwargs):
        super(CLRS, self).__init__(**kwargs)

        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms

        with self.name_scope():
            self.stages = nn.HybridSequential()
            for i in range(len(stages)):
                self.stages.add(stages[i])
            # extra layers
            self.extras = nn.HybridSequential()
            self.extras.add(self._extra_layer(256, 512))
            self.extras.add(self._extra_layer(128, 256))
            self.extras.add(self._extra_layer(128, 256, strides=1))
            self.extras.add(self._extra_layer(128, 256, strides=1))
            self.dms = nn.HybridSequential()
            for i in range(6):
                strides = 2 if i > 1 else 1
                ksize = 2 if strides == 2 else 3
                self.dms.add(
                    DM(dm_channels, ksize, strides=strides, pad=ksize - 2))
            self.pms = nn.HybridSequential()
            self.anchor_generators = nn.HybridSequential()
            asz = anchor_alloc_size
            for i, (s, r, st) in enumerate(zip(sizes, ratios, steps)):
                self.pms.add(PM(pm_channels, len(s)))
                anchor_generator = CLRSAnchorGenerator(i, (512, 512),
                                                       s,
                                                       r,
                                                       st,
                                                       alloc_size=(asz, asz))
                self.anchor_generators.add(anchor_generator)
                asz = max(asz // 2, 16)
            self.seg_pred = SegPred(sm_channels)
            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(4 + 1, thresh=0.01)
    def __init__(self, features, top_features, classes, short, max_size, train_patterns=None,
                nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), stride=16, clip=None,
                rpn_channel=1024, base_size=16, scales=(0.5, 1, 2), ratios=(8, 16, 32), alloc_size=(128, 128), rpn_nms_thresh=0.5,
                rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16,
                num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25):
        super(FasterRCNN, self).__init__()
        self.classes = classes
        self.num_classes = len(classes)
        self.short = short
        self.max_size = max_size
        self.train_patterns = train_patterns
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms

        self._max_batch = 1
        self._num_sample = num_sample
        self._rpn_test_post_nms = rpn_test_post_nms
        # return cls_target, box_target, box_mask
        self._target_generater = {RCNNTargetGenerator(self.num_classes)}

        self._roi_mode = roi_mode.lower()
        self._roi_size = roi_size
        self._stride = stride

        with self.name_scope():
            self.features = features
            self.top_features = top_features
            self.global_avg_pool = nn.GlobalAvgPool2D()
            self.class_predictor = nn.Dense(
                self.num_classes+1, weight_initializer=mx.init.Normal(0.01))
            self.box_predictor = nn.Dense(
                self.num_classes*4, weight_initializer=mx.init.Normal(0.01))

            # reconstruct valid labels
            self.cls_decoder = MultiPerClassDecoder(num_class=self.num_classes+1)
            # (xmin, ymin, xmax, ymax) -> (x, y, h, w)
            self.box_to_center = BBoxCornerToCenter()
            # reconstructed bounding boxes
            self.box_decoder = NormalizedBoxCenterDecoder(clip=clip)
            # 
            self.rpn = RPN(
                channels=rpn_channel, stride=stride, base_size=base_size,
                scales=scales, ratios=ratios, alloc_size=alloc_size,
                clip=clip, nms_thresh=rpn_nms_thresh, train_pre_nms=rpn_train_pre_nms,
                train_post_nms=rpn_train_post_nms, test_pre_nms=rpn_test_pre_nms, test_post_nms=rpn_test_post_nms, min_size=rpn_min_size)

            self.sampler = RCNNTargetSampler(
                num_image=self._max_batch, num_proposal=rpn_train_post_nms,
                num_sample=num_sample, pos_iou_thresh=pos_iou_thresh, pos_ratio=pos_ratio)
Esempio n. 5
0
    def __init__(self,
                 features,
                 top_features,
                 classes,
                 box_features,
                 short,
                 max_size,
                 train_patterns,
                 nms_thresh,
                 nms_topk,
                 post_nms,
                 roi_mode,
                 roi_size,
                 strides,
                 clip,
                 force_nms=False,
                 **kwargs):
        super(RCNN, self).__init__(**kwargs)
        self.classes = classes
        self.num_class = len(classes)
        self.short = short
        self.max_size = max_size
        self.train_patterns = train_patterns
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms
        self.force_nms = force_nms

        assert self.num_class > 0, "Invalid number of class : {}".format(
            self.num_class)
        self._roi_mode = roi_mode.lower()
        assert len(
            roi_size) == 2, "Require (h, w) as roi_size, given {}".format(
                roi_size)
        self._roi_size = roi_size
        self._strides = strides

        with self.name_scope():
            self.features = features
            self.top_features = top_features
            self.box_features = box_features
            self.class_predictor = nn.Dense(
                self.num_class + 1, weight_initializer=mx.init.Normal(0.01))
            self.box_predictor = nn.Dense(
                self.num_class * 4, weight_initializer=mx.init.Normal(0.001))
            self.cls_decoder = MultiPerClassDecoder(num_class=self.num_class +
                                                    1)
            self.box_decoder = NormalizedBoxCenterDecoder(clip=clip,
                                                          convert_anchor=True)
Esempio n. 6
0
    def __init__(self, classes, use_1x1_transition=False, use_bn=True, reduce_ratio=1.0, min_depth=128,
                 stds=(0.1, 0.1, 0.2, 0.2), nms_thresh=0.45, nms_topk=400, post_nms=100,
                 anchor_alloc_size=128, ctx=mx.gpu(), norm_layer=nn.BatchNorm, **kwargs):
        super(JanetRes, self).__init__(**kwargs)
        self.classes = classes
        pretrained = False; global_pool = False; norm_kwargs = {}
        im_size = (300, 300)

        network = 'resnet101_v2'
        features = ['stage3_activation22', 'stage4_activation2']
        channels = [512, 512, 256, 256]
        sizes = [0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 0.9]
        ratios = [[1, 2, 1.4]] * 2 + [[1, 2, 0.8, 3, 0.8]] * 2 + [[1, 2, 1.5]] * 2
        steps = [40 / 300, 100 / 300, 120 / 300, 150 / 300, 180 / 300, 250 / 300]
        num_layers = len(features) + len(channels)

        sizes = list(zip(sizes[:-1], sizes[1:]))
        self._num_layers = num_layers
        self.classes = classes
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms

        with self.name_scope():
            self.features = FeatureExpander(network=network, outputs=features, num_filters=channels,
                                            use_1x1_transition=use_1x1_transition,
                                            use_bn=use_bn, reduce_ratio=reduce_ratio, min_depth=min_depth,
                                            global_pool=True, pretrained=pretrained, ctx=ctx,
                                            norm_layer=norm_layer, norm_kwargs=norm_kwargs)

            self.class_predictors = nn.HybridSequential()
            self.box_predictors = nn.HybridSequential()
            self.anchor_generators = nn.HybridSequential()
            asz = anchor_alloc_size
            for i, s, r, st in zip(range(num_layers), sizes, ratios, steps):
                anchor_generator = SSDAnchorGenerator(i, im_size, s, r, st, (asz, asz))
                self.anchor_generators.add(anchor_generator)
                asz = max(asz // 2, 16)                     # pre-compute larger than 16x16 anchor map
                num_anchors = anchor_generator.num_depth
                self.class_predictors.add(ConvPredictor(num_anchors * (len(self.classes) + 1)))
                self.box_predictors.add(ConvPredictor(num_anchors * 4))
            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(len(self.classes) + 1, thresh=0.01)
Esempio n. 7
0
    def __init__(self, classes, nms_thresh=0.45, nms_topk=400, post_nms=100, anchor_alloc_size=128,  **kwargs):
        super(JanetVgg, self).__init__(**kwargs)
        base_size = 300
        stds = (0.1, 0.1, 0.2, 0.2)

        self.features = JafeatVgg()
        sizes = [0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 0.9]
        ratios = [[1, 2, 1.4]] * 2 + [[1, 2, 0.8, 3, 0.8]] * 2 + [[1, 2, 1.5]]*2
        steps = [40 / 300, 100 / 300, 120/300,  150 / 300, 180 / 300, 250 / 300]

        num_layers = len(ratios)
        assert len(sizes) == num_layers + 1
        sizes = list(zip(sizes[:-1], sizes[1:]))
        assert isinstance(ratios, list), "Must provide ratios as list or list of list"
        if not isinstance(ratios[0], (tuple, list)):
            ratios = ratios * num_layers  # propagate to all layers if use same ratio
        assert num_layers == len(sizes) == len(ratios), \
            "Mismatched (number of layers) vs (sizes) vs (ratios): {}, {}, {}".format(
                num_layers, len(sizes), len(ratios))
        assert num_layers > 0, "SSD require at least one layer, suggest multiple."
        self._num_layers = num_layers
        self.classes = classes
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms

        with self.name_scope():
            self.class_predictors = nn.HybridSequential()
            self.box_predictors = nn.HybridSequential()
            self.anchor_generators = nn.HybridSequential()
            asz = anchor_alloc_size
            im_size = (base_size, base_size)
            for i, s, r, st in zip(range(num_layers), sizes, ratios, steps):
                anchor_generator = SSDAnchorGenerator(i, im_size, s, r, st, (asz, asz))
                self.anchor_generators.add(anchor_generator)
                asz = max(asz // 2, 16)  # pre-compute larger than 16x16 anchor map
                num_anchors = anchor_generator.num_depth
                self.class_predictors.add(ConvPredictor(num_anchors * (len(self.classes) + 1)))
                self.box_predictors.add(ConvPredictor(num_anchors * 4))
            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(len(self.classes) + 1, thresh=0.01)
Esempio n. 8
0
    def __init__(self,
                 network,
                 base_size,
                 features,
                 num_filters,
                 sizes,
                 ratios,
                 steps,
                 classes,
                 oriental=('u', 'd', 'l', 'r'),
                 use_1x1_transition=True,
                 use_bn=True,
                 reduce_ratio=1.0,
                 min_depth=128,
                 global_pool=False,
                 pretrained=False,
                 stds=(0.1, 0.1, 0.2, 0.2),
                 nms_thresh=0.45,
                 nms_topk=400,
                 post_nms=100,
                 anchor_alloc_size=128,
                 ctx=mx.cpu(),
                 norm_layer=nn.BatchNorm,
                 norm_kwargs=None,
                 **kwargs):
        super(SSD_ORIENTAL, self).__init__(**kwargs)
        if norm_kwargs is None:
            norm_kwargs = {}
        if network is None:
            num_layers = len(ratios)
        else:
            num_layers = len(features) + len(num_filters) + int(global_pool)
        assert len(sizes) == num_layers + 1
        sizes = list(zip(sizes[:-1], sizes[1:]))
        assert isinstance(ratios,
                          list), "Must provide ratios as list or list of list"
        if not isinstance(ratios[0], (tuple, list)):
            ratios = ratios * num_layers  # propagate to all layers if use same ratio
        assert num_layers == len(sizes) == len(ratios), \
            "Mismatched (number of layers) vs (sizes) vs (ratios): {}, {}, {}".format(
                num_layers, len(sizes), len(ratios))
        assert num_layers > 0, "SSD require at least one layer, suggest multiple."
        self._num_layers = num_layers
        self.classes = classes

        self.oriental = oriental

        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms

        with self.name_scope():
            if network is None:
                # use fine-grained manually designed block as features
                try:
                    self.features = features(pretrained=pretrained,
                                             ctx=ctx,
                                             norm_layer=norm_layer,
                                             norm_kwargs=norm_kwargs)
                except TypeError:
                    self.features = features(pretrained=pretrained, ctx=ctx)
            else:
                try:
                    self.features = FeatureExpander(
                        network=network,
                        outputs=features,
                        num_filters=num_filters,
                        use_1x1_transition=use_1x1_transition,
                        use_bn=use_bn,
                        reduce_ratio=reduce_ratio,
                        min_depth=min_depth,
                        global_pool=global_pool,
                        pretrained=pretrained,
                        ctx=ctx,
                        norm_layer=norm_layer,
                        norm_kwargs=norm_kwargs)
                except TypeError:
                    self.features = FeatureExpander(
                        network=network,
                        outputs=features,
                        num_filters=num_filters,
                        use_1x1_transition=use_1x1_transition,
                        use_bn=use_bn,
                        reduce_ratio=reduce_ratio,
                        min_depth=min_depth,
                        global_pool=global_pool,
                        pretrained=pretrained,
                        ctx=ctx)
            self.class_predictors = nn.HybridSequential()
            self.ori_predictors = nn.HybridSequential()
            self.box_predictors = nn.HybridSequential()
            self.anchor_generators = nn.HybridSequential()
            asz = anchor_alloc_size
            im_size = (base_size, base_size)
            for i, s, r, st in zip(range(num_layers), sizes, ratios, steps):
                anchor_generator = SSDAnchorGenerator(i, im_size, s, r, st,
                                                      (asz, asz))
                self.anchor_generators.add(anchor_generator)
                asz = max(asz // 2,
                          16)  # pre-compute larger than 16x16 anchor map
                num_anchors = anchor_generator.num_depth
                self.class_predictors.add(
                    ConvPredictor(num_anchors * (len(self.classes) + 1)))

                self.ori_predictors.add(
                    ConvPredictor(num_anchors * (len(self.oriental) + 1)))

                self.box_predictors.add(ConvPredictor(num_anchors * 4))
            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(len(self.classes) + 1,
                                                    thresh=0.01)

            self.ori_decoder = MultiPerClassDecoder(len(self.oriental) + 1,
                                                    thresh=0.01)
    def __init__(self,
                 base_size,
                 stages,
                 ratios,
                 scales,
                 steps,
                 classes,
                 fpn_channel=64,
                 fpn_repeat=3,
                 box_cls_repeat=3,
                 act_type='swish',
                 stds=(0.1, 0.1, 0.2, 0.2),
                 nms_thresh=0.45,
                 nms_topk=400,
                 post_nms=100,
                 anchor_alloc_size=128,
                 ctx=mx.cpu(),
                 norm_layer=nn.BatchNorm,
                 norm_kwargs=None,
                 **kwargs):

        super(EfficientDet, self).__init__(**kwargs)

        self.num_stages = len(steps)
        self.classes = classes
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms
        num_anchors = len(ratios) * len(scales)
        norm_kwargs = {} if norm_kwargs is None else norm_kwargs

        im_size = (base_size, base_size)
        asz = anchor_alloc_size
        with self.name_scope():
            self.stages = nn.HybridSequential()
            self.proj_convs = nn.HybridSequential()
            self.fpns = nn.HybridSequential()
            self.anchor_generators = nn.HybridSequential()
            for stage in stages:
                self.stages.add(stage)
            for i in range(self.num_stages):
                block = nn.HybridSequential()
                _add_conv(block,
                          channels=fpn_channel,
                          act_type=act_type,
                          norm_layer=norm_layer,
                          norm_kwargs=norm_kwargs)
                self.proj_convs.add(block)
                anchor_generator = AnchorGenerator(i, im_size, ratios, scales,
                                                   steps[i], (asz, asz))
                self.anchor_generators.add(anchor_generator)
                asz = max(asz // 2, 16)

            for i in range(fpn_repeat):
                self.fpns.add(
                    BiFPN(fpn_channel,
                          num_features=self.num_stages,
                          act_type=act_type,
                          norm_layer=norm_layer,
                          norm_kwargs=norm_kwargs))
            self.cls_net = OutputSubnet(fpn_channel,
                                        box_cls_repeat,
                                        self.num_classes + 1,
                                        num_anchors,
                                        act_type=act_type,
                                        norm_layer=norm_layer,
                                        norm_kwargs=norm_kwargs,
                                        prefix='class_net')
            self.box_net = OutputSubnet(fpn_channel,
                                        box_cls_repeat,
                                        4,
                                        num_anchors,
                                        act_type=act_type,
                                        norm_layer=norm_layer,
                                        norm_kwargs=norm_kwargs,
                                        prefix='box_net')
            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(self.num_classes + 1,
                                                    thresh=0.01)
Esempio n. 10
0
    def __init__(self,
                 num_layers,
                 base_size,
                 sizes,
                 ratios,
                 steps,
                 classes,
                 stds=(0.1, 0.1, 0.2, 0.2),
                 nms_thresh=0.3,
                 nms_topk=10000,
                 post_nms=3000,
                 anchor_alloc_size=640,
                 is_multitask=False,
                 use_pose=False,
                 use_keypoints=False,
                 num_keypoints=1,
                 use_embedding=False,
                 embedding_dim=128,
                 return_intermediate_features=False,
                 **kwargs):
        super(SSDDetectorHead, self).__init__(**kwargs)

        self._num_layers = num_layers
        self.classes = classes
        self.nms_thresh = nms_thresh
        self.nms_topk = nms_topk
        self.post_nms = post_nms
        self._use_pose = use_pose
        if self._use_pose:
            self._is_multitask = True
        else:
            self._is_multitask = is_multitask

        self._use_keypoints = use_keypoints
        self._keypoint_size = num_keypoints * 2

        self._use_emebdding = use_embedding
        self._embedding_dim = embedding_dim

        self._return_int_feat = return_intermediate_features

        with self.name_scope():
            self.class_predictors = nn.HybridSequential()
            self.box_predictors = nn.HybridSequential()
            self.anchor_generators = nn.HybridSequential()
            if self._is_multitask:
                self.landmark_predictors = nn.HybridSequential()
            if self._use_pose:
                self.pose_predictors = nn.HybridSequential()
            if self._use_keypoints:
                self.keypoint_predictors = nn.HybridSequential()
            if self._use_emebdding:
                self.embedding_predictors = nn.HybridSequential()
            asz = anchor_alloc_size
            im_size = (base_size, base_size)
            for i, s, r, st in zip(range(num_layers), sizes, ratios, steps):
                anchor_generator = SSDAnchorGenerator(i, im_size, s, r, st,
                                                      (asz, asz))
                self.anchor_generators.add(anchor_generator)
                asz = max(asz // 2,
                          16)  # pre-compute larger than 16x16 anchor map
                num_anchors = anchor_generator.num_depth
                self.class_predictors.add(
                    ConvPredictor(num_anchors * (len(self.classes) + 1)))
                self.box_predictors.add(ConvPredictor(num_anchors * 4))
                if self._is_multitask:
                    self.landmark_predictors.add(
                        ConvPredictor(num_anchors * 10))
                if self._use_pose:
                    self.pose_predictors.add(ConvPredictor(num_anchors * 6))
                if self._use_keypoints:
                    self.keypoint_predictors.add(
                        ConvPredictor(num_anchors * self._keypoint_size))
                if self._use_emebdding:
                    local_seq = nn.HybridSequential()
                    local_seq.add(
                        ConvPredictor(num_anchors * self._embedding_dim *
                                      len(self.classes)))
                    local_seq.add(
                        nn.BatchNorm(prefix='embedding_norm_{}_'.format(i)))
                    local_seq.add(nn.LeakyReLU(alpha=0.25))
                    local_seq.add(
                        nn.Conv2D(
                            num_anchors * self._embedding_dim *
                            len(self.classes), (1, 1),
                            weight_initializer=mx.init.Xavier(magnitude=2),
                            bias_initializer='zeros',
                            groups=num_anchors * len(self.classes)))
                    self.embedding_predictors.add(local_seq)

            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(len(self.classes) + 1,
                                                    thresh=0.01)
            if self._is_multitask:
                self.landmark_decoder = NormalizedLandmarkCenterDecoder(stds)
            if self._use_keypoints:
                self.keypoint_decoder = GeneralNormalizedKeyPointsDecoder(1)
Esempio n. 11
0
    def __init__(self,
                 network,
                 base_size,
                 features,
                 num_filters,
                 sizes,
                 ratios,
                 steps,
                 classes,
                 use_1x1_transition=True,
                 use_bn=True,
                 reduce_ratio=1.0,
                 min_depth=128,
                 global_pool=False,
                 pretrained=False,
                 stds=(0.1, 0.1, 0.2, 0.2),
                 anchor_alloc_size=128,
                 nms_overlap_thresh=0.5,
                 nms_topk=200,
                 nms_valid_thresh=0.0,
                 post_nms=200,
                 norm_layer=GroupBatchNorm,
                 fuse_bn_relu=True,
                 fuse_bn_add_relu=True,
                 bn_fp16=False,
                 norm_kwargs=None,
                 predictors_kernel=(3, 3),
                 predictors_pad=(1, 1),
                 ctx=mx.cpu(),
                 layout='NCHW',
                 **kwargs):
        super(SSD, self).__init__(**kwargs)
        if norm_kwargs is None:
            norm_kwargs = {}
        if network is None:
            num_layers = len(ratios)
        else:
            num_layers = len(features) + len(num_filters) + int(global_pool)
        assert len(sizes) == num_layers + 1
        sizes = list(zip(sizes[:-1], sizes[1:]))
        assert isinstance(ratios,
                          list), "Must provide ratios as list or list of list"
        if not isinstance(ratios[0], (tuple, list)):
            ratios = ratios * num_layers  # propagate to all layers if use same ratio
        assert num_layers == len(sizes) == len(ratios), \
            f"Mismatched (number of layers) vs (sizes) vs (ratios): {num_layers}, {len(sizes)}, {len(ratios)}."
        assert num_layers > 0, "SSD require at least one layer, suggest multiple."
        self._num_layers = num_layers
        self.classes = classes
        self.nms_overlap_thresh = nms_overlap_thresh
        self.nms_topk = nms_topk
        self.nms_valid_thresh = nms_valid_thresh
        self.post_nms = post_nms
        self.layout = layout
        self.reduce_ratio = reduce_ratio
        self._bn_fp16 = bn_fp16
        self._bn_group = norm_kwargs.get('bn_group', 1)

        logging.info(f'[SSD] network: {network}')
        logging.info(f'[SSD] norm layer: {norm_layer}')
        logging.info(f'[SSD] fuse bn relu: {fuse_bn_relu}')
        logging.info(f'[SSD] fuse bn add relu: {fuse_bn_add_relu}')
        logging.info(f'[SSD] bn group: {self._bn_group}')

        with self.name_scope():
            if network is None:
                # use fine-grained manually designed block as features
                self.features = features(pretrained=pretrained,
                                         ctx=ctx,
                                         norm_layer=norm_layer,
                                         fuse_bn_relu=fuse_bn_relu,
                                         fuse_bn_add_relu=fuse_bn_add_relu,
                                         bn_fp16=bn_fp16,
                                         norm_kwargs=norm_kwargs)
            else:
                self.features = FeatureExpander(
                    network=network,
                    outputs=features,
                    num_filters=num_filters,
                    use_1x1_transition=use_1x1_transition,
                    use_bn=use_bn,
                    reduce_ratio=reduce_ratio,
                    min_depth=min_depth,
                    global_pool=global_pool,
                    pretrained=pretrained,
                    ctx=ctx,
                    norm_layer=norm_layer,
                    fuse_bn_relu=fuse_bn_relu,
                    fuse_bn_add_relu=fuse_bn_add_relu,
                    bn_fp16=bn_fp16,
                    norm_kwargs=norm_kwargs,
                    layout=layout)

            # use a single ConvPredictor for conf and loc predictors (head fusion),
            # but they are treated as two different segments
            self.predictors = nn.HybridSequential()
            self.num_defaults = [4, 6, 6, 6, 4, 4]
            padding_channels_to = 8
            self.padding_amounts = [
            ]  # We keep track of padding to slice conf/loc correctly
            self.predictor_offsets = [
            ]  # We keep track of offset to initialize conf/loc correctly
            for nd in self.num_defaults:
                # keep track of beginning/ending offsets for all segments
                offsets = [0]
                n = nd * (self.num_classes + 1
                          )  # output channels for conf predictors
                offsets.append(n)
                n = n + nd * 4  # output channels for both conf and loc predictors
                offsets.append(n)
                # padding if necessary
                padding_amt = 0
                # manually pad to get HMMA kernels for NHWC layout
                if (self.layout == 'NHWC') and (n % padding_channels_to):
                    padding_amt = padding_channels_to - (n %
                                                         padding_channels_to)
                    n = n + padding_amt
                    if padding_amt:
                        offsets.append(n)
                self.predictors.add(
                    ConvPredictor(n,
                                  kernel=predictors_kernel,
                                  pad=predictors_pad,
                                  layout=layout))
                self.predictor_offsets.append(offsets)
                self.padding_amounts.append(padding_amt)

            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(self.num_classes + 1,
                                                    thresh=0)