def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False
        loader_worker = 8

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="gn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        depth = 50

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image
        nnvm_proposal = True
        nnvm_rpn_target = False

        class anchor_generate:
            scale = (8, )
            ratio = (0.5, 1.0, 2.0)
            stride = (4, 8, 16, 32, 64)
            image_anchor = 256
            max_side = 1400

        class anchor_assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0
            image_anchor = 256
            pos_fraction = 0.5

        class head:
            conv_channel = 256
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 2000 if is_train else 1000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 81
            class_agnostic = False
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 80
        image_roi = 512
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = False
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class MaskParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        resolution = 28
        dim_reduced = 256
        num_fg_roi = int(RpnParam.subsample_proposal.image_roi *
                         RpnParam.subsample_proposal.fg_fraction)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = (4, 8, 16, 32)
        roi_canonical_scale = 224
        roi_canonical_level = 4

    class MaskRoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 14
        stride = (4, 8, 16, 32)
        roi_canonical_scale = 224
        roi_canonical_level = 4

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2017", )
        else:
            image_set = ("coco_val2017", )

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.02 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            mult = 2
            begin_epoch = 0
            end_epoch = 6 * mult
            lr_iter = [
                60000 * mult * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image),
                80000 * mult * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.01 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0
            iter = 500

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: process_output(x, y)

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam, MaskParam)
    roi_extractor = RoiExtractor(RoiParam)
    mask_roi_extractor = RoiExtractor(MaskRoiParam)
    bbox_head = BboxHead(BboxParam)
    mask_head = MaskHead(BboxParam, MaskParam, MaskRoiParam)
    bbox_post_processer = BboxPostProcessor(TestParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor,
                                              mask_roi_extractor, bbox_head,
                                              mask_head)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, mask_roi_extractor,
                                            bbox_head, mask_head,
                                            bbox_post_processer)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        from_scratch = True
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth
            epoch = 0
            fixed_param = []

        def process_weight(sym, arg, aux):
            for stride in RpnParam.anchor_generate.stride:
                add_anchor_to_arg(sym, arg, aux,
                                  RpnParam.anchor_generate.max_side, stride,
                                  RpnParam.anchor_generate.scale,
                                  RpnParam.anchor_generate.ratio)

    # data processing
    class NormParam:
        mean = tuple(i * 255 for i in (0.485, 0.456, 0.406))  # RGB order
        std = tuple(i * 255 for i in (0.229, 0.224, 0.225))

    # data processing
    class ResizeParam:
        short = 800
        long = 1333

    class PadParam:
        short = 800
        long = 1333
        max_num_gt = 100
        max_len_gt_poly = 2500

    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.stride = (4, 8, 16, 32, 64)
                self.short = (200, 100, 50, 25, 13)
                self.long = (334, 167, 84, 42, 21)

            scales = (8)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage

    from models.maskrcnn.input import PreprocessGtPoly, EncodeGtPoly, \
        Resize2DImageBboxMask, Flip2DImageBboxMask, Pad2DImageBboxMask

    from models.FPN.input import PyramidAnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            PreprocessGtPoly(),
            Resize2DImageBboxMask(ResizeParam),
            Flip2DImageBboxMask(),
            EncodeGtPoly(PadParam),
            Pad2DImageBboxMask(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["im_info", "gt_bbox", "gt_poly"]
        if not RpnParam.nnvm_rpn_target:
            transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam()))
            label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric
    from models.maskrcnn.metric import SigmoidCELossMetric

    rpn_acc_metric = metric.AccWithIgnore(
        "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"],
        [])
    rpn_l1_metric = metric.L1(
        "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], [])
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [])
    box_l1_metric = metric.L1(
        "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [])
    mask_cls_metric = SigmoidCELossMetric("MaskCE", ["mask_loss_output"], [])

    metric_list = [
        rpn_acc_metric,
        rpn_l1_metric,
        box_acc_metric,
        box_l1_metric,
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
Exemple #2
0
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = True
        loader_worker = 24

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        depth = 50

    class NeckParam:
        fp16 = General.fp16
        normalizer = normalizer_factory(type="localbn")

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image
        nnvm_proposal = True
        nnvm_rpn_target = False

        class anchor_generate:
            scale = (8, )
            ratio = (0.5, 1, 2)
            stride = (4, 8, 16, 32, 64)
            max_side = 1450

        class anchor_assign:
            allowed_border = 1000
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0
            image_anchor = 256
            pos_fraction = 0.5

        class head:
            conv_channel = 256
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 3000 if is_train else 2000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 2

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.5
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = False
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 1
        image_roi = 512
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = False
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = (4, 8, 16, 32)
        roi_canonical_scale = 224
        roi_canonical_level = 4

    class DatasetParam:
        if is_train:
            image_set = ("crowdhuman_train", )
        else:
            image_set = ("crowdhuman_val", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor, bbox_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, bbox_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth
            epoch = 0
            fixed_param = [
                "conv0", "stage1", "bn_gamma", "bn_beta", "bn0", "bn1", "bn2",
                "bn3", "bn4"
            ]

        def process_weight(sym, arg, aux):
            for stride in RpnParam.anchor_generate.stride:
                add_anchor_to_arg(sym, arg, aux,
                                  RpnParam.anchor_generate.max_side, stride,
                                  RpnParam.anchor_generate.scale,
                                  RpnParam.anchor_generate.ratio)

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            begin_epoch = 0
            end_epoch = 10
            lr_iter = [
                14960 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 17765 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.01 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0
            iter = 500

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 300

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "/mnt/truenas/scratch/czh/data/crowdhuman/annotations/annotation_val.json"

    # data processing
    class NormParam:
        mean = tuple(i * 255 for i in (0.485, 0.456, 0.406))  # RGB order
        std = tuple(i * 255 for i in (0.229, 0.224, 0.225))

    # data processing
    class ResizeParam:
        short = 800
        long = 1400

    class PadParam:
        short = 800
        long = 1400
        max_num_gt = 500

    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.stride = (4, 8, 16, 32, 64)
                self.short = (200, 100, 50, 25, 13)
                self.long = (350, 175, 88, 44, 22)

            scales = (8)
            aspects = RpnParam.anchor_generate.ratio

        class assign:
            allowed_border = 1000
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage

    from models.crowdhuman.input import PyramidAnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["gt_bbox", "im_info"]
        if not RpnParam.nnvm_rpn_target:
            transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam()))
            label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore(
        "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"],
        [])
    rpn_l1_metric = metric.L1(
        "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], [])

    emd_metric = metric.ScalarLoss("emd", [
        'cls_reg_loss_output',
    ], [])

    metric_list = [rpn_acc_metric, rpn_l1_metric, emd_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        num_class = 1 + 80
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class anchor_generate:
            scale = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0))
            ratio = (0.5, 1.0, 2.0)
            stride = (8, 16, 32, 64, 128)
            image_anchor = None

        class head:
            conv_channel = 256
            mean = None
            std = None

        class proposal:
            pre_nms_top_n = 1000
            post_nms_top_n = None
            nms_thr = None
            min_bbox_side = None
            min_det_score = 0.05  # filter score in network

        class subsample_proposal:
            proposal_wo_gt = None
            image_roi = None
            fg_fraction = None
            fg_thr = None
            bg_thr_hi = None
            bg_thr_lo = None

        class bbox_target:
            num_reg_class = None
            class_agnostic = None
            weight = None
            mean = None
            std = None

        class focal_loss:
            alpha = 0.25
            gamma = 2.0

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = None
        image_roi = None
        batch_image = None

        class regress_target:
            class_agnostic = None
            mean = None
            std = None

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = None
        stride = None

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014")
        else:
            image_set = ("coco_minival2014", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-v1-101"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.005 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3
            iter = 500

    class TestParam:
        min_det_score = 0  # filter appended boxes
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class NormParam:
        mean = (122.7717, 115.9465, 102.9801)  # RGB order
        std = (1.0, 1.0, 1.0)

    class ResizeParam:
        short = 800
        long = 1333

    class PadParam:
        short = 800
        long = 1333
        max_num_gt = 100

    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.short = (100, 50, 25, 13, 7)
                self.long = (167, 84, 42, 21, 11)
                self.stride = (8, 16, 32, 64, 128)

            scales = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0))
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 9999
            pos_thr = 0.5
            neg_thr = 0.4
            min_pos_thr = 0.0

        class sample:
            image_anchor = None
            pos_fraction = None

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord
    from models.retinanet.input import PyramidAnchorTarget2D, Norm2DImage

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            PyramidAnchorTarget2D(AnchorTarget2DParam()),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    from models.retinanet import metric

    rpn_acc_metric = metric.FGAccMetric("FGAcc", ["cls_loss_output"],
                                        ["rpn_cls_label"])

    metric_list = [rpn_acc_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        # normalizer = normalizer_factory(type="syncbn", ndev=8, wd_mult=1.0)
        normalizer = normalizer_factory(type="gn")

    class BackboneParam:
        fp16 = General.fp16
        # normalizer = NormalizeParam.normalizer
        normalizer = normalizer_factory(type="fixbn")

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class HeadParam:
        num_class = 1 + 80
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class point_generate:
            num_points = 9
            scale = 4
            stride = (8, 16, 32, 64, 128)
            # transform = "minmax"
            transform = "moment"

        class head:
            conv_channel = 256
            point_conv_channel = 256
            mean = None
            std = None

        class proposal:
            pre_nms_top_n = 1000
            post_nms_top_n = None
            nms_thr = None
            min_bbox_side = None

        class point_target:
            target_scale = 4
            num_pos = 1

        class bbox_target:
            pos_iou_thr = 0.5
            neg_iou_thr = 0.5
            min_pos_iou = 0.0

        class focal_loss:
            alpha = 0.25
            gamma = 2.0

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = None
        image_roi = None
        batch_image = None

        class regress_target:
            class_agnostic = None
            mean = None
            std = None

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = None
        stride = None

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2017", )
        else:
            image_set = ("coco_val2017", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    head = Head(HeadParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, head)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(backbone, neck, head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-v1-50"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]
            excluded_param = ["gn"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.005 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3
            iter = 500

    class TestParam:
        min_det_score = 0.05  # filter appended boxes
        max_det_per_image = 100

        def process_roidb(x):
            return x

        def process_output(x, y):
            return x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class NormParam:
        mean = (122.7717, 115.9465, 102.9801)  # RGB order
        std = (1.0, 1.0, 1.0)

    class ResizeParam:
        short = 800
        long = 1333

    class PadParam:
        short = 800
        long = 1333
        max_num_gt = 100

    class RenameParam:
        mapping = dict(image="data")

    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord
    from models.retinanet.input import Norm2DImage

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["gt_bbox"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    from models.retinanet import metric as cls_metric
    import core.detection_metric as box_metric

    cls_acc_metric = cls_metric.FGAccMetric(
        "FGAcc", ["cls_loss_output", "point_refine_labels_output"], [])
    box_init_l1_metric = box_metric.L1(
        "InitL1", ["pts_init_loss_output", "points_init_labels_output"], [])
    box_refine_l1_metric = box_metric.L1(
        "RefineL1", ["pts_refine_loss_output", "point_refine_labels_output"],
        [])

    metric_list = [cls_acc_metric, box_init_l1_metric, box_refine_l1_metric]

    return General, KvstoreParam, HeadParam, RoiParam, BboxParam, DatasetParam, \
        ModelParam, OptimizeParam, TestParam, \
        transform, data_name, label_name, metric_list
Exemple #5
0
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 3 if is_train else 1
        fp16 = True

    class KvstoreParam:
        kvstore = "local"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="syncbn", ndev=8, wd_mult=1.0)

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        depth = 50

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        dim_reduced = 384
        num_stage = 3
        S0_kernel = 1

    class RpnParam:
        num_class = 1 + 80
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image
        sync_loss = True

        class anchor_generate:
            scale = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0))
            ratio = (0.5, 1.0, 2.0)
            stride = (8, 16, 32, 64, 128)
            image_anchor = None

        class head:
            conv_channel = 256
            mean = None
            std = None

        class proposal:
            pre_nms_top_n = 1000
            post_nms_top_n = None
            nms_thr = None
            min_bbox_side = None
            min_det_score = 0.05  # filter score in network

        class focal_loss:
            alpha = 0.25
            gamma = 2.0

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = None
        image_roi = None
        batch_image = None

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = None
        stride = None

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2017", "coco_val2017")
        else:
            image_set = ("coco_test-dev2017", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage4_unit3_relu"

        class pretrain:
            prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth
            epoch = 0
            fixed_param = ["conv0"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            begin_epoch = 0
            end_epoch = 25
            lr_iter = [
                15272 * 15 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 15272 *
                20 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.001 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            iter = 15272 * 1 * 16 // (len(KvstoreParam.gpus) *
                                      KvstoreParam.batch_image)

    class TestParam:
        min_det_score = 0  # filter appended boxes
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_val2017.json"

    # data processing
    class NormParam:
        mean = (123.688, 116.779, 103.939)  # RGB order
        std = (58.393, 57.12, 57.375)

    class ResizeParam:
        short = 1280
        long = 1280
        scale_min = 0.8
        scale_max = 1.2

    class PadParam:
        short = ResizeParam.short
        long = ResizeParam.long
        max_num_gt = 100

    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.short = (160, 80, 40, 20, 10)
                self.long = (160, 80, 40, 20, 10)
                self.stride = (8, 16, 32, 64, 128)

            scales = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0))
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 9999
            pos_thr = 0.5
            neg_thr = 0.5
            min_pos_thr = 0.0

        class sample:
            image_anchor = None
            pos_fraction = None

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord
    from models.NASFPN.input import RandResizeCrop2DImageBbox, ResizeCrop2DImageBbox
    from models.retinanet.input import PyramidAnchorTarget2D, Norm2DImage, \
        AverageFgCount

    if is_train:
        transform = {
            "sample": [
                ReadRoiRecord(None),
                Norm2DImage(NormParam),
                RandResizeCrop2DImageBbox(ResizeParam),
                Flip2DImageBbox(),
                Pad2DImageBbox(PadParam),
                ConvertImageFromHwcToChw(),
                PyramidAnchorTarget2D(AnchorTarget2DParam()),
                RenameRecord(RenameParam.mapping)
            ],
            "batch": [AverageFgCount("rpn_fg_count")]
        }
        data_name = ["data"]
        label_name = [
            "rpn_cls_label", "rpn_fg_count", "rpn_reg_target", "rpn_reg_weight"
        ]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            ResizeCrop2DImageBbox(ResizeParam),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    from models.retinanet import metric

    rpn_acc_metric = metric.FGAccMetric("FGAcc", ["cls_loss_output"],
                                        ["rpn_cls_label"])

    metric_list = [rpn_acc_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False
        long_side = 1200
        short_side = 800


    class KvstoreParam:
        kvstore     = "local"
        batch_image = General.batch_image
        gpus        = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16        = General.fp16


    class NormalizeParam:
        # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus))
        normalizer = normalizer_factory(type="fixbn")


    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        depth = 50


    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer


    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image
        use_symbolic_proposal = None

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            max_side = General.long_side
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)


    class BboxParam:
        fp16        = General.fp16
        normalizer  = NormalizeParam.normalizer
        num_class   = 1 + 80
        image_roi   = 512
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)


    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = 16


    class DatasetParam:
        if is_train:
            image_set = ("coco_train2017", )
        else:
            image_set = ("coco_minival2017", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head)


    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        process_weight = lambda sym, arg, aux: \
            add_anchor_to_arg(
                sym, arg, aux, RpnParam.anchor_generate.max_side,
                RpnParam.anchor_generate.stride,RpnParam.anchor_generate.scale,
                RpnParam.anchor_generate.ratio)

        class pretrain:
            prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

        class QuantizeTrainingParam:
            quantize_flag = False
            # quantized_op = ("Convolution", "FullyConnected", "Deconvolution","Concat", "Pooling", "add_n", "elemwise_add")
            quantized_op = ("Convolution", "FullyConnected", "Deconvolution")

            class WeightQuantizeParam:
                delay_quant = 0
                ema_decay = 0.99
                grad_mode = "ste"
                is_weight = True
                is_weight_perchannel = False
                quant_mode = "minmax"

            class ActQuantizeParam:
                delay_quant = 0
                ema_decay = 0.99
                grad_mode = "ste"
                is_weight = False
                is_weight_perchannel = False
                quant_mode = "minmax"

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 35

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image),
                       80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 750 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)


    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class NormParam:
        mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order
        std = tuple(i * 255 for i in (0.229, 0.224, 0.225))


    class ResizeParam:
        short = General.short_side
        long = General.long_side


    class PadParam:
        short = ResizeParam.short
        long = ResizeParam.long
        max_num_gt = 100


    class AnchorTarget2DParam:
        class generate:
            short = ResizeParam.short // RpnParam.anchor_generate.stride
            long = ResizeParam.long // RpnParam.anchor_generate.stride
            stride = RpnParam.anchor_generate.stride
            scales = RpnParam.anchor_generate.scale
            aspects = RpnParam.anchor_generate.ratio

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = RpnParam.anchor_generate.image_anchor
            pos_fraction = 0.5


    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, AnchorTarget2D, Norm2DImage, Pad2DImage

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            AnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Pad2DImage(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore(
        "RpnAcc",
        ["rpn_cls_loss_output"],
        ["rpn_cls_label"]
    )
    rpn_l1_metric = metric.L1(
        "RpnL1",
        ["rpn_reg_loss_output"],
        ["rpn_cls_label"]
    )
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc",
        ["bbox_cls_loss_output", "bbox_label_blockgrad_output"],
        []
    )
    box_l1_metric = metric.L1(
        "RcnnL1",
        ["bbox_reg_loss_output", "bbox_label_blockgrad_output"],
        []
    )

    metric_list = [rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False

    class KvstoreParam:
        kvstore = "local"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus))
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = True
            image_roi = 256
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 80
        image_roi = 256
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = 16

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014")
        else:
            image_set = ("coco_minival2014", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor, bbox_head)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, bbox_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-50"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 35

        class schedule:
            begin_epoch = 0
            end_epoch = 12
            lr_iter = [
                120000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 160000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 3000 * 16 // (len(KvstoreParam.gpus) *
                                 KvstoreParam.batch_image)

    class TestScaleParam:
        short_ranges = [600, 800, 1000, 1200]
        long_ranges = [2000, 2000, 2000, 2000]

        @staticmethod
        def add_resize_info(roidb):
            ms_roidb = []
            for r_ in roidb:
                for short, long in zip(TestScaleParam.short_ranges,
                                       TestScaleParam.long_ranges):
                    r = r_.copy()
                    r["resize_long"] = long
                    r["resize_short"] = short
                    ms_roidb.append(r)

            return ms_roidb

    class TestParam:
        min_det_score = 0.001
        max_det_per_image = 0

        process_roidb = TestScaleParam.add_resize_info
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class ResizeParam:
        short = 800
        long = 1200 if is_train else 2000

    class RandResizeParam:
        short = None  # generate on the fly
        long = None
        short_ranges = [600, 800, 1000, 1200]
        long_ranges = [2000, 2000, 2000, 2000]

    class RandCropParam:
        mode = "center"  # random or center
        short = 800
        long = 1200

    class PadParam:
        short = 800
        long = 1200 if is_train else 2000
        max_num_gt = 100

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, RandResize2DImageBbox, RandCrop2DImageBbox, \
        Resize2DImageBboxByRoidb, ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, AnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            RandResize2DImageBbox(RandResizeParam),
            RandCrop2DImageBbox(RandCropParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            AnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            #Resize2DImageBbox(ResizeParam),
            Resize2DImageBboxByRoidb(),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"],
                                          ["rpn_cls_label"])
    rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"],
                              ["rpn_cls_label"])
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [])
    box_l1_metric = metric.L1(
        "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [])

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
Exemple #8
0
def get_config(is_train):
    class General:
        log_frequency = 20
        depth = 101
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 3 if is_train else 1
        fp16 = True

    class Trident:
        num_branch = 3
        train_scaleaware = True
        test_scaleaware = True
        branch_ids = range(num_branch)
        branch_dilates = [1, 2, 3]
        valid_ranges = [(0, 150), (50, 270), (150, -1)]
        valid_ranges_on_origin = False
        branch_bn_shared = False
        branch_conv_shared = True
        branch_deform = True

    class KvstoreParam:
        kvstore     = "local"
        batch_image = General.batch_image
        gpus        = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16        = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus))
        # normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        depth = General.depth
        normalizer = NormalizeParam.normalizer
        num_branch = Trident.num_branch
        branch_ids = Trident.branch_ids
        branch_dilates = Trident.branch_dilates
        branch_bn_shared = Trident.branch_bn_shared
        branch_conv_shared = Trident.branch_conv_shared
        branch_deform = Trident.branch_deform

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer


    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image * Trident.num_branch

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 500 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = True
            image_roi = 128
            fg_fraction = 0.5
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)


    class BboxParam:
        fp16        = General.fp16
        normalizer  = NormalizeParam.normalizer
        num_class   = 1 + 80
        image_roi   = 128
        batch_image = General.batch_image * Trident.num_branch

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)


    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = 16


    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014", "coco_minival2014")
        else:
            image_set = ("coco_test-dev2017", )
            # image_set = ("coco_minival2014", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(
            backbone, neck, rpn_head, roi_extractor, bbox_head,
            num_branch=Trident.num_branch, scaleaware=Trident.train_scaleaware)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(
            backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch=Trident.num_branch)


    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        from_scratch = False
        random = True
        memonger = True
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-%d" % General.depth
            epoch = 0
            fixed_param = []

        def process_weight(sym, arg_params, aux_params):
            import re
            import logging

            logger = logging.getLogger()
            # for trident non-shared initialization
            for k in sym.list_arguments():
                branch_name = re.sub('_branch\d+', '', k)
                if k != branch_name and branch_name in arg_params:
                    arg_params[k] = arg_params[branch_name]
                    logger.info('init arg {} with {}'.format(k, branch_name))

            for k in sym.list_auxiliary_states():
                branch_name = re.sub('_branch\d+', '', k)
                if k != branch_name and branch_name in aux_params:
                    aux_params[k] = aux_params[branch_name]
                    logger.info('init aux {} with {}'.format(k, branch_name))


    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 5

        class schedule:
            begin_epoch = 0
            end_epoch = 18
            lr_iter = [180000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image),
                       240000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 3000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)


    class TestScaleParam:
        short_ranges = [600, 800, 1000, 1200]
        long_ranges = [2000, 2000, 2000, 2000]

        @staticmethod
        def add_resize_info(roidb):
            ms_roidb = []
            for r_ in roidb:
                for short, long in zip(TestScaleParam.short_ranges, TestScaleParam.long_ranges):
                    r = r_.copy()
                    r["resize_long"] = long
                    r["resize_short"] = short
                    ms_roidb.append(r)

            return ms_roidb


    class TestParam:
        min_det_score = 0.001
        max_det_per_image = 100

        process_roidb = TestScaleParam.add_resize_info
        if Trident.test_scaleaware:
            process_output = lambda x, y: process_branch_outputs(
                x, Trident.num_branch, Trident.valid_ranges, Trident.valid_ranges_on_origin)
        else:
            process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            from operator_py.nms import cython_soft_nms_wrapper
            type = cython_soft_nms_wrapper
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class ResizeParam:
        short = 800
        long = 1200 if is_train else 2000

    class RandResizeParam:
        short = None # generate on the fly
        long = None
        short_ranges = [600, 800, 1000, 1200]
        long_ranges = [2000, 2000, 2000, 2000]


    class RandCropParam:
        mode = "center" # random or center
        short = 800
        long = 1200

    class PadParam:
        short = 800
        long = 1200 if is_train else 2000
        max_num_gt = 100

    class ScaleRange:
        valid_ranges = Trident.valid_ranges
        cal_on_origin = Trident.valid_ranges_on_origin # True: valid_ranges on origin image scale / valid_ranges on resized image scale

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

        class trident:
            invalid_anchor_threshd = 0.3


    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord,  RandResize2DImageBbox, RandCrop2DImageBbox, Resize2DImageBboxByRoidb, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord
    from models.tridentnet.input import ScaleAwareRange, TridentAnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            RandResize2DImageBbox(RandResizeParam),
            RandCrop2DImageBbox(RandCropParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            ScaleAwareRange(ScaleRange),
            TridentAnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        if Trident.train_scaleaware:
            data_name.append("valid_ranges")
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBboxByRoidb(),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore(
        "RpnAcc",
        ["rpn_cls_loss_output"],
        ["rpn_cls_label"]
    )
    rpn_l1_metric = metric.L1(
        "RpnL1",
        ["rpn_reg_loss_output"],
        ["rpn_cls_label"]
    )
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc",
        ["bbox_cls_loss_output", "bbox_label_blockgrad_output"],
        []
    )
    box_l1_metric = metric.L1(
        "RcnnL1",
        ["bbox_reg_loss_output", "bbox_label_blockgrad_output"],
        []
    )

    metric_list = [rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
Exemple #9
0
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        num_class = 1 + 80
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class anchor_generate:
            scale = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0))
            ratio = (0.5, 1.0, 2.0)
            stride = (8, 16, 32, 64, 128)
            max_side = 1440

        class anchor_assign:
            allowed_border = 9999
            bbox_thr = 0.6
            pre_anchor_top_n = 50

        class head:
            conv_channel = 256
            mean = (.0, .0, .0, .0)
            std = (0.1, 0.1, 0.2, 0.2)

        class proposal:
            pre_nms_top_n = 1000
            post_nms_top_n = None
            nms_thr = None
            min_bbox_side = None

        class subsample_proposal:
            proposal_wo_gt = None
            image_roi = None
            fg_fraction = None
            fg_thr = None
            bg_thr_hi = None
            bg_thr_lo = None

        class bbox_target:
            num_reg_class = None
            class_agnostic = None
            weight = None
            mean = None
            std = None

        class focal_loss:
            alpha = 0.5
            gamma = 2.0

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = None
        image_roi = None
        batch_image = None

        class regress_target:
            class_agnostic = None
            mean = None
            std = None

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = None
        stride = None

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2017", )
        else:
            image_set = ("coco_val2017", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-v1-101"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

        def process_weight(sym, arg, aux):
            for stride in RpnParam.anchor_generate.stride:
                add_anchor_to_arg(sym, arg, aux,
                                  RpnParam.anchor_generate.max_side, stride,
                                  RpnParam.anchor_generate.scale,
                                  RpnParam.anchor_generate.ratio)

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 35

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.005 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3
            iter = 1000

    class TestParam:
        min_det_score = 0.05  # filter appended boxes
        max_det_per_image = 100

        def process_roidb(x):
            return x  # noqa: E704

        def process_output(x, y):
            return x  # noqa: E704

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class NormParam:
        mean = (122.7717, 115.9465, 102.9801)  # RGB order
        std = (1.0, 1.0, 1.0)

    class ResizeParam:
        short = 800
        long = 1333

    class PadParam:
        short = 800
        long = 1333
        max_num_gt = 100

    class RenameParam:
        mapping = dict(image="data")

    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord
    from models.retinanet.input import Norm2DImage

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["gt_bbox", "im_info"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric
    pos_loss = metric.ScalarLoss("PosLoss", ["positive_loss_output"], [])
    neg_loss = metric.ScalarLoss("NegLoss", ["negative_loss_output"], [])
    metric_list = [pos_loss, neg_loss]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
        ModelParam, OptimizeParam, TestParam, \
        transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        # number of iteration for print the metrics to stdout
        log_frequency = 10
        # the directory name for the experiment, the default is the name of config
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        # batch size per GPU
        batch_image = 2 if is_train else 1
        # use FP16 for weight and activation
        # recommend to toggle when you are training on Volta or later GPUs
        fp16 = False
        # number of threads used for the data loader
        # this term affects both the CPU utilization and the MEM usage
        # lower this if you are training on Desktop
        loader_worker = 8
        # switch the built in profile to find the bottleneck of network
        profile = False

    class KvstoreParam:
        # the type of communicator used to sync model parameters
        kvstore = "nccl"  # "local", "aggregated"
        batch_image = General.batch_image
        # GPUs to use
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        # the type of normalizer used for network
        # see also ModelParam.pretrain.fixed_param for the freeze of gamma/beta
        normalizer = normalizer_factory(type="fixbn")  # freeze bn stats
        normalizer = normalizer_factory(
            type="localbn")  # use bn stats in one GPU
        normalizer = normalizer_factory(
            type="syncbn",
            ndev=len(KvstoreParam.gpus))  # use bn stats across GPUs
        normalizer = normalizer_factory(type="gn")  # use GroupNorm

    class BackboneParam:
        # you can control the FP16 option and normalizer for each individual component
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        # some backbone component accept additional configs, like the depth for ResNet
        depth = 50

    class NeckParam:
        # you can control the FP16 option and normalizer for each individual component
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        # you can control the FP16 option and normalizer for each individual component
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image
        # use ONNX-compatible proposal operator instead of the one written in C++/CUDA
        nnvm_proposal = True
        # use in-network rpn target operator instead of the label generated by data loader
        # if your network is quite fast, the CPU might not feed the labels fast enough
        # else you can offload the rpn target generation to CPU to save GPU resources
        nnvm_rpn_target = False

        # anchor grid generated are used in the rpn target assign and proposal decoding
        class anchor_generate:
            scale = (8, )
            ratio = (0.5, 1.0, 2.0)
            stride = (4, 8, 16, 32, 64)
            # number of anchors per image
            image_anchor = 256
            # to avoid generate the same anchor grid more than once
            # we cache an anchor grid in the arg_params
            # max_side specify the max side of resized input image
            # 3000 is a safe bet, increase it if necessary
            max_side = 1400

        # valid when use nnvm_rpn_target, controls the rpn target assign
        class anchor_assign:
            # number of pixels the anchor box could extend out of the image border
            allowed_border = 0
            # iou lower bound with groundtruth box for foreground anchor
            pos_thr = 0.7
            # iou upper bound with groundtruth box for background anchor
            neg_thr = 0.3
            # every groundtruth box will match the anchors overlaps most with it by default
            # increase the threshold to avoid matching low quality anchors
            min_pos_thr = 0.0
            # number of anchors per image
            image_anchor = 256
            # fraction of foreground anchors per image
            pos_fraction = 0.5

        # rpn head structure
        class head:
            # number of channels for the 3x3 conv in rpn head
            conv_channel = 256
            # mean and std for rpn regression target
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        # the proposal generation for RCNN
        class proposal:
            # number of top-scored proposals to take before NMS
            pre_nms_top_n = 2000 if is_train else 1000
            # number of top-scored proposals to take after NMS
            post_nms_top_n = 2000 if is_train else 1000
            # proposal NMS threshold
            nms_thr = 0.7
            # min proposal box to keep, 0 means keep all
            min_bbox_side = 0

        # the proposal sampling for RCNN during training
        class subsample_proposal:
            # add gt to proposals
            proposal_wo_gt = False
            # number of proposals sampled per image during training
            image_roi = 512
            # the maxinum fraction of foreground proposals
            fg_fraction = 0.25
            # iou lower bound with gt bbox for foreground proposals
            fg_thr = 0.5
            # iou upper bound with gt bbox for background proposals
            bg_thr_hi = 0.5
            # iou lower bound with gt bbox for background proposals
            # set to non-zero value could remove some trivial background proposals
            bg_thr_lo = 0.0

        # the target encoding for RCNN bbox head
        class bbox_target:
            # 1(background) + num_class
            # could be num_class if using sigmoid activition instead of softmax one
            num_reg_class = 1 + 80
            # share the regressor for all classes
            class_agnostic = False
            # the mean, std, and weight for bbox head regression target
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        # you can control the FP16 option and normalizer for each individual component
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        # num_class may be different from RpnParam.bbox_target.num_reg_class
        # if the class_agnostic regressor is adopted
        num_class = 1 + 80
        image_roi = RpnParam.subsample_proposal.image_roi
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = RpnParam.bbox_target.class_agnostic
            mean = RpnParam.bbox_target.mean
            std = RpnParam.bbox_target.std

    class MaskParam:
        # you can control the FP16 option and normalizer for each individual component
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        # output resolution of mask head
        resolution = 28
        # number of channels for 3x3 convs in mask head
        dim_reduced = 256
        # mask head only trains on foreground proposals
        # so we discard all the background proposals to save computation
        num_fg_roi = int(RpnParam.subsample_proposal.image_roi *
                         RpnParam.subsample_proposal.fg_fraction)

    class RoiParam:
        # you can control the FP16 option and normalizer for each individual component
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        # Each RoI is pooled into an out_size x out_size fixed-length representation
        out_size = 7
        # the total stride of the feature map to pool from
        stride = (4, 8, 16, 32)
        # FPN specific configs
        # objects of size in [224^2, 448^2) will be assgin to P4
        roi_canonical_scale = 224
        roi_canonical_level = 4

    class MaskRoiParam:
        # you can control the FP16 option and normalizer for each individual component
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        # Each RoI is pooled into an out_size x out_size fixed-length representation
        out_size = 14
        # the total stride of the feature map to pool from
        stride = (4, 8, 16, 32)
        # FPN specific configs
        # objects of size in [224^2, 448^2) will be assgin to P4
        roi_canonical_scale = 224
        roi_canonical_level = 4

    class DatasetParam:
        # specify the roidbs to read for training/validation
        if is_train:
            # == coco_train2017
            image_set = ("coco_train2014", "coco_valminusminival2014")
        else:
            # == coco_val2017
            image_set = ("coco_minival2014", )

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            # learning rate will automaticly adapt to different batch size
            # the base learning rate is 0.02 for 16 images
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            # correspond to the 1x, 2x, ... training schedule
            mult = 2
            begin_epoch = 0
            end_epoch = 6 * mult
            lr_mode = "step"  # or "cosine"
            # lr step factor
            lr_factor = 0.1
            # lr step iterations
            if mult <= 1:
                lr_iter = [
                    60000 * mult * 16 //
                    (len(KvstoreParam.gpus) * KvstoreParam.batch_image),
                    80000 * mult * 16 //
                    (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
                ]
            else:
                # follow the practice in arXiv:1811.08883
                # reduce the lr in the last 60k and 20k iterations
                lr_iter = [
                    -60000 * 16 //
                    (len(KvstoreParam.gpus) * KvstoreParam.batch_image),
                    -20000 * 16 //
                    (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
                ]

        # follow the practice in arXiv:1706.02677
        class warmup:
            type = "gradual"
            lr = 0.01 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3
            iter = 500

    class TestParam:
        # detection below min_det_score will be removed in the evaluation
        min_det_score = 0.05
        # only the top max_det_per_image detecitons will be evaluated
        max_det_per_image = 100

        # callback, useful in multi-scale testing
        process_roidb = lambda x: x
        # callback, useful in scale-aware post-processing
        process_output = lambda x, y: process_output(x, y)

        # the model name and epoch used during test
        # by default the last checkpoint is employed
        # user can override this with --epoch N when invoking script
        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"  # or "softnms"
            thr = 0.5

        # we make use of the coco test toolchain
        # if no coco format annotation file is specified
        # test script will generate one on the fly from roidb
        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # compose the components to for a detector
    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam, MaskParam)
    roi_extractor = RoiExtractor(RoiParam)
    mask_roi_extractor = RoiExtractor(MaskRoiParam)
    bbox_head = BboxHead(BboxParam)
    mask_head = MaskHead(BboxParam, MaskParam, MaskRoiParam)
    bbox_post_processer = BboxPostProcessor(TestParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor,
                                              mask_roi_extractor, bbox_head,
                                              mask_head)
        test_sym = None
    else:
        train_sym = None
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, mask_roi_extractor,
                                            bbox_head, mask_head,
                                            bbox_post_processer)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym

        # training model from scratch
        from_scratch = False
        # use random seed when initializating
        random = True
        # sublinear memory checkpointing
        memonger = False
        # checkpointing up to a layer
        # recompute early stage of a network is cheaper
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            # the model name and epoch used for initialization
            prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth
            epoch = 0
            # any params partially match the fixed_param will be fixed
            # fixed params will not be updated
            fixed_param = ["conv0", "stage1", "gamma", "beta"]
            # any params partially match the excluded_param will not be fixed
            excluded_param = ["mask_fcn"]

        # callback, useful for adding cached anchor or complex initialization
        def process_weight(sym, arg, aux):
            for stride in RpnParam.anchor_generate.stride:
                add_anchor_to_arg(sym, arg, aux,
                                  RpnParam.anchor_generate.max_side, stride,
                                  RpnParam.anchor_generate.scale,
                                  RpnParam.anchor_generate.ratio)

    # data processing
    class NormParam:
        # mean/std for input image
        mean = tuple(i * 255 for i in (0.485, 0.456, 0.406))  # RGB order
        std = tuple(i * 255 for i in (0.229, 0.224, 0.225))

    # data processing
    class ResizeParam:
        # the input is resized to a short side not exceeding short
        # and a long side not exceeding long
        short = 800
        long = 1333

    # SimpleDet is written in MXNet symbolic API which features the fastest
    # execution while requires static input shape
    # All the inputs are padded to the maximum shape item on the dataset
    class PadParam:
        # the resized input is padded to short x long with 0 in bottom-right corner
        short = 800
        long = 1333

        max_num_gt = 100
        max_len_gt_poly = 2500

    # this control the rpn target generation offloaded to CPU data loader
    # refer to RpnParam.anchor_generate for more infos
    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.stride = (4, 8, 16, 32, 64)
                # the shorts and longs have to be pre-computed since the
                # loader knows nothing of the network
                # the downsampled side can be calculated as ceil(side / 2)
                self.short = (200, 100, 50, 25, 13)
                self.long = (334, 167, 84, 42, 21)

            scales = (8, )
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    # align blobs name between loader and network
    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage

    from models.maskrcnn.input import PreprocessGtPoly, EncodeGtPoly, \
        Resize2DImageBboxMask, Flip2DImageBboxMask, Pad2DImageBboxMask

    from models.FPN.input import PyramidAnchorTarget2D

    # modular data augmentation design
    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            PreprocessGtPoly(),
            Resize2DImageBboxMask(ResizeParam),
            Flip2DImageBboxMask(),
            EncodeGtPoly(PadParam),
            Pad2DImageBboxMask(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["im_info", "gt_bbox", "gt_poly"]
        if not RpnParam.nnvm_rpn_target:
            transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam()))
            label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric
    from models.maskrcnn.metric import SigmoidCELossMetric
    from mxboard import SummaryWriter

    # summary writer logs metric to tensorboard for a better track of training
    sw = SummaryWriter(logdir="./tflogs", flush_secs=5)

    rpn_acc_metric = metric.AccWithIgnore(
        name="RpnAcc",
        output_names=["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"],
        label_names=[],
        summary=sw)
    rpn_l1_metric = metric.L1(
        name="RpnL1",
        output_names=["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"],
        label_names=[],
        summary=sw)
    box_acc_metric = metric.AccWithIgnore(
        name="RcnnAcc",
        output_names=["bbox_cls_loss_output", "bbox_label_blockgrad_output"],
        label_names=[],
        summary=sw)
    box_l1_metric = metric.L1(
        name="RcnnL1",
        output_names=["bbox_reg_loss_output", "bbox_label_blockgrad_output"],
        label_names=[],
        summary=sw)
    mask_cls_metric = SigmoidCELossMetric(name="MaskCE",
                                          output_names=["mask_loss_output"],
                                          label_names=[],
                                          summary=sw)

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric,
        mask_cls_metric
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        use_groupsoftmax = True
        log_frequency = 20
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = True

    class KvstoreParam:
        kvstore = "local"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        if is_train:
            normalizer = normalizer_factory(type="syncbn",
                                            ndev=len(KvstoreParam.gpus))
        else:
            normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = normalizer_factory(
            type="fixbn")  # old model does not use BN in RPN head
        batch_image = General.batch_image
        use_groupsoftmax = General.use_groupsoftmax
        num_class = (1 + 2) if use_groupsoftmax else 2

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = True
            image_roi = 256
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 83
        image_roi = 256
        batch_image = General.batch_image
        use_groupsoftmax = General.use_groupsoftmax

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = 16

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014",
                         "cctsdb_train")
        else:
            image_set = ("coco_minival2014", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor, bbox_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, bbox_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-101"
            epoch = 0
            fixed_param = []

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 5

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 3000 * 16 // (len(KvstoreParam.gpus) *
                                 KvstoreParam.batch_image)

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "/ws/data/opendata/coco/annotations/instances_minival2014.json"

    # data processing
    class GroupParam:
        # box 83 classes
        boxv0 = np.array([0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, \
                              31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, \
                              61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83], dtype=np.float32)
        #COCO benchmark
        boxv1 = np.array([0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, \
                              31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, \
                              61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0,  0,  0 ], dtype=np.float32)
        #CCTSDB benchmark
        boxv2 = np.array([0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  \
                              0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  \
                              0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  81, 82, 83], dtype=np.float32)

        rpnv0 = np.array([0, 1, 2], dtype=np.float32)  # rpn 3 classes
        rpnv1 = np.array([0, 1, 0], dtype=np.float32)  # COCO benchmark
        rpnv2 = np.array([0, 0, 2], dtype=np.float32)  # CCTSDB benchmark

        rpn_groups = [rpnv0, rpnv1, rpnv2]
        box_groups = [boxv0, boxv1, boxv2]

    class ResizeParam:
        short = 800
        long = 1200 if is_train else 2000

    class PadParam:
        short = 800
        long = 1200
        max_num_gt = 100

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)
            use_groupsoftmax = General.use_groupsoftmax

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

        def gtclass2rpn(gtclass):
            class_gap = 80
            gtclass[gtclass > class_gap] = -1
            gtclass[gtclass > 0] = 1
            gtclass[gtclass < 0] = 2
            return gtclass

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, AnchorTarget2D, GroupRead

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            AnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
        if General.use_groupsoftmax:
            data_name.append("rpn_group")
            data_name.append("box_group")
            transform.append(GroupRead(GroupParam))
    else:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"],
                                          ["rpn_cls_label"])
    rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"],
                              ["rpn_cls_label"])
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [])
    box_l1_metric = metric.L1(
        "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [])

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
Exemple #12
0
def get_config(is_train):
    class General:
        log_frequency = 20
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False
        loader_worker = 4
        loader_collector = 2
        profile = False

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="fix")

    class BboxParam:
        pass

    class RoiParam:
        pass

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class head:
            conv_channel = 256
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_thresh = 0.05
            pre_nms_top_n = 1000
            post_nms_top_n = 1000
            fpn_box_max_n = 100
            nms_thr = 0.6
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class loss_setting:
            focal_loss_alpha = 0.25
            focal_loss_gamma = 2.0
            ignore_label = -1
            ignore_offset = -1

        class FCOSParam:
            num_classifier = 81 - 1  # COCO: 80 object + 1 background
            stride = (8, 16, 32, 64, 128)

    # data processing
    class NormParam:
        mean = (122.7717, 115.9465, 102.9801)  # RGB order
        std = (1.0, 1.0, 1.0)

    class ResizeParam:
        short = 800
        long = 1333

    class PadParam:
        short = 800
        long = 1333
        max_num_gt = 100

    class FCOSFPNAssignParam:
        stages = [
            [-1, 64],
            [64, 128],
            [128, 256],
            [256, 512],
            [512, INF],
        ]
        stride = (8, 16, 32, 64, 128)
        num_classifier = 81 - 1  # COCO: 80 object + 1 background
        ignore_label = RpnParam.loss_setting.ignore_label
        ignore_offset = RpnParam.loss_setting.ignore_offset
        data_size = [PadParam.short, PadParam.long]

    class RenameParam:
        mapping = dict(image="data")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = normalizer_factory(type="dummy")

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014")
        else:
            image_set = ("coco_minival2014", )

    # throw out param used as custom op's input
    global throwout_param
    throwout_param = FCOSFPNAssignParam  # This line MUST be in front of rpn_head = RpnHead(RpnParam)

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head)
        rpn_test_sym = None
        test_sym = None
    else:
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        train_sym = None

    class ModelParam:
        train_symbol = train_sym
        test_symbol = rpn_test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-101"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 16 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.01 / 16 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0
            iter = 500

    class TestParam:
        min_det_score = 0
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.6

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info"]
        label_name = ["gt_bbox"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import models.FCOS.metric as metric

    centerness_loss_metric = metric.LossMeter(RpnParam.FCOSParam.stride,
                                              pred_id_start=0,
                                              pred_id_end=1,
                                              name='centernessloss_meter')
    cls_loss_metric = metric.LossMeter(RpnParam.FCOSParam.stride,
                                       pred_id_start=1,
                                       pred_id_end=2,
                                       name='clsloss_meter')
    reg_loss_metric = metric.LossMeter(RpnParam.FCOSParam.stride,
                                       pred_id_start=2,
                                       pred_id_end=3,
                                       name='offsetloss_meter')

    metric_list = [centerness_loss_metric, cls_loss_metric, reg_loss_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
Exemple #13
0
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus))
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

    class BboxParam:
        pass

    class RoiParam:
        pass

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014")
        else:
            image_set = ("coco_minival2014", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = None

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-50"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 35

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 3000 * 16 // (len(KvstoreParam.gpus) *
                                 KvstoreParam.batch_image)

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class ResizeParam:
        short = 800
        long = 1200 if is_train else 2000

    class PadParam:
        short = 800
        long = 1200 if is_train else 2000
        max_num_gt = 100

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, AnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            AnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"],
                                          ["rpn_cls_label"])
    rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"],
                              ["rpn_cls_label"])

    metric_list = [rpn_acc_metric, rpn_l1_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 8 if is_train else 1
        fp16 = True
        loader_worker = 8


    class KvstoreParam:
        kvstore     = "nccl"
        batch_image = General.batch_image
        gpus        = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16        = General.fp16


    class NormalizeParam:
        normalizer = normalizer_factory(type="localbn", ndev=len(KvstoreParam.gpus))
        # normalizer = normalizer_factory(type="gn")


    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer


    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer


    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image
        nnvm_proposal = True
        nnvm_rpn_target = False

        class anchor_generate:
            scale = (4,)
            ratio = (0.5, 1.0, 2.0)
            stride = (4, 8, 16, 32, 64)
            image_anchor = 256
            max_side = 700

        class anchor_assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0
            image_anchor = 256
            pos_fraction = 0.5

        class head:
            conv_channel = 256
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 2000 if is_train else 1000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 81
            class_agnostic = False
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)


    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class   = 1 + 80
        image_roi   = 512
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = False
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)


    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = (4, 8, 16, 32)
        roi_canonical_scale = 224
        roi_canonical_level = 4


    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014")
            total_image = 82783 + 35504
        else:
            image_set = ("coco_minival2014", )
            total_image = 5000

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head)


    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = True
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = None
            epoch = 0
            fixed_param = []

        def process_weight(sym, arg, aux):
            for stride in RpnParam.anchor_generate.stride:
                add_anchor_to_arg(
                    sym, arg, aux, RpnParam.anchor_generate.max_side,
                    stride, RpnParam.anchor_generate.scale,
                    RpnParam.anchor_generate.ratio)


    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 1e-4
            clip_gradient = None

        class schedule:
            mult = 12
            begin_epoch = 0
            end_epoch = 6 * mult
            if mult <= 2:
                lr_iter = [60000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image),
                           80000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)]
            else:
                # follow the setting in Rethinking ImageNet Pre-training
                # reduce the lr in the last 60k and 20k iterations
                lr_iter = [(DatasetParam.total_image * 2 // 16 * end_epoch - 70000) * 16 //
                    (len(KvstoreParam.gpus) * KvstoreParam.batch_image),
                    (DatasetParam.total_image * 2 // 16 * end_epoch - 30000) * 16 //
                    (len(KvstoreParam.gpus) * KvstoreParam.batch_image)]

        class warmup:
            type = "gradual"
            lr = 0
            iter = 500


    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"


    # data processing
    class NormParam:
        mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order
        std = tuple(i * 255 for i in (0.229, 0.224, 0.225))

    # data processing
    class ResizeParam:
        short = 400
        long = 600


    class PadParam:
        short = 400
        long = 600
        max_num_gt = 100


    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.stride = (4, 8, 16, 32, 64)
                self.short = (100, 50, 25, 13, 7)
                self.long = (150, 75, 38, 19, 10)
            scales = (4)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5


    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage

    from models.FPN.input import PyramidAnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["gt_bbox", "im_info"]
        if not RpnParam.nnvm_rpn_target:
            transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam()))
            label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore(
        "RpnAcc",
        ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"],
        []
    )
    rpn_l1_metric = metric.L1(
        "RpnL1",
        ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"],
        []
    )
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc",
        ["bbox_cls_loss_output", "bbox_label_blockgrad_output"],
        []
    )
    box_l1_metric = metric.L1(
        "RcnnL1",
        ["bbox_reg_loss_output", "bbox_label_blockgrad_output"],
        []
    )

    metric_list = [rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False

    class KvstoreParam:
        kvstore = "nccl"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class anchor_generate:
            scale = (8, )
            ratio = (0.5, 1.0, 2.0)
            stride = (4, 8, 16, 32, 64)
            image_anchor = 256

        class head:
            conv_channel = 256
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 2000 if is_train else 1000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 81
            class_agnostic = False
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 80
        image_roi = 512
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = False
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = (4, 8, 16, 32)
        roi_canonical_scale = 224
        roi_canonical_level = 4

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014")
        else:
            image_set = ("coco_minival2014", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor, bbox_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, bbox_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-v1-101"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.01 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0
            iter = 500

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class NormParam:
        mean = (122.7717, 115.9465, 102.9801)  # RGB order
        std = (1.0, 1.0, 1.0)

    # data processing
    class ResizeParam:
        short = 800
        long = 1333

    class PadParam:
        short = 800
        long = 1333
        max_num_gt = 100

    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.stride = (4, 8, 16, 32, 64)
                self.short = (200, 100, 50, 25, 13)
                self.long = (334, 167, 84, 42, 21)

            scales = (8)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage

    from models.FPN.input import PyramidAnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            PyramidAnchorTarget2D(AnchorTarget2DParam()),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"],
                                          ["rpn_cls_label"])
    rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"],
                              ["rpn_cls_label"])
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [])
    box_l1_metric = metric.L1(
        "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [])

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
Exemple #16
0
        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")

    transform = [
        ReadRoiRecord(None),
        RandResizeCrop2DImageBbox(ResizeParam),
        Flip2DImageBbox(),
        Pad2DImageBbox(PadParam),
        ConvertImageFromHwcToChw(),
        AnchorTarget2D(AnchorTarget2DParam),
        RenameRecord(RenameParam.mapping)
    ]

    DEBUG = True

    with open("data/cache/coco_val2017.roidb", "rb") as fin:
        roidb = pkl.load(fin)
        roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0]
        roidb = [roidb[i] for i in np.random.choice(len(roidb), 20, replace=False)]

        print(roidb[0])
        flipped_roidb = []
        for rec in roidb:
            new_rec = rec.copy()
            new_rec["flipped"] = True
            flipped_roidb.append(new_rec)
Exemple #17
0
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 1 if is_train else 1
        fp16 = False

    class Trident:
        num_branch = 3 if is_train else 1
        train_scaleaware = False
        test_scaleaware = False
        branch_ids = range(num_branch) if is_train else [1]
        branch_dilates = [1, 2, 3] if is_train else [2]
        valid_ranges = [(0, -1), (0, -1), (0, -1)] if is_train else [(0, -1)]
        valid_ranges_on_origin = True
        branch_bn_shared = True
        branch_conv_shared = True
        branch_deform = False

        assert num_branch == len(branch_ids)
        assert num_branch == len(valid_ranges)

    class KvstoreParam:
        kvstore = "local"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus))
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        depth = 101
        num_branch = Trident.num_branch
        branch_ids = Trident.branch_ids
        branch_dilates = Trident.branch_dilates
        branch_bn_shared = Trident.branch_bn_shared
        branch_conv_shared = Trident.branch_conv_shared
        branch_deform = Trident.branch_deform

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image * Trident.num_branch

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 500 if is_train else 300
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = True
            image_roi = 128
            fg_fraction = 0.5
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 80
        image_roi = 128
        batch_image = General.batch_image * Trident.num_branch

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = 16

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2017", )
        else:
            image_set = ("coco_val2017", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(
            backbone,
            neck,
            rpn_head,
            roi_extractor,
            bbox_head,
            num_branch=Trident.num_branch,
            scaleaware=Trident.train_scaleaware)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head,
                                                    Trident.num_branch)
        test_sym = detector.get_test_symbol(backbone,
                                            neck,
                                            rpn_head,
                                            roi_extractor,
                                            bbox_head,
                                            num_branch=Trident.num_branch)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 5

        class schedule:
            begin_epoch = 0
            end_epoch = 12
            lr_iter = [
                120000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 160000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 3000 * 16 // (len(KvstoreParam.gpus) *
                                 KvstoreParam.batch_image)

    class TestParam:
        min_det_score = 0.001
        max_det_per_image = 100

        process_roidb = lambda x: x
        if Trident.test_scaleaware:
            process_output = lambda x, y: process_branch_outputs(
                x, Trident.num_branch, Trident.valid_ranges, Trident.
                valid_ranges_on_origin)
        else:
            process_output = lambda x, y: x

        process_rpn_output = lambda x, y: process_branch_rpn_outputs(
            x, Trident.num_branch)

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class NormParam:
        mean = tuple(i * 255 for i in (0.485, 0.456, 0.406))  # RGB order
        std = tuple(i * 255 for i in (0.229, 0.224, 0.225))

    class ResizeParam:
        short = 800
        long = 1200 if is_train else 2000

    class PadParam:
        short = 800
        long = 1200 if is_train else 2000
        max_num_gt = 100

    class ScaleRange:
        valid_ranges = Trident.valid_ranges
        cal_on_origin = Trident.valid_ranges_on_origin  # True: valid_ranges on origin image scale / valid_ranges on resized image scale

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

        class trident:
            invalid_anchor_threshd = 0.3

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage
    from models.tridentnet.input import ScaleAwareRange, TridentAnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            ScaleAwareRange(ScaleRange),
            TridentAnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        if Trident.train_scaleaware:
            data_name.append("valid_ranges")
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"],
                                          ["rpn_cls_label"])
    rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"],
                              ["rpn_cls_label"])
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [])
    box_l1_metric = metric.L1(
        "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [])

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False
        long_side = 1280
        short_side = 960

    class KvstoreParam:
        kvstore = "local"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus))
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        depth = 50

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image
        nnvm_proposal = True
        nnvm_rpn_target = False
        use_symbolic_proposal = None

        class anchor_generate:
            scale = (8, )
            ratio = (0.5, 1.0, 2.0)
            stride = (4, 8, 16, 32, 64)
            image_anchor = 256
            max_side = 1280

        class anchor_assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.3
            image_anchor = 256
            pos_fraction = 0.5

        class head:
            conv_channel = 256
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 2000 if is_train else 1000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.5

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 54
        image_roi = 512
        batch_image = General.batch_image
        stage = "1st"
        loss_weight = 1.0

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.6
            bg_thr_hi = 0.6
            bg_thr_lo = 0.6

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam2nd:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 54
        image_roi = 512
        batch_image = General.batch_image
        stage = "2nd"
        loss_weight = 0.5

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.05, 0.05, 0.1, 0.1)

        class subsample_proposal:
            proposal_wo_gt = False
            image_roi = 512
            fg_fraction = 0.25
            fg_thr = 0.7
            bg_thr_hi = 0.7
            bg_thr_lo = 0.7

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.05, 0.05, 0.1, 0.1)

    class BboxParam3rd:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 54
        image_roi = 512
        batch_image = General.batch_image
        stage = "3rd"
        loss_weight = 0.25

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.033, 0.033, 0.067, 0.067)

        class subsample_proposal:
            proposal_wo_gt = None
            image_roi = None
            fg_fraction = None
            fg_thr = None
            bg_thr_hi = None
            bg_thr_lo = None

        class bbox_target:
            num_reg_class = None
            class_agnostic = None
            weight = None
            mean = None
            std = None

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = (4, 8, 16, 32)
        roi_canonical_scale = 224
        roi_canonical_level = 4

    class DatasetParam:
        if is_train:
            image_set = ("vending_train", )
        else:
            image_set = ("coco_val2017", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    bbox_head_2nd = BboxHead(BboxParam2nd)
    bbox_head_3rd = BboxHead(BboxParam3rd)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor, bbox_head,
                                              bbox_head_2nd, bbox_head_3rd)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, bbox_head,
                                            bbox_head_2nd, bbox_head_3rd)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-v1-50"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

        def process_weight(sym, arg, aux):
            for stride in RpnParam.anchor_generate.stride:
                add_anchor_to_arg(sym, arg, aux,
                                  RpnParam.anchor_generate.max_side, stride,
                                  RpnParam.anchor_generate.scale,
                                  RpnParam.anchor_generate.ratio)

        class QuantizeTrainingParam:
            quantize_flag = False
            # quantized_op = ("Convolution", "FullyConnected", "Deconvolution","Concat", "Pooling", "add_n", "elemwise_add")
            quantized_op = ("Convolution", "FullyConnected", "Deconvolution")

            class WeightQuantizeParam:
                delay_quant = 0
                ema_decay = 0.99
                grad_mode = "ste"
                is_weight = True
                is_weight_perchannel = False
                quant_mode = "minmax"

            class ActQuantizeParam:
                delay_quant = 0
                ema_decay = 0.99
                grad_mode = "ste"
                is_weight = False
                is_weight_perchannel = False
                quant_mode = "minmax"

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = None

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.01 / 8 * len(
                KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0
            iter = 500

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.6

        class coco:
            annotation = "data/coco/annotations/instances_val2017.json"

    # data processing
    class NormParam:
        mean = (123.675, 116.28, 103.53)  # RGB order
        std = (58.395, 57.12, 57.375)

    # data processing
    class ResizeParam:
        short = 960
        long = 1280

    class PadParam:
        short = 960
        long = 1280
        max_num_gt = 100

    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()
            self.mean = (0, 0, 0, 0)
            self.std = (1, 1, 1, 1)

        class _generate:
            def __init__(self):
                self.stride = (4, 8, 16, 32, 64)
                self.short = (240, 120, 60, 30, 15)
                self.long = (320, 160, 80, 40, 20)

            scales = (8, )
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.3

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, Norm2DImage

    from models.FPN.input import PyramidAnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data"]
        label_name = ["gt_bbox", "im_info"]
        if not RpnParam.nnvm_rpn_target:
            transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam()))
            label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Norm2DImage(NormParam),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore(
        "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"],
        [])
    rpn_l1_metric = metric.L1(
        "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], [])
    # for bbox, the label is generated in network so it is an output
    # stage1 metric
    box_acc_metric_1st = metric.AccWithIgnore(
        "RcnnAcc_1st",
        ["bbox_cls_loss_1st_output", "bbox_label_blockgrad_1st_output"], [])
    box_l1_metric_1st = metric.L1(
        "RcnnL1_1st",
        ["bbox_reg_loss_1st_output", "bbox_label_blockgrad_1st_output"], [])
    # stage2 metric
    box_acc_metric_2nd = metric.AccWithIgnore(
        "RcnnAcc_2nd",
        ["bbox_cls_loss_2nd_output", "bbox_label_blockgrad_2nd_output"], [])
    box_l1_metric_2nd = metric.L1(
        "RcnnL1_2nd",
        ["bbox_reg_loss_2nd_output", "bbox_label_blockgrad_2nd_output"], [])
    # stage3 metric
    box_acc_metric_3rd = metric.AccWithIgnore(
        "RcnnAcc_3rd",
        ["bbox_cls_loss_3rd_output", "bbox_label_blockgrad_3rd_output"], [])
    box_l1_metric_3rd = metric.L1(
        "RcnnL1_3rd",
        ["bbox_reg_loss_3rd_output", "bbox_label_blockgrad_3rd_output"], [])

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric_1st, box_l1_metric_1st,
        box_acc_metric_2nd, box_l1_metric_2nd, box_acc_metric_3rd,
        box_l1_metric_3rd
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list