Beispiel #1
0
def get_onenet_config(num_classes=80):
    h = Config()

    input_size = (512, 512)
    h.detector = "OneNet"
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNet18",
                      dropblock=None,
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.997,
                                         epsilon=1e-4,
                                         trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[2, 3, 4, 5],
                      frozen_stages=[
                          1,
                      ])

    h.neck = dict(neck="CenterNetDeconv",
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.997,
                                     epsilon=1e-4,
                                     trainable=False),
                  activation=dict(activation="relu"))
    h.head = dict(head="OneNetHead",
                  activation=dict(activation="relu"),
                  feat_dims=64,
                  dropblock=None,
                  num_classes=num_classes,
                  strides=4,
                  prior=0.01,
                  use_sigmoid=True,
                  assigner=dict(assigner="MinCostAssigner",
                                class_weight=2.,
                                l1_weight=2.,
                                iou_weight=5.,
                                iou_type="giou",
                                alpha=0.25,
                                gamma=2.),
                  label_loss=dict(loss="FocalLoss",
                                  alpha=0.25,
                                  gamma=2.,
                                  reduction="sum"),
                  bbox_loss=dict(loss="RegL1Loss", weight=1., reduction="sum"))

    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train = dict(
        dataset=dict(dataset="COCODataset",
                     batch_size=4,
                     dataset_dir="/data/bail/COCO",
                     training=True,
                     augmentations=[
                         dict(augmentation="FlipLeftToRight", probability=0.5),
                         dict(augmentation="RandomDistortColor"),
                         dict(augmentation="Resize",
                              img_scale=(0.2, 2),
                              multiscale_mode="range",
                              keep_ratio=True),
                         dict(augmentation="RandCropOrPad",
                              size=(input_size, input_size),
                              clip_box_base_center=False),
                     ],
                     num_samples=118287),
        pretrained_weights_path=
        "/data/bail/pretrained_weights/resnet50/resnet50.ckpt",
        optimizer=dict(optimizer="SGD", momentum=0.9),
        mixed_precision=dict(
            loss_scale=None
        ),  # The loss scale in mixed precision training. If None, use dynamic.
        gradient_clip_norm=10.0,
        scheduler=dict(train_epochs=24,
                       learning_rate_scheduler=dict(
                           scheduler="PiecewiseConstantDecay",
                           boundaries=[16, 22],
                           values=[0.02, 0.002, 0.0002]),
                       warmup=dict(warmup_learning_rate=0.001, steps=800)),
        checkpoint_dir="checkpoints/onenet",
        summary_dir="logs/onenet",
        log_every_n_steps=100,
        save_ckpt_steps=5000)
    h.val = dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=False,
                              augmentations=[
                                  dict(augmentation="Resize",
                                       img_scale=[(1333, input_size)],
                                       keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ]),
                 samples=5000)
    h.test = dict(topk=100, score_threshold=0.3)

    return h
Beispiel #2
0
def default_detection_configs(phi,
                              num_classes=90,
                              min_level=3,
                              max_level=7,
                              fpn_filters=64,
                              neck_repeats=3,
                              head_repeats=3,
                              anchor_scale=4,
                              num_scales=3,
                              batch_size=4,
                              image_size=512,
                              fpn_name="BiFPN",
                              fpn_input_dims=[80, 192, 320],
                              fusion_type="weighted_sum"):
    h = Config()

    h.detector = "EfficientDetD%d" % phi
    h.dtype = "float32"
    h.num_classes = num_classes
    h.backbone = dict(
        backbone="EfficientNetB%d" % phi,
        convolution="depthwise_conv2d",
        dropblock=None,
        #   dropblock=dict(keep_prob=None,
        #                  block_size=None)
        normalization=dict(normalization="batch_norm",
                           momentum=0.99,
                           epsilon=1e-3,
                           axis=-1,
                           trainable=False),
        activation=dict(activation="swish"),
        strides=[2, 1, 2, 2, 2, 1, 2, 1],
        dilation_rates=[1, 1, 1, 1, 1, 1, 1, 1],
        output_indices=[3, 4, 5],
        frozen_stages=[
            -1,
        ])
    h.neck = dict(neck=fpn_name,
                  input_size=image_size if isinstance(
                      image_size, (list, tuple)) else [image_size, image_size],
                  num_backbone_levels=3,
                  feat_dims=fpn_filters,
                  repeats=neck_repeats,
                  convolution="separable_conv2d",
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  min_level=3,
                  max_level=7,
                  input_dims=fpn_input_dims,
                  pool_type=None,
                  apply_bn=True,
                  fusion_type=fusion_type)
    # anchors parameters
    strides = [2**l for l in range(min_level, max_level + 1)]
    scales = [[
        2**(i / num_scales) * s * anchor_scale for i in range(num_scales)
    ] for s in strides]
    aspect_ratios = [1., 0.5, 2.]
    num_scales = len(scales[0]) * len(aspect_ratios)

    h.anchors = dict(aspect_ratios=aspect_ratios,
                     strides=strides,
                     scales=scales,
                     num_anchors=num_scales)
    h.head = dict(head="RetinaNetHead",
                  convolution="separable_conv2d",
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  feat_dims=fpn_filters,
                  dropblock=None,
                  repeats=head_repeats,
                  min_level=min_level,
                  max_level=max_level,
                  use_sigmoid=True,
                  prior=0.01,
                  survival_prob=None,
                  data_format="channels_last",
                  bbox_encoder=dict(encoder="Box2Delta", weights=None),
                  bbox_decoder=dict(decoder="Delta2Box", weights=None),
                  assigner=dict(assigner="ATSSAssigner", topk=9),
                  sampler=dict(sampler="PseudoSampler"),
                  label_loss=dict(loss="FocalLoss",
                                  gamma=2.0,
                                  alpha=0.25,
                                  label_smoothing=0.01,
                                  weight=1.,
                                  from_logits=True,
                                  reduction="sum"),
                  bbox_loss=dict(loss="CIoULoss", weight=1., reduction="sum"))
    h.weight_decay = 4e-5
    h.train = dict(
        input_size=(image_size, image_size),
        dataset=dict(
            dataset="COCODataset",
            batch_size=batch_size,
            dataset_dir="/data/bail/COCO",
            training=True,
            augmentations=[
                dict(FlipLeftToRight=dict(probability=0.5)),
                dict(RandomDistortColor=dict(probability=1.)),
                dict(Resize=dict(size=(image_size, image_size),
                                 strides=32,
                                 min_scale=0.5,
                                 max_scale=2.0)),
                #  dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
            ],
            num_samples=118287,
            num_classes=num_classes),
        pretrained_weights_path=
        "/data/bail/pretrained_weights/resnet50/resnet50.ckpt",
        optimizer=dict(optimizer="SGD", momentum=0.9),
        mixed_precision=dict(
            loss_scale=None
        ),  # The loss scale in mixed precision training. If None, use dynamic.
        gradient_clip_norm=.0,
        scheduler=dict(
            train_epochs=18,
            #  learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
            #                               boundaries=[8, 11],
            #                               values=[0.02, 0.002, 0.0002]),
            learning_rate_scheduler=dict(scheduler="CosineDecay",
                                         initial_learning_rate=0.02),
            warmup=dict(warmup_learning_rate=0.0001, steps=800)),
        checkpoint_dir="checkpoints/efficientdet-d%d" % phi,
        summary_dir="logs/efficientdet-d%d" % phi,
        log_every_n_steps=100,
        save_ckpt_steps=5000)
    h.val = dict(
        dataset=dict(
            dataset="COCODataset",
            batch_size=batch_size,
            dataset_dir="/data/bail/COCO",
            training=False,
            augmentations=[
                dict(Resize=dict(size=(image_size, image_size),
                                 strides=32,
                                 min_scale=1.0,
                                 max_scale=1.0))
                # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
            ]),
        input_size=(image_size, image_size),
        samples=5000)
    h.test = dict(
        nms="CombinedNonMaxSuppression",
        pre_nms_size=5000,  # select top_k high confident detections for nms 
        post_nms_size=100,
        iou_threshold=0.6,
        score_threshold=0.1,
    )

    return h
Beispiel #3
0
def get_yolof_config(num_classes=80):
    h = Config()
    
    data_format = "channels_last"
    input_size = (1024, 1024)
    h.detector = "YOLOF"
    h.dtype = "float16"
    h.data_format = data_format
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNeXt101_64X4D",
                      dropblock=None, 
                      normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[5],
                      frozen_stages=[1, ])
    h.neck=dict(neck="DilatedEncoder", 
                filters=512,
                midfilters=128,
                dilation_rates=[2, 4, 6, 8],  # dilation not in stage5
                # dilation_rates=[4, 8, 12, 16],
                normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True),
                activation=dict(activation="relu"),
                kernel_initializer="he_normal",
                data_format=data_format)
    h.anchors=dict(generator="AnchorGenerator",
                   aspect_ratios=[1.],
                   scales=[32, 64, 128, 256, 512],
                   strides=32, 
                #    scales=[16, 32, 64, 128, 256, 512],
                #    strides=16, 
                   num_anchors=5)
    h.head=dict(head="YOLOFHead",
                normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True),
                activation=dict(activation="relu"),
                feat_dims=512,
                cls_num_convs=2,
                reg_num_convs=4,
                kernel_initializer="he_normal",
                use_sigmoid=True,
                prior=0.01,
                bbox_decoder=dict(decoder="Delta2Box", weights=[1., 1., 1., 1.]),
                bbox_encoder=dict(encoder="Box2Delta", weights=[1., 1., 1., 1.]),
                assigner=dict(assigner="UniformAssigner", match_times=8, pos_ignore_thresh=0.7, neg_ignore_thresh=0.15),
                sampler=dict(sampler="PseudoSampler"),
                label_loss=dict(loss="FocalLoss", alpha=0.25, gamma=2.0, weight=1., from_logits=True, reduction="sum"),
                bbox_loss=dict(loss="GIoULoss", weight=2., reduction="sum"))
   
    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ],
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt",

                  optimizer=dict(optimizer="SGD", momentum=0.9),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=10.0,

                  scheduler=dict(train_epochs=24,
                                 learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                                              boundaries=[16, 22],
                                                              values=[0.02, 0.002, 0.0002]),
                                 warmup=dict(warmup_learning_rate=0.001, steps=800)),
                  checkpoint_dir="checkpoints/yolof",
                  summary_dir="logs/yolof",
                  log_every_n_steps=100,
                  save_ckpt_steps=5000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=4,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                dict(augmentation="Pad", size_divisor=32),
                            ]),
               samples=5000)
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=2000,
                post_nms_size=100, 
                iou_threshold=0.5, 
                score_threshold=0.35)

    return h
Beispiel #4
0
def get_gfl_config(num_classes=80):
    h = Config()
    
    input_size = (1024, 1024)
    h.detector = "GFL"
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNet101",
                      dropblock=None, 
                      normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[3, 4, 5],
                      frozen_stages=[1, ])
    h.neck=dict(neck="FPN", 
                feat_dims=256,
                min_level=3, 
                max_level=5,
                num_output_levels=5, 
                add_extra_convs=True,
                relu_before_extra_convs=True)
    h.anchors=dict(generator="AnchorGeneratorV2",
                   aspect_ratios=[1.], 
                   octave_base_scale=8,
                   scales_per_octave=1,
                   strides=[8, 16, 32, 64, 128], 
                   num_anchors=1)
    h.head=dict(head="GFLHead",
                normalization=dict(normalization="group_norm", groups=32),
                activation=dict(activation="relu"),
                feat_dims=256,
                dropblock=None,
                num_classes=num_classes,
                repeats=4,
                min_level=3,
                max_level=7,
                use_sigmoid=True,
                prior=0.01,
                reg_max=16,
                bbox_decoder = dict(decoder="Distance2Box", weights=None),
                bbox_encoder = dict(encoder="Box2Distance", weights=None),
                assigner = dict(assigner="ATSSAssigner", topk=9),
                sampler = dict(sampler="PseudoSampler"),
                label_loss = dict(loss="QualityFocalLoss", beta=2.0, weight=1., from_logits=True, reduction="sum"),
                bbox_loss = dict(loss="GIoULoss", weight=2., reduction="sum"),
                dfl_loss = dict(loss="DistributionFocalLoss", weight=.25, reduction="sum"))
   
    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32),
                              ],
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt",

                  optimizer=dict(optimizer="SGD", momentum=0.9),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=10.0,

                  scheduler=dict(train_epochs=24,
                                 learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                                              boundaries=[16, 22],
                                                              values=[0.02, 0.002, 0.0002]),
                                 warmup=dict(warmup_learning_rate=0.001, steps=800)),
                  checkpoint_dir="checkpoints/gfl",
                  summary_dir="logs/gfl",
                  log_every_n_steps=100,
                  save_ckpt_steps=5000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=4,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                dict(augmentation="Pad", size_divisor=32),
                            ]),
               samples=5000)
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=5000,
                post_nms_size=100, 
                iou_threshold=0.6, 
                score_threshold=0.3)

    return h
Beispiel #5
0
def default_detection_configs(phi, 
                              min_level=3, 
                              max_level=7, 
                              fpn_filters=64,
                              neck_repeats=3,
                              head_repeats=3,
                              anchor_scale=4,
                              num_scales=3,
                              batch_size=4,
                              image_size=512,
                              fusion_type="weighted_sum"):
    h = Config()

    # model name
    h.detector = "efficientdet-d%d" % phi
    h.min_level = min_level
    h.max_level = max_level
    h.dtype = "float16"

    # backbone
    h.backbone = dict(backbone="efficientnet-b%d" % phi,
                      convolution="depthwise_conv2d",
                      dropblock=None,
                    #   dropblock=dict(keep_prob=None,
                    #                  block_size=None)
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.99,
                                         epsilon=1e-3,
                                         axis=-1,
                                         trainable=False),
                      activation=dict(activation="swish"),
                      strides=[2, 1, 2, 2, 2, 1, 2, 1],
                      dilation_rates=[1, 1, 1, 1, 1, 1, 1, 1],
                      output_indices=[3, 4, 5],
                      frozen_stages=[-1])
    
    # neck
    h.neck = dict(neck="bifpn",
                  repeats=neck_repeats,
                  convolution="separable_conv2d",
                  dropblock=None,
                #   dropblock=dict(keep_prob=None,
                #                  block_size=None)
                  feat_dims=fpn_filters,
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  add_extra_conv=False,  # Add extra convolution for neck
                  fusion_type=fusion_type, 
                  use_multiplication=False)
    
    # head
    h.head = dict(head="RetinaNetHead",
                  repeats=head_repeats,
                  convolution="separable_conv2d",
                  dropblock=None,
                #   dropblock=dict(keep_prob=None,
                #                  block_size=None)
                  feat_dims=fpn_filters,
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  prior=0.01)
    
    # anchors parameters
    strides = [2 ** l for l in range(min_level, max_level + 1)]
    h.anchor = dict(aspect_ratios=[[1., 0.5, 2.]] * (max_level - min_level + 1),
                    scales=[
                        [2 ** (i / num_scales) * s * anchor_scale 
                        for i in range(num_scales)] for s in strides
                    ],
                    num_anchors=9)

    # assigner
    h.assigner = dict(assigner="max_iou_assigner",
                      pos_iou_thresh=0.5,
                      neg_iou_thresh=0.5)
    # sampler
    h.sampler = dict(sampler="pseudo_sampler")
    
    # loss
    h.use_sigmoid = True
    h.label_loss=dict(loss="focal_loss",
                      alpha=0.25,
                      gamma=1.5,
                      label_smoothing=0.,
                      weight=1.,
                      from_logits=True,
                      reduction="none")
    h.bbox_loss=dict(loss="smooth_l1_loss",
                     weight=50.,   # 50.
                     delta=.1,    # .1
                     reduction="none")
    # h.box_loss=dict(loss="giou_loss",
    #                 weight=10.,
    #                 reduction="none")
    h.weight_decay = 4e-5

    h.bbox_mean = None  # [0., 0., 0., 0.]
    h.bbox_std = None  # [0.1, 0.1, 0.2, 0.2]

    # dataset
    h.num_classes = 90
    h.skip_crowd_during_training = True
    h.dataset = "objects365"

    h.batch_size = batch_size
    h.input_size = [image_size, image_size]
    h.train_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train"
    h.val_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train"
    h.augmentation = [
        dict(ssd_crop=dict(patch_area_range=(0.3, 1.),
                            aspect_ratio_range=(0.5, 2.0),
                            min_overlaps=(0.1, 0.3, 0.5, 0.7, 0.9),
                            max_attempts=100,
                            probability=.5)),
        # dict(data_anchor_sampling=dict(anchor_scales=(16, 32, 64, 128, 256, 512),
        #                                overlap_threshold=0.7,
        #                                max_attempts=50,
        #                                probability=.5)),
        dict(flip_left_to_right=dict(probability=0.5)),
        dict(random_distort_color=dict(probability=1.))
        ]

    # train
    h.pretrained_weights_path = "/home/bail/Workspace/pretrained_weights/efficientdet-d%d" % phi

    h.optimizer = dict(optimizer="sgd", momentum=0.9)
    h.lookahead = None

    h.train_steps = 240000
    h.learning_rate_scheduler = dict(scheduler="cosine", initial_learning_rate=0.002)
    h.warmup = dict(warmup_learning_rate = 0.00001, steps = 24000)
    h.checkpoint_dir = "checkpoints/efficientdet_d%d" % phi
    h.summary_dir = "logs/efficientdet_d%d" % phi

    h.gradient_clip_norm = .0

    h.log_every_n_steps = 500
    h.save_ckpt_steps = 10000
    h.val_every_n_steps = 4000

    h.postprocess = dict(pre_nms_size=5000,   # select top_k high confident detections for nms 
                         post_nms_size=100,
                         iou_threshold=0.5,
                         score_threshold=0.2)
    
    return h