Beispiel #1
0
def get_yolov4_config(input_size=None,
                      num_classes=80, 
                      num_anchors=3,
                      depth_multiplier=1.,
                      width_multiplier=1.,
                      label_assignment="iou", 
                      name="YOLOv4"):
    h = Config()
    
    if input_size is None:
        input_size = default_input_sizes[name]
    h.detector = name
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.num_classes = num_classes

    h.depth_multiplier = depth_multiplier_dict[name]
    h.width_multiplier = width_multiplier_dict[name]
    
    if name not in model:
        raise ValueError(name + " not in ", list(model.keys()))
    h.model = model[name]
    h.min_level = min_level_dict[name]
    h.max_level = max_level_dict[name]
    h.strides = strides_dict[name]
    h.anchors = anchors_dict[name]
    h.num_anchors = len(anchors_dict[name][0]) // 2
    h.input_size = input_size if isinstance(input_size, (tuple, list)) else (input_size, input_size)

    h.label_assignment = label_assignment
    h.anchor_threshold = 0.2
    h.gr = 1.
        
    h.bbox_loss = dict(loss="CIoULoss", weight=1., reduction="none")  
    h.label_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")  # .631 if finetuning else weight = 1.0
    h.conf_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")   # 0.911 if finetuning else weight = 1.
    h.balance = [1., 1., 1.] # [4.0, 1.0, 0.4]   # if num_level == 3 else [4.0, 1.0, 0.4, 0.1]
    h.box_weight = 0.05  # 0.0296 if finetune else 0.05
    h.label_weight = .5  # 0.243 if finetune else 0.5
    h.conf_weight = 1.0   # 0.301 if finetune else 1.0
    
    h.weight_decay = 0.0005
    h.excluding_weight_names = ["predicted"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=8,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=(0.2, 2), keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ],
                            #   mixup=dict(alpha=8.0, prob=0.5),
                              mosaic=dict(size=input_size, min_image_scale=0.25, prob=1.),
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/darknet53-notop/darknet53.ckpt",
                  optimizer=dict(optimizer="SGD", momentum=0.937),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=.0,

                  scheduler=dict(train_epochs=480,
                                 #  learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                 #                               boundaries=[24, 32],
                                 #                               values=[0.012, 0.0012, 0.00012]),
                                 learning_rate_scheduler=dict(scheduler="CosineDecay", initial_learning_rate=0.012),
                                 warmup=dict(warmup_learning_rate=0.0012, steps=12000)),
                  checkpoint_dir="checkpoints/%s" % name,
                  summary_dir="logs/%s" % name,
                  log_every_n_steps=100,
                  save_ckpt_steps=10000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=8,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(Resize=dict(size=(input_size, input_size), strides=32, min_scale=1., max_scale=1.0))
                                # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
                            ]),
               samples=5000)
    # h.test=dict(nms="NonMaxSuppressionWithQuality",
    #             pre_nms_size=5000,
    #             post_nms_size=100, 
    #             iou_threshold=0.6, 
    #             score_threshold=0.5,
    #             sigma=0.5,
    #             nms_type="nms")
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=2000,
                post_nms_size=100, 
                iou_threshold=0.6, 
                score_threshold=0.35)

    return h
Beispiel #2
0
def get_onenet_config(num_classes=80):
    h = Config()

    input_size = (512, 512)
    h.detector = "OneNet"
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNet18",
                      dropblock=None,
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.997,
                                         epsilon=1e-4,
                                         trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[2, 3, 4, 5],
                      frozen_stages=[
                          1,
                      ])

    h.neck = dict(neck="CenterNetDeconv",
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.997,
                                     epsilon=1e-4,
                                     trainable=False),
                  activation=dict(activation="relu"))
    h.head = dict(head="OneNetHead",
                  activation=dict(activation="relu"),
                  feat_dims=64,
                  dropblock=None,
                  num_classes=num_classes,
                  strides=4,
                  prior=0.01,
                  use_sigmoid=True,
                  assigner=dict(assigner="MinCostAssigner",
                                class_weight=2.,
                                l1_weight=2.,
                                iou_weight=5.,
                                iou_type="giou",
                                alpha=0.25,
                                gamma=2.),
                  label_loss=dict(loss="FocalLoss",
                                  alpha=0.25,
                                  gamma=2.,
                                  reduction="sum"),
                  bbox_loss=dict(loss="RegL1Loss", weight=1., reduction="sum"))

    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train = dict(
        dataset=dict(dataset="COCODataset",
                     batch_size=4,
                     dataset_dir="/data/bail/COCO",
                     training=True,
                     augmentations=[
                         dict(augmentation="FlipLeftToRight", probability=0.5),
                         dict(augmentation="RandomDistortColor"),
                         dict(augmentation="Resize",
                              img_scale=(0.2, 2),
                              multiscale_mode="range",
                              keep_ratio=True),
                         dict(augmentation="RandCropOrPad",
                              size=(input_size, input_size),
                              clip_box_base_center=False),
                     ],
                     num_samples=118287),
        pretrained_weights_path=
        "/data/bail/pretrained_weights/resnet50/resnet50.ckpt",
        optimizer=dict(optimizer="SGD", momentum=0.9),
        mixed_precision=dict(
            loss_scale=None
        ),  # The loss scale in mixed precision training. If None, use dynamic.
        gradient_clip_norm=10.0,
        scheduler=dict(train_epochs=24,
                       learning_rate_scheduler=dict(
                           scheduler="PiecewiseConstantDecay",
                           boundaries=[16, 22],
                           values=[0.02, 0.002, 0.0002]),
                       warmup=dict(warmup_learning_rate=0.001, steps=800)),
        checkpoint_dir="checkpoints/onenet",
        summary_dir="logs/onenet",
        log_every_n_steps=100,
        save_ckpt_steps=5000)
    h.val = dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=False,
                              augmentations=[
                                  dict(augmentation="Resize",
                                       img_scale=[(1333, input_size)],
                                       keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ]),
                 samples=5000)
    h.test = dict(topk=100, score_threshold=0.3)

    return h
Beispiel #3
0
def get_gfl_config(num_classes=80):
    h = Config()
    
    input_size = (1024, 1024)
    h.detector = "GFL"
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNet101",
                      dropblock=None, 
                      normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[3, 4, 5],
                      frozen_stages=[1, ])
    h.neck=dict(neck="FPN", 
                feat_dims=256,
                min_level=3, 
                max_level=5,
                num_output_levels=5, 
                add_extra_convs=True,
                relu_before_extra_convs=True)
    h.anchors=dict(generator="AnchorGeneratorV2",
                   aspect_ratios=[1.], 
                   octave_base_scale=8,
                   scales_per_octave=1,
                   strides=[8, 16, 32, 64, 128], 
                   num_anchors=1)
    h.head=dict(head="GFLHead",
                normalization=dict(normalization="group_norm", groups=32),
                activation=dict(activation="relu"),
                feat_dims=256,
                dropblock=None,
                num_classes=num_classes,
                repeats=4,
                min_level=3,
                max_level=7,
                use_sigmoid=True,
                prior=0.01,
                reg_max=16,
                bbox_decoder = dict(decoder="Distance2Box", weights=None),
                bbox_encoder = dict(encoder="Box2Distance", weights=None),
                assigner = dict(assigner="ATSSAssigner", topk=9),
                sampler = dict(sampler="PseudoSampler"),
                label_loss = dict(loss="QualityFocalLoss", beta=2.0, weight=1., from_logits=True, reduction="sum"),
                bbox_loss = dict(loss="GIoULoss", weight=2., reduction="sum"),
                dfl_loss = dict(loss="DistributionFocalLoss", weight=.25, reduction="sum"))
   
    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32),
                              ],
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt",

                  optimizer=dict(optimizer="SGD", momentum=0.9),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=10.0,

                  scheduler=dict(train_epochs=24,
                                 learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                                              boundaries=[16, 22],
                                                              values=[0.02, 0.002, 0.0002]),
                                 warmup=dict(warmup_learning_rate=0.001, steps=800)),
                  checkpoint_dir="checkpoints/gfl",
                  summary_dir="logs/gfl",
                  log_every_n_steps=100,
                  save_ckpt_steps=5000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=4,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                dict(augmentation="Pad", size_divisor=32),
                            ]),
               samples=5000)
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=5000,
                post_nms_size=100, 
                iou_threshold=0.6, 
                score_threshold=0.3)

    return h
Beispiel #4
0
def default_detection_configs(phi,
                              num_classes=90,
                              min_level=3,
                              max_level=7,
                              fpn_filters=64,
                              neck_repeats=3,
                              head_repeats=3,
                              anchor_scale=4,
                              num_scales=3,
                              batch_size=4,
                              image_size=512,
                              fpn_name="BiFPN",
                              fpn_input_dims=[80, 192, 320],
                              fusion_type="weighted_sum"):
    h = Config()

    h.detector = "EfficientDetD%d" % phi
    h.dtype = "float32"
    h.num_classes = num_classes
    h.backbone = dict(
        backbone="EfficientNetB%d" % phi,
        convolution="depthwise_conv2d",
        dropblock=None,
        #   dropblock=dict(keep_prob=None,
        #                  block_size=None)
        normalization=dict(normalization="batch_norm",
                           momentum=0.99,
                           epsilon=1e-3,
                           axis=-1,
                           trainable=False),
        activation=dict(activation="swish"),
        strides=[2, 1, 2, 2, 2, 1, 2, 1],
        dilation_rates=[1, 1, 1, 1, 1, 1, 1, 1],
        output_indices=[3, 4, 5],
        frozen_stages=[
            -1,
        ])
    h.neck = dict(neck=fpn_name,
                  input_size=image_size if isinstance(
                      image_size, (list, tuple)) else [image_size, image_size],
                  num_backbone_levels=3,
                  feat_dims=fpn_filters,
                  repeats=neck_repeats,
                  convolution="separable_conv2d",
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  min_level=3,
                  max_level=7,
                  input_dims=fpn_input_dims,
                  pool_type=None,
                  apply_bn=True,
                  fusion_type=fusion_type)
    # anchors parameters
    strides = [2**l for l in range(min_level, max_level + 1)]
    scales = [[
        2**(i / num_scales) * s * anchor_scale for i in range(num_scales)
    ] for s in strides]
    aspect_ratios = [1., 0.5, 2.]
    num_scales = len(scales[0]) * len(aspect_ratios)

    h.anchors = dict(aspect_ratios=aspect_ratios,
                     strides=strides,
                     scales=scales,
                     num_anchors=num_scales)
    h.head = dict(head="RetinaNetHead",
                  convolution="separable_conv2d",
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  feat_dims=fpn_filters,
                  dropblock=None,
                  repeats=head_repeats,
                  min_level=min_level,
                  max_level=max_level,
                  use_sigmoid=True,
                  prior=0.01,
                  survival_prob=None,
                  data_format="channels_last",
                  bbox_encoder=dict(encoder="Box2Delta", weights=None),
                  bbox_decoder=dict(decoder="Delta2Box", weights=None),
                  assigner=dict(assigner="ATSSAssigner", topk=9),
                  sampler=dict(sampler="PseudoSampler"),
                  label_loss=dict(loss="FocalLoss",
                                  gamma=2.0,
                                  alpha=0.25,
                                  label_smoothing=0.01,
                                  weight=1.,
                                  from_logits=True,
                                  reduction="sum"),
                  bbox_loss=dict(loss="CIoULoss", weight=1., reduction="sum"))
    h.weight_decay = 4e-5
    h.train = dict(
        input_size=(image_size, image_size),
        dataset=dict(
            dataset="COCODataset",
            batch_size=batch_size,
            dataset_dir="/data/bail/COCO",
            training=True,
            augmentations=[
                dict(FlipLeftToRight=dict(probability=0.5)),
                dict(RandomDistortColor=dict(probability=1.)),
                dict(Resize=dict(size=(image_size, image_size),
                                 strides=32,
                                 min_scale=0.5,
                                 max_scale=2.0)),
                #  dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
            ],
            num_samples=118287,
            num_classes=num_classes),
        pretrained_weights_path=
        "/data/bail/pretrained_weights/resnet50/resnet50.ckpt",
        optimizer=dict(optimizer="SGD", momentum=0.9),
        mixed_precision=dict(
            loss_scale=None
        ),  # The loss scale in mixed precision training. If None, use dynamic.
        gradient_clip_norm=.0,
        scheduler=dict(
            train_epochs=18,
            #  learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
            #                               boundaries=[8, 11],
            #                               values=[0.02, 0.002, 0.0002]),
            learning_rate_scheduler=dict(scheduler="CosineDecay",
                                         initial_learning_rate=0.02),
            warmup=dict(warmup_learning_rate=0.0001, steps=800)),
        checkpoint_dir="checkpoints/efficientdet-d%d" % phi,
        summary_dir="logs/efficientdet-d%d" % phi,
        log_every_n_steps=100,
        save_ckpt_steps=5000)
    h.val = dict(
        dataset=dict(
            dataset="COCODataset",
            batch_size=batch_size,
            dataset_dir="/data/bail/COCO",
            training=False,
            augmentations=[
                dict(Resize=dict(size=(image_size, image_size),
                                 strides=32,
                                 min_scale=1.0,
                                 max_scale=1.0))
                # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
            ]),
        input_size=(image_size, image_size),
        samples=5000)
    h.test = dict(
        nms="CombinedNonMaxSuppression",
        pre_nms_size=5000,  # select top_k high confident detections for nms 
        post_nms_size=100,
        iou_threshold=0.6,
        score_threshold=0.1,
    )

    return h
Beispiel #5
0
def get_faster_rcnn_config(num_classes=80):
    h = Config()

    h.detector = "FasterRCNN"
    h.dtype = "float16"
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNet50V1D",
                      convolution="conv2d",
                      dropblock=None,
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.997,
                                         epsilon=1e-4,
                                         trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[2, 3, 4, 5],
                      frozen_stages=[
                          -1,
                      ])
    h.neck = dict(neck="FPN",
                  convolution="conv2d",
                  feat_dims=256,
                  input_dims=[256, 512, 1024, 2048],
                  min_level=2,
                  max_level=6,
                  add_extra_convs=False)
    h.anchors = dict(scales=[[32], [64], [128], [256], [512]],
                     aspect_ratios=[0.5, 1., 2.0],
                     strides=[4, 8, 16, 32, 64],
                     num_anchors=3)
    h.rpn_head = dict(head="RPNHead",
                      convolution="conv2d",
                      normalization=None,
                      activation=dict(activation="relu"),
                      feat_dims=256,
                      dropblock=None,
                      num_classes=2,
                      min_level=2,
                      max_level=6,
                      use_sigmoid=False,
                      bbox_encoder=dict(encoder="Box2Delta", weights=None),
                      bbox_decoder=dict(decoder="Delta2Box", weights=None),
                      assigner=dict(assigner="MaxIoUAssigner",
                                    pos_iou_thresh=0.7,
                                    neg_iou_thresh=0.3),
                      sampler=dict(sampler="RandomSampler",
                                   num_proposals=256,
                                   pos_fraction=0.5,
                                   add_gt_as_proposals=False),
                      label_loss=dict(loss="CrossEntropy",
                                      label_smoothing=0.01,
                                      weight=1.,
                                      from_logits=True,
                                      reduction="none"),
                      bbox_loss=dict(loss="SmoothL1Loss",
                                     delta=1. / 9.,
                                     weight=1.,
                                     reduction="none"))
    h.roi_head = dict(
        num_stages=3,
        roi_pooling=dict(roi_pooling="MultiLevelAlignedRoIPooling",
                         cropped_size=7,
                         strides=(4, 8, 16, 32, 64)),
        bbox_head=[
            dict(head="Shared2FCRCNNHead",
                 activation=dict(activation="relu"),
                 dropblock=None,
                 fc_dims=1024,
                 num_classes=num_classes,
                 bbox_encoder=dict(encoder="Box2Delta",
                                   weights=[10., 10., 5., 5.]),
                 bbox_decoder=dict(decoder="Delta2Box",
                                   weights=[10., 10., 5., 5.]),
                 assigner=dict(
                     assigner="MaxIoUAssigner",
                     pos_iou_thresh=0.5,
                     neg_iou_thresh=0.5,
                 ),
                 sampler=dict(sampler="RandomSampler",
                              num_proposals=512,
                              pos_fraction=0.25,
                              add_gt_as_proposals=True),
                 label_loss=dict(loss="CrossEntropy",
                                 label_smoothing=0.0,
                                 weight=1.,
                                 from_logits=True,
                                 reduction="none"),
                 bbox_loss=dict(loss="SmoothL1Loss",
                                delta=1.,
                                weight=1.,
                                reduction="none"),
                 use_sigmoid=False,
                 reg_class_agnostic=True),
            dict(head="Shared2FCRCNNHead",
                 activation=dict(activation="relu"),
                 dropblock=None,
                 fc_dims=1024,
                 num_classes=num_classes,
                 bbox_encoder=dict(encoder="Box2Delta",
                                   weights=[20., 20., 10., 10.]),
                 bbox_decoder=dict(decoder="Delta2Box",
                                   weights=[20., 20., 10., 10.]),
                 assigner=dict(
                     assigner="MaxIoUAssigner",
                     pos_iou_thresh=0.6,
                     neg_iou_thresh=0.6,
                 ),
                 sampler=dict(sampler="RandomSampler",
                              num_proposals=512,
                              pos_fraction=0.25,
                              add_gt_as_proposals=True),
                 label_loss=dict(loss="CrossEntropy",
                                 label_smoothing=0.01,
                                 weight=1.,
                                 from_logits=True,
                                 reduction="none"),
                 bbox_loss=dict(loss="SmoothL1Loss",
                                delta=1.,
                                weight=1.,
                                reduction="none"),
                 use_sigmoid=False,
                 reg_class_agnostic=True),
            dict(head="Shared2FCRCNNHead",
                 activation=dict(activation="relu"),
                 dropblock=None,
                 fc_dims=1024,
                 num_classes=num_classes,
                 bbox_encoder=dict(
                     encoder="Box2Delta",
                     weights=[1. / 0.033, 1. / 0.022, 1. / 0.067, 1. / 0.067]),
                 bbox_decoder=dict(
                     decoder="Delta2Box",
                     weights=[1. / 0.033, 1. / 0.022, 1. / 0.067, 1. / 0.067]),
                 assigner=dict(assigner="MaxIoUAssigner",
                               pos_iou_thresh=0.7,
                               neg_iou_thresh=0.7),
                 sampler=dict(sampler="RandomSampler",
                              num_proposals=512,
                              pos_fraction=0.25,
                              add_gt_as_proposals=True),
                 label_loss=dict(loss="CrossEntropy",
                                 label_smoothing=0.01,
                                 weight=1.,
                                 from_logits=True,
                                 reduction="none"),
                 bbox_loss=dict(loss="SmoothL1Loss",
                                delta=1.,
                                weight=1.,
                                reduction="none"),
                 use_sigmoid=False,
                 reg_class_agnostic=True)
        ])
    h.weight_decay = 1e-4
    h.train = dict(
        proposal_layer=dict(pre_nms_size=12000,
                            post_nms_size=2000,
                            max_total_size=2000,
                            iou_threshold=0.7,
                            min_size=0),
        input_size=(1024, 1024),
        dataset=dict(dataset="COCODataset",
                     batch_size=2,
                     dataset_dir="/data/bail/COCO",
                     training=True,
                     augmentations=[
                         dict(FlipLeftToRight=dict(probability=0.5)),
                         dict(RandomDistortColor=dict(probability=1.)),
                         dict(Resize=dict(size=(1024, 1024),
                                          strides=128,
                                          min_scale=1.0,
                                          max_scale=1.0)),
                     ],
                     num_samples=118287,
                     num_classes=num_classes),
        pretrained_weights_path="/data/bail/pretrained_weights/resnet50_v1d.h5",
        optimizer=dict(optimizer="SGD", momentum=0.9),
        mixed_precision=dict(
            loss_scale=None
        ),  # The loss scale in mixed precision training. If None, use dynamic.
        gradient_clip_norm=10.0,
        scheduler=dict(train_epochs=12,
                       learning_rate_scheduler=dict(
                           scheduler="PiecewiseConstantDecay",
                           boundaries=[8, 11],
                           values=[0.02, 0.002, 0.0002]),
                       warmup=dict(warmup_learning_rate=0.0001, steps=8000)),
        checkpoint_dir="checkpoints/faster_rcnn",
        summary_dir="logs/faster_rcnn",
        log_every_n_steps=100,
        save_ckpt_steps=5000)
    h.val = dict(dataset=dict(
        dataset="COCODataset",
        batch_size=2,
        dataset_dir="/data/bail/COCO",
        training=False,
        augmentations=[
            dict(Resize=dict(
                size=(1024, 1024), strides=128, min_scale=1.0, max_scale=1.0))
        ]),
                 input_size=(1024, 1024),
                 samples=5000,
                 val_every_n_steps=250)
    h.test = dict(
        proposal_layer=dict(pre_nms_size=6000,
                            post_nms_size=1000,
                            max_total_size=1000,
                            iou_threshold=0.7,
                            min_size=0),
        pre_nms_size=1000,  # select top_k high confident detections for nms 
        post_nms_size=100,
        iou_threshold=0.5,
        score_threshold=0.05)

    return h
Beispiel #6
0
def get_yolof_config(num_classes=80):
    h = Config()
    
    data_format = "channels_last"
    input_size = (1024, 1024)
    h.detector = "YOLOF"
    h.dtype = "float16"
    h.data_format = data_format
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNeXt101_64X4D",
                      dropblock=None, 
                      normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[5],
                      frozen_stages=[1, ])
    h.neck=dict(neck="DilatedEncoder", 
                filters=512,
                midfilters=128,
                dilation_rates=[2, 4, 6, 8],  # dilation not in stage5
                # dilation_rates=[4, 8, 12, 16],
                normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True),
                activation=dict(activation="relu"),
                kernel_initializer="he_normal",
                data_format=data_format)
    h.anchors=dict(generator="AnchorGenerator",
                   aspect_ratios=[1.],
                   scales=[32, 64, 128, 256, 512],
                   strides=32, 
                #    scales=[16, 32, 64, 128, 256, 512],
                #    strides=16, 
                   num_anchors=5)
    h.head=dict(head="YOLOFHead",
                normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True),
                activation=dict(activation="relu"),
                feat_dims=512,
                cls_num_convs=2,
                reg_num_convs=4,
                kernel_initializer="he_normal",
                use_sigmoid=True,
                prior=0.01,
                bbox_decoder=dict(decoder="Delta2Box", weights=[1., 1., 1., 1.]),
                bbox_encoder=dict(encoder="Box2Delta", weights=[1., 1., 1., 1.]),
                assigner=dict(assigner="UniformAssigner", match_times=8, pos_ignore_thresh=0.7, neg_ignore_thresh=0.15),
                sampler=dict(sampler="PseudoSampler"),
                label_loss=dict(loss="FocalLoss", alpha=0.25, gamma=2.0, weight=1., from_logits=True, reduction="sum"),
                bbox_loss=dict(loss="GIoULoss", weight=2., reduction="sum"))
   
    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ],
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt",

                  optimizer=dict(optimizer="SGD", momentum=0.9),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=10.0,

                  scheduler=dict(train_epochs=24,
                                 learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                                              boundaries=[16, 22],
                                                              values=[0.02, 0.002, 0.0002]),
                                 warmup=dict(warmup_learning_rate=0.001, steps=800)),
                  checkpoint_dir="checkpoints/yolof",
                  summary_dir="logs/yolof",
                  log_every_n_steps=100,
                  save_ckpt_steps=5000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=4,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                dict(augmentation="Pad", size_divisor=32),
                            ]),
               samples=5000)
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=2000,
                post_nms_size=100, 
                iou_threshold=0.5, 
                score_threshold=0.35)

    return h
Beispiel #7
0
def default_detection_configs(phi, 
                              min_level=3, 
                              max_level=7, 
                              fpn_filters=64,
                              neck_repeats=3,
                              head_repeats=3,
                              anchor_scale=4,
                              num_scales=3,
                              batch_size=4,
                              image_size=512,
                              fusion_type="weighted_sum"):
    h = Config()

    # model name
    h.detector = "efficientdet-d%d" % phi
    h.min_level = min_level
    h.max_level = max_level
    h.dtype = "float16"

    # backbone
    h.backbone = dict(backbone="efficientnet-b%d" % phi,
                      convolution="depthwise_conv2d",
                      dropblock=None,
                    #   dropblock=dict(keep_prob=None,
                    #                  block_size=None)
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.99,
                                         epsilon=1e-3,
                                         axis=-1,
                                         trainable=False),
                      activation=dict(activation="swish"),
                      strides=[2, 1, 2, 2, 2, 1, 2, 1],
                      dilation_rates=[1, 1, 1, 1, 1, 1, 1, 1],
                      output_indices=[3, 4, 5],
                      frozen_stages=[-1])
    
    # neck
    h.neck = dict(neck="bifpn",
                  repeats=neck_repeats,
                  convolution="separable_conv2d",
                  dropblock=None,
                #   dropblock=dict(keep_prob=None,
                #                  block_size=None)
                  feat_dims=fpn_filters,
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  add_extra_conv=False,  # Add extra convolution for neck
                  fusion_type=fusion_type, 
                  use_multiplication=False)
    
    # head
    h.head = dict(head="RetinaNetHead",
                  repeats=head_repeats,
                  convolution="separable_conv2d",
                  dropblock=None,
                #   dropblock=dict(keep_prob=None,
                #                  block_size=None)
                  feat_dims=fpn_filters,
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  prior=0.01)
    
    # anchors parameters
    strides = [2 ** l for l in range(min_level, max_level + 1)]
    h.anchor = dict(aspect_ratios=[[1., 0.5, 2.]] * (max_level - min_level + 1),
                    scales=[
                        [2 ** (i / num_scales) * s * anchor_scale 
                        for i in range(num_scales)] for s in strides
                    ],
                    num_anchors=9)

    # assigner
    h.assigner = dict(assigner="max_iou_assigner",
                      pos_iou_thresh=0.5,
                      neg_iou_thresh=0.5)
    # sampler
    h.sampler = dict(sampler="pseudo_sampler")
    
    # loss
    h.use_sigmoid = True
    h.label_loss=dict(loss="focal_loss",
                      alpha=0.25,
                      gamma=1.5,
                      label_smoothing=0.,
                      weight=1.,
                      from_logits=True,
                      reduction="none")
    h.bbox_loss=dict(loss="smooth_l1_loss",
                     weight=50.,   # 50.
                     delta=.1,    # .1
                     reduction="none")
    # h.box_loss=dict(loss="giou_loss",
    #                 weight=10.,
    #                 reduction="none")
    h.weight_decay = 4e-5

    h.bbox_mean = None  # [0., 0., 0., 0.]
    h.bbox_std = None  # [0.1, 0.1, 0.2, 0.2]

    # dataset
    h.num_classes = 90
    h.skip_crowd_during_training = True
    h.dataset = "objects365"

    h.batch_size = batch_size
    h.input_size = [image_size, image_size]
    h.train_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train"
    h.val_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train"
    h.augmentation = [
        dict(ssd_crop=dict(patch_area_range=(0.3, 1.),
                            aspect_ratio_range=(0.5, 2.0),
                            min_overlaps=(0.1, 0.3, 0.5, 0.7, 0.9),
                            max_attempts=100,
                            probability=.5)),
        # dict(data_anchor_sampling=dict(anchor_scales=(16, 32, 64, 128, 256, 512),
        #                                overlap_threshold=0.7,
        #                                max_attempts=50,
        #                                probability=.5)),
        dict(flip_left_to_right=dict(probability=0.5)),
        dict(random_distort_color=dict(probability=1.))
        ]

    # train
    h.pretrained_weights_path = "/home/bail/Workspace/pretrained_weights/efficientdet-d%d" % phi

    h.optimizer = dict(optimizer="sgd", momentum=0.9)
    h.lookahead = None

    h.train_steps = 240000
    h.learning_rate_scheduler = dict(scheduler="cosine", initial_learning_rate=0.002)
    h.warmup = dict(warmup_learning_rate = 0.00001, steps = 24000)
    h.checkpoint_dir = "checkpoints/efficientdet_d%d" % phi
    h.summary_dir = "logs/efficientdet_d%d" % phi

    h.gradient_clip_norm = .0

    h.log_every_n_steps = 500
    h.save_ckpt_steps = 10000
    h.val_every_n_steps = 4000

    h.postprocess = dict(pre_nms_size=5000,   # select top_k high confident detections for nms 
                         post_nms_size=100,
                         iou_threshold=0.5,
                         score_threshold=0.2)
    
    return h
Beispiel #8
0
def get_faster_rcnn_config(num_classes=80):
    h = Config()

    h.detector = "FasterRCNN"
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.num_classes = num_classes
    h.backbone = dict(backbone="CaffeResNet50",
                      dropblock=None,
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.997,
                                         epsilon=1e-4,
                                         trainable=True),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[2, 3, 4, 5],
                      frozen_stages=[
                          -1,
                      ])
    h.neck = dict(neck="FPN",
                  feat_dims=256,
                  min_level=2,
                  max_level=5,
                  num_output_levels=5,
                  add_extra_convs=False)
    h.anchors = dict(generator="AnchorGenerator",
                     scales=[[32], [64], [128], [256], [512]],
                     aspect_ratios=[0.5, 1., 2.0],
                     strides=[4, 8, 16, 32, 64],
                     num_anchors=3)
    h.rpn_head = dict(head="RPNHead",
                      normalization=None,
                      activation=dict(activation="relu"),
                      feat_dims=256,
                      dropblock=None,
                      num_classes=1,
                      min_level=2,
                      max_level=6,
                      use_sigmoid=True,
                      train_proposal=dict(pre_nms_size=2000,
                                          post_nms_size=1000,
                                          iou_threshold=0.7,
                                          min_box_size=0.),
                      test_proposal=dict(pre_nms_size=1000,
                                         post_nms_size=1000,
                                         iou_threshold=0.7,
                                         min_box_size=0.1),
                      bbox_encoder=dict(encoder="Box2Delta", weights=None),
                      bbox_decoder=dict(decoder="Delta2Box", weights=None),
                      assigner=dict(assigner="MaxIoUAssigner",
                                    pos_iou_thresh=0.7,
                                    neg_iou_thresh=0.3,
                                    min_pos_iou=0.3),
                      sampler=dict(sampler="RandomSampler",
                                   num_proposals=256,
                                   pos_fraction=0.5,
                                   add_gt_as_proposals=False),
                      label_loss=dict(loss="CrossEntropy",
                                      label_smoothing=0.0,
                                      weight=1.,
                                      from_logits=True,
                                      reduction="none"),
                      bbox_loss=dict(loss="SmoothL1Loss",
                                     delta=1. / 9.,
                                     weight=1.,
                                     reduction="none"))
    h.roi_head = dict(head="StandardRoIHead",
                      bbox_head=dict(roi_pooling=dict(
                          roi_pooling="MultiLevelAlignedRoIPooling",
                          pooled_size=7,
                          feat_dims=256),
                                     normalization=None,
                                     activation=dict(activation="relu"),
                                     dropblock=None,
                                     num_convs=0,
                                     conv_dims=256,
                                     num_fc=2,
                                     fc_dims=1024,
                                     feat_dims=256),
                      min_level=2,
                      max_level=5,
                      class_agnostic=False,
                      use_sigmoid=False,
                      bbox_encoder=dict(encoder="Box2Delta",
                                        weights=[10., 10., 5., 5.]),
                      bbox_decoder=dict(decoder="Delta2Box",
                                        weights=[10., 10., 5., 5.]),
                      assigner=dict(assigner="MaxIoUAssigner",
                                    pos_iou_thresh=0.5,
                                    neg_iou_thresh=0.5,
                                    min_pos_iou=0.5),
                      sampler=dict(sampler="RandomSampler",
                                   num_proposals=512,
                                   pos_fraction=0.25,
                                   add_gt_as_proposals=True),
                      label_loss=dict(loss="CrossEntropy",
                                      label_smoothing=0.0,
                                      weight=1.,
                                      from_logits=True,
                                      reduction="none"),
                      bbox_loss=dict(loss="SmoothL1Loss",
                                     delta=1.,
                                     weight=1.,
                                     reduction="none"),
                      reg_class_agnostic=True)
    h.weight_decay = 1e-4
    h.train = dict(
        dataset=dict(dataset="COCODataset",
                     batch_size=2,
                     dataset_dir="/data/bail/COCO",
                     training=True,
                     augmentations=[
                         dict(augmentation="FlipLeftToRight", probability=0.5),
                         dict(augmentation="RandomDistortColor"),
                         dict(augmentation="Resize",
                              img_scale=[(1333, 800)],
                              keep_ratio=True),
                         dict(augmentation="Pad", size_divisor=32),
                     ],
                     num_samples=118287,
                     num_classes=num_classes),
        pretrained_weights_path=
        "/data/bail/pretrained_weights/resnet50/resnet50.ckpt",
        optimizer=dict(optimizer="SGD", momentum=0.9),
        mixed_precision=dict(
            loss_scale=None
        ),  # The loss scale in mixed precision training. If None, use dynamic.
        gradient_clip_norm=10.0,
        scheduler=dict(train_epochs=12,
                       learning_rate_scheduler=dict(
                           scheduler="PiecewiseConstantDecay",
                           boundaries=[8, 11],
                           values=[0.01, 0.001, 0.0001]),
                       warmup=dict(warmup_learning_rate=0.00001, steps=2000)),
        checkpoint_dir="checkpoints/faster_rcnn",
        summary_dir="logs/faster_rcnn",
        log_every_n_steps=100,
        save_ckpt_steps=5000)
    h.val = dict(dataset=dict(dataset="COCODataset",
                              batch_size=1,
                              dataset_dir="/data/bail/COCO",
                              training=False,
                              augmentations=[
                                  dict(augmentation="Resize",
                                       img_scale=[(1333, 800)],
                                       keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32),
                              ]),
                 input_size=(1024, 1024),
                 samples=5000)
    h.test = dict(nms="CombinedNonMaxSuppression",
                  pre_nms_size=5000,
                  post_nms_size=100,
                  iou_threshold=0.6,
                  score_threshold=0.5)

    return h
Beispiel #9
0
def get_yolov5_config(num_classes=80, depth_multiple=.33, width_multiple=.50, name="yolov5s"):
    h = Config()
    
    h.detector = "YOLOv5"
    h.dtype = "float16"
    h.num_classes = num_classes
    h.depth_multiple = depth_multiple  # 0.33 0.67 1.0 1.33
    h.width_multiple = width_multiple  # 0.50 0.75 1.0 1.25
    h.model = [ 
        #        [from, number, module, args]
        # backbone
        [-1, 1, "Focus", dict(filters=64, kernel_size=3)],        # 0-P1/2
        [-1, 1, "Conv", dict(filters=128, kernel_size=3, strides=2)],
        [-1, 3, "BottleneckCSP", dict(filters=128)],  # 2-P3/8
        [-1, 1, "Conv", dict(filters=256, kernel_size=3, strides=2)],
        [-1, 9, "BottleneckCSP", dict(filters=256)],  # 4-P4/16
        [-1, 1, "Conv", dict(filters=512, kernel_size=3, strides=2)],
        [-1, 9, "BottleneckCSP", dict(filters=512)],  # 6-P5/32
        [-1, 1, "Conv", dict(filters=1024, kernel_size=3, strides=2)],
        [-1, 1, "SpatialPyramidPooling", dict(filters=1024, pool_sizes=[5, 9, 13])],
        [-1, 3, "BottleneckCSP", dict(filters=1024, shortcut=False)],  # 9 
        # head
        [-1, 1, "Conv", dict(filters=512, kernel_size=1, strides=1)],
        [-1, 1, "Upsample", dict(size=2, interpolation="nearest")],
        [[-1, 6], 1, "Concat", dict(axis=-1)],  # concat backbone P4
        [-1, 3, "BottleneckCSP", dict(filters=512, shortcut=False)],  # 13
        
        [-1, 1, "Conv", dict(filters=256, kernel_size=1, strides=1)],
        [-1, 1, "Upsample", dict(size=2, interpolation="nearest")],
        [[-1, 4], 1, "Concat", dict(axis=-1)],  # concat backbone P3
        [-1, 3, "BottleneckCSP", dict(filters=256, shortcut=False)],  # 17 (P3/8-small)
        
        [-1, 1, "Conv", dict(filters=256, kernel_size=3, strides=2)],
        [[-1, 14], 1, "Concat", dict(axis=-1)],  # concat head P4
        [-1, 3, "BottleneckCSP", dict(filters=512, shortcut=False)],  # 20 (P4/16-medium)
        
        [-1, 1, "Conv", dict(filters=512, kernel_size=3, strides=2)],
        [[-1, 10], 1, "Concat", dict(axis=-1)],  # concat head P5
        [-1, 3, "BottleneckCSP", dict(filters=1024, shortcut=False)],  # 23 (P5-large)
        
        [[17, 20, 23], 1, "Detect", dict(anchors="anchors", num_classes="num_classes")]
    ]
    h.min_level = 3
    h.max_level = 5
    h.strides = [8, 16, 32] 
    h.anchors = [[10, 13, 16, 30, 33, 23], 
                 [30, 61, 62, 45, 59, 119], 
                 [116, 90, 156, 198, 373, 326]]
    h.num_anchors = 3
    h.anchor_threshold = 4.  # 2.91 if finetuning else 4.0
    h.gr = 1.
        
    h.bbox_loss = dict(loss="CIoULoss", weight=1., reduction="none")  
    h.label_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")  # .631 if finetuning else weight = 1.0
    h.conf_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")   # 0.911 if finetuning else weight = 1.
    h.balance = [1., 1., 1.] # [4.0, 1.0, 0.4]   # if num_level == 3 else [4.0, 1.0, 0.4, 0.1]
    h.box_weight = 0.05  # 0.0296 if finetune else 0.05
    h.label_weight = .5  # 0.243 if finetune else 0.5
    h.conf_weight = 1.0   # 0.301 if finetune else 1.0
    
    h.weight_decay = 0.0005
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=8,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(FlipLeftToRight=dict(probability=0.5)),
                                  dict(RandomDistortColor=dict(probability=1.)),
                                #   dict(Resize=dict(size=(640, 640), strides=32, min_scale=.5, max_scale=2.)),
                                  dict(ResizeV2=dict(short_side=640, long_side=1024, strides=32, min_scale=1.0, max_scale=1.0))
                                  ],
                            #   mixup=dict(alpha=8.0, prob=0.5),
                              mosaic=dict(size=640, min_image_scale=0.25, prob=1.),
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/darknet53-notop/darknet53.ckpt",
                  optimizer=dict(optimizer="SGD", momentum=0.937),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=.0,

                  scheduler=dict(train_epochs=480,
                                 #  learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                 #                               boundaries=[24, 32],
                                 #                               values=[0.012, 0.0012, 0.00012]),
                                 learning_rate_scheduler=dict(scheduler="CosineDecay", initial_learning_rate=0.012),
                                 warmup=dict(warmup_learning_rate=0.0012, steps=12000)),
                  checkpoint_dir="checkpoints/%s" % name,
                  summary_dir="logs/%s" % name,
                  log_every_n_steps=100,
                  save_ckpt_steps=10000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=8,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(Resize=dict(size=(640, 640), strides=32, min_scale=1., max_scale=1.0))
                                # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
                            ]),
               samples=5000)
    h.test=dict(nms="NonMaxSuppressionWithQuality",
                pre_nms_size=5000,
                post_nms_size=100, 
                iou_threshold=0.6, 
                score_threshold=0.05,
                sigma=0.5,
                nms_type="nms")
    # h.test=dict(nms="CombinedNonMaxSuppression",
    #             pre_nms_size=5000,
    #             post_nms_size=100, 
    #             iou_threshold=0.6, 
    #             score_threshold=0.05)

    return h