Beispiel #1
0
def get_onenet_config(num_classes=80):
    h = Config()

    input_size = (512, 512)
    h.detector = "OneNet"
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNet18",
                      dropblock=None,
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.997,
                                         epsilon=1e-4,
                                         trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[2, 3, 4, 5],
                      frozen_stages=[
                          1,
                      ])

    h.neck = dict(neck="CenterNetDeconv",
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.997,
                                     epsilon=1e-4,
                                     trainable=False),
                  activation=dict(activation="relu"))
    h.head = dict(head="OneNetHead",
                  activation=dict(activation="relu"),
                  feat_dims=64,
                  dropblock=None,
                  num_classes=num_classes,
                  strides=4,
                  prior=0.01,
                  use_sigmoid=True,
                  assigner=dict(assigner="MinCostAssigner",
                                class_weight=2.,
                                l1_weight=2.,
                                iou_weight=5.,
                                iou_type="giou",
                                alpha=0.25,
                                gamma=2.),
                  label_loss=dict(loss="FocalLoss",
                                  alpha=0.25,
                                  gamma=2.,
                                  reduction="sum"),
                  bbox_loss=dict(loss="RegL1Loss", weight=1., reduction="sum"))

    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train = dict(
        dataset=dict(dataset="COCODataset",
                     batch_size=4,
                     dataset_dir="/data/bail/COCO",
                     training=True,
                     augmentations=[
                         dict(augmentation="FlipLeftToRight", probability=0.5),
                         dict(augmentation="RandomDistortColor"),
                         dict(augmentation="Resize",
                              img_scale=(0.2, 2),
                              multiscale_mode="range",
                              keep_ratio=True),
                         dict(augmentation="RandCropOrPad",
                              size=(input_size, input_size),
                              clip_box_base_center=False),
                     ],
                     num_samples=118287),
        pretrained_weights_path=
        "/data/bail/pretrained_weights/resnet50/resnet50.ckpt",
        optimizer=dict(optimizer="SGD", momentum=0.9),
        mixed_precision=dict(
            loss_scale=None
        ),  # The loss scale in mixed precision training. If None, use dynamic.
        gradient_clip_norm=10.0,
        scheduler=dict(train_epochs=24,
                       learning_rate_scheduler=dict(
                           scheduler="PiecewiseConstantDecay",
                           boundaries=[16, 22],
                           values=[0.02, 0.002, 0.0002]),
                       warmup=dict(warmup_learning_rate=0.001, steps=800)),
        checkpoint_dir="checkpoints/onenet",
        summary_dir="logs/onenet",
        log_every_n_steps=100,
        save_ckpt_steps=5000)
    h.val = dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=False,
                              augmentations=[
                                  dict(augmentation="Resize",
                                       img_scale=[(1333, input_size)],
                                       keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ]),
                 samples=5000)
    h.test = dict(topk=100, score_threshold=0.3)

    return h
Beispiel #2
0
def get_gfl_config(num_classes=80):
    h = Config()
    
    input_size = (1024, 1024)
    h.detector = "GFL"
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNet101",
                      dropblock=None, 
                      normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[3, 4, 5],
                      frozen_stages=[1, ])
    h.neck=dict(neck="FPN", 
                feat_dims=256,
                min_level=3, 
                max_level=5,
                num_output_levels=5, 
                add_extra_convs=True,
                relu_before_extra_convs=True)
    h.anchors=dict(generator="AnchorGeneratorV2",
                   aspect_ratios=[1.], 
                   octave_base_scale=8,
                   scales_per_octave=1,
                   strides=[8, 16, 32, 64, 128], 
                   num_anchors=1)
    h.head=dict(head="GFLHead",
                normalization=dict(normalization="group_norm", groups=32),
                activation=dict(activation="relu"),
                feat_dims=256,
                dropblock=None,
                num_classes=num_classes,
                repeats=4,
                min_level=3,
                max_level=7,
                use_sigmoid=True,
                prior=0.01,
                reg_max=16,
                bbox_decoder = dict(decoder="Distance2Box", weights=None),
                bbox_encoder = dict(encoder="Box2Distance", weights=None),
                assigner = dict(assigner="ATSSAssigner", topk=9),
                sampler = dict(sampler="PseudoSampler"),
                label_loss = dict(loss="QualityFocalLoss", beta=2.0, weight=1., from_logits=True, reduction="sum"),
                bbox_loss = dict(loss="GIoULoss", weight=2., reduction="sum"),
                dfl_loss = dict(loss="DistributionFocalLoss", weight=.25, reduction="sum"))
   
    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32),
                              ],
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt",

                  optimizer=dict(optimizer="SGD", momentum=0.9),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=10.0,

                  scheduler=dict(train_epochs=24,
                                 learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                                              boundaries=[16, 22],
                                                              values=[0.02, 0.002, 0.0002]),
                                 warmup=dict(warmup_learning_rate=0.001, steps=800)),
                  checkpoint_dir="checkpoints/gfl",
                  summary_dir="logs/gfl",
                  log_every_n_steps=100,
                  save_ckpt_steps=5000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=4,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                dict(augmentation="Pad", size_divisor=32),
                            ]),
               samples=5000)
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=5000,
                post_nms_size=100, 
                iou_threshold=0.6, 
                score_threshold=0.3)

    return h
Beispiel #3
0
def get_yolov4_config(input_size=None,
                      num_classes=80, 
                      num_anchors=3,
                      depth_multiplier=1.,
                      width_multiplier=1.,
                      label_assignment="iou", 
                      name="YOLOv4"):
    h = Config()
    
    if input_size is None:
        input_size = default_input_sizes[name]
    h.detector = name
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.num_classes = num_classes

    h.depth_multiplier = depth_multiplier_dict[name]
    h.width_multiplier = width_multiplier_dict[name]
    
    if name not in model:
        raise ValueError(name + " not in ", list(model.keys()))
    h.model = model[name]
    h.min_level = min_level_dict[name]
    h.max_level = max_level_dict[name]
    h.strides = strides_dict[name]
    h.anchors = anchors_dict[name]
    h.num_anchors = len(anchors_dict[name][0]) // 2
    h.input_size = input_size if isinstance(input_size, (tuple, list)) else (input_size, input_size)

    h.label_assignment = label_assignment
    h.anchor_threshold = 0.2
    h.gr = 1.
        
    h.bbox_loss = dict(loss="CIoULoss", weight=1., reduction="none")  
    h.label_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")  # .631 if finetuning else weight = 1.0
    h.conf_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")   # 0.911 if finetuning else weight = 1.
    h.balance = [1., 1., 1.] # [4.0, 1.0, 0.4]   # if num_level == 3 else [4.0, 1.0, 0.4, 0.1]
    h.box_weight = 0.05  # 0.0296 if finetune else 0.05
    h.label_weight = .5  # 0.243 if finetune else 0.5
    h.conf_weight = 1.0   # 0.301 if finetune else 1.0
    
    h.weight_decay = 0.0005
    h.excluding_weight_names = ["predicted"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=8,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=(0.2, 2), keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ],
                            #   mixup=dict(alpha=8.0, prob=0.5),
                              mosaic=dict(size=input_size, min_image_scale=0.25, prob=1.),
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/darknet53-notop/darknet53.ckpt",
                  optimizer=dict(optimizer="SGD", momentum=0.937),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=.0,

                  scheduler=dict(train_epochs=480,
                                 #  learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                 #                               boundaries=[24, 32],
                                 #                               values=[0.012, 0.0012, 0.00012]),
                                 learning_rate_scheduler=dict(scheduler="CosineDecay", initial_learning_rate=0.012),
                                 warmup=dict(warmup_learning_rate=0.0012, steps=12000)),
                  checkpoint_dir="checkpoints/%s" % name,
                  summary_dir="logs/%s" % name,
                  log_every_n_steps=100,
                  save_ckpt_steps=10000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=8,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(Resize=dict(size=(input_size, input_size), strides=32, min_scale=1., max_scale=1.0))
                                # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
                            ]),
               samples=5000)
    # h.test=dict(nms="NonMaxSuppressionWithQuality",
    #             pre_nms_size=5000,
    #             post_nms_size=100, 
    #             iou_threshold=0.6, 
    #             score_threshold=0.5,
    #             sigma=0.5,
    #             nms_type="nms")
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=2000,
                post_nms_size=100, 
                iou_threshold=0.6, 
                score_threshold=0.35)

    return h
Beispiel #4
0
def get_yolof_config(num_classes=80):
    h = Config()
    
    data_format = "channels_last"
    input_size = (1024, 1024)
    h.detector = "YOLOF"
    h.dtype = "float16"
    h.data_format = data_format
    h.input_shape = (input_size[0], input_size[1], 3)
    h.num_classes = num_classes
    h.backbone = dict(backbone="ResNeXt101_64X4D",
                      dropblock=None, 
                      normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
                      activation=dict(activation="relu"),
                      strides=[2, 2, 2, 2, 2],
                      dilation_rates=[1, 1, 1, 1, 1],
                      output_indices=[5],
                      frozen_stages=[1, ])
    h.neck=dict(neck="DilatedEncoder", 
                filters=512,
                midfilters=128,
                dilation_rates=[2, 4, 6, 8],  # dilation not in stage5
                # dilation_rates=[4, 8, 12, 16],
                normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True),
                activation=dict(activation="relu"),
                kernel_initializer="he_normal",
                data_format=data_format)
    h.anchors=dict(generator="AnchorGenerator",
                   aspect_ratios=[1.],
                   scales=[32, 64, 128, 256, 512],
                   strides=32, 
                #    scales=[16, 32, 64, 128, 256, 512],
                #    strides=16, 
                   num_anchors=5)
    h.head=dict(head="YOLOFHead",
                normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True),
                activation=dict(activation="relu"),
                feat_dims=512,
                cls_num_convs=2,
                reg_num_convs=4,
                kernel_initializer="he_normal",
                use_sigmoid=True,
                prior=0.01,
                bbox_decoder=dict(decoder="Delta2Box", weights=[1., 1., 1., 1.]),
                bbox_encoder=dict(encoder="Box2Delta", weights=[1., 1., 1., 1.]),
                assigner=dict(assigner="UniformAssigner", match_times=8, pos_ignore_thresh=0.7, neg_ignore_thresh=0.15),
                sampler=dict(sampler="PseudoSampler"),
                label_loss=dict(loss="FocalLoss", alpha=0.25, gamma=2.0, weight=1., from_logits=True, reduction="sum"),
                bbox_loss=dict(loss="GIoULoss", weight=2., reduction="sum"))
   
    h.weight_decay = 1e-4
    h.excluding_weight_names = ["predicted_box", "predicted_class"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=4,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ],
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt",

                  optimizer=dict(optimizer="SGD", momentum=0.9),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=10.0,

                  scheduler=dict(train_epochs=24,
                                 learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                                              boundaries=[16, 22],
                                                              values=[0.02, 0.002, 0.0002]),
                                 warmup=dict(warmup_learning_rate=0.001, steps=800)),
                  checkpoint_dir="checkpoints/yolof",
                  summary_dir="logs/yolof",
                  log_every_n_steps=100,
                  save_ckpt_steps=5000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=4,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True),
                                dict(augmentation="Pad", size_divisor=32),
                            ]),
               samples=5000)
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=2000,
                post_nms_size=100, 
                iou_threshold=0.5, 
                score_threshold=0.35)

    return h